1   package org.kuali.ole.utility.callnumber;
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  import com.ibm.icu.lang.UCharacter;
12  import org.apache.commons.lang.StringUtils;
13  import org.marc4j.ErrorHandler;
14  
15  import java.text.DecimalFormat;
16  import java.util.HashMap;
17  import java.util.Map;
18  import java.util.regex.Matcher;
19  import java.util.regex.Pattern;
20  
21  
22  
23  
24  
25  
26  
27  
28  
29  public final class CallNumUtils {
30  
31  
32  
33  
34  
35  
36      
37  
38  
39      private CallNumUtils() {
40      }
41  
42      public static final Pattern DEWEY_PATTERN = Pattern.compile("^\\d{1,3}(\\.\\d+)?.*");
43      
44  
45  
46  
47  
48  
49  
50  
51      public static final String LC_CLASS_REQ_REGEX = "[A-Z&&[^IOWXY]]{1}[A-Z]{0,2} *\\d+(\\.\\d+)?";
52  
53      
54  
55  
56      public static final String NOT_CUTTER = "([\\da-z]\\w*)|([A-Z]\\D+[\\w]*)";
57  
58      
59  
60  
61      public static final String LC_CLASS_W_SUFFIX = "(" + LC_CLASS_REQ_REGEX + "( +" + NOT_CUTTER + ")?)";
62  
63      
64  
65  
66  
67  
68      public static final String CUTTER_REGEX = "[A-Z]\\d+";
69  
70      
71  
72  
73      public static final String LC_CLASS_N_CUTTER = LC_CLASS_W_SUFFIX + " *\\.?" + CUTTER_REGEX;
74      public static final Pattern LC_CLASS_N_CUTTER_PATTERN = Pattern.compile(LC_CLASS_N_CUTTER + ".*");
75  
76      
77  
78  
79  
80  
81      public static final String DEWEY_CLASS_REGEX = "\\d{1,3}(\\.\\d+)?";
82  
83      
84  
85  
86  
87  
88      public static final String DEWEY_MIN_CUTTER_LETTER_REGEX = "[A-Z]";
89      public static final String DEWEY_MIN_CUTTER_REGEX = "[A-Z]\\d{1,3}";
90      public static final String DEWEY_CUTTER_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + "[A-Z]+";
91      public static final String DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + " +[A-Z]+";
92      public static final String DEWEY_FULL_CUTTER_REGEX = DEWEY_MIN_CUTTER_REGEX + " *[A-Z]*+";
93  
94      
95  
96  
97      public static final String DEWEY_CLASS_N_CUTTER_REGEX = DEWEY_CLASS_REGEX + " *\\.?" + DEWEY_FULL_CUTTER_REGEX;
98      public static final Pattern DEWEY_CLASS_N_CUTTER_PATTERN = Pattern.compile(DEWEY_CLASS_N_CUTTER_REGEX + ".*");
99  
100     
101 
102 
103 
104 
105     public static final String SUDOC_REGEX = "[^A-Z0-9]+|(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])";
106 
107 
108     private static Map<Character, Character> alphanumReverseMap = new HashMap<Character, Character>();
109 
110     static {
111         alphanumReverseMap.put('0', 'Z');
112         alphanumReverseMap.put('1', 'Y');
113         alphanumReverseMap.put('2', 'X');
114         alphanumReverseMap.put('3', 'W');
115         alphanumReverseMap.put('4', 'V');
116         alphanumReverseMap.put('5', 'U');
117         alphanumReverseMap.put('6', 'T');
118         alphanumReverseMap.put('7', 'S');
119         alphanumReverseMap.put('8', 'R');
120         alphanumReverseMap.put('9', 'Q');
121         alphanumReverseMap.put('A', 'P');
122         alphanumReverseMap.put('B', 'O');
123         alphanumReverseMap.put('C', 'N');
124         alphanumReverseMap.put('D', 'M');
125         alphanumReverseMap.put('E', 'L');
126         alphanumReverseMap.put('F', 'K');
127         alphanumReverseMap.put('G', 'J');
128         alphanumReverseMap.put('H', 'I');
129         alphanumReverseMap.put('I', 'H');
130         alphanumReverseMap.put('J', 'G');
131         alphanumReverseMap.put('K', 'F');
132         alphanumReverseMap.put('L', 'E');
133         alphanumReverseMap.put('M', 'D');
134         alphanumReverseMap.put('N', 'C');
135         alphanumReverseMap.put('O', 'B');
136         alphanumReverseMap.put('P', 'A');
137         alphanumReverseMap.put('Q', '9');
138         alphanumReverseMap.put('R', '8');
139         alphanumReverseMap.put('S', '7');
140         alphanumReverseMap.put('T', '6');
141         alphanumReverseMap.put('U', '5');
142         alphanumReverseMap.put('V', '4');
143         alphanumReverseMap.put('W', '3');
144         alphanumReverseMap.put('X', '2');
145         alphanumReverseMap.put('Y', '1');
146         alphanumReverseMap.put('Z', '0');
147     }
148 
149 
150     
151 
152 
153     public static char SORT_FIRST_CHAR = Character.MIN_VALUE;
154     public static StringBuilder reverseDefault = new StringBuilder(75);
155 
156     static {
157         for (int i = 0; i < 50; i++)
158 
159 
160 
161             reverseDefault.append(Character.toChars('~'));
162     }
163 
164 
165 
166     
167 
168 
169 
170     public static final boolean isValidLC(String possLCval) {
171         if (possLCval != null && LC_CLASS_N_CUTTER_PATTERN.matcher(possLCval.trim()).matches())
172             return true;
173         return false;
174     }
175 
176     
177 
178 
179 
180     public static final boolean isValidDeweyWithCutter(String possDeweyVal) {
181         if (possDeweyVal != null && DEWEY_CLASS_N_CUTTER_PATTERN.matcher(possDeweyVal.trim()).matches())
182             return true;
183         return false;
184     }
185 
186     
187 
188 
189 
190     public static final boolean isValidDewey(String possDeweyVal) {
191         if (possDeweyVal != null && DEWEY_PATTERN.matcher(possDeweyVal.trim()).matches())
192             return true;
193         return false;
194     }
195 
196     
197 
198 
199 
200     public static final String getPortionBeforeCutter(String callnum) {
201 
202         
203         
204         
205         
206         
207         
208         String beginCutterRegex = "( +|(\\.[A-Z])| */)";
209 
210         String[] pieces = callnum.split(beginCutterRegex);
211         if (pieces.length == 0 || pieces[0] == null || pieces[0].length() == 0)
212             return null;
213         else
214             return pieces[0].trim();
215     }
216 
217     
218 
219 
220 
221     public static final String getLCB4FirstCutter(String callnum) {
222         String result = null;
223 
224         String cutter = getFirstLCcutter(callnum);
225         if (cutter != null && cutter.length() > 0) {
226             
227             int ix = callnum.indexOf(cutter);
228             String lets = getLCstartLetters(callnum);
229             if (ix < lets.length())
230                 ix = callnum.indexOf(cutter, lets.length());
231 
232             if (ix > 0) {
233                 result = callnum.substring(0, ix).trim();
234                 if (result.endsWith("."))
235                     result = result.substring(0, result.length() - 1).trim();
236             } else
237                 result = callnum;
238         } else 
239             result = callnum;
240 
241         return result;
242     }
243 
244     
245 
246 
247 
248     public static String getLCstartLetters(String rawLCcallnum) {
249         String result = null;
250         if (rawLCcallnum != null && rawLCcallnum.length() > 0) {
251             String[] lcClass = rawLCcallnum.split("[^A-Z]+");
252             if (lcClass.length > 0)
253                 result = lcClass[0];
254         }
255         return result;
256     }
257 
258     
259 
260 
261 
262 
263 
264 
265     public static String getLCClassDigits(String rawLCcallnum) {
266         String result = null;
267 
268         String rawClass = getLCB4FirstCutter(rawLCcallnum);
269         if (rawClass != null && rawClass.length() > 0) {
270             String[] pieces = rawClass.split("[A-Z ]+");
271             if (pieces.length > 1)
272                 result = pieces[1].trim();
273         }
274         return result;
275     }
276 
277     
278 
279 
280 
281 
282 
283     public static String getLCClassSuffix(String rawLCcallnum) {
284         String result = null;
285 
286         String b4cutter = getLCB4FirstCutter(rawLCcallnum);
287         if (b4cutter == null || b4cutter.length() == 0)
288             return null;
289 
290         String classDigits = getLCClassDigits(rawLCcallnum);
291 
292         if (classDigits != null && classDigits.length() > 0) {
293             int reqClassLen = b4cutter.indexOf(classDigits) + classDigits.length();
294 
295             if (b4cutter.length() > reqClassLen)
296                 result = b4cutter.substring(reqClassLen).trim();
297         }
298 
299         return result;
300     }
301 
302     
303 
304 
305 
306 
307 
308 
309     public static String getFirstLCcutter(String rawCallnum) {
310         String result = null;
311 
312         String regex = LC_CLASS_W_SUFFIX + " *\\.?(" + CUTTER_REGEX + ")";
313         Pattern pattern = Pattern.compile(regex);
314         Matcher matcher = pattern.matcher(rawCallnum);
315 
316         if (matcher.find())
317             result = matcher.group(6).trim();
318 
319         
320         
321 
322 
323 
324 
325 
326 
327 
328 
329 
330 
331 
332         return result;
333     }
334 
335     
336 
337 
338 
339 
340 
341     public static String getFirstLCcutterSuffix(String rawLCcallnum) {
342         String result = null;
343 
344         String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
345         Pattern pattern = Pattern.compile(regex);
346         Matcher matcher = pattern.matcher(rawLCcallnum);
347 
348         
349         if (matcher.find() && matcher.groupCount() > 5
350                 && matcher.group(6) != null && matcher.group(6).length() > 0) {
351 
352             
353             
354             result = matcher.group(6).trim();
355 
356             
357             
358             int endLastIx = matcher.end(6); 
359             if (endLastIx < rawLCcallnum.length()) {
360                 
361                 Pattern cutterPat = Pattern.compile(" *\\." + CUTTER_REGEX);
362                 matcher.usePattern(cutterPat);
363                 if (matcher.find(endLastIx)) {
364                     if (endLastIx < matcher.start())
365                         result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
366                 } else
367                     result = result + rawLCcallnum.substring(endLastIx);
368             }
369         } else {
370             
371             
372             
373             
374             String afterLCclassNCutter = rawLCcallnum.replaceFirst(LC_CLASS_N_CUTTER + " *", "");
375             String cutterRegex = LC_CLASS_N_CUTTER + " *(.*)\\." + CUTTER_REGEX;
376 
377             pattern = Pattern.compile(cutterRegex);
378             matcher = pattern.matcher(rawLCcallnum);
379 
380             if (matcher.find() && matcher.groupCount() > 5
381                     && matcher.group(6) != null && matcher.group(6).length() > 0)
382                 
383                 result = matcher.group(6).trim();
384             else {
385                 regex = LC_CLASS_N_CUTTER + " \\.\\.\\.$";
386                 pattern = Pattern.compile(regex);
387                 matcher = pattern.matcher(rawLCcallnum);
388                 if (matcher.find())
389                     result = " ...";
390             }
391         }
392         return result;
393     }
394 
395     
396 
397 
398 
399 
400 
401 
402     public static String getSecondLCcutter(String rawLCcallnum) {
403         String result = null;
404 
405         String firstCutSuffix = getFirstLCcutterSuffix(rawLCcallnum);
406         if (firstCutSuffix == null || firstCutSuffix.length() == 0) {
407             
408             String regex = LC_CLASS_N_CUTTER + " *\\.?(" + CUTTER_REGEX + ")";
409             Pattern pattern = Pattern.compile(regex);
410             Matcher matcher = pattern.matcher(rawLCcallnum);
411             if (matcher.find() && matcher.groupCount() > 5
412                     && matcher.group(6) != null && matcher.group(6).length() > 0) {
413                 result = matcher.group(6).trim();
414             }
415         } else {
416             
417             
418             int ix = rawLCcallnum.indexOf(firstCutSuffix) + firstCutSuffix.length();
419             if (ix < rawLCcallnum.length()) {
420                 String remaining = rawLCcallnum.substring(ix).trim();
421                 Pattern pattern = Pattern.compile("(" + CUTTER_REGEX + ")");
422                 Matcher matcher = pattern.matcher(remaining);
423                 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
424                     result = matcher.group(1).trim();
425                 }
426             }
427             
428             if (result == null) {
429                 Pattern pattern = Pattern.compile("\\.(" + CUTTER_REGEX + ")");
430                 Matcher matcher = pattern.matcher(firstCutSuffix);
431                 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
432                     result = matcher.group(1).trim();
433                 }
434             }
435         }
436         return result;
437     }
438 
439     
440 
441 
442 
443 
444 
445     public static String getSecondLCcutterSuffix(String rawLCcallnum) {
446         String result = null;
447 
448         String secondCutter = getSecondLCcutter(rawLCcallnum);
449         if (secondCutter != null && secondCutter.length() > 0) {
450             
451             int ix = rawLCcallnum.indexOf(secondCutter) + secondCutter.length();
452             if (ix < rawLCcallnum.length())
453                 result = rawLCcallnum.substring(ix).trim();
454         }
455 
456         return result;
457     }
458 
459     
460 
461 
462 
463 
464 
465 
466 
467     public static String getSecondLCcutterYearSuffix(String rawLCcallnum) {
468         String result = null;
469 
470         String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
471         Pattern pattern = Pattern.compile(regex);
472         Matcher matcher = pattern.matcher(rawLCcallnum);
473 
474         if (matcher.find() && matcher.groupCount() > 5
475                 && matcher.group(6) != null && matcher.group(6).length() > 0) {
476 
477             
478             
479             result = matcher.group(6);
480 
481             
482             
483             int endLastIx = matcher.end(6); 
484             if (endLastIx < rawLCcallnum.length()) {
485                 Pattern cutterPat = Pattern.compile(" *\\.?" + CUTTER_REGEX + ".*");
486                 matcher.usePattern(cutterPat);
487                 if (matcher.find(endLastIx)) {
488                     if (endLastIx < matcher.start())
489                         result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
490                 } else
491                     result = result.trim() + rawLCcallnum.substring(endLastIx);
492             }
493         }
494 
495         return result;
496     }
497 
498     
499 
500 
501 
502     public static final String getDeweyB4Cutter(String callnum) {
503         String result = null;
504 
505         String entireCallNumRegex = "(" + DEWEY_CLASS_REGEX + ").*";
506         Pattern pattern = Pattern.compile(entireCallNumRegex);
507         Matcher matcher = pattern.matcher(callnum);
508         if (matcher.find())
509             result = matcher.group(1).trim();
510 
511         return result;
512     }
513 
514     
515 
516 
517 
518 
519 
520 
521     public static String getDeweyCutter(String rawCallnum) {
522         String result = null;
523 
524         
525         String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
526         String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
527         String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
528         String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
529         String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
530         String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
531         String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
532         Pattern pat1 = Pattern.compile(regex1);
533         Pattern pat2 = Pattern.compile(regex2);
534         Pattern pat3 = Pattern.compile(regex3);
535         Pattern pat4 = Pattern.compile(regex4);
536         Pattern pat5 = Pattern.compile(regex5);
537         Pattern pat6 = Pattern.compile(regex6);
538         Pattern pat7 = Pattern.compile(regex7);
539 
540         Matcher matcher = pat1.matcher(rawCallnum);
541         if (!matcher.find()) {
542             matcher = pat2.matcher(rawCallnum);
543             if (!matcher.find()) {
544                 matcher = pat3.matcher(rawCallnum);
545             }
546         }
547 
548         if (matcher.find()) {
549             String cutter = matcher.group(2);
550             String suffix = matcher.group(3);
551             if (suffix.length() == 0)
552                 result = cutter.trim();
553             else {
554                 
555                 
556                 if (suffix.startsWith(" ") || cutter.endsWith(" "))
557                     result = cutter.trim();
558                 else {
559                     int ix = cutter.lastIndexOf(' ');
560                     if (ix != -1)
561                         result = cutter.substring(0, ix);
562                     else
563                         result = cutter.trim();
564                 }
565             }
566         } else {
567             matcher = pat4.matcher(rawCallnum);
568             if (matcher.find())
569                 result = matcher.group(2);
570             else {
571                 matcher = pat5.matcher(rawCallnum);
572                 if (matcher.find())
573                     result = matcher.group(2);
574                 else {
575                     matcher = pat6.matcher(rawCallnum);
576                     if (matcher.find())
577                         result = matcher.group(2);
578                     else {
579                         matcher = pat7.matcher(rawCallnum);
580                         if (matcher.find())
581                             result = matcher.group(2);
582                     }
583                 }
584             }
585         }
586         if (result != null)
587             return result.trim();
588         return result;
589     }
590 
591     
592 
593 
594 
595 
596     public static String getDeweyCutterSuffix(String rawCallnum) {
597         if (rawCallnum == null || rawCallnum.length() == 0)
598             return null;
599         String result = null;
600 
601         String cutter = getDeweyCutter(rawCallnum);
602         if (cutter != null) {
603             int ix = rawCallnum.indexOf(cutter) + cutter.length();
604             result = rawCallnum.substring(ix).trim();
605         }
606 
607         if (result == null || result.length() == 0) {
608             
609             String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
610             String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
611             String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
612             String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
613             String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
614             String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
615             String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
616             Pattern pat1 = Pattern.compile(regex1);
617             Pattern pat2 = Pattern.compile(regex2);
618             Pattern pat3 = Pattern.compile(regex3);
619             Pattern pat4 = Pattern.compile(regex4);
620             Pattern pat5 = Pattern.compile(regex5);
621             Pattern pat6 = Pattern.compile(regex6);
622             Pattern pat7 = Pattern.compile(regex7);
623 
624             Matcher matcher = pat1.matcher(rawCallnum);
625             if (!matcher.find()) {
626                 matcher = pat2.matcher(rawCallnum);
627                 if (!matcher.find()) {
628                     matcher = pat3.matcher(rawCallnum);
629                     if (!matcher.find()) {
630                         matcher = pat4.matcher(rawCallnum);
631                         if (!matcher.find()) {
632                             matcher = pat5.matcher(rawCallnum);
633                             if (!matcher.find()) {
634                                 matcher = pat6.matcher(rawCallnum);
635                                 if(!matcher.find()){
636                                     matcher = pat7.matcher(rawCallnum);
637                                 }
638                             }
639                         }
640                     }
641                 }
642             }
643 
644             if (matcher.find(0)) {
645                 cutter = matcher.group(2);
646                 String suffix = matcher.group(3);
647                 if (suffix.trim().length() > 0) {
648                     
649                     
650                     if (suffix.startsWith(" ") || cutter.endsWith(" "))
651                         result = suffix;
652                     else {
653                         int ix = cutter.lastIndexOf(' ');
654                         if (ix != -1)
655                             result = cutter.substring(ix) + suffix;
656                         else
657                             result = suffix;
658                     }
659                 }
660             }
661         }
662         if (result != null)
663             result = result.trim();
664         if (result == null || result.trim().length() == 0)
665             return null;
666         else
667             return result;
668     }
669 
670 
671     
672 
673 
674 
675 
676 
677 
678 
679     public static String normalizeCallnum(String rawCallnum) {
680 
681         
682         String normalizedCallnum = rawCallnum.trim().replaceAll("\\s\\s+", " ");
683         
684         normalizedCallnum = normalizedCallnum.replaceAll("\\. \\.", " .");
685         
686         normalizedCallnum = normalizedCallnum.replaceAll("(\\d+\\.) ([A-Z])", "$1$2");
687         
688         if (normalizedCallnum.endsWith("."))
689             normalizedCallnum = normalizedCallnum.substring(0, normalizedCallnum.length() - 1).trim();
690 
691         
692 
693         
694         
695         return normalizedCallnum;
696     }
697 
698     
699 
700 
701 
702     static String normalizeLCcallnum(String rawLCcallnum) {
703         String normCallnum = normalizeCallnum(rawLCcallnum);
704         
705         return normCallnum.replaceAll("^([A-Z][A-Z]?[A-Z]?) ([0-9])", "$1$2");
706     }
707 
708 
709 
710 
711     
712 
713 
714 
715     public static String getLCShelfkey(String rawLCcallnum, String recid) {
716         return (getLCShelfkey(rawLCcallnum, recid, null));
717     }
718 
719     
720 
721 
722 
723     public static String getLCShelfkey(String rawLCcallnum, String recid, ErrorHandler errors) {
724         StringBuilder resultBuf = new StringBuilder();
725         String upcaseLCcallnum = rawLCcallnum.toUpperCase();
726 
727 
728 
729 
730         
731         StringBuilder initLetBuf = new StringBuilder("    ");
732         String lets = getLCstartLetters(upcaseLCcallnum);
733         if (lets != null) {
734             initLetBuf.replace(0, lets.length(), lets);
735         } else {
736             if ((recid != null) && (!rawLCcallnum.startsWith("XX"))) 
737             {
738                 if (errors == null) {
739                     System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
740                 } else {
741                     errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
742                 }
743             }
744             return (rawLCcallnum + " ");
745         }
746         resultBuf.append(initLetBuf);
747 
748         try {
749             
750             
751             String digitStr = getLCClassDigits(upcaseLCcallnum);
752             if (digitStr != null)
753                 resultBuf.append(normalizeFloat(digitStr, 4, 6));
754             else
755                 resultBuf.append(normalizeFloat("0", 4, 6));
756 
757             
758             String classSuffix = getLCClassSuffix(upcaseLCcallnum);
759             if (classSuffix != null)
760                 resultBuf.append(" " + normalizeSuffix(classSuffix));
761 
762             
763             String firstCutter = getFirstLCcutter(upcaseLCcallnum);
764             if (firstCutter != null) {
765                 resultBuf.append(" " + normalizeCutter(firstCutter, 6));
766 
767                 
768                 String firstCutterSuffix = getFirstLCcutterSuffix(upcaseLCcallnum);
769                 if (firstCutterSuffix != null)
770                     resultBuf.append(" " + normalizeSuffix(firstCutterSuffix));
771 
772                 
773                 String secondCutter = getSecondLCcutter(upcaseLCcallnum);
774                 if (secondCutter != null) {
775                     resultBuf.append(" " + normalizeCutter(secondCutter, 6));
776 
777                     String secondCutterSuffix = getSecondLCcutterSuffix(upcaseLCcallnum);
778                     if (secondCutterSuffix != null)
779                         resultBuf.append(" " + normalizeSuffix(secondCutterSuffix));
780                 }
781             }
782         } catch (NumberFormatException e) {
783 
784             if ((recid != null) && (!rawLCcallnum.startsWith("XX"))) 
785             {
786                 if (errors == null) {
787                     System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
788                 } else {
789                     errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
790                 }
791             }
792             
793             resultBuf = new StringBuilder();
794         }
795 
796         if (resultBuf.length() == 0)
797             resultBuf.append(upcaseLCcallnum);
798 
799         return resultBuf.toString().trim();
800     }
801 
802     
803 
804 
805 
806     private static String normalizeCutter(String cutter, int numDigits) {
807         String result = null;
808         if (cutter != null && cutter.length() > 0) {
809             String cutLets = getLCstartLetters(cutter);
810             String cutDigs = cutter.substring(cutLets.length());
811             String norm = null;
812             if (cutDigs != null && cutDigs.length() > 0) {
813                 try {
814                     
815                     Integer.parseInt(cutDigs);
816                     norm = normalizeFloat("." + cutDigs, 1, numDigits);
817                 } catch (NumberFormatException e) {
818                     norm = cutDigs;
819                 }
820             } else if (cutDigs.length() == 0 && cutLets.length() == 1)
821                 
822                 norm = normalizeFloat("0", 1, numDigits);
823 
824             result = cutLets + norm;
825         }
826         return result;
827     }
828 
829     
830 
831 
832 
833     public static String normalizeSuffix(String suffix) {
834         if (suffix != null && suffix.length() > 0) {
835             StringBuilder resultBuf = new StringBuilder(suffix.length());
836             
837             String[] digitStrs = suffix.split("[\\D]+");
838             int len = digitStrs.length;
839             if (digitStrs != null && len != 0) {
840                 int s = 0;
841                 for (int d = 0; d < len; d++) {
842                     String digitStr = digitStrs[d];
843                     int ix = suffix.indexOf(digitStr, s);
844                     
845                     if (s < ix) {
846                         String text = suffix.substring(s, ix);
847                         resultBuf.append(text);
848                     }
849                     if (digitStr != null && digitStr.length() != 0) {
850                         
851                         resultBuf.append(normalizeFloat(digitStr, 6, 0));
852                         s = ix + digitStr.length();
853                     }
854 
855                 }
856                 
857                 resultBuf.append(suffix.substring(s));
858                 return resultBuf.toString();
859             }
860         }
861 
862         return suffix;
863     }
864 
865     
866 
867 
868 
869 
870     public static String getReverseShelfKey(String shelfkey) {
871         StringBuilder resultBuf = new StringBuilder(reverseDefault);
872         if (shelfkey != null && shelfkey.length() > 0)
873             resultBuf.replace(0, shelfkey.length(), reverseAlphanum(shelfkey));
874         return resultBuf.toString();
875     }
876 
877     
878 
879 
880 
881     private static String reverseAlphanum(String orig) {
882 
883 
884 
885 
886 
887 
888 
889 
890 
891 
892 
893 
894 
895 
896 
897         StringBuilder reverse = new StringBuilder();
898         for (int ix = 0; ix < orig.length(); ) {
899             int codePoint = Character.toUpperCase(orig.codePointAt(ix));
900             char[] chs = Character.toChars(codePoint);
901 
902             if (Character.isLetterOrDigit(codePoint)) {
903                 if (chs.length == 1) {
904                     char c = chs[0];
905                     if (alphanumReverseMap.containsKey(c))
906                         reverse.append(alphanumReverseMap.get(c));
907                     else {
908                         
909 
910                         
911                         char foldC;
912 
913                         if (UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.COMBINING_DIACRITICAL_MARKS &&
914                                 UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.SPACING_MODIFIER_LETTERS &&
915                                 (foldC = Utils.foldDiacriticLatinChar(c)) != 0x00)
916                             
917                             reverse.append(alphanumReverseMap.get(foldC));
918                         else
919                             
920                             
921                             reverse.append(SORT_FIRST_CHAR);
922                     }
923                 } else {
924                     
925                     
926                     reverse.append(SORT_FIRST_CHAR);
927                 }
928             } else 
929                 reverse.append(reverseNonAlphanum(chs[0]));
930 
931             ix += chs.length;
932         }
933 
934         return new String(reverse);
935     }
936 
937     
938 
939 
940 
941     public static char[] reverseNonAlphanum(char ch) {
942         
943         switch (ch) {
944             case '.':
945                 return Character.toChars('}');
946             case '{':
947             case '|':
948             case '}':
949             case '~':
950 
951 
952 
953                 return Character.toChars(' ');
954             default:
955 
956                 return Character.toChars('~');
957         }
958     }
959 
960     
961 
962 
963 
964     public static String getDeweyShelfKey(String rawDeweyCallnum) {
965         StringBuilder resultBuf = new StringBuilder();
966 
967         
968         
969         
970         String classNum = normalizeFloat(getDeweyB4Cutter(rawDeweyCallnum), 3, 8);
971         resultBuf.append(classNum);
972 
973         
974         
975         
976 
977         
978         String cutter = getDeweyCutter(rawDeweyCallnum);
979         if (cutter != null)
980             resultBuf.append(" " + cutter);
981 
982         
983         String cutterSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
984         if (cutterSuffix != null)
985             resultBuf.append(" " + normalizeSuffix(cutterSuffix));
986 
987 
988         if (resultBuf.length() == 0)
989             resultBuf.append(rawDeweyCallnum);
990 
991         return resultBuf.toString().trim();
992     }
993 
994 
995     
996 
997 
998 
999 
1000 
1001 
1002 
1003 
1004 
1005 
1006 
1007 
1008 
1009 
1010 
1011     public static String normalizeFloat(String floatStr, int digitsB4, int digitsAfter) {
1012         double value = Double.valueOf(floatStr).doubleValue();
1013 
1014         String formatStr = getFormatString(digitsB4) + '.' + getFormatString(digitsAfter);
1015 
1016         DecimalFormat normFormat = new DecimalFormat(formatStr);
1017         String norm = normFormat.format(value);
1018         if (norm.endsWith("."))
1019             norm = norm.substring(0, norm.length() - 1);
1020         return norm;
1021     }
1022 
1023     private static String PUNCT_PREFIX = "([\\.:\\/])?";
1024     private static String NS_PREFIX = "(n\\.s\\.?\\,? ?)?";
1025     private static String MONTHS = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec";
1026     private static String VOL_LETTERS = "[\\:\\/]?(bd|iss|jahrg|new ser|no|part|pts?|ser|t|v|vols?|vyp" + "|" + MONTHS + ")";
1027     private static String VOL_NUMBERS = "\\d+([\\/-]\\d+)?( \\d{4}([\\/-]\\d{4})?)?( ?suppl\\.?)?";
1028     private static String VOL_NUMBERS_LOOSER = "\\d+.*";
1029     private static String VOL_NUM_AS_LETTERS = "[A-Z]([\\/-]\\[A-Z]+)?.*";
1030 
1031     private static Pattern VOL_PATTERN = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS, Pattern.CASE_INSENSITIVE);
1032     private static Pattern VOL_PATTERN_LOOSER = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS_LOOSER, Pattern.CASE_INSENSITIVE);
1033     private static Pattern VOL_PATTERN_LETTERS = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "[\\/\\. ]" + VOL_NUM_AS_LETTERS, Pattern.CASE_INSENSITIVE);
1034 
1035     
1036 
1037 
1038 
1039 
1040 
1041 
1042 
1043     public static String removeLCVolSuffix(String rawLCcallnum) {
1044         
1045         String suffix = getSecondLCcutterSuffix(rawLCcallnum);
1046         if (suffix == null || suffix.length() == 0) {
1047             String cut1suffix = getFirstLCcutterSuffix(rawLCcallnum);
1048             if (cut1suffix != null) {
1049                 
1050                 String cut2 = getSecondLCcutter(rawLCcallnum);
1051                 if (cut2 != null) {
1052                     int ix = cut1suffix.indexOf(cut2);
1053                     if (ix != -1)
1054                         suffix = cut1suffix.substring(0, ix);
1055                     else
1056                         suffix = cut1suffix;
1057                 } else
1058                     suffix = cut1suffix;
1059             }
1060         }
1061 
1062         
1063         if (suffix != null && suffix.length() > 0) {
1064             Matcher matcher = VOL_PATTERN.matcher(suffix);
1065             if (!matcher.find()) {
1066                 matcher = VOL_PATTERN_LOOSER.matcher(suffix);
1067                 if (!matcher.find()) {
1068                     matcher = VOL_PATTERN_LETTERS.matcher(suffix);
1069                 }
1070             }
1071 
1072             if (matcher.find(0)) {
1073                 
1074                 int ix = rawLCcallnum.indexOf(suffix) + matcher.start();
1075                 if (ix != -1 && ix < rawLCcallnum.length()) {
1076                     return rawLCcallnum.substring(0, ix).trim();
1077                 }
1078             }
1079         }
1080         return rawLCcallnum;
1081     }
1082 
1083 
1084     
1085 
1086 
1087 
1088 
1089 
1090 
1091 
1092     public static String removeDeweyVolSuffix(String rawDeweyCallnum) {
1093         String cutSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
1094 
1095         if (cutSuffix == null || cutSuffix.length() == 0)
1096             return rawDeweyCallnum;
1097 
1098         Matcher matcher = VOL_PATTERN.matcher(cutSuffix);
1099         if (!matcher.find()) {
1100             matcher = VOL_PATTERN_LOOSER.matcher(cutSuffix);
1101             if (!matcher.find()) {
1102                 matcher = VOL_PATTERN_LETTERS.matcher(cutSuffix);
1103             }
1104         }
1105 
1106         if (matcher.find(0)) {
1107             
1108             int ix = rawDeweyCallnum.indexOf(cutSuffix) + matcher.start();
1109             if (ix != -1 && ix < rawDeweyCallnum.length()) {
1110                 return rawDeweyCallnum.substring(0, ix).trim();
1111             }
1112         }
1113         return rawDeweyCallnum;
1114     }
1115 
1116 
1117     
1118 
1119 
1120 
1121 
1122 
1123     public static String addLeadingZeros(String deweyCallNum) {
1124         String result = deweyCallNum;
1125         String b4Cutter = getPortionBeforeCutter(deweyCallNum);
1126 
1127         
1128         
1129 
1130         String b4dec = null;
1131         int decIx = b4Cutter.indexOf(".");
1132         if (decIx >= 0)
1133             b4dec = deweyCallNum.substring(0, decIx).trim();
1134         else
1135             b4dec = b4Cutter.trim();
1136 
1137         if (b4dec != null) {
1138             switch (b4dec.length()) {
1139                 case 1:
1140                     result = "00" + deweyCallNum;
1141                     break;
1142                 case 2:
1143                     result = "0" + deweyCallNum;
1144             }
1145         }
1146 
1147         return result;
1148     }
1149 
1150     
1151 
1152 
1153 
1154 
1155 
1156 
1157     private static String getFormatString(int numDigits) {
1158         StringBuilder b4 = new StringBuilder();
1159         if (numDigits < 0)
1160             b4.append("############");
1161         else if (numDigits > 0) {
1162             for (int i = 0; i < numDigits; i++) {
1163                 b4.append('0');
1164             }
1165         }
1166         return b4.toString();
1167     }
1168 
1169     
1170 
1171 
1172 
1173     public static String getSuDocShelfKey(String callNumber) {
1174         String upcaseSuDoccallnum = callNumber.toUpperCase();
1175         StringBuffer shelfKey = new StringBuffer();
1176         
1177         String[] cNumSub = upcaseSuDoccallnum.split(SUDOC_REGEX);
1178         for (String str : cNumSub) {
1179             if (StringUtils.isNumeric(str)) {   
1180                 
1181                 str = StringUtils.leftPad(str, 5, "0"); 
1182                 shelfKey.append(str);
1183                 shelfKey.append(" ");
1184             } else {                     
1185                 
1186                 str = StringUtils.rightPad(str, 5);  
1187                 shelfKey.append(str);
1188                 shelfKey.append(" ");
1189             }
1190         }
1191         return shelfKey.toString().trim();
1192     }
1193 }