1   package org.kuali.ole.utility.callnumber;
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  import com.ibm.icu.lang.UCharacter;
12  import org.apache.commons.lang.StringUtils;
13  import org.kuali.ole.docstore.common.exception.DocstoreResources;
14  import org.kuali.ole.docstore.common.exception.DocstoreValidationException;
15  import org.marc4j.ErrorHandler;
16  import org.slf4j.Logger;
17  import org.slf4j.LoggerFactory;
18  
19  import java.text.DecimalFormat;
20  import java.util.HashMap;
21  import java.util.Map;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  
26  
27  
28  
29  
30  
31  
32  
33  public final class CallNumUtils {
34  
35  
36  
37  
38  
39  
40      
41  
42  
43      private CallNumUtils() {
44      }
45      private static final Logger LOG = LoggerFactory.getLogger(CallNumUtils.class);
46      public static final Pattern DEWEY_PATTERN = Pattern.compile("^\\d{1,3}(\\.\\d+)?.*");
47      
48  
49  
50  
51  
52  
53  
54  
55      public static final String LC_CLASS_REQ_REGEX = "[A-Z&&[^IOWXY]]{1}[A-Z]{0,2} *\\d+(\\.\\d+)?";
56  
57      
58  
59  
60      public static final String NOT_CUTTER = "([\\da-z]\\w*)|([A-Z]\\D+[\\w]*)";
61  
62      
63  
64  
65      public static final String LC_CLASS_W_SUFFIX = "(" + LC_CLASS_REQ_REGEX + "( +" + NOT_CUTTER + ")?)";
66  
67      
68  
69  
70  
71  
72      public static final String CUTTER_REGEX = "[A-Z]\\d+";
73  
74      
75  
76  
77      public static final String LC_CLASS_N_CUTTER = LC_CLASS_W_SUFFIX + " *\\.?" + CUTTER_REGEX;
78      public static final Pattern LC_CLASS_N_CUTTER_PATTERN = Pattern.compile(LC_CLASS_N_CUTTER + ".*");
79  
80      
81  
82  
83  
84  
85      public static final String DEWEY_CLASS_REGEX = "\\d{1,3}(\\.\\d+)?";
86  
87      
88  
89  
90  
91  
92      public static final String DEWEY_MIN_CUTTER_LETTER_REGEX = "[A-Z]";
93      public static final String DEWEY_MIN_CUTTER_REGEX = "[A-Z]\\d{1,3}";
94      public static final String DEWEY_CUTTER_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + "[A-Z]+";
95      public static final String DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + " +[A-Z]+";
96      public static final String DEWEY_FULL_CUTTER_REGEX = DEWEY_MIN_CUTTER_REGEX + " *[A-Z]*+";
97  
98      
99  
100 
101     public static final String DEWEY_CLASS_N_CUTTER_REGEX = DEWEY_CLASS_REGEX + " *\\.?" + DEWEY_FULL_CUTTER_REGEX;
102     public static final Pattern DEWEY_CLASS_N_CUTTER_PATTERN = Pattern.compile(DEWEY_CLASS_N_CUTTER_REGEX + ".*");
103 
104     
105 
106 
107 
108 
109     public static final String SUDOC_REGEX = "[^A-Z0-9]+|(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])";
110 
111 
112     private static Map<Character, Character> alphanumReverseMap = new HashMap<Character, Character>();
113 
114     static {
115         alphanumReverseMap.put('0', 'Z');
116         alphanumReverseMap.put('1', 'Y');
117         alphanumReverseMap.put('2', 'X');
118         alphanumReverseMap.put('3', 'W');
119         alphanumReverseMap.put('4', 'V');
120         alphanumReverseMap.put('5', 'U');
121         alphanumReverseMap.put('6', 'T');
122         alphanumReverseMap.put('7', 'S');
123         alphanumReverseMap.put('8', 'R');
124         alphanumReverseMap.put('9', 'Q');
125         alphanumReverseMap.put('A', 'P');
126         alphanumReverseMap.put('B', 'O');
127         alphanumReverseMap.put('C', 'N');
128         alphanumReverseMap.put('D', 'M');
129         alphanumReverseMap.put('E', 'L');
130         alphanumReverseMap.put('F', 'K');
131         alphanumReverseMap.put('G', 'J');
132         alphanumReverseMap.put('H', 'I');
133         alphanumReverseMap.put('I', 'H');
134         alphanumReverseMap.put('J', 'G');
135         alphanumReverseMap.put('K', 'F');
136         alphanumReverseMap.put('L', 'E');
137         alphanumReverseMap.put('M', 'D');
138         alphanumReverseMap.put('N', 'C');
139         alphanumReverseMap.put('O', 'B');
140         alphanumReverseMap.put('P', 'A');
141         alphanumReverseMap.put('Q', '9');
142         alphanumReverseMap.put('R', '8');
143         alphanumReverseMap.put('S', '7');
144         alphanumReverseMap.put('T', '6');
145         alphanumReverseMap.put('U', '5');
146         alphanumReverseMap.put('V', '4');
147         alphanumReverseMap.put('W', '3');
148         alphanumReverseMap.put('X', '2');
149         alphanumReverseMap.put('Y', '1');
150         alphanumReverseMap.put('Z', '0');
151     }
152 
153 
154     
155 
156 
157     public static char SORT_FIRST_CHAR = Character.MIN_VALUE;
158     public static StringBuilder reverseDefault = new StringBuilder(75);
159 
160     static {
161         for (int i = 0; i < 50; i++)
162 
163 
164 
165             reverseDefault.append(Character.toChars('~'));
166     }
167 
168 
169 
170     
171 
172 
173 
174     public static final boolean isValidLC(String possLCval) {
175         if (possLCval != null && LC_CLASS_N_CUTTER_PATTERN.matcher(possLCval.trim()).matches())
176             return true;
177         return false;
178     }
179 
180     
181 
182 
183 
184     public static final boolean isValidDeweyWithCutter(String possDeweyVal) {
185         if (possDeweyVal != null && DEWEY_CLASS_N_CUTTER_PATTERN.matcher(possDeweyVal.trim()).matches())
186             return true;
187         return false;
188     }
189 
190     
191 
192 
193 
194     public static final boolean isValidDewey(String possDeweyVal) {
195         if (possDeweyVal != null && DEWEY_PATTERN.matcher(possDeweyVal.trim()).matches())
196             return true;
197         return false;
198     }
199 
200     
201 
202 
203 
204     public static final String getPortionBeforeCutter(String callnum) {
205 
206         
207         
208         
209         
210         
211         
212         String beginCutterRegex = "( +|(\\.[A-Z])| */)";
213 
214         String[] pieces = callnum.split(beginCutterRegex);
215         if (pieces.length == 0 || pieces[0] == null || pieces[0].length() == 0)
216             return null;
217         else
218             return pieces[0].trim();
219     }
220 
221     
222 
223 
224 
225     public static final String getLCB4FirstCutter(String callnum) {
226         String result = null;
227 
228         String cutter = getFirstLCcutter(callnum);
229         if (cutter != null && cutter.length() > 0) {
230             
231             int ix = callnum.indexOf(cutter);
232             String lets = getLCstartLetters(callnum);
233             if (ix < lets.length())
234                 ix = callnum.indexOf(cutter, lets.length());
235 
236             if (ix > 0) {
237                 result = callnum.substring(0, ix).trim();
238                 if (result.endsWith("."))
239                     result = result.substring(0, result.length() - 1).trim();
240             } else
241                 result = callnum;
242         } else 
243             result = callnum;
244 
245         return result;
246     }
247 
248     
249 
250 
251 
252     public static String getLCstartLetters(String rawLCcallnum) {
253         String result = null;
254         if (rawLCcallnum != null && rawLCcallnum.length() > 0) {
255             String[] lcClass = rawLCcallnum.split("[^A-Z]+");
256             if (lcClass.length > 0)
257                 result = lcClass[0];
258         }
259         return result;
260     }
261 
262     
263 
264 
265 
266 
267 
268 
269     public static String getLCClassDigits(String rawLCcallnum) {
270         String result = null;
271 
272         String rawClass = getLCB4FirstCutter(rawLCcallnum);
273         if (rawClass != null && rawClass.length() > 0) {
274             String[] pieces = rawClass.split("[A-Z ]+");
275             if (pieces.length > 1)
276                 result = pieces[1].trim();
277         }
278         return result;
279     }
280 
281     
282 
283 
284 
285 
286 
287     public static String getLCClassSuffix(String rawLCcallnum) {
288         String result = null;
289 
290         String b4cutter = getLCB4FirstCutter(rawLCcallnum);
291         if (b4cutter == null || b4cutter.length() == 0)
292             return null;
293 
294         String classDigits = getLCClassDigits(rawLCcallnum);
295 
296         if (classDigits != null && classDigits.length() > 0) {
297             int reqClassLen = b4cutter.indexOf(classDigits) + classDigits.length();
298 
299             if (b4cutter.length() > reqClassLen)
300                 result = b4cutter.substring(reqClassLen).trim();
301         }
302 
303         return result;
304     }
305 
306     
307 
308 
309 
310 
311 
312 
313     public static String getFirstLCcutter(String rawCallnum) {
314         String result = null;
315 
316         String regex = LC_CLASS_W_SUFFIX + " *\\.?(" + CUTTER_REGEX + ")";
317         Pattern pattern = Pattern.compile(regex);
318         Matcher matcher = pattern.matcher(rawCallnum);
319 
320         if (matcher.find())
321             result = matcher.group(6).trim();
322 
323         
324         
325 
326 
327 
328 
329 
330 
331 
332 
333 
334 
335 
336         return result;
337     }
338 
339     
340 
341 
342 
343 
344 
345     public static String getFirstLCcutterSuffix(String rawLCcallnum) {
346         String result = null;
347 
348         String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
349         Pattern pattern = Pattern.compile(regex);
350         Matcher matcher = pattern.matcher(rawLCcallnum);
351 
352         
353         if (matcher.find() && matcher.groupCount() > 5
354                 && matcher.group(6) != null && matcher.group(6).length() > 0) {
355 
356             
357             
358             result = matcher.group(6).trim();
359 
360             
361             
362             int endLastIx = matcher.end(6); 
363             if (endLastIx < rawLCcallnum.length()) {
364                 
365                 Pattern cutterPat = Pattern.compile(" *\\." + CUTTER_REGEX);
366                 matcher.usePattern(cutterPat);
367                 if (matcher.find(endLastIx)) {
368                     if (endLastIx < matcher.start())
369                         result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
370                 } else
371                     result = result + rawLCcallnum.substring(endLastIx);
372             }
373         } else {
374             
375             
376             
377             
378             String afterLCclassNCutter = rawLCcallnum.replaceFirst(LC_CLASS_N_CUTTER + " *", "");
379             String cutterRegex = LC_CLASS_N_CUTTER + " *(.*)\\." + CUTTER_REGEX;
380 
381             pattern = Pattern.compile(cutterRegex);
382             matcher = pattern.matcher(rawLCcallnum);
383 
384             if (matcher.find() && matcher.groupCount() > 5
385                     && matcher.group(6) != null && matcher.group(6).length() > 0)
386                 
387                 result = matcher.group(6).trim();
388             else {
389                 regex = LC_CLASS_N_CUTTER + " \\.\\.\\.$";
390                 pattern = Pattern.compile(regex);
391                 matcher = pattern.matcher(rawLCcallnum);
392                 if (matcher.find())
393                     result = " ...";
394             }
395         }
396         return result;
397     }
398 
399     
400 
401 
402 
403 
404 
405 
406     public static String getSecondLCcutter(String rawLCcallnum) {
407         String result = null;
408 
409         String firstCutSuffix = getFirstLCcutterSuffix(rawLCcallnum);
410         if (firstCutSuffix == null || firstCutSuffix.length() == 0) {
411             
412             String regex = LC_CLASS_N_CUTTER + " *\\.?(" + CUTTER_REGEX + ")";
413             Pattern pattern = Pattern.compile(regex);
414             Matcher matcher = pattern.matcher(rawLCcallnum);
415             if (matcher.find() && matcher.groupCount() > 5
416                     && matcher.group(6) != null && matcher.group(6).length() > 0) {
417                 result = matcher.group(6).trim();
418             }
419         } else {
420             
421             
422             int ix = rawLCcallnum.indexOf(firstCutSuffix) + firstCutSuffix.length();
423             if (ix < rawLCcallnum.length()) {
424                 String remaining = rawLCcallnum.substring(ix).trim();
425                 Pattern pattern = Pattern.compile("(" + CUTTER_REGEX + ")");
426                 Matcher matcher = pattern.matcher(remaining);
427                 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
428                     result = matcher.group(1).trim();
429                 }
430             }
431             
432             if (result == null) {
433                 Pattern pattern = Pattern.compile("\\.(" + CUTTER_REGEX + ")");
434                 Matcher matcher = pattern.matcher(firstCutSuffix);
435                 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
436                     result = matcher.group(1).trim();
437                 }
438             }
439         }
440         return result;
441     }
442 
443     
444 
445 
446 
447 
448 
449     public static String getSecondLCcutterSuffix(String rawLCcallnum) {
450         String result = null;
451 
452         String secondCutter = getSecondLCcutter(rawLCcallnum);
453         if (secondCutter != null && secondCutter.length() > 0) {
454             
455             int ix = rawLCcallnum.indexOf(secondCutter) + secondCutter.length();
456             if (ix < rawLCcallnum.length())
457                 result = rawLCcallnum.substring(ix).trim();
458         }
459 
460         return result;
461     }
462 
463     
464 
465 
466 
467 
468 
469 
470 
471     public static String getSecondLCcutterYearSuffix(String rawLCcallnum) {
472         String result = null;
473 
474         String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
475         Pattern pattern = Pattern.compile(regex);
476         Matcher matcher = pattern.matcher(rawLCcallnum);
477 
478         if (matcher.find() && matcher.groupCount() > 5
479                 && matcher.group(6) != null && matcher.group(6).length() > 0) {
480 
481             
482             
483             result = matcher.group(6);
484 
485             
486             
487             int endLastIx = matcher.end(6); 
488             if (endLastIx < rawLCcallnum.length()) {
489                 Pattern cutterPat = Pattern.compile(" *\\.?" + CUTTER_REGEX + ".*");
490                 matcher.usePattern(cutterPat);
491                 if (matcher.find(endLastIx)) {
492                     if (endLastIx < matcher.start())
493                         result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
494                 } else
495                     result = result.trim() + rawLCcallnum.substring(endLastIx);
496             }
497         }
498 
499         return result;
500     }
501 
502     
503 
504 
505 
506     public static final String getDeweyB4Cutter(String callnum) {
507         String result = null;
508 
509         String entireCallNumRegex = "(" + DEWEY_CLASS_REGEX + ").*";
510         Pattern pattern = Pattern.compile(entireCallNumRegex);
511         Matcher matcher = pattern.matcher(callnum);
512         if (matcher.find())
513             result = matcher.group(1).trim();
514 
515         return result;
516     }
517 
518     
519 
520 
521 
522 
523 
524 
525     public static String getDeweyCutter(String rawCallnum) {
526         String result = null;
527 
528         
529         String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
530         String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
531         String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
532         String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
533         String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
534         String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
535         String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
536         Pattern pat1 = Pattern.compile(regex1);
537         Pattern pat2 = Pattern.compile(regex2);
538         Pattern pat3 = Pattern.compile(regex3);
539         Pattern pat4 = Pattern.compile(regex4);
540         Pattern pat5 = Pattern.compile(regex5);
541         Pattern pat6 = Pattern.compile(regex6);
542         Pattern pat7 = Pattern.compile(regex7);
543 
544         Matcher matcher = pat1.matcher(rawCallnum);
545         if (!matcher.find()) {
546             matcher = pat2.matcher(rawCallnum);
547             if (!matcher.find()) {
548                 matcher = pat3.matcher(rawCallnum);
549             }
550         }
551 
552         if (matcher.find()) {
553             String cutter = matcher.group(2);
554             String suffix = matcher.group(3);
555             if (suffix.length() == 0)
556                 result = cutter.trim();
557             else {
558                 
559                 
560                 if (suffix.startsWith(" ") || cutter.endsWith(" "))
561                     result = cutter.trim();
562                 else {
563                     int ix = cutter.lastIndexOf(' ');
564                     if (ix != -1)
565                         result = cutter.substring(0, ix);
566                     else
567                         result = cutter.trim();
568                 }
569             }
570         } else {
571             matcher = pat4.matcher(rawCallnum);
572             if (matcher.find())
573                 result = matcher.group(2);
574             else {
575                 matcher = pat5.matcher(rawCallnum);
576                 if (matcher.find())
577                     result = matcher.group(2);
578                 else {
579                     matcher = pat6.matcher(rawCallnum);
580                     if (matcher.find())
581                         result = matcher.group(2);
582                     else {
583                         matcher = pat7.matcher(rawCallnum);
584                         if (matcher.find())
585                             result = matcher.group(2);
586                     }
587                 }
588             }
589         }
590         if (result != null)
591             return result.trim();
592         return result;
593     }
594 
595     
596 
597 
598 
599 
600     public static String getDeweyCutterSuffix(String rawCallnum) {
601         if (rawCallnum == null || rawCallnum.length() == 0)
602             return null;
603         String result = null;
604 
605         String cutter = getDeweyCutter(rawCallnum);
606         if (cutter != null) {
607             int ix = rawCallnum.indexOf(cutter) + cutter.length();
608             result = rawCallnum.substring(ix).trim();
609         }
610 
611         if (result == null || result.length() == 0) {
612             
613             String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
614             String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
615             String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
616             String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
617             String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
618             String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
619             String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
620             Pattern pat1 = Pattern.compile(regex1);
621             Pattern pat2 = Pattern.compile(regex2);
622             Pattern pat3 = Pattern.compile(regex3);
623             Pattern pat4 = Pattern.compile(regex4);
624             Pattern pat5 = Pattern.compile(regex5);
625             Pattern pat6 = Pattern.compile(regex6);
626             Pattern pat7 = Pattern.compile(regex7);
627 
628             Matcher matcher = pat1.matcher(rawCallnum);
629             if (!matcher.find()) {
630                 matcher = pat2.matcher(rawCallnum);
631                 if (!matcher.find()) {
632                     matcher = pat3.matcher(rawCallnum);
633                     if (!matcher.find()) {
634                         matcher = pat4.matcher(rawCallnum);
635                         if (!matcher.find()) {
636                             matcher = pat5.matcher(rawCallnum);
637                             if (!matcher.find()) {
638                                 matcher = pat6.matcher(rawCallnum);
639                                 if(!matcher.find()){
640                                     matcher = pat7.matcher(rawCallnum);
641                                 }
642                             }
643                         }
644                     }
645                 }
646             }
647 
648             if (matcher.find(0)) {
649                 cutter = matcher.group(2);
650                 String suffix = matcher.group(3);
651                 if (suffix.trim().length() > 0) {
652                     
653                     
654                     if (suffix.startsWith(" ") || cutter.endsWith(" "))
655                         result = suffix;
656                     else {
657                         int ix = cutter.lastIndexOf(' ');
658                         if (ix != -1)
659                             result = cutter.substring(ix) + suffix;
660                         else
661                             result = suffix;
662                     }
663                 }
664             }
665         }
666         if (result != null)
667             result = result.trim();
668         if (result == null || result.trim().length() == 0)
669             return null;
670         else
671             return result;
672     }
673 
674 
675     
676 
677 
678 
679 
680 
681 
682 
683     public static String normalizeCallnum(String rawCallnum) {
684 
685         
686         String normalizedCallnum = rawCallnum.trim().replaceAll("\\s\\s+", " ");
687         
688         normalizedCallnum = normalizedCallnum.replaceAll("\\. \\.", " .");
689         
690         normalizedCallnum = normalizedCallnum.replaceAll("(\\d+\\.) ([A-Z])", "$1$2");
691         
692         if (normalizedCallnum.endsWith("."))
693             normalizedCallnum = normalizedCallnum.substring(0, normalizedCallnum.length() - 1).trim();
694 
695         
696 
697         
698         
699         return normalizedCallnum;
700     }
701 
702     
703 
704 
705 
706     static String normalizeLCcallnum(String rawLCcallnum) {
707         String normCallnum = normalizeCallnum(rawLCcallnum);
708         
709         return normCallnum.replaceAll("^([A-Z][A-Z]?[A-Z]?) ([0-9])", "$1$2");
710     }
711 
712 
713 
714 
715     
716 
717 
718 
719     public static String getLCShelfkey(String rawLCcallnum, String recid) {
720         return (getLCShelfkey(rawLCcallnum, recid, null));
721     }
722 
723     
724 
725 
726 
727     public static String getLCShelfkey(String rawLCcallnum, String recid, ErrorHandler errors) {
728         StringBuilder resultBuf = new StringBuilder();
729         String upcaseLCcallnum = rawLCcallnum.toUpperCase();
730 
731 
732 
733 
734         
735         StringBuilder initLetBuf = new StringBuilder("    ");
736         String lets = getLCstartLetters(upcaseLCcallnum);
737         if (lets != null) {
738             initLetBuf.replace(0, lets.length(), lets);
739         } else {
740             if ((recid != null) && (!rawLCcallnum.startsWith("XX"))) 
741             {
742                 if (errors == null) {
743                     System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
744                 } else {
745                     errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
746                 }
747             }
748             return (rawLCcallnum + " ");
749         }
750         resultBuf.append(initLetBuf);
751 
752         try {
753             
754             
755             String digitStr = getLCClassDigits(upcaseLCcallnum);
756             if (digitStr != null)
757                 resultBuf.append(normalizeFloat(digitStr, 4, 6));
758             else
759                 resultBuf.append(normalizeFloat("0", 4, 6));
760 
761             
762             String classSuffix = getLCClassSuffix(upcaseLCcallnum);
763             if (classSuffix != null)
764                 resultBuf.append(" " + normalizeSuffix(classSuffix));
765 
766             
767             String firstCutter = getFirstLCcutter(upcaseLCcallnum);
768             if (firstCutter != null) {
769                 resultBuf.append(" " + normalizeCutter(firstCutter, 6));
770 
771                 
772                 String firstCutterSuffix = getFirstLCcutterSuffix(upcaseLCcallnum);
773                 if (firstCutterSuffix != null)
774                     resultBuf.append(" " + normalizeSuffix(firstCutterSuffix));
775 
776                 
777                 String secondCutter = getSecondLCcutter(upcaseLCcallnum);
778                 if (secondCutter != null) {
779                     resultBuf.append(" " + normalizeCutter(secondCutter, 6));
780 
781                     String secondCutterSuffix = getSecondLCcutterSuffix(upcaseLCcallnum);
782                     if (secondCutterSuffix != null)
783                         resultBuf.append(" " + normalizeSuffix(secondCutterSuffix));
784                 }
785             }
786         } catch (NumberFormatException e) {
787 
788             if ((recid != null) && (!rawLCcallnum.startsWith("XX"))) 
789             {
790                 if (errors == null) {
791                     System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
792                 } else {
793                     errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
794                 }
795             }
796             
797             resultBuf = new StringBuilder();
798         }
799 
800         if (resultBuf.length() == 0)
801             resultBuf.append(upcaseLCcallnum);
802 
803         return resultBuf.toString().trim();
804     }
805 
806     
807 
808 
809 
810     private static String normalizeCutter(String cutter, int numDigits) {
811         String result = null;
812         if (cutter != null && cutter.length() > 0) {
813             String cutLets = getLCstartLetters(cutter);
814             String cutDigs = cutter.substring(cutLets.length());
815             String norm = null;
816             if (cutDigs != null && cutDigs.length() > 0) {
817                 try {
818                     
819                     Integer.parseInt(cutDigs);
820                     norm = normalizeFloat("." + cutDigs, 1, numDigits);
821                 } catch (NumberFormatException e) {
822                     norm = cutDigs;
823                 }
824             } else if (cutDigs.length() == 0 && cutLets.length() == 1)
825                 
826                 norm = normalizeFloat("0", 1, numDigits);
827 
828             result = cutLets + norm;
829         }
830         return result;
831     }
832 
833     
834 
835 
836 
837     public static String normalizeSuffix(String suffix) {
838         if (suffix != null && suffix.length() > 0) {
839             StringBuilder resultBuf = new StringBuilder(suffix.length());
840             
841             String[] digitStrs = suffix.split("[\\D]+");
842             int len = digitStrs.length;
843             if (digitStrs != null && len != 0) {
844                 int s = 0;
845                 for (int d = 0; d < len; d++) {
846                     String digitStr = digitStrs[d];
847                     int ix = suffix.indexOf(digitStr, s);
848                     
849                     if (s < ix) {
850                         String text = suffix.substring(s, ix);
851                         resultBuf.append(text);
852                     }
853                     if (digitStr != null && digitStr.length() != 0) {
854                         
855                         resultBuf.append(normalizeFloat(digitStr, 6, 0));
856                         s = ix + digitStr.length();
857                     }
858 
859                 }
860                 
861                 resultBuf.append(suffix.substring(s));
862                 return resultBuf.toString();
863             }
864         }
865 
866         return suffix;
867     }
868 
869     
870 
871 
872 
873 
874     public static String getReverseShelfKey(String shelfkey) {
875         StringBuilder resultBuf = new StringBuilder(reverseDefault);
876         if (shelfkey != null && shelfkey.length() > 0)
877             resultBuf.replace(0, shelfkey.length(), reverseAlphanum(shelfkey));
878         return resultBuf.toString();
879     }
880 
881     
882 
883 
884 
885     private static String reverseAlphanum(String orig) {
886 
887 
888 
889 
890 
891 
892 
893 
894 
895 
896 
897 
898 
899 
900 
901         StringBuilder reverse = new StringBuilder();
902         for (int ix = 0; ix < orig.length(); ) {
903             int codePoint = Character.toUpperCase(orig.codePointAt(ix));
904             char[] chs = Character.toChars(codePoint);
905 
906             if (Character.isLetterOrDigit(codePoint)) {
907                 if (chs.length == 1) {
908                     char c = chs[0];
909                     if (alphanumReverseMap.containsKey(c))
910                         reverse.append(alphanumReverseMap.get(c));
911                     else {
912                         
913 
914                         
915                         char foldC;
916 
917                         if (UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.COMBINING_DIACRITICAL_MARKS &&
918                                 UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.SPACING_MODIFIER_LETTERS &&
919                                 (foldC = Utils.foldDiacriticLatinChar(c)) != 0x00)
920                             
921                             reverse.append(alphanumReverseMap.get(foldC));
922                         else
923                             
924                             
925                             reverse.append(SORT_FIRST_CHAR);
926                     }
927                 } else {
928                     
929                     
930                     reverse.append(SORT_FIRST_CHAR);
931                 }
932             } else 
933                 reverse.append(reverseNonAlphanum(chs[0]));
934 
935             ix += chs.length;
936         }
937 
938         return new String(reverse);
939     }
940 
941     
942 
943 
944 
945     public static char[] reverseNonAlphanum(char ch) {
946         
947         switch (ch) {
948             case '.':
949                 return Character.toChars('}');
950             case '{':
951             case '|':
952             case '}':
953             case '~':
954 
955 
956 
957                 return Character.toChars(' ');
958             default:
959 
960                 return Character.toChars('~');
961         }
962     }
963 
964     
965 
966 
967 
968     public static String getDeweyShelfKey(String rawDeweyCallnum) {
969         StringBuilder resultBuf = new StringBuilder();
970 
971         
972         
973         
974         String classNum = normalizeFloat(getDeweyB4Cutter(rawDeweyCallnum), 3, 8);
975         resultBuf.append(classNum);
976 
977         
978         
979         
980 
981         
982         String cutter = getDeweyCutter(rawDeweyCallnum);
983         if (cutter != null)
984             resultBuf.append(" " + cutter);
985 
986         
987         String cutterSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
988         if (cutterSuffix != null)
989             resultBuf.append(" " + normalizeSuffix(cutterSuffix));
990 
991 
992         if (resultBuf.length() == 0)
993             resultBuf.append(rawDeweyCallnum);
994 
995         return resultBuf.toString().trim();
996     }
997 
998 
999     
1000 
1001 
1002 
1003 
1004 
1005 
1006 
1007 
1008 
1009 
1010 
1011 
1012 
1013 
1014 
1015     public static String normalizeFloat(String floatStr, int digitsB4, int digitsAfter) {
1016         String norm = null;
1017         try {
1018             double value = Double.valueOf(floatStr).doubleValue();
1019             String formatStr = getFormatString(digitsB4) + '.' + getFormatString(digitsAfter);
1020             DecimalFormat normFormat = new DecimalFormat(formatStr);
1021             norm = normFormat.format(value);
1022             if (norm.endsWith("."))
1023                 norm = norm.substring(0, norm.length() - 1);
1024 
1025         } catch (NumberFormatException e) {
1026             LOG.error("Exception while Normalizing Call Number",e);
1027             throw new DocstoreValidationException(e);
1028         }
1029         return norm;
1030     }
1031 
1032     private static String PUNCT_PREFIX = "([\\.:\\/])?";
1033     private static String NS_PREFIX = "(n\\.s\\.?\\,? ?)?";
1034     private static String MONTHS = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec";
1035     private static String VOL_LETTERS = "[\\:\\/]?(bd|iss|jahrg|new ser|no|part|pts?|ser|t|v|vols?|vyp" + "|" + MONTHS + ")";
1036     private static String VOL_NUMBERS = "\\d+([\\/-]\\d+)?( \\d{4}([\\/-]\\d{4})?)?( ?suppl\\.?)?";
1037     private static String VOL_NUMBERS_LOOSER = "\\d+.*";
1038     private static String VOL_NUM_AS_LETTERS = "[A-Z]([\\/-]\\[A-Z]+)?.*";
1039 
1040     private static Pattern VOL_PATTERN = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS, Pattern.CASE_INSENSITIVE);
1041     private static Pattern VOL_PATTERN_LOOSER = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS_LOOSER, Pattern.CASE_INSENSITIVE);
1042     private static Pattern VOL_PATTERN_LETTERS = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "[\\/\\. ]" + VOL_NUM_AS_LETTERS, Pattern.CASE_INSENSITIVE);
1043 
1044     
1045 
1046 
1047 
1048 
1049 
1050 
1051 
1052     public static String removeLCVolSuffix(String rawLCcallnum) {
1053         
1054         String suffix = getSecondLCcutterSuffix(rawLCcallnum);
1055         if (suffix == null || suffix.length() == 0) {
1056             String cut1suffix = getFirstLCcutterSuffix(rawLCcallnum);
1057             if (cut1suffix != null) {
1058                 
1059                 String cut2 = getSecondLCcutter(rawLCcallnum);
1060                 if (cut2 != null) {
1061                     int ix = cut1suffix.indexOf(cut2);
1062                     if (ix != -1)
1063                         suffix = cut1suffix.substring(0, ix);
1064                     else
1065                         suffix = cut1suffix;
1066                 } else
1067                     suffix = cut1suffix;
1068             }
1069         }
1070 
1071         
1072         if (suffix != null && suffix.length() > 0) {
1073             Matcher matcher = VOL_PATTERN.matcher(suffix);
1074             if (!matcher.find()) {
1075                 matcher = VOL_PATTERN_LOOSER.matcher(suffix);
1076                 if (!matcher.find()) {
1077                     matcher = VOL_PATTERN_LETTERS.matcher(suffix);
1078                 }
1079             }
1080 
1081             if (matcher.find(0)) {
1082                 
1083                 int ix = rawLCcallnum.indexOf(suffix) + matcher.start();
1084                 if (ix != -1 && ix < rawLCcallnum.length()) {
1085                     return rawLCcallnum.substring(0, ix).trim();
1086                 }
1087             }
1088         }
1089         return rawLCcallnum;
1090     }
1091 
1092 
1093     
1094 
1095 
1096 
1097 
1098 
1099 
1100 
1101     public static String removeDeweyVolSuffix(String rawDeweyCallnum) {
1102         String cutSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
1103 
1104         if (cutSuffix == null || cutSuffix.length() == 0)
1105             return rawDeweyCallnum;
1106 
1107         Matcher matcher = VOL_PATTERN.matcher(cutSuffix);
1108         if (!matcher.find()) {
1109             matcher = VOL_PATTERN_LOOSER.matcher(cutSuffix);
1110             if (!matcher.find()) {
1111                 matcher = VOL_PATTERN_LETTERS.matcher(cutSuffix);
1112             }
1113         }
1114 
1115         if (matcher.find(0)) {
1116             
1117             int ix = rawDeweyCallnum.indexOf(cutSuffix) + matcher.start();
1118             if (ix != -1 && ix < rawDeweyCallnum.length()) {
1119                 return rawDeweyCallnum.substring(0, ix).trim();
1120             }
1121         }
1122         return rawDeweyCallnum;
1123     }
1124 
1125 
1126     
1127 
1128 
1129 
1130 
1131 
1132     public static String addLeadingZeros(String deweyCallNum) {
1133         String result = deweyCallNum;
1134         String b4Cutter = getPortionBeforeCutter(deweyCallNum);
1135 
1136         
1137         
1138 
1139         String b4dec = null;
1140         int decIx = b4Cutter.indexOf(".");
1141         if (decIx >= 0)
1142             b4dec = deweyCallNum.substring(0, decIx).trim();
1143         else
1144             b4dec = b4Cutter.trim();
1145 
1146         if (b4dec != null) {
1147             switch (b4dec.length()) {
1148                 case 1:
1149                     result = "00" + deweyCallNum;
1150                     break;
1151                 case 2:
1152                     result = "0" + deweyCallNum;
1153             }
1154         }
1155 
1156         return result;
1157     }
1158 
1159     
1160 
1161 
1162 
1163 
1164 
1165 
1166     private static String getFormatString(int numDigits) {
1167         StringBuilder b4 = new StringBuilder();
1168         if (numDigits < 0)
1169             b4.append("############");
1170         else if (numDigits > 0) {
1171             for (int i = 0; i < numDigits; i++) {
1172                 b4.append('0');
1173             }
1174         }
1175         return b4.toString();
1176     }
1177 
1178     
1179 
1180 
1181 
1182     public static String getSuDocShelfKey(String callNumber) {
1183         String upcaseSuDoccallnum = callNumber.toUpperCase();
1184         StringBuffer shelfKey = new StringBuffer();
1185         
1186         String[] cNumSub = upcaseSuDoccallnum.split(SUDOC_REGEX);
1187         for (String str : cNumSub) {
1188             if (StringUtils.isNumeric(str)) {   
1189                 
1190                 str = StringUtils.leftPad(str, 5, "0"); 
1191                 shelfKey.append(str);
1192                 shelfKey.append(" ");
1193             } else {                     
1194                 
1195                 str = StringUtils.rightPad(str, 5);  
1196                 shelfKey.append(str);
1197                 shelfKey.append(" ");
1198             }
1199         }
1200         return shelfKey.toString().trim();
1201     }
1202 }