View Javadoc
1   package org.kuali.ole.utility.callnumber;
2   
3   /**
4    * Created with IntelliJ IDEA.
5    * User: ?
6    * Date: 19/2/13
7    * Time: 7:49 PM
8    * To change this template use File | Settings | File Templates.
9    */
10  
11  import com.ibm.icu.lang.UCharacter;
12  import org.apache.commons.lang.StringUtils;
13  import org.kuali.ole.docstore.common.exception.DocstoreResources;
14  import org.kuali.ole.docstore.common.exception.DocstoreValidationException;
15  import org.marc4j.ErrorHandler;
16  import org.slf4j.Logger;
17  import org.slf4j.LoggerFactory;
18  
19  import java.text.DecimalFormat;
20  import java.util.HashMap;
21  import java.util.Map;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  //import com.solrmarc.icu.lang.UCharacter;
26  
27  /**
28   * Call number utility functions for solrmarc
29   *
30   * @author Naomi Dushay, Stanford University
31   */
32  
33  public final class CallNumUtils {
34  
35  
36  // TODO:  should have LCcallnum and DeweyCallnum classes, with the call number
37  //   pieces as fields.  Then parsing would happen once per call number, not
38  //   all over the place and some parsing repeated.
39  
40      /**
41       * Default Constructor: private, so it can't be instantiated by other objects
42       */
43      private CallNumUtils() {
44      }
45      private static final Logger LOG = LoggerFactory.getLogger(CallNumUtils.class);
46      public static final Pattern DEWEY_PATTERN = Pattern.compile("^\\d{1,3}(\\.\\d+)?.*");
47      /**
48       * regular expression string for the required portion of the LC classification
49       * LC classification is
50       * 1-3 capital letters followed by  float number (may be an integer)
51       * optionally followed by a space and then a year or other number,
52       * e.g. "1987" "15th"
53       * LC call numbers can't begin with I, O, W, X, or Y
54       */
55      public static final String LC_CLASS_REQ_REGEX = "[A-Z&&[^IOWXY]]{1}[A-Z]{0,2} *\\d+(\\.\\d+)?";
56  
57      /**
58       * non-cutter text that can appear before or after cutters
59       */
60      public static final String NOT_CUTTER = "([\\da-z]\\w*)|([A-Z]\\D+[\\w]*)";
61  
62      /**
63       * the full LC classification string (can have an optional suffix after LC class)
64       */
65      public static final String LC_CLASS_W_SUFFIX = "(" + LC_CLASS_REQ_REGEX + "( +" + NOT_CUTTER + ")?)";
66  
67      /**
68       * regular expression string for the cutter, without preceding characters
69       * (such as the "required" period, which is sometimes missing, or spaces).
70       * A Cutter is a single letter followed by digits.
71       */
72      public static final String CUTTER_REGEX = "[A-Z]\\d+";
73  
74      /**
75       * the full LC classification string, followed by the first cutter
76       */
77      public static final String LC_CLASS_N_CUTTER = LC_CLASS_W_SUFFIX + " *\\.?" + CUTTER_REGEX;
78      public static final Pattern LC_CLASS_N_CUTTER_PATTERN = Pattern.compile(LC_CLASS_N_CUTTER + ".*");
79  
80      /**
81       * regular expression for Dewey classification.
82       * Dewey classification is a three digit number (possibly missing leading
83       * zeros) with an optional fraction portion.
84       */
85      public static final String DEWEY_CLASS_REGEX = "\\d{1,3}(\\.\\d+)?";
86  
87      /**
88       * Dewey cutters start with a letter, followed by a one to three digit
89       * number. The number may be followed immediately (i.e. without space) by
90       * letters, or followed first by a space and then letters.
91       */
92      public static final String DEWEY_MIN_CUTTER_LETTER_REGEX = "[A-Z]";
93      public static final String DEWEY_MIN_CUTTER_REGEX = "[A-Z]\\d{1,3}";
94      public static final String DEWEY_CUTTER_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + "[A-Z]+";
95      public static final String DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + " +[A-Z]+";
96      public static final String DEWEY_FULL_CUTTER_REGEX = DEWEY_MIN_CUTTER_REGEX + " *[A-Z]*+";
97  
98      /**
99       * the full Dewey classification string, followed by the first cutter
100      */
101     public static final String DEWEY_CLASS_N_CUTTER_REGEX = DEWEY_CLASS_REGEX + " *\\.?" + DEWEY_FULL_CUTTER_REGEX;
102     public static final Pattern DEWEY_CLASS_N_CUTTER_PATTERN = Pattern.compile(DEWEY_CLASS_N_CUTTER_REGEX + ".*");
103 
104     /**
105      * regular expression string for complete SuDoc classification
106      * Splits the based on continuous numbers and alphabets
107      * Ignore any special char and spaces.
108      */
109     public static final String SUDOC_REGEX = "[^A-Z0-9]+|(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])";
110 
111 
112     private static Map<Character, Character> alphanumReverseMap = new HashMap<Character, Character>();
113 
114     static {
115         alphanumReverseMap.put('0', 'Z');
116         alphanumReverseMap.put('1', 'Y');
117         alphanumReverseMap.put('2', 'X');
118         alphanumReverseMap.put('3', 'W');
119         alphanumReverseMap.put('4', 'V');
120         alphanumReverseMap.put('5', 'U');
121         alphanumReverseMap.put('6', 'T');
122         alphanumReverseMap.put('7', 'S');
123         alphanumReverseMap.put('8', 'R');
124         alphanumReverseMap.put('9', 'Q');
125         alphanumReverseMap.put('A', 'P');
126         alphanumReverseMap.put('B', 'O');
127         alphanumReverseMap.put('C', 'N');
128         alphanumReverseMap.put('D', 'M');
129         alphanumReverseMap.put('E', 'L');
130         alphanumReverseMap.put('F', 'K');
131         alphanumReverseMap.put('G', 'J');
132         alphanumReverseMap.put('H', 'I');
133         alphanumReverseMap.put('I', 'H');
134         alphanumReverseMap.put('J', 'G');
135         alphanumReverseMap.put('K', 'F');
136         alphanumReverseMap.put('L', 'E');
137         alphanumReverseMap.put('M', 'D');
138         alphanumReverseMap.put('N', 'C');
139         alphanumReverseMap.put('O', 'B');
140         alphanumReverseMap.put('P', 'A');
141         alphanumReverseMap.put('Q', '9');
142         alphanumReverseMap.put('R', '8');
143         alphanumReverseMap.put('S', '7');
144         alphanumReverseMap.put('T', '6');
145         alphanumReverseMap.put('U', '5');
146         alphanumReverseMap.put('V', '4');
147         alphanumReverseMap.put('W', '3');
148         alphanumReverseMap.put('X', '2');
149         alphanumReverseMap.put('Y', '1');
150         alphanumReverseMap.put('Z', '0');
151     }
152 
153 
154     /**
155      * this character will sort first
156      */
157     public static char SORT_FIRST_CHAR = Character.MIN_VALUE;
158     public static StringBuilder reverseDefault = new StringBuilder(75);
159 
160     static {
161         for (int i = 0; i < 50; i++)
162 // N.B.:  this char is tough to deal with in a variety of contexts.
163 // Hopefully diacritics and non-latin won't bite us in the butt.
164 //          reverseDefault.append(Character.toChars(Character.MAX_CODE_POINT));
165             reverseDefault.append(Character.toChars('~'));
166     }
167 
168 //------ public methods --------
169 
170     /**
171      * given a possible Library of Congress call number value, determine if it
172      * matches the pattern of an LC call number
173      */
174     public static final boolean isValidLC(String possLCval) {
175         if (possLCval != null && LC_CLASS_N_CUTTER_PATTERN.matcher(possLCval.trim()).matches())
176             return true;
177         return false;
178     }
179 
180     /**
181      * given a possible Dewey call number value, determine if it
182      * matches the pattern of an Dewey call number
183      */
184     public static final boolean isValidDeweyWithCutter(String possDeweyVal) {
185         if (possDeweyVal != null && DEWEY_CLASS_N_CUTTER_PATTERN.matcher(possDeweyVal.trim()).matches())
186             return true;
187         return false;
188     }
189 
190     /**
191      * given a possible Dewey call number value, determine if it
192      * matches the pattern of an Dewey call number
193      */
194     public static final boolean isValidDewey(String possDeweyVal) {
195         if (possDeweyVal != null && DEWEY_PATTERN.matcher(possDeweyVal.trim()).matches())
196             return true;
197         return false;
198     }
199 
200     /**
201      * return the portion of the call number string that occurs before the
202      * Cutter, NOT including any class suffixes occuring before the cutter
203      */
204     public static final String getPortionBeforeCutter(String callnum) {
205 
206         // cutter is a single letter followed by digits.
207         // there may be a space before a cutter
208         // there should be a period, which is followed by a single letter
209         //   the period is sometimes missing
210         // For Dewey callnumber, there may be a slash instead of a cutter,
211         //  or there might be NO cutter
212         String beginCutterRegex = "( +|(\\.[A-Z])| */)";
213 
214         String[] pieces = callnum.split(beginCutterRegex);
215         if (pieces.length == 0 || pieces[0] == null || pieces[0].length() == 0)
216             return null;
217         else
218             return pieces[0].trim();
219     }
220 
221     /**
222      * return the portion of the LC call number string that occurs before the
223      * Cutter.
224      */
225     public static final String getLCB4FirstCutter(String callnum) {
226         String result = null;
227 
228         String cutter = getFirstLCcutter(callnum);
229         if (cutter != null && cutter.length() > 0) {
230             // lc class can start with same chars as first cutter: (G384 G3)
231             int ix = callnum.indexOf(cutter);
232             String lets = getLCstartLetters(callnum);
233             if (ix < lets.length())
234                 ix = callnum.indexOf(cutter, lets.length());
235 
236             if (ix > 0) {
237                 result = callnum.substring(0, ix).trim();
238                 if (result.endsWith("."))
239                     result = result.substring(0, result.length() - 1).trim();
240             } else
241                 result = callnum;
242         } else // no cutter
243             result = callnum;
244 
245         return result;
246     }
247 
248     /**
249      * Given a raw LC call number, return the initial letters (before any
250      * numbers)
251      */
252     public static String getLCstartLetters(String rawLCcallnum) {
253         String result = null;
254         if (rawLCcallnum != null && rawLCcallnum.length() > 0) {
255             String[] lcClass = rawLCcallnum.split("[^A-Z]+");
256             if (lcClass.length > 0)
257                 result = lcClass[0];
258         }
259         return result;
260     }
261 
262     /**
263      * return the numeric portion of the required portion of the LC classification.
264      * LC classification requires
265      * 1-3 capital letters followed by  float number (may be an integer)
266      *
267      * @param rawLCcallnum
268      */
269     public static String getLCClassDigits(String rawLCcallnum) {
270         String result = null;
271 
272         String rawClass = getLCB4FirstCutter(rawLCcallnum);
273         if (rawClass != null && rawClass.length() > 0) {
274             String[] pieces = rawClass.split("[A-Z ]+");
275             if (pieces.length > 1)
276                 result = pieces[1].trim();
277         }
278         return result;
279     }
280 
281     /**
282      * return the string between the LC class number and the cutter, if it
283      * starts with a digit, null otherwise
284      *
285      * @param rawLCcallnum - the entire LC call number, as a string
286      */
287     public static String getLCClassSuffix(String rawLCcallnum) {
288         String result = null;
289 
290         String b4cutter = getLCB4FirstCutter(rawLCcallnum);
291         if (b4cutter == null || b4cutter.length() == 0)
292             return null;
293 
294         String classDigits = getLCClassDigits(rawLCcallnum);
295 
296         if (classDigits != null && classDigits.length() > 0) {
297             int reqClassLen = b4cutter.indexOf(classDigits) + classDigits.length();
298 
299             if (b4cutter.length() > reqClassLen)
300                 result = b4cutter.substring(reqClassLen).trim();
301         }
302 
303         return result;
304     }
305 
306     /**
307      * return the first cutter in the LC call number, without the preceding
308      * characters (such as the "required" period, which is sometimes missing,
309      * or spaces), or any suffixes
310      *
311      * @param rawCallnum - the entire call number, as a string
312      */
313     public static String getFirstLCcutter(String rawCallnum) {
314         String result = null;
315 
316         String regex = LC_CLASS_W_SUFFIX + " *\\.?(" + CUTTER_REGEX + ")";
317         Pattern pattern = Pattern.compile(regex);
318         Matcher matcher = pattern.matcher(rawCallnum);
319 
320         if (matcher.find())
321             result = matcher.group(6).trim();
322 
323         // if no well formed cutter, take the chunk after last period or space
324         //  if it begins with a letter
325 //        if (result == null) {
326 //            int i = rawCallnum.trim().lastIndexOf('.');  // period
327 //            if (i == -1)
328 //                i = rawCallnum.trim().lastIndexOf(' ');  // space
329 //            if (rawCallnum.trim().length() > i + 1) {
330 //                String possible = rawCallnum.trim().substring(i + 1).trim();
331 //                if (Character.isLetter(possible.charAt(0)))
332 //                    result = possible;
333 //            }
334 //        }
335 
336         return result;
337     }
338 
339     /**
340      * return the suffix after the first cutter, if there is one.  This occurs
341      * before the second cutter, if there is one.
342      *
343      * @param rawLCcallnum - the entire LC call number, as a string
344      */
345     public static String getFirstLCcutterSuffix(String rawLCcallnum) {
346         String result = null;
347 
348         String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
349         Pattern pattern = Pattern.compile(regex);
350         Matcher matcher = pattern.matcher(rawLCcallnum);
351 
352         // non cutter string optionally followed by cutter preceded by a period
353         if (matcher.find() && matcher.groupCount() > 5
354                 && matcher.group(6) != null && matcher.group(6).length() > 0) {
355 
356             // this only grabs the FIRST non-cutter string it encounters after
357             //   the first cutter
358             result = matcher.group(6).trim();
359 
360             // this is to cope with additional non-cutter strings after the
361             //  first cutter  (e.g. M211 .M93 K.240 1988)
362             int endLastIx = matcher.end(6); // end of previous match
363             if (endLastIx < rawLCcallnum.length()) {
364                 // if there is a suffix, there must be a period before second cutter
365                 Pattern cutterPat = Pattern.compile(" *\\." + CUTTER_REGEX);
366                 matcher.usePattern(cutterPat);
367                 if (matcher.find(endLastIx)) {
368                     if (endLastIx < matcher.start())
369                         result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
370                 } else
371                     result = result + rawLCcallnum.substring(endLastIx);
372             }
373         } else {
374             // string after first cutter looks like a second cutter, but is
375             //  not because further on there is a second cutter preceded by
376             //  a period.
377             // look for period before second cutter
378             String afterLCclassNCutter = rawLCcallnum.replaceFirst(LC_CLASS_N_CUTTER + " *", "");
379             String cutterRegex = LC_CLASS_N_CUTTER + " *(.*)\\." + CUTTER_REGEX;
380 
381             pattern = Pattern.compile(cutterRegex);
382             matcher = pattern.matcher(rawLCcallnum);
383 
384             if (matcher.find() && matcher.groupCount() > 5
385                     && matcher.group(6) != null && matcher.group(6).length() > 0)
386                 // there is a second cutter preceded by a period
387                 result = matcher.group(6).trim();
388             else {
389                 regex = LC_CLASS_N_CUTTER + " \\.\\.\\.$";
390                 pattern = Pattern.compile(regex);
391                 matcher = pattern.matcher(rawLCcallnum);
392                 if (matcher.find())
393                     result = " ...";
394             }
395         }
396         return result;
397     }
398 
399     /**
400      * return the second cutter in the call number, without the preceding
401      * characters (such as the "required" period, which is sometimes missing,
402      * or spaces), or any suffixes
403      *
404      * @param rawLCcallnum - the entire call number, as a string
405      */
406     public static String getSecondLCcutter(String rawLCcallnum) {
407         String result = null;
408 
409         String firstCutSuffix = getFirstLCcutterSuffix(rawLCcallnum);
410         if (firstCutSuffix == null || firstCutSuffix.length() == 0) {
411             // look for second cutter
412             String regex = LC_CLASS_N_CUTTER + " *\\.?(" + CUTTER_REGEX + ")";
413             Pattern pattern = Pattern.compile(regex);
414             Matcher matcher = pattern.matcher(rawLCcallnum);
415             if (matcher.find() && matcher.groupCount() > 5
416                     && matcher.group(6) != null && matcher.group(6).length() > 0) {
417                 result = matcher.group(6).trim();
418             }
419         } else {
420             // get the text AFTER the first cutter suffix, then parse out
421             //   cutter text from any potential following text.
422             int ix = rawLCcallnum.indexOf(firstCutSuffix) + firstCutSuffix.length();
423             if (ix < rawLCcallnum.length()) {
424                 String remaining = rawLCcallnum.substring(ix).trim();
425                 Pattern pattern = Pattern.compile("(" + CUTTER_REGEX + ")");
426                 Matcher matcher = pattern.matcher(remaining);
427                 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
428                     result = matcher.group(1).trim();
429                 }
430             }
431             // if we still have nothing, look for 2nd cutter in first cutter suffix
432             if (result == null) {
433                 Pattern pattern = Pattern.compile("\\.(" + CUTTER_REGEX + ")");
434                 Matcher matcher = pattern.matcher(firstCutSuffix);
435                 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
436                     result = matcher.group(1).trim();
437                 }
438             }
439         }
440         return result;
441     }
442 
443     /**
444      * return the suffix after the first cutter, if there is one.  This occurs
445      * before the second cutter, if there is one.
446      *
447      * @param rawLCcallnum - the entire LC call number, as a string
448      */
449     public static String getSecondLCcutterSuffix(String rawLCcallnum) {
450         String result = null;
451 
452         String secondCutter = getSecondLCcutter(rawLCcallnum);
453         if (secondCutter != null && secondCutter.length() > 0) {
454             // get the call number after the 2nd cutter
455             int ix = rawLCcallnum.indexOf(secondCutter) + secondCutter.length();
456             if (ix < rawLCcallnum.length())
457                 result = rawLCcallnum.substring(ix).trim();
458         }
459 
460         return result;
461     }
462 
463     /**
464      * return the suffix after the first cutter, if there is one.  This occurs
465      * before the second cutter, if there is one.
466      *
467      * @param rawLCcallnum - the entire LC call number, as a string
468      * @deprecated
469      */
470 // do we want to separate out year suffixes?  for all or just here? - unused
471     public static String getSecondLCcutterYearSuffix(String rawLCcallnum) {
472         String result = null;
473 
474         String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
475         Pattern pattern = Pattern.compile(regex);
476         Matcher matcher = pattern.matcher(rawLCcallnum);
477 
478         if (matcher.find() && matcher.groupCount() > 5
479                 && matcher.group(6) != null && matcher.group(6).length() > 0) {
480 
481             // this only grabs the FIRST non-cutter string it encounters after
482             //   the first cutter
483             result = matcher.group(6);
484 
485             // this is to cope with additional non-cutter strings after the
486             //  first cutter  (e.g. M211 .M93 K.240 1988)
487             int endLastIx = matcher.end(6); // end of previous match
488             if (endLastIx < rawLCcallnum.length()) {
489                 Pattern cutterPat = Pattern.compile(" *\\.?" + CUTTER_REGEX + ".*");
490                 matcher.usePattern(cutterPat);
491                 if (matcher.find(endLastIx)) {
492                     if (endLastIx < matcher.start())
493                         result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
494                 } else
495                     result = result.trim() + rawLCcallnum.substring(endLastIx);
496             }
497         }
498 
499         return result;
500     }
501 
502     /**
503      * return the portion of the Dewey call number string that occurs before the
504      * Cutter.
505      */
506     public static final String getDeweyB4Cutter(String callnum) {
507         String result = null;
508 
509         String entireCallNumRegex = "(" + DEWEY_CLASS_REGEX + ").*";
510         Pattern pattern = Pattern.compile(entireCallNumRegex);
511         Matcher matcher = pattern.matcher(callnum);
512         if (matcher.find())
513             result = matcher.group(1).trim();
514 
515         return result;
516     }
517 
518     /**
519      * return the first cutter in the call number, without the preceding
520      * characters (such as the "required" period, which is sometimes missing,
521      * or spaces).
522      *
523      * @param rawCallnum - the entire call number, as a string
524      */
525     public static String getDeweyCutter(String rawCallnum) {
526         String result = null;
527 
528         // dewey cutters can have trailing letters, preceded by a space or not
529         String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
530         String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
531         String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
532         String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
533         String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
534         String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
535         String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
536         Pattern pat1 = Pattern.compile(regex1);
537         Pattern pat2 = Pattern.compile(regex2);
538         Pattern pat3 = Pattern.compile(regex3);
539         Pattern pat4 = Pattern.compile(regex4);
540         Pattern pat5 = Pattern.compile(regex5);
541         Pattern pat6 = Pattern.compile(regex6);
542         Pattern pat7 = Pattern.compile(regex7);
543 
544         Matcher matcher = pat1.matcher(rawCallnum);
545         if (!matcher.find()) {
546             matcher = pat2.matcher(rawCallnum);
547             if (!matcher.find()) {
548                 matcher = pat3.matcher(rawCallnum);
549             }
550         }
551 
552         if (matcher.find()) {
553             String cutter = matcher.group(2);
554             String suffix = matcher.group(3);
555             if (suffix.length() == 0)
556                 result = cutter.trim();
557             else {
558                 // check if there are letters in the cutter that should be assigned
559                 //  to the suffix
560                 if (suffix.startsWith(" ") || cutter.endsWith(" "))
561                     result = cutter.trim();
562                 else {
563                     int ix = cutter.lastIndexOf(' ');
564                     if (ix != -1)
565                         result = cutter.substring(0, ix);
566                     else
567                         result = cutter.trim();
568                 }
569             }
570         } else {
571             matcher = pat4.matcher(rawCallnum);
572             if (matcher.find())
573                 result = matcher.group(2);
574             else {
575                 matcher = pat5.matcher(rawCallnum);
576                 if (matcher.find())
577                     result = matcher.group(2);
578                 else {
579                     matcher = pat6.matcher(rawCallnum);
580                     if (matcher.find())
581                         result = matcher.group(2);
582                     else {
583                         matcher = pat7.matcher(rawCallnum);
584                         if (matcher.find())
585                             result = matcher.group(2);
586                     }
587                 }
588             }
589         }
590         if (result != null)
591             return result.trim();
592         return result;
593     }
594 
595     /**
596      * return suffix to the first cutter in the dewey call number
597      *
598      * @param rawCallnum - the entire call number, as a string
599      */
600     public static String getDeweyCutterSuffix(String rawCallnum) {
601         if (rawCallnum == null || rawCallnum.length() == 0)
602             return null;
603         String result = null;
604 
605         String cutter = getDeweyCutter(rawCallnum);
606         if (cutter != null) {
607             int ix = rawCallnum.indexOf(cutter) + cutter.length();
608             result = rawCallnum.substring(ix).trim();
609         }
610 
611         if (result == null || result.length() == 0) {
612             // dewey cutters can have trailing letters, preceded by a space or not
613             String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
614             String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
615             String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
616             String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
617             String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
618             String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
619             String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
620             Pattern pat1 = Pattern.compile(regex1);
621             Pattern pat2 = Pattern.compile(regex2);
622             Pattern pat3 = Pattern.compile(regex3);
623             Pattern pat4 = Pattern.compile(regex4);
624             Pattern pat5 = Pattern.compile(regex5);
625             Pattern pat6 = Pattern.compile(regex6);
626             Pattern pat7 = Pattern.compile(regex7);
627 
628             Matcher matcher = pat1.matcher(rawCallnum);
629             if (!matcher.find()) {
630                 matcher = pat2.matcher(rawCallnum);
631                 if (!matcher.find()) {
632                     matcher = pat3.matcher(rawCallnum);
633                     if (!matcher.find()) {
634                         matcher = pat4.matcher(rawCallnum);
635                         if (!matcher.find()) {
636                             matcher = pat5.matcher(rawCallnum);
637                             if (!matcher.find()) {
638                                 matcher = pat6.matcher(rawCallnum);
639                                 if(!matcher.find()){
640                                     matcher = pat7.matcher(rawCallnum);
641                                 }
642                             }
643                         }
644                     }
645                 }
646             }
647 
648             if (matcher.find(0)) {
649                 cutter = matcher.group(2);
650                 String suffix = matcher.group(3);
651                 if (suffix.trim().length() > 0) {
652                     // check if there are letters in the cutter that should be assigned
653                     //  to the suffix
654                     if (suffix.startsWith(" ") || cutter.endsWith(" "))
655                         result = suffix;
656                     else {
657                         int ix = cutter.lastIndexOf(' ');
658                         if (ix != -1)
659                             result = cutter.substring(ix) + suffix;
660                         else
661                             result = suffix;
662                     }
663                 }
664             }
665         }
666         if (result != null)
667             result = result.trim();
668         if (result == null || result.trim().length() == 0)
669             return null;
670         else
671             return result;
672     }
673 
674 
675     /**
676      * Used to improve call num sorting and volume lopping.
677      * Remove leading and trailing whitespace, ensure whitespace is always a
678      * single space, remove spaces after periods, remove trailing periods
679      *
680      * @param rawCallnum - a non-null String containing a Dewey call number
681      * @return normalized form of a call number
682      */
683     public static String normalizeCallnum(String rawCallnum) {
684 
685         // reduce multiple whitespace chars to a single space
686         String normalizedCallnum = rawCallnum.trim().replaceAll("\\s\\s+", " ");
687         // reduce double periods to a single period
688         normalizedCallnum = normalizedCallnum.replaceAll("\\. \\.", " .");
689         // remove space after a period if period is after digits and before letters
690         normalizedCallnum = normalizedCallnum.replaceAll("(\\d+\\.) ([A-Z])", "$1$2");
691         // remove trailing period and any spaces before it
692         if (normalizedCallnum.endsWith("."))
693             normalizedCallnum = normalizedCallnum.substring(0, normalizedCallnum.length() - 1).trim();
694 
695         // cutter could be missing preceding period, but we are leaving that as is
696 
697         // there should be a single space before the cutter - the above should
698         //  ensure this in nearly all cases
699         return normalizedCallnum;
700     }
701 
702     /**
703      * reduce multiple whitespace to single, remove spaces before or after
704      * periods, remove spaces between letters and class digits
705      */
706     static String normalizeLCcallnum(String rawLCcallnum) {
707         String normCallnum = normalizeCallnum(rawLCcallnum);
708         // remove space between class letters and digits
709         return normCallnum.replaceAll("^([A-Z][A-Z]?[A-Z]?) ([0-9])", "$1$2");
710     }
711 
712 
713 // TODO:  method to normalize year and immediate following chars (no space)?   <-- stupid?
714 
715     /**
716      * given a raw LC call number, return the shelf key - a sortable version
717      * of the call number
718      */
719     public static String getLCShelfkey(String rawLCcallnum, String recid) {
720         return (getLCShelfkey(rawLCcallnum, recid, null));
721     }
722 
723     /**
724      * given a raw LC call number, return the shelf key - a sortable version
725      * of the call number
726      */
727     public static String getLCShelfkey(String rawLCcallnum, String recid, ErrorHandler errors) {
728         StringBuilder resultBuf = new StringBuilder();
729         String upcaseLCcallnum = rawLCcallnum.toUpperCase();
730 
731 // TODO: don't repeat same parsing -- some of these methods could take the
732 //   portion of the callnumber before the cutter as the input string.
733 
734         // pad initial letters with trailing blanks to be 4 chars long
735         StringBuilder initLetBuf = new StringBuilder("    ");
736         String lets = getLCstartLetters(upcaseLCcallnum);
737         if (lets != null) {
738             initLetBuf.replace(0, lets.length(), lets);
739         } else {
740             if ((recid != null) && (!rawLCcallnum.startsWith("XX"))) // Stanford mod
741             {
742                 if (errors == null) {
743                     System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
744                 } else {
745                     errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
746                 }
747             }
748             return (rawLCcallnum + " ");
749         }
750         resultBuf.append(initLetBuf);
751 
752         try {
753             // normalize first numeric portion to a constant length:
754             //  four digits before decimal, 6 digits after
755             String digitStr = getLCClassDigits(upcaseLCcallnum);
756             if (digitStr != null)
757                 resultBuf.append(normalizeFloat(digitStr, 4, 6));
758             else
759                 resultBuf.append(normalizeFloat("0", 4, 6));
760 
761             // optional string b/t class and first cutter
762             String classSuffix = getLCClassSuffix(upcaseLCcallnum);
763             if (classSuffix != null)
764                 resultBuf.append(" " + normalizeSuffix(classSuffix));
765 
766             // normalize first cutter  - treat number as a fraction
767             String firstCutter = getFirstLCcutter(upcaseLCcallnum);
768             if (firstCutter != null) {
769                 resultBuf.append(" " + normalizeCutter(firstCutter, 6));
770 
771                 // normalize optional first cutter suffix
772                 String firstCutterSuffix = getFirstLCcutterSuffix(upcaseLCcallnum);
773                 if (firstCutterSuffix != null)
774                     resultBuf.append(" " + normalizeSuffix(firstCutterSuffix));
775 
776                 // optional second cutter - normalize
777                 String secondCutter = getSecondLCcutter(upcaseLCcallnum);
778                 if (secondCutter != null) {
779                     resultBuf.append(" " + normalizeCutter(secondCutter, 6));
780 
781                     String secondCutterSuffix = getSecondLCcutterSuffix(upcaseLCcallnum);
782                     if (secondCutterSuffix != null)
783                         resultBuf.append(" " + normalizeSuffix(secondCutterSuffix));
784                 }
785             }
786         } catch (NumberFormatException e) {
787 //              if (recid != null)
788             if ((recid != null) && (!rawLCcallnum.startsWith("XX"))) // Stanford mod
789             {
790                 if (errors == null) {
791                     System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
792                 } else {
793                     errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
794                 }
795             }
796             //e.printStackTrace();
797             resultBuf = new StringBuilder();
798         }
799 
800         if (resultBuf.length() == 0)
801             resultBuf.append(upcaseLCcallnum);
802 
803         return resultBuf.toString().trim();
804     }
805 
806     /**
807      * normalize the cutter string for shelf list sorting - make number into
808      * decimal of the number of digits indicated by param
809      */
810     private static String normalizeCutter(String cutter, int numDigits) {
811         String result = null;
812         if (cutter != null && cutter.length() > 0) {
813             String cutLets = getLCstartLetters(cutter);
814             String cutDigs = cutter.substring(cutLets.length());
815             String norm = null;
816             if (cutDigs != null && cutDigs.length() > 0) {
817                 try {
818                     // make sure part after letters is an integer
819                     Integer.parseInt(cutDigs);
820                     norm = normalizeFloat("." + cutDigs, 1, numDigits);
821                 } catch (NumberFormatException e) {
822                     norm = cutDigs;
823                 }
824             } else if (cutDigs.length() == 0 && cutLets.length() == 1)
825                 // if no digits in cutter, want it to sort first
826                 norm = normalizeFloat("0", 1, numDigits);
827 
828             result = cutLets + norm;
829         }
830         return result;
831     }
832 
833     /**
834      * normalize a suffix for shelf list sorting by changing all digit
835      * substrings to a constant length (left padding with zeros).
836      */
837     public static String normalizeSuffix(String suffix) {
838         if (suffix != null && suffix.length() > 0) {
839             StringBuilder resultBuf = new StringBuilder(suffix.length());
840             // get digit substrings
841             String[] digitStrs = suffix.split("[\\D]+");
842             int len = digitStrs.length;
843             if (digitStrs != null && len != 0) {
844                 int s = 0;
845                 for (int d = 0; d < len; d++) {
846                     String digitStr = digitStrs[d];
847                     int ix = suffix.indexOf(digitStr, s);
848                     // add the non-digit chars before, if they exist
849                     if (s < ix) {
850                         String text = suffix.substring(s, ix);
851                         resultBuf.append(text);
852                     }
853                     if (digitStr != null && digitStr.length() != 0) {
854                         // add the normalized digit chars, if they exist
855                         resultBuf.append(normalizeFloat(digitStr, 6, 0));
856                         s = ix + digitStr.length();
857                     }
858 
859                 }
860                 // add any chars after the last digStr
861                 resultBuf.append(suffix.substring(s));
862                 return resultBuf.toString();
863             }
864         }
865 
866         return suffix;
867     }
868 
869     /**
870      * given a shelfkey (a lexicaly sortable call number), return the reverse
871      * shelf key - a sortable version of the call number that will give the
872      * reverse order (for getting "previous" call numbers in a list)
873      */
874     public static String getReverseShelfKey(String shelfkey) {
875         StringBuilder resultBuf = new StringBuilder(reverseDefault);
876         if (shelfkey != null && shelfkey.length() > 0)
877             resultBuf.replace(0, shelfkey.length(), reverseAlphanum(shelfkey));
878         return resultBuf.toString();
879     }
880 
881     /**
882      * return the reverse String value, mapping A --> 9, B --> 8, ...
883      * 9 --> A and also non-alphanum to sort properly (before or after alphanum)
884      */
885     private static String reverseAlphanum(String orig) {
886 
887 /*
888         char[] origArray = orig.toCharArray();
889 
890         char[] reverse = new char[origArray.length];
891         for (int i = 0; i < origArray.length; i++) {
892             Character ch = origArray[i];
893             if (ch != null) {
894                 if (Character.isLetterOrDigit(ch))
895                     reverse[i] = alphanumReverseMap.get(ch);
896                 else
897                     reverse[i] = reverseNonAlphanum(ch);
898             }
899         }
900 */
901         StringBuilder reverse = new StringBuilder();
902         for (int ix = 0; ix < orig.length(); ) {
903             int codePoint = Character.toUpperCase(orig.codePointAt(ix));
904             char[] chs = Character.toChars(codePoint);
905 
906             if (Character.isLetterOrDigit(codePoint)) {
907                 if (chs.length == 1) {
908                     char c = chs[0];
909                     if (alphanumReverseMap.containsKey(c))
910                         reverse.append(alphanumReverseMap.get(c));
911                     else {
912                         // not an ASCII letter or digit
913 
914                         // map latin chars with diacritic to char without
915                         char foldC;
916 
917                         if (UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.COMBINING_DIACRITICAL_MARKS &&
918                                 UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.SPACING_MODIFIER_LETTERS &&
919                                 (foldC = Utils.foldDiacriticLatinChar(c)) != 0x00)
920                             // we mapped a latin char w diacritic to plain ascii
921                             reverse.append(alphanumReverseMap.get(foldC));
922                         else
923                             // single char, but non-latin, non-digit
924                             // ... view it as after Z in regular alphabet, for now
925                             reverse.append(SORT_FIRST_CHAR);
926                     }
927                 } else {
928                     // multiple 16 bit character unicode letter
929                     // ... view it as after Z in regular alphabet, for now
930                     reverse.append(SORT_FIRST_CHAR);
931                 }
932             } else // not a letter or a digit
933                 reverse.append(reverseNonAlphanum(chs[0]));
934 
935             ix += chs.length;
936         }
937 
938         return new String(reverse);
939     }
940 
941     /**
942      * for non alpha numeric characters, return a character that will sort
943      * first or last, whichever is the opposite of the original character.
944      */
945     public static char[] reverseNonAlphanum(char ch) {
946         // use punctuation before or after alphanum as appropriate
947         switch (ch) {
948             case '.':
949                 return Character.toChars('}');
950             case '{':
951             case '|':
952             case '}':
953             case '~':
954 // N.B.:  these are tough to deal with in a variety of contexts.
955 // Hopefully diacritics and non-latin won't bite us in the butt.
956 //              return Character.toChars(Character.MIN_CODE_POINT);
957                 return Character.toChars(' ');
958             default:
959 //              return Character.toChars(Character.MAX_CODE_POINT);
960                 return Character.toChars('~');
961         }
962     }
963 
964     /**
965      * given a raw Dewey call number, return the shelf key - a sortable
966      * version of the call number
967      */
968     public static String getDeweyShelfKey(String rawDeweyCallnum) {
969         StringBuilder resultBuf = new StringBuilder();
970 
971         // class
972         // float number, normalized to have 3 leading zeros
973         //   and trailing zeros if blank doesn't sort before digits
974         String classNum = normalizeFloat(getDeweyB4Cutter(rawDeweyCallnum), 3, 8);
975         resultBuf.append(classNum);
976 
977         // cutter   1-3 digits
978         // optional cutter letters suffix
979         //   letters preceded by space or not.
980 
981         // normalize cutter  - treat number as a fraction.
982         String cutter = getDeweyCutter(rawDeweyCallnum);
983         if (cutter != null)
984             resultBuf.append(" " + cutter);
985 
986         // optional suffix (year, part, volume, edition) ...
987         String cutterSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
988         if (cutterSuffix != null)
989             resultBuf.append(" " + normalizeSuffix(cutterSuffix));
990 
991 
992         if (resultBuf.length() == 0)
993             resultBuf.append(rawDeweyCallnum);
994 
995         return resultBuf.toString().trim();
996     }
997 
998 
999     /**
1000      * normalizes numbers (can have decimal portion) to (digitsB4) before
1001      * the decimal (adding leading zeroes as necessary) and (digitsAfter
1002      * after the decimal.  In the case of a whole number, there will be no
1003      * decimal point.
1004      *
1005      * @param floatStr,   the number, as a String
1006      * @param digitsB4    - the number of characters the result should have before the
1007      *                    decimal point (leading zeroes will be added as necessary). A negative
1008      *                    number means leave whatever digits encountered as is; don't pad with leading zeroes.
1009      * @param digitsAfter - the number of characters the result should have after
1010      *                    the decimal point.  A negative number means leave whatever fraction
1011      *                    encountered as is; don't pad with trailing zeroes (trailing zeroes in
1012      *                    this case will be removed)
1013      * @throws NumberFormatException if string can't be parsed as a number
1014      */
1015     public static String normalizeFloat(String floatStr, int digitsB4, int digitsAfter) {
1016         String norm = null;
1017         try {
1018             double value = Double.valueOf(floatStr).doubleValue();
1019             String formatStr = getFormatString(digitsB4) + '.' + getFormatString(digitsAfter);
1020             DecimalFormat normFormat = new DecimalFormat(formatStr);
1021             norm = normFormat.format(value);
1022             if (norm.endsWith("."))
1023                 norm = norm.substring(0, norm.length() - 1);
1024 
1025         } catch (NumberFormatException e) {
1026             LOG.error("Exception while Normalizing Call Number",e);
1027             throw new DocstoreValidationException(e);
1028         }
1029         return norm;
1030     }
1031 
1032     private static String PUNCT_PREFIX = "([\\.:\\/])?";
1033     private static String NS_PREFIX = "(n\\.s\\.?\\,? ?)?";
1034     private static String MONTHS = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec";
1035     private static String VOL_LETTERS = "[\\:\\/]?(bd|iss|jahrg|new ser|no|part|pts?|ser|t|v|vols?|vyp" + "|" + MONTHS + ")";
1036     private static String VOL_NUMBERS = "\\d+([\\/-]\\d+)?( \\d{4}([\\/-]\\d{4})?)?( ?suppl\\.?)?";
1037     private static String VOL_NUMBERS_LOOSER = "\\d+.*";
1038     private static String VOL_NUM_AS_LETTERS = "[A-Z]([\\/-]\\[A-Z]+)?.*";
1039 
1040     private static Pattern VOL_PATTERN = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS, Pattern.CASE_INSENSITIVE);
1041     private static Pattern VOL_PATTERN_LOOSER = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS_LOOSER, Pattern.CASE_INSENSITIVE);
1042     private static Pattern VOL_PATTERN_LETTERS = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "[\\/\\. ]" + VOL_NUM_AS_LETTERS, Pattern.CASE_INSENSITIVE);
1043 
1044     /**
1045      * remove volume information from LC call number if it is present as a
1046      * suffix
1047      *
1048      * @param rawLCcallnum
1049      * @return call number without the volume information, or full call number
1050      *         if no volume information was present.
1051      */
1052     public static String removeLCVolSuffix(String rawLCcallnum) {
1053         // get suffix to last occurring cutter, if there is one
1054         String suffix = getSecondLCcutterSuffix(rawLCcallnum);
1055         if (suffix == null || suffix.length() == 0) {
1056             String cut1suffix = getFirstLCcutterSuffix(rawLCcallnum);
1057             if (cut1suffix != null) {
1058                 // first cutter suffix may contain second cutter
1059                 String cut2 = getSecondLCcutter(rawLCcallnum);
1060                 if (cut2 != null) {
1061                     int ix = cut1suffix.indexOf(cut2);
1062                     if (ix != -1)
1063                         suffix = cut1suffix.substring(0, ix);
1064                     else
1065                         suffix = cut1suffix;
1066                 } else
1067                     suffix = cut1suffix;
1068             }
1069         }
1070 
1071         // could put last ditch effort with tightest pattern, but don't want to take out too much
1072         if (suffix != null && suffix.length() > 0) {
1073             Matcher matcher = VOL_PATTERN.matcher(suffix);
1074             if (!matcher.find()) {
1075                 matcher = VOL_PATTERN_LOOSER.matcher(suffix);
1076                 if (!matcher.find()) {
1077                     matcher = VOL_PATTERN_LETTERS.matcher(suffix);
1078                 }
1079             }
1080 // look for first / last match, not any match (subroutine?)?
1081             if (matcher.find(0)) {
1082                 // return orig call number with matcher part lopped off.
1083                 int ix = rawLCcallnum.indexOf(suffix) + matcher.start();
1084                 if (ix != -1 && ix < rawLCcallnum.length()) {
1085                     return rawLCcallnum.substring(0, ix).trim();
1086                 }
1087             }
1088         }
1089         return rawLCcallnum;
1090     }
1091 
1092 
1093     /**
1094      * remove volume information from Dewey call number if it is present as a
1095      * suffix
1096      *
1097      * @param rawDeweyCallnum
1098      * @return call number without the volume information, or full call number
1099      *         if no volume information was present.
1100      */
1101     public static String removeDeweyVolSuffix(String rawDeweyCallnum) {
1102         String cutSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
1103 
1104         if (cutSuffix == null || cutSuffix.length() == 0)
1105             return rawDeweyCallnum;
1106 
1107         Matcher matcher = VOL_PATTERN.matcher(cutSuffix);
1108         if (!matcher.find()) {
1109             matcher = VOL_PATTERN_LOOSER.matcher(cutSuffix);
1110             if (!matcher.find()) {
1111                 matcher = VOL_PATTERN_LETTERS.matcher(cutSuffix);
1112             }
1113         }
1114 
1115         if (matcher.find(0)) {
1116             // return orig call number with matcher part lopped off.
1117             int ix = rawDeweyCallnum.indexOf(cutSuffix) + matcher.start();
1118             if (ix != -1 && ix < rawDeweyCallnum.length()) {
1119                 return rawDeweyCallnum.substring(0, ix).trim();
1120             }
1121         }
1122         return rawDeweyCallnum;
1123     }
1124 
1125 
1126     /**
1127      * adds leading zeros to a dewey call number, when they're missing.
1128      *
1129      * @param deweyCallNum
1130      * @return the dewey call number with leading zeros
1131      */
1132     public static String addLeadingZeros(String deweyCallNum) {
1133         String result = deweyCallNum;
1134         String b4Cutter = getPortionBeforeCutter(deweyCallNum);
1135 
1136         // TODO: could call Utils.normalizeFloat(b4Cutter.trim(), 3, -1);
1137         // but still need to add back part after cutter
1138 
1139         String b4dec = null;
1140         int decIx = b4Cutter.indexOf(".");
1141         if (decIx >= 0)
1142             b4dec = deweyCallNum.substring(0, decIx).trim();
1143         else
1144             b4dec = b4Cutter.trim();
1145 
1146         if (b4dec != null) {
1147             switch (b4dec.length()) {
1148                 case 1:
1149                     result = "00" + deweyCallNum;
1150                     break;
1151                 case 2:
1152                     result = "0" + deweyCallNum;
1153             }
1154         }
1155 
1156         return result;
1157     }
1158 
1159     /**
1160      * return a format string corresponding to the number of digits specified
1161      *
1162      * @param numDigits - the number of characters the result should have (to be padded
1163      *                  with zeroes as necessary). A negative number means leave whatever digits
1164      *                  encountered as is; don't pad with zeroes -- up to 12 characters.
1165      */
1166     private static String getFormatString(int numDigits) {
1167         StringBuilder b4 = new StringBuilder();
1168         if (numDigits < 0)
1169             b4.append("############");
1170         else if (numDigits > 0) {
1171             for (int i = 0; i < numDigits; i++) {
1172                 b4.append('0');
1173             }
1174         }
1175         return b4.toString();
1176     }
1177 
1178     /**
1179      * @param callNumber
1180      * @return
1181      */
1182     public static String getSuDocShelfKey(String callNumber) {
1183         String upcaseSuDoccallnum = callNumber.toUpperCase();
1184         StringBuffer shelfKey = new StringBuffer();
1185         //split the call number based on numbers and alphabets
1186         String[] cNumSub = upcaseSuDoccallnum.split(SUDOC_REGEX);
1187         for (String str : cNumSub) {
1188             if (StringUtils.isNumeric(str)) {   // numbers
1189                 // append zeros to sort Ordinal
1190                 str = StringUtils.leftPad(str, 5, "0"); // constant length 5
1191                 shelfKey.append(str);
1192                 shelfKey.append(" ");
1193             } else {                     // alphabets
1194                 // append spaces to sort Lexicographic
1195                 str = StringUtils.rightPad(str, 5);  // constant length 5
1196                 shelfKey.append(str);
1197                 shelfKey.append(" ");
1198             }
1199         }
1200         return shelfKey.toString().trim();
1201     }
1202 }