1 package org.kuali.ole.utility.callnumber;
2
3
4
5
6
7
8
9
10
11 import com.ibm.icu.lang.UCharacter;
12 import org.apache.commons.lang.StringUtils;
13 import org.marc4j.ErrorHandler;
14
15 import java.text.DecimalFormat;
16 import java.util.HashMap;
17 import java.util.Map;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21
22
23
24
25
26
27
28
29 public final class CallNumUtils {
30
31
32
33
34
35
36
37
38
39 private CallNumUtils() {
40 }
41
42 public static final Pattern DEWEY_PATTERN = Pattern.compile("^\\d{1,3}(\\.\\d+)?.*");
43
44
45
46
47
48
49
50
51 public static final String LC_CLASS_REQ_REGEX = "[A-Z&&[^IOWXY]]{1}[A-Z]{0,2} *\\d+(\\.\\d+)?";
52
53
54
55
56 public static final String NOT_CUTTER = "([\\da-z]\\w*)|([A-Z]\\D+[\\w]*)";
57
58
59
60
61 public static final String LC_CLASS_W_SUFFIX = "(" + LC_CLASS_REQ_REGEX + "( +" + NOT_CUTTER + ")?)";
62
63
64
65
66
67
68 public static final String CUTTER_REGEX = "[A-Z]\\d+";
69
70
71
72
73 public static final String LC_CLASS_N_CUTTER = LC_CLASS_W_SUFFIX + " *\\.?" + CUTTER_REGEX;
74 public static final Pattern LC_CLASS_N_CUTTER_PATTERN = Pattern.compile(LC_CLASS_N_CUTTER + ".*");
75
76
77
78
79
80
81 public static final String DEWEY_CLASS_REGEX = "\\d{1,3}(\\.\\d+)?";
82
83
84
85
86
87
88 public static final String DEWEY_MIN_CUTTER_REGEX = "[A-Z]\\d{1,3}";
89 public static final String DEWEY_CUTTER_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + "[A-Z]+";
90 public static final String DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + " +[A-Z]+";
91 public static final String DEWEY_FULL_CUTTER_REGEX = DEWEY_MIN_CUTTER_REGEX + " *[A-Z]*+";
92
93
94
95
96 public static final String DEWEY_CLASS_N_CUTTER_REGEX = DEWEY_CLASS_REGEX + " *\\.?" + DEWEY_FULL_CUTTER_REGEX;
97 public static final Pattern DEWEY_CLASS_N_CUTTER_PATTERN = Pattern.compile(DEWEY_CLASS_N_CUTTER_REGEX + ".*");
98
99
100
101
102
103
104 public static final String SUDOC_REGEX = "[^A-Z0-9]+|(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])";
105
106
107 private static Map<Character, Character> alphanumReverseMap = new HashMap<Character, Character>();
108
109 static {
110 alphanumReverseMap.put('0', 'Z');
111 alphanumReverseMap.put('1', 'Y');
112 alphanumReverseMap.put('2', 'X');
113 alphanumReverseMap.put('3', 'W');
114 alphanumReverseMap.put('4', 'V');
115 alphanumReverseMap.put('5', 'U');
116 alphanumReverseMap.put('6', 'T');
117 alphanumReverseMap.put('7', 'S');
118 alphanumReverseMap.put('8', 'R');
119 alphanumReverseMap.put('9', 'Q');
120 alphanumReverseMap.put('A', 'P');
121 alphanumReverseMap.put('B', 'O');
122 alphanumReverseMap.put('C', 'N');
123 alphanumReverseMap.put('D', 'M');
124 alphanumReverseMap.put('E', 'L');
125 alphanumReverseMap.put('F', 'K');
126 alphanumReverseMap.put('G', 'J');
127 alphanumReverseMap.put('H', 'I');
128 alphanumReverseMap.put('I', 'H');
129 alphanumReverseMap.put('J', 'G');
130 alphanumReverseMap.put('K', 'F');
131 alphanumReverseMap.put('L', 'E');
132 alphanumReverseMap.put('M', 'D');
133 alphanumReverseMap.put('N', 'C');
134 alphanumReverseMap.put('O', 'B');
135 alphanumReverseMap.put('P', 'A');
136 alphanumReverseMap.put('Q', '9');
137 alphanumReverseMap.put('R', '8');
138 alphanumReverseMap.put('S', '7');
139 alphanumReverseMap.put('T', '6');
140 alphanumReverseMap.put('U', '5');
141 alphanumReverseMap.put('V', '4');
142 alphanumReverseMap.put('W', '3');
143 alphanumReverseMap.put('X', '2');
144 alphanumReverseMap.put('Y', '1');
145 alphanumReverseMap.put('Z', '0');
146 }
147
148
149
150
151
152 public static char SORT_FIRST_CHAR = Character.MIN_VALUE;
153 public static StringBuilder reverseDefault = new StringBuilder(75);
154
155 static {
156 for (int i = 0; i < 50; i++)
157
158
159
160 reverseDefault.append(Character.toChars('~'));
161 }
162
163
164
165
166
167
168
169 public static final boolean isValidLC(String possLCval) {
170 if (possLCval != null && LC_CLASS_N_CUTTER_PATTERN.matcher(possLCval.trim()).matches())
171 return true;
172 return false;
173 }
174
175
176
177
178
179 public static final boolean isValidDeweyWithCutter(String possDeweyVal) {
180 if (possDeweyVal != null && DEWEY_CLASS_N_CUTTER_PATTERN.matcher(possDeweyVal.trim()).matches())
181 return true;
182 return false;
183 }
184
185
186
187
188
189 public static final boolean isValidDewey(String possDeweyVal) {
190 if (possDeweyVal != null && DEWEY_PATTERN.matcher(possDeweyVal.trim()).matches())
191 return true;
192 return false;
193 }
194
195
196
197
198
199 public static final String getPortionBeforeCutter(String callnum) {
200
201
202
203
204
205
206
207 String beginCutterRegex = "( +|(\\.[A-Z])| */)";
208
209 String[] pieces = callnum.split(beginCutterRegex);
210 if (pieces.length == 0 || pieces[0] == null || pieces[0].length() == 0)
211 return null;
212 else
213 return pieces[0].trim();
214 }
215
216
217
218
219
220 public static final String getLCB4FirstCutter(String callnum) {
221 String result = null;
222
223 String cutter = getFirstLCcutter(callnum);
224 if (cutter != null && cutter.length() > 0) {
225
226 int ix = callnum.indexOf(cutter);
227 String lets = getLCstartLetters(callnum);
228 if (ix < lets.length())
229 ix = callnum.indexOf(cutter, lets.length());
230
231 if (ix > 0) {
232 result = callnum.substring(0, ix).trim();
233 if (result.endsWith("."))
234 result = result.substring(0, result.length() - 1).trim();
235 } else
236 result = callnum;
237 } else
238 result = callnum;
239
240 return result;
241 }
242
243
244
245
246
247 public static String getLCstartLetters(String rawLCcallnum) {
248 String result = null;
249 if (rawLCcallnum != null && rawLCcallnum.length() > 0) {
250 String[] lcClass = rawLCcallnum.split("[^A-Z]+");
251 if (lcClass.length > 0)
252 result = lcClass[0];
253 }
254 return result;
255 }
256
257
258
259
260
261
262
263
264 public static String getLCClassDigits(String rawLCcallnum) {
265 String result = null;
266
267 String rawClass = getLCB4FirstCutter(rawLCcallnum);
268 if (rawClass != null && rawClass.length() > 0) {
269 String[] pieces = rawClass.split("[A-Z ]+");
270 if (pieces.length > 1)
271 result = pieces[1].trim();
272 }
273 return result;
274 }
275
276
277
278
279
280
281
282 public static String getLCClassSuffix(String rawLCcallnum) {
283 String result = null;
284
285 String b4cutter = getLCB4FirstCutter(rawLCcallnum);
286 if (b4cutter == null || b4cutter.length() == 0)
287 return null;
288
289 String classDigits = getLCClassDigits(rawLCcallnum);
290
291 if (classDigits != null && classDigits.length() > 0) {
292 int reqClassLen = b4cutter.indexOf(classDigits) + classDigits.length();
293
294 if (b4cutter.length() > reqClassLen)
295 result = b4cutter.substring(reqClassLen).trim();
296 }
297
298 return result;
299 }
300
301
302
303
304
305
306
307
308 public static String getFirstLCcutter(String rawCallnum) {
309 String result = null;
310
311 String regex = LC_CLASS_W_SUFFIX + " *\\.?(" + CUTTER_REGEX + ")";
312 Pattern pattern = Pattern.compile(regex);
313 Matcher matcher = pattern.matcher(rawCallnum);
314
315 if (matcher.find())
316 result = matcher.group(6).trim();
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331 return result;
332 }
333
334
335
336
337
338
339
340 public static String getFirstLCcutterSuffix(String rawLCcallnum) {
341 String result = null;
342
343 String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
344 Pattern pattern = Pattern.compile(regex);
345 Matcher matcher = pattern.matcher(rawLCcallnum);
346
347
348 if (matcher.find() && matcher.groupCount() > 5
349 && matcher.group(6) != null && matcher.group(6).length() > 0) {
350
351
352
353 result = matcher.group(6).trim();
354
355
356
357 int endLastIx = matcher.end(6);
358 if (endLastIx < rawLCcallnum.length()) {
359
360 Pattern cutterPat = Pattern.compile(" *\\." + CUTTER_REGEX);
361 matcher.usePattern(cutterPat);
362 if (matcher.find(endLastIx)) {
363 if (endLastIx < matcher.start())
364 result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
365 } else
366 result = result + rawLCcallnum.substring(endLastIx);
367 }
368 } else {
369
370
371
372
373 String afterLCclassNCutter = rawLCcallnum.replaceFirst(LC_CLASS_N_CUTTER + " *", "");
374 String cutterRegex = LC_CLASS_N_CUTTER + " *(.*)\\." + CUTTER_REGEX;
375
376 pattern = Pattern.compile(cutterRegex);
377 matcher = pattern.matcher(rawLCcallnum);
378
379 if (matcher.find() && matcher.groupCount() > 5
380 && matcher.group(6) != null && matcher.group(6).length() > 0)
381
382 result = matcher.group(6).trim();
383 else {
384 regex = LC_CLASS_N_CUTTER + " \\.\\.\\.$";
385 pattern = Pattern.compile(regex);
386 matcher = pattern.matcher(rawLCcallnum);
387 if (matcher.find())
388 result = " ...";
389 }
390 }
391 return result;
392 }
393
394
395
396
397
398
399
400
401 public static String getSecondLCcutter(String rawLCcallnum) {
402 String result = null;
403
404 String firstCutSuffix = getFirstLCcutterSuffix(rawLCcallnum);
405 if (firstCutSuffix == null || firstCutSuffix.length() == 0) {
406
407 String regex = LC_CLASS_N_CUTTER + " *\\.?(" + CUTTER_REGEX + ")";
408 Pattern pattern = Pattern.compile(regex);
409 Matcher matcher = pattern.matcher(rawLCcallnum);
410 if (matcher.find() && matcher.groupCount() > 5
411 && matcher.group(6) != null && matcher.group(6).length() > 0) {
412 result = matcher.group(6).trim();
413 }
414 } else {
415
416
417 int ix = rawLCcallnum.indexOf(firstCutSuffix) + firstCutSuffix.length();
418 if (ix < rawLCcallnum.length()) {
419 String remaining = rawLCcallnum.substring(ix).trim();
420 Pattern pattern = Pattern.compile("(" + CUTTER_REGEX + ")");
421 Matcher matcher = pattern.matcher(remaining);
422 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
423 result = matcher.group(1).trim();
424 }
425 }
426
427 if (result == null) {
428 Pattern pattern = Pattern.compile("\\.(" + CUTTER_REGEX + ")");
429 Matcher matcher = pattern.matcher(firstCutSuffix);
430 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
431 result = matcher.group(1).trim();
432 }
433 }
434 }
435 return result;
436 }
437
438
439
440
441
442
443
444 public static String getSecondLCcutterSuffix(String rawLCcallnum) {
445 String result = null;
446
447 String secondCutter = getSecondLCcutter(rawLCcallnum);
448 if (secondCutter != null && secondCutter.length() > 0) {
449
450 int ix = rawLCcallnum.indexOf(secondCutter) + secondCutter.length();
451 if (ix < rawLCcallnum.length())
452 result = rawLCcallnum.substring(ix).trim();
453 }
454
455 return result;
456 }
457
458
459
460
461
462
463
464
465
466 public static String getSecondLCcutterYearSuffix(String rawLCcallnum) {
467 String result = null;
468
469 String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
470 Pattern pattern = Pattern.compile(regex);
471 Matcher matcher = pattern.matcher(rawLCcallnum);
472
473 if (matcher.find() && matcher.groupCount() > 5
474 && matcher.group(6) != null && matcher.group(6).length() > 0) {
475
476
477
478 result = matcher.group(6);
479
480
481
482 int endLastIx = matcher.end(6);
483 if (endLastIx < rawLCcallnum.length()) {
484 Pattern cutterPat = Pattern.compile(" *\\.?" + CUTTER_REGEX + ".*");
485 matcher.usePattern(cutterPat);
486 if (matcher.find(endLastIx)) {
487 if (endLastIx < matcher.start())
488 result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
489 } else
490 result = result.trim() + rawLCcallnum.substring(endLastIx);
491 }
492 }
493
494 return result;
495 }
496
497
498
499
500
501 public static final String getDeweyB4Cutter(String callnum) {
502 String result = null;
503
504 String entireCallNumRegex = "(" + DEWEY_CLASS_REGEX + ").*";
505 Pattern pattern = Pattern.compile(entireCallNumRegex);
506 Matcher matcher = pattern.matcher(callnum);
507 if (matcher.find())
508 result = matcher.group(1).trim();
509
510 return result;
511 }
512
513
514
515
516
517
518
519
520 public static String getDeweyCutter(String rawCallnum) {
521 String result = null;
522
523
524 String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
525 String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
526 String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
527 String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
528 String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
529 String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
530 Pattern pat1 = Pattern.compile(regex1);
531 Pattern pat2 = Pattern.compile(regex2);
532 Pattern pat3 = Pattern.compile(regex3);
533 Pattern pat4 = Pattern.compile(regex4);
534 Pattern pat5 = Pattern.compile(regex5);
535 Pattern pat6 = Pattern.compile(regex6);
536
537 Matcher matcher = pat1.matcher(rawCallnum);
538 if (!matcher.find()) {
539 matcher = pat2.matcher(rawCallnum);
540 if (!matcher.find()) {
541 matcher = pat3.matcher(rawCallnum);
542 }
543 }
544
545 if (matcher.find()) {
546 String cutter = matcher.group(2);
547 String suffix = matcher.group(3);
548 if (suffix.length() == 0)
549 result = cutter.trim();
550 else {
551
552
553 if (suffix.startsWith(" ") || cutter.endsWith(" "))
554 result = cutter.trim();
555 else {
556 int ix = cutter.lastIndexOf(' ');
557 if (ix != -1)
558 result = cutter.substring(0, ix);
559 else
560 result = cutter.trim();
561 }
562 }
563 } else {
564 matcher = pat4.matcher(rawCallnum);
565 if (matcher.find())
566 result = matcher.group(2);
567 else {
568 matcher = pat5.matcher(rawCallnum);
569 if (matcher.find())
570 result = matcher.group(2);
571 else {
572 matcher = pat6.matcher(rawCallnum);
573 if (matcher.find())
574 result = matcher.group(2);
575 }
576 }
577 }
578 if (result != null)
579 return result.trim();
580 return result;
581 }
582
583
584
585
586
587
588 public static String getDeweyCutterSuffix(String rawCallnum) {
589 if (rawCallnum == null || rawCallnum.length() == 0)
590 return null;
591 String result = null;
592
593 String cutter = getDeweyCutter(rawCallnum);
594 if (cutter != null) {
595 int ix = rawCallnum.indexOf(cutter) + cutter.length();
596 result = rawCallnum.substring(ix).trim();
597 }
598
599 if (result == null || result.length() == 0) {
600
601 String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
602 String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
603 String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
604 String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
605 String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
606 String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
607 Pattern pat1 = Pattern.compile(regex1);
608 Pattern pat2 = Pattern.compile(regex2);
609 Pattern pat3 = Pattern.compile(regex3);
610 Pattern pat4 = Pattern.compile(regex4);
611 Pattern pat5 = Pattern.compile(regex5);
612 Pattern pat6 = Pattern.compile(regex6);
613
614 Matcher matcher = pat1.matcher(rawCallnum);
615 if (!matcher.find()) {
616 matcher = pat2.matcher(rawCallnum);
617 if (!matcher.find()) {
618 matcher = pat3.matcher(rawCallnum);
619 if (!matcher.find()) {
620 matcher = pat4.matcher(rawCallnum);
621 if (!matcher.find()) {
622 matcher = pat5.matcher(rawCallnum);
623 if (!matcher.find()) {
624 matcher = pat6.matcher(rawCallnum);
625 }
626 }
627 }
628 }
629 }
630
631 if (matcher.find(0)) {
632 cutter = matcher.group(2);
633 String suffix = matcher.group(3);
634 if (suffix.trim().length() > 0) {
635
636
637 if (suffix.startsWith(" ") || cutter.endsWith(" "))
638 result = suffix;
639 else {
640 int ix = cutter.lastIndexOf(' ');
641 if (ix != -1)
642 result = cutter.substring(ix) + suffix;
643 else
644 result = suffix;
645 }
646 }
647 }
648 }
649 if (result != null)
650 result = result.trim();
651 if (result == null || result.trim().length() == 0)
652 return null;
653 else
654 return result;
655 }
656
657
658
659
660
661
662
663
664
665
666 public static String normalizeCallnum(String rawCallnum) {
667
668
669 String normalizedCallnum = rawCallnum.trim().replaceAll("\\s\\s+", " ");
670
671 normalizedCallnum = normalizedCallnum.replaceAll("\\. \\.", " .");
672
673 normalizedCallnum = normalizedCallnum.replaceAll("(\\d+\\.) ([A-Z])", "$1$2");
674
675 if (normalizedCallnum.endsWith("."))
676 normalizedCallnum = normalizedCallnum.substring(0, normalizedCallnum.length() - 1).trim();
677
678
679
680
681
682 return normalizedCallnum;
683 }
684
685
686
687
688
689 static String normalizeLCcallnum(String rawLCcallnum) {
690 String normCallnum = normalizeCallnum(rawLCcallnum);
691
692 return normCallnum.replaceAll("^([A-Z][A-Z]?[A-Z]?) ([0-9])", "$1$2");
693 }
694
695
696
697
698
699
700
701
702 public static String getLCShelfkey(String rawLCcallnum, String recid) {
703 return (getLCShelfkey(rawLCcallnum, recid, null));
704 }
705
706
707
708
709
710 public static String getLCShelfkey(String rawLCcallnum, String recid, ErrorHandler errors) {
711 StringBuilder resultBuf = new StringBuilder();
712 String upcaseLCcallnum = rawLCcallnum.toUpperCase();
713
714
715
716
717
718 StringBuilder initLetBuf = new StringBuilder(" ");
719 String lets = getLCstartLetters(upcaseLCcallnum);
720 if (lets != null) {
721 initLetBuf.replace(0, lets.length(), lets);
722 } else {
723 if ((recid != null) && (!rawLCcallnum.startsWith("XX")))
724 {
725 if (errors == null) {
726 System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
727 } else {
728 errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
729 }
730 }
731 return (rawLCcallnum + " ");
732 }
733 resultBuf.append(initLetBuf);
734
735 try {
736
737
738 String digitStr = getLCClassDigits(upcaseLCcallnum);
739 if (digitStr != null)
740 resultBuf.append(normalizeFloat(digitStr, 4, 6));
741 else
742 resultBuf.append(normalizeFloat("0", 4, 6));
743
744
745 String classSuffix = getLCClassSuffix(upcaseLCcallnum);
746 if (classSuffix != null)
747 resultBuf.append(" " + normalizeSuffix(classSuffix));
748
749
750 String firstCutter = getFirstLCcutter(upcaseLCcallnum);
751 if (firstCutter != null) {
752 resultBuf.append(" " + normalizeCutter(firstCutter, 6));
753
754
755 String firstCutterSuffix = getFirstLCcutterSuffix(upcaseLCcallnum);
756 if (firstCutterSuffix != null)
757 resultBuf.append(" " + normalizeSuffix(firstCutterSuffix));
758
759
760 String secondCutter = getSecondLCcutter(upcaseLCcallnum);
761 if (secondCutter != null) {
762 resultBuf.append(" " + normalizeCutter(secondCutter, 6));
763
764 String secondCutterSuffix = getSecondLCcutterSuffix(upcaseLCcallnum);
765 if (secondCutterSuffix != null)
766 resultBuf.append(" " + normalizeSuffix(secondCutterSuffix));
767 }
768 }
769 } catch (NumberFormatException e) {
770
771 if ((recid != null) && (!rawLCcallnum.startsWith("XX")))
772 {
773 if (errors == null) {
774 System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
775 } else {
776 errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
777 }
778 }
779
780 resultBuf = new StringBuilder();
781 }
782
783 if (resultBuf.length() == 0)
784 resultBuf.append(upcaseLCcallnum);
785
786 return resultBuf.toString().trim();
787 }
788
789
790
791
792
793 private static String normalizeCutter(String cutter, int numDigits) {
794 String result = null;
795 if (cutter != null && cutter.length() > 0) {
796 String cutLets = getLCstartLetters(cutter);
797 String cutDigs = cutter.substring(cutLets.length());
798 String norm = null;
799 if (cutDigs != null && cutDigs.length() > 0) {
800 try {
801
802 Integer.parseInt(cutDigs);
803 norm = normalizeFloat("." + cutDigs, 1, numDigits);
804 } catch (NumberFormatException e) {
805 norm = cutDigs;
806 }
807 } else if (cutDigs.length() == 0 && cutLets.length() == 1)
808
809 norm = normalizeFloat("0", 1, numDigits);
810
811 result = cutLets + norm;
812 }
813 return result;
814 }
815
816
817
818
819
820 public static String normalizeSuffix(String suffix) {
821 if (suffix != null && suffix.length() > 0) {
822 StringBuilder resultBuf = new StringBuilder(suffix.length());
823
824 String[] digitStrs = suffix.split("[\\D]+");
825 int len = digitStrs.length;
826 if (digitStrs != null && len != 0) {
827 int s = 0;
828 for (int d = 0; d < len; d++) {
829 String digitStr = digitStrs[d];
830 int ix = suffix.indexOf(digitStr, s);
831
832 if (s < ix) {
833 String text = suffix.substring(s, ix);
834 resultBuf.append(text);
835 }
836 if (digitStr != null && digitStr.length() != 0) {
837
838 resultBuf.append(normalizeFloat(digitStr, 6, 0));
839 s = ix + digitStr.length();
840 }
841
842 }
843
844 resultBuf.append(suffix.substring(s));
845 return resultBuf.toString();
846 }
847 }
848
849 return suffix;
850 }
851
852
853
854
855
856
857 public static String getReverseShelfKey(String shelfkey) {
858 StringBuilder resultBuf = new StringBuilder(reverseDefault);
859 if (shelfkey != null && shelfkey.length() > 0)
860 resultBuf.replace(0, shelfkey.length(), reverseAlphanum(shelfkey));
861 return resultBuf.toString();
862 }
863
864
865
866
867
868 private static String reverseAlphanum(String orig) {
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884 StringBuilder reverse = new StringBuilder();
885 for (int ix = 0; ix < orig.length(); ) {
886 int codePoint = Character.toUpperCase(orig.codePointAt(ix));
887 char[] chs = Character.toChars(codePoint);
888
889 if (Character.isLetterOrDigit(codePoint)) {
890 if (chs.length == 1) {
891 char c = chs[0];
892 if (alphanumReverseMap.containsKey(c))
893 reverse.append(alphanumReverseMap.get(c));
894 else {
895
896
897
898 char foldC;
899
900 if (UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.COMBINING_DIACRITICAL_MARKS &&
901 UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.SPACING_MODIFIER_LETTERS &&
902 (foldC = Utils.foldDiacriticLatinChar(c)) != 0x00)
903
904 reverse.append(alphanumReverseMap.get(foldC));
905 else
906
907
908 reverse.append(SORT_FIRST_CHAR);
909 }
910 } else {
911
912
913 reverse.append(SORT_FIRST_CHAR);
914 }
915 } else
916 reverse.append(reverseNonAlphanum(chs[0]));
917
918 ix += chs.length;
919 }
920
921 return new String(reverse);
922 }
923
924
925
926
927
928 public static char[] reverseNonAlphanum(char ch) {
929
930 switch (ch) {
931 case '.':
932 return Character.toChars('}');
933 case '{':
934 case '|':
935 case '}':
936 case '~':
937
938
939
940 return Character.toChars(' ');
941 default:
942
943 return Character.toChars('~');
944 }
945 }
946
947
948
949
950
951 public static String getDeweyShelfKey(String rawDeweyCallnum) {
952 StringBuilder resultBuf = new StringBuilder();
953
954
955
956
957 String classNum = normalizeFloat(getDeweyB4Cutter(rawDeweyCallnum), 3, 8);
958 resultBuf.append(classNum);
959
960
961
962
963
964
965 String cutter = getDeweyCutter(rawDeweyCallnum);
966 if (cutter != null)
967 resultBuf.append(" " + cutter);
968
969
970 String cutterSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
971 if (cutterSuffix != null)
972 resultBuf.append(" " + normalizeSuffix(cutterSuffix));
973
974
975 if (resultBuf.length() == 0)
976 resultBuf.append(rawDeweyCallnum);
977
978 return resultBuf.toString().trim();
979 }
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998 public static String normalizeFloat(String floatStr, int digitsB4, int digitsAfter) {
999 double value = Double.valueOf(floatStr).doubleValue();
1000
1001 String formatStr = getFormatString(digitsB4) + '.' + getFormatString(digitsAfter);
1002
1003 DecimalFormat normFormat = new DecimalFormat(formatStr);
1004 String norm = normFormat.format(value);
1005 if (norm.endsWith("."))
1006 norm = norm.substring(0, norm.length() - 1);
1007 return norm;
1008 }
1009
1010 private static String PUNCT_PREFIX = "([\\.:\\/])?";
1011 private static String NS_PREFIX = "(n\\.s\\.?\\,? ?)?";
1012 private static String MONTHS = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec";
1013 private static String VOL_LETTERS = "[\\:\\/]?(bd|iss|jahrg|new ser|no|part|pts?|ser|t|v|vols?|vyp" + "|" + MONTHS + ")";
1014 private static String VOL_NUMBERS = "\\d+([\\/-]\\d+)?( \\d{4}([\\/-]\\d{4})?)?( ?suppl\\.?)?";
1015 private static String VOL_NUMBERS_LOOSER = "\\d+.*";
1016 private static String VOL_NUM_AS_LETTERS = "[A-Z]([\\/-]\\[A-Z]+)?.*";
1017
1018 private static Pattern VOL_PATTERN = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS, Pattern.CASE_INSENSITIVE);
1019 private static Pattern VOL_PATTERN_LOOSER = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS_LOOSER, Pattern.CASE_INSENSITIVE);
1020 private static Pattern VOL_PATTERN_LETTERS = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "[\\/\\. ]" + VOL_NUM_AS_LETTERS, Pattern.CASE_INSENSITIVE);
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 public static String removeLCVolSuffix(String rawLCcallnum) {
1031
1032 String suffix = getSecondLCcutterSuffix(rawLCcallnum);
1033 if (suffix == null || suffix.length() == 0) {
1034 String cut1suffix = getFirstLCcutterSuffix(rawLCcallnum);
1035 if (cut1suffix != null) {
1036
1037 String cut2 = getSecondLCcutter(rawLCcallnum);
1038 if (cut2 != null) {
1039 int ix = cut1suffix.indexOf(cut2);
1040 if (ix != -1)
1041 suffix = cut1suffix.substring(0, ix);
1042 else
1043 suffix = cut1suffix;
1044 } else
1045 suffix = cut1suffix;
1046 }
1047 }
1048
1049
1050 if (suffix != null && suffix.length() > 0) {
1051 Matcher matcher = VOL_PATTERN.matcher(suffix);
1052 if (!matcher.find()) {
1053 matcher = VOL_PATTERN_LOOSER.matcher(suffix);
1054 if (!matcher.find()) {
1055 matcher = VOL_PATTERN_LETTERS.matcher(suffix);
1056 }
1057 }
1058
1059 if (matcher.find(0)) {
1060
1061 int ix = rawLCcallnum.indexOf(suffix) + matcher.start();
1062 if (ix != -1 && ix < rawLCcallnum.length()) {
1063 return rawLCcallnum.substring(0, ix).trim();
1064 }
1065 }
1066 }
1067 return rawLCcallnum;
1068 }
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 public static String removeDeweyVolSuffix(String rawDeweyCallnum) {
1080 String cutSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
1081
1082 if (cutSuffix == null || cutSuffix.length() == 0)
1083 return rawDeweyCallnum;
1084
1085 Matcher matcher = VOL_PATTERN.matcher(cutSuffix);
1086 if (!matcher.find()) {
1087 matcher = VOL_PATTERN_LOOSER.matcher(cutSuffix);
1088 if (!matcher.find()) {
1089 matcher = VOL_PATTERN_LETTERS.matcher(cutSuffix);
1090 }
1091 }
1092
1093 if (matcher.find(0)) {
1094
1095 int ix = rawDeweyCallnum.indexOf(cutSuffix) + matcher.start();
1096 if (ix != -1 && ix < rawDeweyCallnum.length()) {
1097 return rawDeweyCallnum.substring(0, ix).trim();
1098 }
1099 }
1100 return rawDeweyCallnum;
1101 }
1102
1103
1104
1105
1106
1107
1108
1109
1110 public static String addLeadingZeros(String deweyCallNum) {
1111 String result = deweyCallNum;
1112 String b4Cutter = getPortionBeforeCutter(deweyCallNum);
1113
1114
1115
1116
1117 String b4dec = null;
1118 int decIx = b4Cutter.indexOf(".");
1119 if (decIx >= 0)
1120 b4dec = deweyCallNum.substring(0, decIx).trim();
1121 else
1122 b4dec = b4Cutter.trim();
1123
1124 if (b4dec != null) {
1125 switch (b4dec.length()) {
1126 case 1:
1127 result = "00" + deweyCallNum;
1128 break;
1129 case 2:
1130 result = "0" + deweyCallNum;
1131 }
1132 }
1133
1134 return result;
1135 }
1136
1137
1138
1139
1140
1141
1142
1143
1144 private static String getFormatString(int numDigits) {
1145 StringBuilder b4 = new StringBuilder();
1146 if (numDigits < 0)
1147 b4.append("############");
1148 else if (numDigits > 0) {
1149 for (int i = 0; i < numDigits; i++) {
1150 b4.append('0');
1151 }
1152 }
1153 return b4.toString();
1154 }
1155
1156
1157
1158
1159
1160 public static String getSuDocShelfKey(String callNumber) {
1161 String upcaseSuDoccallnum = callNumber.toUpperCase();
1162 StringBuffer shelfKey = new StringBuffer();
1163
1164 String[] cNumSub = upcaseSuDoccallnum.split(SUDOC_REGEX);
1165 for (String str : cNumSub) {
1166 if (StringUtils.isNumeric(str)) {
1167
1168 str = StringUtils.leftPad(str, 5, "0");
1169 shelfKey.append(str);
1170 shelfKey.append(" ");
1171 } else {
1172
1173 str = StringUtils.rightPad(str, 5);
1174 shelfKey.append(str);
1175 shelfKey.append(" ");
1176 }
1177 }
1178 return shelfKey.toString().trim();
1179 }
1180 }