1 package org.kuali.ole.utility.callnumber;
2
3
4
5
6
7
8
9
10
11 import com.ibm.icu.lang.UCharacter;
12 import org.apache.commons.lang.StringUtils;
13 import org.kuali.ole.docstore.common.exception.DocstoreResources;
14 import org.kuali.ole.docstore.common.exception.DocstoreValidationException;
15 import org.marc4j.ErrorHandler;
16 import org.slf4j.Logger;
17 import org.slf4j.LoggerFactory;
18
19 import java.text.DecimalFormat;
20 import java.util.HashMap;
21 import java.util.Map;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25
26
27
28
29
30
31
32
33 public final class CallNumUtils {
34
35
36
37
38
39
40
41
42
43 private CallNumUtils() {
44 }
45 private static final Logger LOG = LoggerFactory.getLogger(CallNumUtils.class);
46 public static final Pattern DEWEY_PATTERN = Pattern.compile("^\\d{1,3}(\\.\\d+)?.*");
47
48
49
50
51
52
53
54
55 public static final String LC_CLASS_REQ_REGEX = "[A-Z&&[^IOWXY]]{1}[A-Z]{0,2} *\\d+(\\.\\d+)?";
56
57
58
59
60 public static final String NOT_CUTTER = "([\\da-z]\\w*)|([A-Z]\\D+[\\w]*)";
61
62
63
64
65 public static final String LC_CLASS_W_SUFFIX = "(" + LC_CLASS_REQ_REGEX + "( +" + NOT_CUTTER + ")?)";
66
67
68
69
70
71
72 public static final String CUTTER_REGEX = "[A-Z]\\d+";
73
74
75
76
77 public static final String LC_CLASS_N_CUTTER = LC_CLASS_W_SUFFIX + " *\\.?" + CUTTER_REGEX;
78 public static final Pattern LC_CLASS_N_CUTTER_PATTERN = Pattern.compile(LC_CLASS_N_CUTTER + ".*");
79
80
81
82
83
84
85 public static final String DEWEY_CLASS_REGEX = "\\d{1,3}(\\.\\d+)?";
86
87
88
89
90
91
92 public static final String DEWEY_MIN_CUTTER_LETTER_REGEX = "[A-Z]";
93 public static final String DEWEY_MIN_CUTTER_REGEX = "[A-Z]\\d{1,3}";
94 public static final String DEWEY_CUTTER_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + "[A-Z]+";
95 public static final String DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + " +[A-Z]+";
96 public static final String DEWEY_FULL_CUTTER_REGEX = DEWEY_MIN_CUTTER_REGEX + " *[A-Z]*+";
97
98
99
100
101 public static final String DEWEY_CLASS_N_CUTTER_REGEX = DEWEY_CLASS_REGEX + " *\\.?" + DEWEY_FULL_CUTTER_REGEX;
102 public static final Pattern DEWEY_CLASS_N_CUTTER_PATTERN = Pattern.compile(DEWEY_CLASS_N_CUTTER_REGEX + ".*");
103
104
105
106
107
108
109 public static final String SUDOC_REGEX = "[^A-Z0-9]+|(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])";
110
111
112 private static Map<Character, Character> alphanumReverseMap = new HashMap<Character, Character>();
113
114 static {
115 alphanumReverseMap.put('0', 'Z');
116 alphanumReverseMap.put('1', 'Y');
117 alphanumReverseMap.put('2', 'X');
118 alphanumReverseMap.put('3', 'W');
119 alphanumReverseMap.put('4', 'V');
120 alphanumReverseMap.put('5', 'U');
121 alphanumReverseMap.put('6', 'T');
122 alphanumReverseMap.put('7', 'S');
123 alphanumReverseMap.put('8', 'R');
124 alphanumReverseMap.put('9', 'Q');
125 alphanumReverseMap.put('A', 'P');
126 alphanumReverseMap.put('B', 'O');
127 alphanumReverseMap.put('C', 'N');
128 alphanumReverseMap.put('D', 'M');
129 alphanumReverseMap.put('E', 'L');
130 alphanumReverseMap.put('F', 'K');
131 alphanumReverseMap.put('G', 'J');
132 alphanumReverseMap.put('H', 'I');
133 alphanumReverseMap.put('I', 'H');
134 alphanumReverseMap.put('J', 'G');
135 alphanumReverseMap.put('K', 'F');
136 alphanumReverseMap.put('L', 'E');
137 alphanumReverseMap.put('M', 'D');
138 alphanumReverseMap.put('N', 'C');
139 alphanumReverseMap.put('O', 'B');
140 alphanumReverseMap.put('P', 'A');
141 alphanumReverseMap.put('Q', '9');
142 alphanumReverseMap.put('R', '8');
143 alphanumReverseMap.put('S', '7');
144 alphanumReverseMap.put('T', '6');
145 alphanumReverseMap.put('U', '5');
146 alphanumReverseMap.put('V', '4');
147 alphanumReverseMap.put('W', '3');
148 alphanumReverseMap.put('X', '2');
149 alphanumReverseMap.put('Y', '1');
150 alphanumReverseMap.put('Z', '0');
151 }
152
153
154
155
156
157 public static char SORT_FIRST_CHAR = Character.MIN_VALUE;
158 public static StringBuilder reverseDefault = new StringBuilder(75);
159
160 static {
161 for (int i = 0; i < 50; i++)
162
163
164
165 reverseDefault.append(Character.toChars('~'));
166 }
167
168
169
170
171
172
173
174 public static final boolean isValidLC(String possLCval) {
175 if (possLCval != null && LC_CLASS_N_CUTTER_PATTERN.matcher(possLCval.trim()).matches())
176 return true;
177 return false;
178 }
179
180
181
182
183
184 public static final boolean isValidDeweyWithCutter(String possDeweyVal) {
185 if (possDeweyVal != null && DEWEY_CLASS_N_CUTTER_PATTERN.matcher(possDeweyVal.trim()).matches())
186 return true;
187 return false;
188 }
189
190
191
192
193
194 public static final boolean isValidDewey(String possDeweyVal) {
195 if (possDeweyVal != null && DEWEY_PATTERN.matcher(possDeweyVal.trim()).matches())
196 return true;
197 return false;
198 }
199
200
201
202
203
204 public static final String getPortionBeforeCutter(String callnum) {
205
206
207
208
209
210
211
212 String beginCutterRegex = "( +|(\\.[A-Z])| */)";
213
214 String[] pieces = callnum.split(beginCutterRegex);
215 if (pieces.length == 0 || pieces[0] == null || pieces[0].length() == 0)
216 return null;
217 else
218 return pieces[0].trim();
219 }
220
221
222
223
224
225 public static final String getLCB4FirstCutter(String callnum) {
226 String result = null;
227
228 String cutter = getFirstLCcutter(callnum);
229 if (cutter != null && cutter.length() > 0) {
230
231 int ix = callnum.indexOf(cutter);
232 String lets = getLCstartLetters(callnum);
233 if (ix < lets.length())
234 ix = callnum.indexOf(cutter, lets.length());
235
236 if (ix > 0) {
237 result = callnum.substring(0, ix).trim();
238 if (result.endsWith("."))
239 result = result.substring(0, result.length() - 1).trim();
240 } else
241 result = callnum;
242 } else
243 result = callnum;
244
245 return result;
246 }
247
248
249
250
251
252 public static String getLCstartLetters(String rawLCcallnum) {
253 String result = null;
254 if (rawLCcallnum != null && rawLCcallnum.length() > 0) {
255 String[] lcClass = rawLCcallnum.split("[^A-Z]+");
256 if (lcClass.length > 0)
257 result = lcClass[0];
258 }
259 return result;
260 }
261
262
263
264
265
266
267
268
269 public static String getLCClassDigits(String rawLCcallnum) {
270 String result = null;
271
272 String rawClass = getLCB4FirstCutter(rawLCcallnum);
273 if (rawClass != null && rawClass.length() > 0) {
274 String[] pieces = rawClass.split("[A-Z ]+");
275 if (pieces.length > 1)
276 result = pieces[1].trim();
277 }
278 return result;
279 }
280
281
282
283
284
285
286
287 public static String getLCClassSuffix(String rawLCcallnum) {
288 String result = null;
289
290 String b4cutter = getLCB4FirstCutter(rawLCcallnum);
291 if (b4cutter == null || b4cutter.length() == 0)
292 return null;
293
294 String classDigits = getLCClassDigits(rawLCcallnum);
295
296 if (classDigits != null && classDigits.length() > 0) {
297 int reqClassLen = b4cutter.indexOf(classDigits) + classDigits.length();
298
299 if (b4cutter.length() > reqClassLen)
300 result = b4cutter.substring(reqClassLen).trim();
301 }
302
303 return result;
304 }
305
306
307
308
309
310
311
312
313 public static String getFirstLCcutter(String rawCallnum) {
314 String result = null;
315
316 String regex = LC_CLASS_W_SUFFIX + " *\\.?(" + CUTTER_REGEX + ")";
317 Pattern pattern = Pattern.compile(regex);
318 Matcher matcher = pattern.matcher(rawCallnum);
319
320 if (matcher.find())
321 result = matcher.group(6).trim();
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336 return result;
337 }
338
339
340
341
342
343
344
345 public static String getFirstLCcutterSuffix(String rawLCcallnum) {
346 String result = null;
347
348 String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
349 Pattern pattern = Pattern.compile(regex);
350 Matcher matcher = pattern.matcher(rawLCcallnum);
351
352
353 if (matcher.find() && matcher.groupCount() > 5
354 && matcher.group(6) != null && matcher.group(6).length() > 0) {
355
356
357
358 result = matcher.group(6).trim();
359
360
361
362 int endLastIx = matcher.end(6);
363 if (endLastIx < rawLCcallnum.length()) {
364
365 Pattern cutterPat = Pattern.compile(" *\\." + CUTTER_REGEX);
366 matcher.usePattern(cutterPat);
367 if (matcher.find(endLastIx)) {
368 if (endLastIx < matcher.start())
369 result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
370 } else
371 result = result + rawLCcallnum.substring(endLastIx);
372 }
373 } else {
374
375
376
377
378 String afterLCclassNCutter = rawLCcallnum.replaceFirst(LC_CLASS_N_CUTTER + " *", "");
379 String cutterRegex = LC_CLASS_N_CUTTER + " *(.*)\\." + CUTTER_REGEX;
380
381 pattern = Pattern.compile(cutterRegex);
382 matcher = pattern.matcher(rawLCcallnum);
383
384 if (matcher.find() && matcher.groupCount() > 5
385 && matcher.group(6) != null && matcher.group(6).length() > 0)
386
387 result = matcher.group(6).trim();
388 else {
389 regex = LC_CLASS_N_CUTTER + " \\.\\.\\.$";
390 pattern = Pattern.compile(regex);
391 matcher = pattern.matcher(rawLCcallnum);
392 if (matcher.find())
393 result = " ...";
394 }
395 }
396 return result;
397 }
398
399
400
401
402
403
404
405
406 public static String getSecondLCcutter(String rawLCcallnum) {
407 String result = null;
408
409 String firstCutSuffix = getFirstLCcutterSuffix(rawLCcallnum);
410 if (firstCutSuffix == null || firstCutSuffix.length() == 0) {
411
412 String regex = LC_CLASS_N_CUTTER + " *\\.?(" + CUTTER_REGEX + ")";
413 Pattern pattern = Pattern.compile(regex);
414 Matcher matcher = pattern.matcher(rawLCcallnum);
415 if (matcher.find() && matcher.groupCount() > 5
416 && matcher.group(6) != null && matcher.group(6).length() > 0) {
417 result = matcher.group(6).trim();
418 }
419 } else {
420
421
422 int ix = rawLCcallnum.indexOf(firstCutSuffix) + firstCutSuffix.length();
423 if (ix < rawLCcallnum.length()) {
424 String remaining = rawLCcallnum.substring(ix).trim();
425 Pattern pattern = Pattern.compile("(" + CUTTER_REGEX + ")");
426 Matcher matcher = pattern.matcher(remaining);
427 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
428 result = matcher.group(1).trim();
429 }
430 }
431
432 if (result == null) {
433 Pattern pattern = Pattern.compile("\\.(" + CUTTER_REGEX + ")");
434 Matcher matcher = pattern.matcher(firstCutSuffix);
435 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
436 result = matcher.group(1).trim();
437 }
438 }
439 }
440 return result;
441 }
442
443
444
445
446
447
448
449 public static String getSecondLCcutterSuffix(String rawLCcallnum) {
450 String result = null;
451
452 String secondCutter = getSecondLCcutter(rawLCcallnum);
453 if (secondCutter != null && secondCutter.length() > 0) {
454
455 int ix = rawLCcallnum.indexOf(secondCutter) + secondCutter.length();
456 if (ix < rawLCcallnum.length())
457 result = rawLCcallnum.substring(ix).trim();
458 }
459
460 return result;
461 }
462
463
464
465
466
467
468
469
470
471 public static String getSecondLCcutterYearSuffix(String rawLCcallnum) {
472 String result = null;
473
474 String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
475 Pattern pattern = Pattern.compile(regex);
476 Matcher matcher = pattern.matcher(rawLCcallnum);
477
478 if (matcher.find() && matcher.groupCount() > 5
479 && matcher.group(6) != null && matcher.group(6).length() > 0) {
480
481
482
483 result = matcher.group(6);
484
485
486
487 int endLastIx = matcher.end(6);
488 if (endLastIx < rawLCcallnum.length()) {
489 Pattern cutterPat = Pattern.compile(" *\\.?" + CUTTER_REGEX + ".*");
490 matcher.usePattern(cutterPat);
491 if (matcher.find(endLastIx)) {
492 if (endLastIx < matcher.start())
493 result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
494 } else
495 result = result.trim() + rawLCcallnum.substring(endLastIx);
496 }
497 }
498
499 return result;
500 }
501
502
503
504
505
506 public static final String getDeweyB4Cutter(String callnum) {
507 String result = null;
508
509 String entireCallNumRegex = "(" + DEWEY_CLASS_REGEX + ").*";
510 Pattern pattern = Pattern.compile(entireCallNumRegex);
511 Matcher matcher = pattern.matcher(callnum);
512 if (matcher.find())
513 result = matcher.group(1).trim();
514
515 return result;
516 }
517
518
519
520
521
522
523
524
525 public static String getDeweyCutter(String rawCallnum) {
526 String result = null;
527
528
529 String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
530 String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
531 String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
532 String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
533 String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
534 String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
535 String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
536 Pattern pat1 = Pattern.compile(regex1);
537 Pattern pat2 = Pattern.compile(regex2);
538 Pattern pat3 = Pattern.compile(regex3);
539 Pattern pat4 = Pattern.compile(regex4);
540 Pattern pat5 = Pattern.compile(regex5);
541 Pattern pat6 = Pattern.compile(regex6);
542 Pattern pat7 = Pattern.compile(regex7);
543
544 Matcher matcher = pat1.matcher(rawCallnum);
545 if (!matcher.find()) {
546 matcher = pat2.matcher(rawCallnum);
547 if (!matcher.find()) {
548 matcher = pat3.matcher(rawCallnum);
549 }
550 }
551
552 if (matcher.find()) {
553 String cutter = matcher.group(2);
554 String suffix = matcher.group(3);
555 if (suffix.length() == 0)
556 result = cutter.trim();
557 else {
558
559
560 if (suffix.startsWith(" ") || cutter.endsWith(" "))
561 result = cutter.trim();
562 else {
563 int ix = cutter.lastIndexOf(' ');
564 if (ix != -1)
565 result = cutter.substring(0, ix);
566 else
567 result = cutter.trim();
568 }
569 }
570 } else {
571 matcher = pat4.matcher(rawCallnum);
572 if (matcher.find())
573 result = matcher.group(2);
574 else {
575 matcher = pat5.matcher(rawCallnum);
576 if (matcher.find())
577 result = matcher.group(2);
578 else {
579 matcher = pat6.matcher(rawCallnum);
580 if (matcher.find())
581 result = matcher.group(2);
582 else {
583 matcher = pat7.matcher(rawCallnum);
584 if (matcher.find())
585 result = matcher.group(2);
586 }
587 }
588 }
589 }
590 if (result != null)
591 return result.trim();
592 return result;
593 }
594
595
596
597
598
599
600 public static String getDeweyCutterSuffix(String rawCallnum) {
601 if (rawCallnum == null || rawCallnum.length() == 0)
602 return null;
603 String result = null;
604
605 String cutter = getDeweyCutter(rawCallnum);
606 if (cutter != null) {
607 int ix = rawCallnum.indexOf(cutter) + cutter.length();
608 result = rawCallnum.substring(ix).trim();
609 }
610
611 if (result == null || result.length() == 0) {
612
613 String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
614 String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
615 String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
616 String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
617 String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
618 String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
619 String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
620 Pattern pat1 = Pattern.compile(regex1);
621 Pattern pat2 = Pattern.compile(regex2);
622 Pattern pat3 = Pattern.compile(regex3);
623 Pattern pat4 = Pattern.compile(regex4);
624 Pattern pat5 = Pattern.compile(regex5);
625 Pattern pat6 = Pattern.compile(regex6);
626 Pattern pat7 = Pattern.compile(regex7);
627
628 Matcher matcher = pat1.matcher(rawCallnum);
629 if (!matcher.find()) {
630 matcher = pat2.matcher(rawCallnum);
631 if (!matcher.find()) {
632 matcher = pat3.matcher(rawCallnum);
633 if (!matcher.find()) {
634 matcher = pat4.matcher(rawCallnum);
635 if (!matcher.find()) {
636 matcher = pat5.matcher(rawCallnum);
637 if (!matcher.find()) {
638 matcher = pat6.matcher(rawCallnum);
639 if(!matcher.find()){
640 matcher = pat7.matcher(rawCallnum);
641 }
642 }
643 }
644 }
645 }
646 }
647
648 if (matcher.find(0)) {
649 cutter = matcher.group(2);
650 String suffix = matcher.group(3);
651 if (suffix.trim().length() > 0) {
652
653
654 if (suffix.startsWith(" ") || cutter.endsWith(" "))
655 result = suffix;
656 else {
657 int ix = cutter.lastIndexOf(' ');
658 if (ix != -1)
659 result = cutter.substring(ix) + suffix;
660 else
661 result = suffix;
662 }
663 }
664 }
665 }
666 if (result != null)
667 result = result.trim();
668 if (result == null || result.trim().length() == 0)
669 return null;
670 else
671 return result;
672 }
673
674
675
676
677
678
679
680
681
682
683 public static String normalizeCallnum(String rawCallnum) {
684
685
686 String normalizedCallnum = rawCallnum.trim().replaceAll("\\s\\s+", " ");
687
688 normalizedCallnum = normalizedCallnum.replaceAll("\\. \\.", " .");
689
690 normalizedCallnum = normalizedCallnum.replaceAll("(\\d+\\.) ([A-Z])", "$1$2");
691
692 if (normalizedCallnum.endsWith("."))
693 normalizedCallnum = normalizedCallnum.substring(0, normalizedCallnum.length() - 1).trim();
694
695
696
697
698
699 return normalizedCallnum;
700 }
701
702
703
704
705
706 static String normalizeLCcallnum(String rawLCcallnum) {
707 String normCallnum = normalizeCallnum(rawLCcallnum);
708
709 return normCallnum.replaceAll("^([A-Z][A-Z]?[A-Z]?) ([0-9])", "$1$2");
710 }
711
712
713
714
715
716
717
718
719 public static String getLCShelfkey(String rawLCcallnum, String recid) {
720 return (getLCShelfkey(rawLCcallnum, recid, null));
721 }
722
723
724
725
726
727 public static String getLCShelfkey(String rawLCcallnum, String recid, ErrorHandler errors) {
728 StringBuilder resultBuf = new StringBuilder();
729 String upcaseLCcallnum = rawLCcallnum.toUpperCase();
730
731
732
733
734
735 StringBuilder initLetBuf = new StringBuilder(" ");
736 String lets = getLCstartLetters(upcaseLCcallnum);
737 if (lets != null) {
738 initLetBuf.replace(0, lets.length(), lets);
739 } else {
740 if ((recid != null) && (!rawLCcallnum.startsWith("XX")))
741 {
742 if (errors == null) {
743 System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
744 } else {
745 errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
746 }
747 }
748 return (rawLCcallnum + " ");
749 }
750 resultBuf.append(initLetBuf);
751
752 try {
753
754
755 String digitStr = getLCClassDigits(upcaseLCcallnum);
756 if (digitStr != null)
757 resultBuf.append(normalizeFloat(digitStr, 4, 6));
758 else
759 resultBuf.append(normalizeFloat("0", 4, 6));
760
761
762 String classSuffix = getLCClassSuffix(upcaseLCcallnum);
763 if (classSuffix != null)
764 resultBuf.append(" " + normalizeSuffix(classSuffix));
765
766
767 String firstCutter = getFirstLCcutter(upcaseLCcallnum);
768 if (firstCutter != null) {
769 resultBuf.append(" " + normalizeCutter(firstCutter, 6));
770
771
772 String firstCutterSuffix = getFirstLCcutterSuffix(upcaseLCcallnum);
773 if (firstCutterSuffix != null)
774 resultBuf.append(" " + normalizeSuffix(firstCutterSuffix));
775
776
777 String secondCutter = getSecondLCcutter(upcaseLCcallnum);
778 if (secondCutter != null) {
779 resultBuf.append(" " + normalizeCutter(secondCutter, 6));
780
781 String secondCutterSuffix = getSecondLCcutterSuffix(upcaseLCcallnum);
782 if (secondCutterSuffix != null)
783 resultBuf.append(" " + normalizeSuffix(secondCutterSuffix));
784 }
785 }
786 } catch (NumberFormatException e) {
787
788 if ((recid != null) && (!rawLCcallnum.startsWith("XX")))
789 {
790 if (errors == null) {
791 System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
792 } else {
793 errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
794 }
795 }
796
797 resultBuf = new StringBuilder();
798 }
799
800 if (resultBuf.length() == 0)
801 resultBuf.append(upcaseLCcallnum);
802
803 return resultBuf.toString().trim();
804 }
805
806
807
808
809
810 private static String normalizeCutter(String cutter, int numDigits) {
811 String result = null;
812 if (cutter != null && cutter.length() > 0) {
813 String cutLets = getLCstartLetters(cutter);
814 String cutDigs = cutter.substring(cutLets.length());
815 String norm = null;
816 if (cutDigs != null && cutDigs.length() > 0) {
817 try {
818
819 Integer.parseInt(cutDigs);
820 norm = normalizeFloat("." + cutDigs, 1, numDigits);
821 } catch (NumberFormatException e) {
822 norm = cutDigs;
823 }
824 } else if (cutDigs.length() == 0 && cutLets.length() == 1)
825
826 norm = normalizeFloat("0", 1, numDigits);
827
828 result = cutLets + norm;
829 }
830 return result;
831 }
832
833
834
835
836
837 public static String normalizeSuffix(String suffix) {
838 if (suffix != null && suffix.length() > 0) {
839 StringBuilder resultBuf = new StringBuilder(suffix.length());
840
841 String[] digitStrs = suffix.split("[\\D]+");
842 int len = digitStrs.length;
843 if (digitStrs != null && len != 0) {
844 int s = 0;
845 for (int d = 0; d < len; d++) {
846 String digitStr = digitStrs[d];
847 int ix = suffix.indexOf(digitStr, s);
848
849 if (s < ix) {
850 String text = suffix.substring(s, ix);
851 resultBuf.append(text);
852 }
853 if (digitStr != null && digitStr.length() != 0) {
854
855 resultBuf.append(normalizeFloat(digitStr, 6, 0));
856 s = ix + digitStr.length();
857 }
858
859 }
860
861 resultBuf.append(suffix.substring(s));
862 return resultBuf.toString();
863 }
864 }
865
866 return suffix;
867 }
868
869
870
871
872
873
874 public static String getReverseShelfKey(String shelfkey) {
875 StringBuilder resultBuf = new StringBuilder(reverseDefault);
876 if (shelfkey != null && shelfkey.length() > 0)
877 resultBuf.replace(0, shelfkey.length(), reverseAlphanum(shelfkey));
878 return resultBuf.toString();
879 }
880
881
882
883
884
885 private static String reverseAlphanum(String orig) {
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901 StringBuilder reverse = new StringBuilder();
902 for (int ix = 0; ix < orig.length(); ) {
903 int codePoint = Character.toUpperCase(orig.codePointAt(ix));
904 char[] chs = Character.toChars(codePoint);
905
906 if (Character.isLetterOrDigit(codePoint)) {
907 if (chs.length == 1) {
908 char c = chs[0];
909 if (alphanumReverseMap.containsKey(c))
910 reverse.append(alphanumReverseMap.get(c));
911 else {
912
913
914
915 char foldC;
916
917 if (UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.COMBINING_DIACRITICAL_MARKS &&
918 UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.SPACING_MODIFIER_LETTERS &&
919 (foldC = Utils.foldDiacriticLatinChar(c)) != 0x00)
920
921 reverse.append(alphanumReverseMap.get(foldC));
922 else
923
924
925 reverse.append(SORT_FIRST_CHAR);
926 }
927 } else {
928
929
930 reverse.append(SORT_FIRST_CHAR);
931 }
932 } else
933 reverse.append(reverseNonAlphanum(chs[0]));
934
935 ix += chs.length;
936 }
937
938 return new String(reverse);
939 }
940
941
942
943
944
945 public static char[] reverseNonAlphanum(char ch) {
946
947 switch (ch) {
948 case '.':
949 return Character.toChars('}');
950 case '{':
951 case '|':
952 case '}':
953 case '~':
954
955
956
957 return Character.toChars(' ');
958 default:
959
960 return Character.toChars('~');
961 }
962 }
963
964
965
966
967
968 public static String getDeweyShelfKey(String rawDeweyCallnum) {
969 StringBuilder resultBuf = new StringBuilder();
970
971
972
973
974 String classNum = normalizeFloat(getDeweyB4Cutter(rawDeweyCallnum), 3, 8);
975 resultBuf.append(classNum);
976
977
978
979
980
981
982 String cutter = getDeweyCutter(rawDeweyCallnum);
983 if (cutter != null)
984 resultBuf.append(" " + cutter);
985
986
987 String cutterSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
988 if (cutterSuffix != null)
989 resultBuf.append(" " + normalizeSuffix(cutterSuffix));
990
991
992 if (resultBuf.length() == 0)
993 resultBuf.append(rawDeweyCallnum);
994
995 return resultBuf.toString().trim();
996 }
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 public static String normalizeFloat(String floatStr, int digitsB4, int digitsAfter) {
1016 String norm = null;
1017 try {
1018 double value = Double.valueOf(floatStr).doubleValue();
1019 String formatStr = getFormatString(digitsB4) + '.' + getFormatString(digitsAfter);
1020 DecimalFormat normFormat = new DecimalFormat(formatStr);
1021 norm = normFormat.format(value);
1022 if (norm.endsWith("."))
1023 norm = norm.substring(0, norm.length() - 1);
1024
1025 } catch (NumberFormatException e) {
1026 LOG.error("Exception while Normalizing Call Number",e);
1027 throw new DocstoreValidationException(e);
1028 }
1029 return norm;
1030 }
1031
1032 private static String PUNCT_PREFIX = "([\\.:\\/])?";
1033 private static String NS_PREFIX = "(n\\.s\\.?\\,? ?)?";
1034 private static String MONTHS = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec";
1035 private static String VOL_LETTERS = "[\\:\\/]?(bd|iss|jahrg|new ser|no|part|pts?|ser|t|v|vols?|vyp" + "|" + MONTHS + ")";
1036 private static String VOL_NUMBERS = "\\d+([\\/-]\\d+)?( \\d{4}([\\/-]\\d{4})?)?( ?suppl\\.?)?";
1037 private static String VOL_NUMBERS_LOOSER = "\\d+.*";
1038 private static String VOL_NUM_AS_LETTERS = "[A-Z]([\\/-]\\[A-Z]+)?.*";
1039
1040 private static Pattern VOL_PATTERN = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS, Pattern.CASE_INSENSITIVE);
1041 private static Pattern VOL_PATTERN_LOOSER = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS_LOOSER, Pattern.CASE_INSENSITIVE);
1042 private static Pattern VOL_PATTERN_LETTERS = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "[\\/\\. ]" + VOL_NUM_AS_LETTERS, Pattern.CASE_INSENSITIVE);
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 public static String removeLCVolSuffix(String rawLCcallnum) {
1053
1054 String suffix = getSecondLCcutterSuffix(rawLCcallnum);
1055 if (suffix == null || suffix.length() == 0) {
1056 String cut1suffix = getFirstLCcutterSuffix(rawLCcallnum);
1057 if (cut1suffix != null) {
1058
1059 String cut2 = getSecondLCcutter(rawLCcallnum);
1060 if (cut2 != null) {
1061 int ix = cut1suffix.indexOf(cut2);
1062 if (ix != -1)
1063 suffix = cut1suffix.substring(0, ix);
1064 else
1065 suffix = cut1suffix;
1066 } else
1067 suffix = cut1suffix;
1068 }
1069 }
1070
1071
1072 if (suffix != null && suffix.length() > 0) {
1073 Matcher matcher = VOL_PATTERN.matcher(suffix);
1074 if (!matcher.find()) {
1075 matcher = VOL_PATTERN_LOOSER.matcher(suffix);
1076 if (!matcher.find()) {
1077 matcher = VOL_PATTERN_LETTERS.matcher(suffix);
1078 }
1079 }
1080
1081 if (matcher.find(0)) {
1082
1083 int ix = rawLCcallnum.indexOf(suffix) + matcher.start();
1084 if (ix != -1 && ix < rawLCcallnum.length()) {
1085 return rawLCcallnum.substring(0, ix).trim();
1086 }
1087 }
1088 }
1089 return rawLCcallnum;
1090 }
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 public static String removeDeweyVolSuffix(String rawDeweyCallnum) {
1102 String cutSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
1103
1104 if (cutSuffix == null || cutSuffix.length() == 0)
1105 return rawDeweyCallnum;
1106
1107 Matcher matcher = VOL_PATTERN.matcher(cutSuffix);
1108 if (!matcher.find()) {
1109 matcher = VOL_PATTERN_LOOSER.matcher(cutSuffix);
1110 if (!matcher.find()) {
1111 matcher = VOL_PATTERN_LETTERS.matcher(cutSuffix);
1112 }
1113 }
1114
1115 if (matcher.find(0)) {
1116
1117 int ix = rawDeweyCallnum.indexOf(cutSuffix) + matcher.start();
1118 if (ix != -1 && ix < rawDeweyCallnum.length()) {
1119 return rawDeweyCallnum.substring(0, ix).trim();
1120 }
1121 }
1122 return rawDeweyCallnum;
1123 }
1124
1125
1126
1127
1128
1129
1130
1131
1132 public static String addLeadingZeros(String deweyCallNum) {
1133 String result = deweyCallNum;
1134 String b4Cutter = getPortionBeforeCutter(deweyCallNum);
1135
1136
1137
1138
1139 String b4dec = null;
1140 int decIx = b4Cutter.indexOf(".");
1141 if (decIx >= 0)
1142 b4dec = deweyCallNum.substring(0, decIx).trim();
1143 else
1144 b4dec = b4Cutter.trim();
1145
1146 if (b4dec != null) {
1147 switch (b4dec.length()) {
1148 case 1:
1149 result = "00" + deweyCallNum;
1150 break;
1151 case 2:
1152 result = "0" + deweyCallNum;
1153 }
1154 }
1155
1156 return result;
1157 }
1158
1159
1160
1161
1162
1163
1164
1165
1166 private static String getFormatString(int numDigits) {
1167 StringBuilder b4 = new StringBuilder();
1168 if (numDigits < 0)
1169 b4.append("############");
1170 else if (numDigits > 0) {
1171 for (int i = 0; i < numDigits; i++) {
1172 b4.append('0');
1173 }
1174 }
1175 return b4.toString();
1176 }
1177
1178
1179
1180
1181
1182 public static String getSuDocShelfKey(String callNumber) {
1183 String upcaseSuDoccallnum = callNumber.toUpperCase();
1184 StringBuffer shelfKey = new StringBuffer();
1185
1186 String[] cNumSub = upcaseSuDoccallnum.split(SUDOC_REGEX);
1187 for (String str : cNumSub) {
1188 if (StringUtils.isNumeric(str)) {
1189
1190 str = StringUtils.leftPad(str, 5, "0");
1191 shelfKey.append(str);
1192 shelfKey.append(" ");
1193 } else {
1194
1195 str = StringUtils.rightPad(str, 5);
1196 shelfKey.append(str);
1197 shelfKey.append(" ");
1198 }
1199 }
1200 return shelfKey.toString().trim();
1201 }
1202 }