1 package org.kuali.ole.utility.callnumber;
2
3
4
5
6
7
8
9
10
11 import com.ibm.icu.lang.UCharacter;
12 import org.apache.commons.lang.StringUtils;
13 import org.marc4j.ErrorHandler;
14
15 import java.text.DecimalFormat;
16 import java.util.HashMap;
17 import java.util.Map;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21
22
23
24
25
26
27
28
29 public final class CallNumUtils {
30
31
32
33
34
35
36
37
38
39 private CallNumUtils() {
40 }
41
42 public static final Pattern DEWEY_PATTERN = Pattern.compile("^\\d{1,3}(\\.\\d+)?.*");
43
44
45
46
47
48
49
50
51 public static final String LC_CLASS_REQ_REGEX = "[A-Z&&[^IOWXY]]{1}[A-Z]{0,2} *\\d+(\\.\\d+)?";
52
53
54
55
56 public static final String NOT_CUTTER = "([\\da-z]\\w*)|([A-Z]\\D+[\\w]*)";
57
58
59
60
61 public static final String LC_CLASS_W_SUFFIX = "(" + LC_CLASS_REQ_REGEX + "( +" + NOT_CUTTER + ")?)";
62
63
64
65
66
67
68 public static final String CUTTER_REGEX = "[A-Z]\\d+";
69
70
71
72
73 public static final String LC_CLASS_N_CUTTER = LC_CLASS_W_SUFFIX + " *\\.?" + CUTTER_REGEX;
74 public static final Pattern LC_CLASS_N_CUTTER_PATTERN = Pattern.compile(LC_CLASS_N_CUTTER + ".*");
75
76
77
78
79
80
81 public static final String DEWEY_CLASS_REGEX = "\\d{1,3}(\\.\\d+)?";
82
83
84
85
86
87
88 public static final String DEWEY_MIN_CUTTER_LETTER_REGEX = "[A-Z]";
89 public static final String DEWEY_MIN_CUTTER_REGEX = "[A-Z]\\d{1,3}";
90 public static final String DEWEY_CUTTER_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + "[A-Z]+";
91 public static final String DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX = DEWEY_MIN_CUTTER_REGEX + " +[A-Z]+";
92 public static final String DEWEY_FULL_CUTTER_REGEX = DEWEY_MIN_CUTTER_REGEX + " *[A-Z]*+";
93
94
95
96
97 public static final String DEWEY_CLASS_N_CUTTER_REGEX = DEWEY_CLASS_REGEX + " *\\.?" + DEWEY_FULL_CUTTER_REGEX;
98 public static final Pattern DEWEY_CLASS_N_CUTTER_PATTERN = Pattern.compile(DEWEY_CLASS_N_CUTTER_REGEX + ".*");
99
100
101
102
103
104
105 public static final String SUDOC_REGEX = "[^A-Z0-9]+|(?<=[A-Z])(?=[0-9])|(?<=[0-9])(?=[A-Z])";
106
107
108 private static Map<Character, Character> alphanumReverseMap = new HashMap<Character, Character>();
109
110 static {
111 alphanumReverseMap.put('0', 'Z');
112 alphanumReverseMap.put('1', 'Y');
113 alphanumReverseMap.put('2', 'X');
114 alphanumReverseMap.put('3', 'W');
115 alphanumReverseMap.put('4', 'V');
116 alphanumReverseMap.put('5', 'U');
117 alphanumReverseMap.put('6', 'T');
118 alphanumReverseMap.put('7', 'S');
119 alphanumReverseMap.put('8', 'R');
120 alphanumReverseMap.put('9', 'Q');
121 alphanumReverseMap.put('A', 'P');
122 alphanumReverseMap.put('B', 'O');
123 alphanumReverseMap.put('C', 'N');
124 alphanumReverseMap.put('D', 'M');
125 alphanumReverseMap.put('E', 'L');
126 alphanumReverseMap.put('F', 'K');
127 alphanumReverseMap.put('G', 'J');
128 alphanumReverseMap.put('H', 'I');
129 alphanumReverseMap.put('I', 'H');
130 alphanumReverseMap.put('J', 'G');
131 alphanumReverseMap.put('K', 'F');
132 alphanumReverseMap.put('L', 'E');
133 alphanumReverseMap.put('M', 'D');
134 alphanumReverseMap.put('N', 'C');
135 alphanumReverseMap.put('O', 'B');
136 alphanumReverseMap.put('P', 'A');
137 alphanumReverseMap.put('Q', '9');
138 alphanumReverseMap.put('R', '8');
139 alphanumReverseMap.put('S', '7');
140 alphanumReverseMap.put('T', '6');
141 alphanumReverseMap.put('U', '5');
142 alphanumReverseMap.put('V', '4');
143 alphanumReverseMap.put('W', '3');
144 alphanumReverseMap.put('X', '2');
145 alphanumReverseMap.put('Y', '1');
146 alphanumReverseMap.put('Z', '0');
147 }
148
149
150
151
152
153 public static char SORT_FIRST_CHAR = Character.MIN_VALUE;
154 public static StringBuilder reverseDefault = new StringBuilder(75);
155
156 static {
157 for (int i = 0; i < 50; i++)
158
159
160
161 reverseDefault.append(Character.toChars('~'));
162 }
163
164
165
166
167
168
169
170 public static final boolean isValidLC(String possLCval) {
171 if (possLCval != null && LC_CLASS_N_CUTTER_PATTERN.matcher(possLCval.trim()).matches())
172 return true;
173 return false;
174 }
175
176
177
178
179
180 public static final boolean isValidDeweyWithCutter(String possDeweyVal) {
181 if (possDeweyVal != null && DEWEY_CLASS_N_CUTTER_PATTERN.matcher(possDeweyVal.trim()).matches())
182 return true;
183 return false;
184 }
185
186
187
188
189
190 public static final boolean isValidDewey(String possDeweyVal) {
191 if (possDeweyVal != null && DEWEY_PATTERN.matcher(possDeweyVal.trim()).matches())
192 return true;
193 return false;
194 }
195
196
197
198
199
200 public static final String getPortionBeforeCutter(String callnum) {
201
202
203
204
205
206
207
208 String beginCutterRegex = "( +|(\\.[A-Z])| */)";
209
210 String[] pieces = callnum.split(beginCutterRegex);
211 if (pieces.length == 0 || pieces[0] == null || pieces[0].length() == 0)
212 return null;
213 else
214 return pieces[0].trim();
215 }
216
217
218
219
220
221 public static final String getLCB4FirstCutter(String callnum) {
222 String result = null;
223
224 String cutter = getFirstLCcutter(callnum);
225 if (cutter != null && cutter.length() > 0) {
226
227 int ix = callnum.indexOf(cutter);
228 String lets = getLCstartLetters(callnum);
229 if (ix < lets.length())
230 ix = callnum.indexOf(cutter, lets.length());
231
232 if (ix > 0) {
233 result = callnum.substring(0, ix).trim();
234 if (result.endsWith("."))
235 result = result.substring(0, result.length() - 1).trim();
236 } else
237 result = callnum;
238 } else
239 result = callnum;
240
241 return result;
242 }
243
244
245
246
247
248 public static String getLCstartLetters(String rawLCcallnum) {
249 String result = null;
250 if (rawLCcallnum != null && rawLCcallnum.length() > 0) {
251 String[] lcClass = rawLCcallnum.split("[^A-Z]+");
252 if (lcClass.length > 0)
253 result = lcClass[0];
254 }
255 return result;
256 }
257
258
259
260
261
262
263
264
265 public static String getLCClassDigits(String rawLCcallnum) {
266 String result = null;
267
268 String rawClass = getLCB4FirstCutter(rawLCcallnum);
269 if (rawClass != null && rawClass.length() > 0) {
270 String[] pieces = rawClass.split("[A-Z ]+");
271 if (pieces.length > 1)
272 result = pieces[1].trim();
273 }
274 return result;
275 }
276
277
278
279
280
281
282
283 public static String getLCClassSuffix(String rawLCcallnum) {
284 String result = null;
285
286 String b4cutter = getLCB4FirstCutter(rawLCcallnum);
287 if (b4cutter == null || b4cutter.length() == 0)
288 return null;
289
290 String classDigits = getLCClassDigits(rawLCcallnum);
291
292 if (classDigits != null && classDigits.length() > 0) {
293 int reqClassLen = b4cutter.indexOf(classDigits) + classDigits.length();
294
295 if (b4cutter.length() > reqClassLen)
296 result = b4cutter.substring(reqClassLen).trim();
297 }
298
299 return result;
300 }
301
302
303
304
305
306
307
308
309 public static String getFirstLCcutter(String rawCallnum) {
310 String result = null;
311
312 String regex = LC_CLASS_W_SUFFIX + " *\\.?(" + CUTTER_REGEX + ")";
313 Pattern pattern = Pattern.compile(regex);
314 Matcher matcher = pattern.matcher(rawCallnum);
315
316 if (matcher.find())
317 result = matcher.group(6).trim();
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332 return result;
333 }
334
335
336
337
338
339
340
341 public static String getFirstLCcutterSuffix(String rawLCcallnum) {
342 String result = null;
343
344 String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
345 Pattern pattern = Pattern.compile(regex);
346 Matcher matcher = pattern.matcher(rawLCcallnum);
347
348
349 if (matcher.find() && matcher.groupCount() > 5
350 && matcher.group(6) != null && matcher.group(6).length() > 0) {
351
352
353
354 result = matcher.group(6).trim();
355
356
357
358 int endLastIx = matcher.end(6);
359 if (endLastIx < rawLCcallnum.length()) {
360
361 Pattern cutterPat = Pattern.compile(" *\\." + CUTTER_REGEX);
362 matcher.usePattern(cutterPat);
363 if (matcher.find(endLastIx)) {
364 if (endLastIx < matcher.start())
365 result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
366 } else
367 result = result + rawLCcallnum.substring(endLastIx);
368 }
369 } else {
370
371
372
373
374 String afterLCclassNCutter = rawLCcallnum.replaceFirst(LC_CLASS_N_CUTTER + " *", "");
375 String cutterRegex = LC_CLASS_N_CUTTER + " *(.*)\\." + CUTTER_REGEX;
376
377 pattern = Pattern.compile(cutterRegex);
378 matcher = pattern.matcher(rawLCcallnum);
379
380 if (matcher.find() && matcher.groupCount() > 5
381 && matcher.group(6) != null && matcher.group(6).length() > 0)
382
383 result = matcher.group(6).trim();
384 else {
385 regex = LC_CLASS_N_CUTTER + " \\.\\.\\.$";
386 pattern = Pattern.compile(regex);
387 matcher = pattern.matcher(rawLCcallnum);
388 if (matcher.find())
389 result = " ...";
390 }
391 }
392 return result;
393 }
394
395
396
397
398
399
400
401
402 public static String getSecondLCcutter(String rawLCcallnum) {
403 String result = null;
404
405 String firstCutSuffix = getFirstLCcutterSuffix(rawLCcallnum);
406 if (firstCutSuffix == null || firstCutSuffix.length() == 0) {
407
408 String regex = LC_CLASS_N_CUTTER + " *\\.?(" + CUTTER_REGEX + ")";
409 Pattern pattern = Pattern.compile(regex);
410 Matcher matcher = pattern.matcher(rawLCcallnum);
411 if (matcher.find() && matcher.groupCount() > 5
412 && matcher.group(6) != null && matcher.group(6).length() > 0) {
413 result = matcher.group(6).trim();
414 }
415 } else {
416
417
418 int ix = rawLCcallnum.indexOf(firstCutSuffix) + firstCutSuffix.length();
419 if (ix < rawLCcallnum.length()) {
420 String remaining = rawLCcallnum.substring(ix).trim();
421 Pattern pattern = Pattern.compile("(" + CUTTER_REGEX + ")");
422 Matcher matcher = pattern.matcher(remaining);
423 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
424 result = matcher.group(1).trim();
425 }
426 }
427
428 if (result == null) {
429 Pattern pattern = Pattern.compile("\\.(" + CUTTER_REGEX + ")");
430 Matcher matcher = pattern.matcher(firstCutSuffix);
431 if (matcher.find() && matcher.group(1) != null && matcher.group(1).length() > 0) {
432 result = matcher.group(1).trim();
433 }
434 }
435 }
436 return result;
437 }
438
439
440
441
442
443
444
445 public static String getSecondLCcutterSuffix(String rawLCcallnum) {
446 String result = null;
447
448 String secondCutter = getSecondLCcutter(rawLCcallnum);
449 if (secondCutter != null && secondCutter.length() > 0) {
450
451 int ix = rawLCcallnum.indexOf(secondCutter) + secondCutter.length();
452 if (ix < rawLCcallnum.length())
453 result = rawLCcallnum.substring(ix).trim();
454 }
455
456 return result;
457 }
458
459
460
461
462
463
464
465
466
467 public static String getSecondLCcutterYearSuffix(String rawLCcallnum) {
468 String result = null;
469
470 String regex = LC_CLASS_N_CUTTER + " *(" + NOT_CUTTER + ")*";
471 Pattern pattern = Pattern.compile(regex);
472 Matcher matcher = pattern.matcher(rawLCcallnum);
473
474 if (matcher.find() && matcher.groupCount() > 5
475 && matcher.group(6) != null && matcher.group(6).length() > 0) {
476
477
478
479 result = matcher.group(6);
480
481
482
483 int endLastIx = matcher.end(6);
484 if (endLastIx < rawLCcallnum.length()) {
485 Pattern cutterPat = Pattern.compile(" *\\.?" + CUTTER_REGEX + ".*");
486 matcher.usePattern(cutterPat);
487 if (matcher.find(endLastIx)) {
488 if (endLastIx < matcher.start())
489 result = result.trim() + " " + rawLCcallnum.substring(endLastIx, matcher.start()).trim();
490 } else
491 result = result.trim() + rawLCcallnum.substring(endLastIx);
492 }
493 }
494
495 return result;
496 }
497
498
499
500
501
502 public static final String getDeweyB4Cutter(String callnum) {
503 String result = null;
504
505 String entireCallNumRegex = "(" + DEWEY_CLASS_REGEX + ").*";
506 Pattern pattern = Pattern.compile(entireCallNumRegex);
507 Matcher matcher = pattern.matcher(callnum);
508 if (matcher.find())
509 result = matcher.group(1).trim();
510
511 return result;
512 }
513
514
515
516
517
518
519
520
521 public static String getDeweyCutter(String rawCallnum) {
522 String result = null;
523
524
525 String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
526 String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
527 String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
528 String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
529 String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
530 String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
531 String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
532 Pattern pat1 = Pattern.compile(regex1);
533 Pattern pat2 = Pattern.compile(regex2);
534 Pattern pat3 = Pattern.compile(regex3);
535 Pattern pat4 = Pattern.compile(regex4);
536 Pattern pat5 = Pattern.compile(regex5);
537 Pattern pat6 = Pattern.compile(regex6);
538 Pattern pat7 = Pattern.compile(regex7);
539
540 Matcher matcher = pat1.matcher(rawCallnum);
541 if (!matcher.find()) {
542 matcher = pat2.matcher(rawCallnum);
543 if (!matcher.find()) {
544 matcher = pat3.matcher(rawCallnum);
545 }
546 }
547
548 if (matcher.find()) {
549 String cutter = matcher.group(2);
550 String suffix = matcher.group(3);
551 if (suffix.length() == 0)
552 result = cutter.trim();
553 else {
554
555
556 if (suffix.startsWith(" ") || cutter.endsWith(" "))
557 result = cutter.trim();
558 else {
559 int ix = cutter.lastIndexOf(' ');
560 if (ix != -1)
561 result = cutter.substring(0, ix);
562 else
563 result = cutter.trim();
564 }
565 }
566 } else {
567 matcher = pat4.matcher(rawCallnum);
568 if (matcher.find())
569 result = matcher.group(2);
570 else {
571 matcher = pat5.matcher(rawCallnum);
572 if (matcher.find())
573 result = matcher.group(2);
574 else {
575 matcher = pat6.matcher(rawCallnum);
576 if (matcher.find())
577 result = matcher.group(2);
578 else {
579 matcher = pat7.matcher(rawCallnum);
580 if (matcher.find())
581 result = matcher.group(2);
582 }
583 }
584 }
585 }
586 if (result != null)
587 return result.trim();
588 return result;
589 }
590
591
592
593
594
595
596 public static String getDeweyCutterSuffix(String rawCallnum) {
597 if (rawCallnum == null || rawCallnum.length() == 0)
598 return null;
599 String result = null;
600
601 String cutter = getDeweyCutter(rawCallnum);
602 if (cutter != null) {
603 int ix = rawCallnum.indexOf(cutter) + cutter.length();
604 result = rawCallnum.substring(ix).trim();
605 }
606
607 if (result == null || result.length() == 0) {
608
609 String regex1 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
610 String regex2 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")( +" + NOT_CUTTER + ".*)";
611 String regex3 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")( +" + NOT_CUTTER + ".*)";
612 String regex4 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_TRAILING_LETTERS_REGEX + ")(.*)";
613 String regex5 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_REGEX + ")(.*)";
614 String regex6 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_CUTTER_SPACE_TRAILING_LETTERS_REGEX + ")(.*)";
615 String regex7 = DEWEY_CLASS_REGEX + " *\\.?(" + DEWEY_MIN_CUTTER_LETTER_REGEX + ")(.*)";
616 Pattern pat1 = Pattern.compile(regex1);
617 Pattern pat2 = Pattern.compile(regex2);
618 Pattern pat3 = Pattern.compile(regex3);
619 Pattern pat4 = Pattern.compile(regex4);
620 Pattern pat5 = Pattern.compile(regex5);
621 Pattern pat6 = Pattern.compile(regex6);
622 Pattern pat7 = Pattern.compile(regex7);
623
624 Matcher matcher = pat1.matcher(rawCallnum);
625 if (!matcher.find()) {
626 matcher = pat2.matcher(rawCallnum);
627 if (!matcher.find()) {
628 matcher = pat3.matcher(rawCallnum);
629 if (!matcher.find()) {
630 matcher = pat4.matcher(rawCallnum);
631 if (!matcher.find()) {
632 matcher = pat5.matcher(rawCallnum);
633 if (!matcher.find()) {
634 matcher = pat6.matcher(rawCallnum);
635 if(!matcher.find()){
636 matcher = pat7.matcher(rawCallnum);
637 }
638 }
639 }
640 }
641 }
642 }
643
644 if (matcher.find(0)) {
645 cutter = matcher.group(2);
646 String suffix = matcher.group(3);
647 if (suffix.trim().length() > 0) {
648
649
650 if (suffix.startsWith(" ") || cutter.endsWith(" "))
651 result = suffix;
652 else {
653 int ix = cutter.lastIndexOf(' ');
654 if (ix != -1)
655 result = cutter.substring(ix) + suffix;
656 else
657 result = suffix;
658 }
659 }
660 }
661 }
662 if (result != null)
663 result = result.trim();
664 if (result == null || result.trim().length() == 0)
665 return null;
666 else
667 return result;
668 }
669
670
671
672
673
674
675
676
677
678
679 public static String normalizeCallnum(String rawCallnum) {
680
681
682 String normalizedCallnum = rawCallnum.trim().replaceAll("\\s\\s+", " ");
683
684 normalizedCallnum = normalizedCallnum.replaceAll("\\. \\.", " .");
685
686 normalizedCallnum = normalizedCallnum.replaceAll("(\\d+\\.) ([A-Z])", "$1$2");
687
688 if (normalizedCallnum.endsWith("."))
689 normalizedCallnum = normalizedCallnum.substring(0, normalizedCallnum.length() - 1).trim();
690
691
692
693
694
695 return normalizedCallnum;
696 }
697
698
699
700
701
702 static String normalizeLCcallnum(String rawLCcallnum) {
703 String normCallnum = normalizeCallnum(rawLCcallnum);
704
705 return normCallnum.replaceAll("^([A-Z][A-Z]?[A-Z]?) ([0-9])", "$1$2");
706 }
707
708
709
710
711
712
713
714
715 public static String getLCShelfkey(String rawLCcallnum, String recid) {
716 return (getLCShelfkey(rawLCcallnum, recid, null));
717 }
718
719
720
721
722
723 public static String getLCShelfkey(String rawLCcallnum, String recid, ErrorHandler errors) {
724 StringBuilder resultBuf = new StringBuilder();
725 String upcaseLCcallnum = rawLCcallnum.toUpperCase();
726
727
728
729
730
731 StringBuilder initLetBuf = new StringBuilder(" ");
732 String lets = getLCstartLetters(upcaseLCcallnum);
733 if (lets != null) {
734 initLetBuf.replace(0, lets.length(), lets);
735 } else {
736 if ((recid != null) && (!rawLCcallnum.startsWith("XX")))
737 {
738 if (errors == null) {
739 System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
740 } else {
741 errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
742 }
743 }
744 return (rawLCcallnum + " ");
745 }
746 resultBuf.append(initLetBuf);
747
748 try {
749
750
751 String digitStr = getLCClassDigits(upcaseLCcallnum);
752 if (digitStr != null)
753 resultBuf.append(normalizeFloat(digitStr, 4, 6));
754 else
755 resultBuf.append(normalizeFloat("0", 4, 6));
756
757
758 String classSuffix = getLCClassSuffix(upcaseLCcallnum);
759 if (classSuffix != null)
760 resultBuf.append(" " + normalizeSuffix(classSuffix));
761
762
763 String firstCutter = getFirstLCcutter(upcaseLCcallnum);
764 if (firstCutter != null) {
765 resultBuf.append(" " + normalizeCutter(firstCutter, 6));
766
767
768 String firstCutterSuffix = getFirstLCcutterSuffix(upcaseLCcallnum);
769 if (firstCutterSuffix != null)
770 resultBuf.append(" " + normalizeSuffix(firstCutterSuffix));
771
772
773 String secondCutter = getSecondLCcutter(upcaseLCcallnum);
774 if (secondCutter != null) {
775 resultBuf.append(" " + normalizeCutter(secondCutter, 6));
776
777 String secondCutterSuffix = getSecondLCcutterSuffix(upcaseLCcallnum);
778 if (secondCutterSuffix != null)
779 resultBuf.append(" " + normalizeSuffix(secondCutterSuffix));
780 }
781 }
782 } catch (NumberFormatException e) {
783
784 if ((recid != null) && (!rawLCcallnum.startsWith("XX")))
785 {
786 if (errors == null) {
787 System.err.println("Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
788 } else {
789 errors.addError(ErrorHandler.ERROR_TYPO, "Problem creating shelfkey for record " + recid + "; call number: " + rawLCcallnum);
790 }
791 }
792
793 resultBuf = new StringBuilder();
794 }
795
796 if (resultBuf.length() == 0)
797 resultBuf.append(upcaseLCcallnum);
798
799 return resultBuf.toString().trim();
800 }
801
802
803
804
805
806 private static String normalizeCutter(String cutter, int numDigits) {
807 String result = null;
808 if (cutter != null && cutter.length() > 0) {
809 String cutLets = getLCstartLetters(cutter);
810 String cutDigs = cutter.substring(cutLets.length());
811 String norm = null;
812 if (cutDigs != null && cutDigs.length() > 0) {
813 try {
814
815 Integer.parseInt(cutDigs);
816 norm = normalizeFloat("." + cutDigs, 1, numDigits);
817 } catch (NumberFormatException e) {
818 norm = cutDigs;
819 }
820 } else if (cutDigs.length() == 0 && cutLets.length() == 1)
821
822 norm = normalizeFloat("0", 1, numDigits);
823
824 result = cutLets + norm;
825 }
826 return result;
827 }
828
829
830
831
832
833 public static String normalizeSuffix(String suffix) {
834 if (suffix != null && suffix.length() > 0) {
835 StringBuilder resultBuf = new StringBuilder(suffix.length());
836
837 String[] digitStrs = suffix.split("[\\D]+");
838 int len = digitStrs.length;
839 if (digitStrs != null && len != 0) {
840 int s = 0;
841 for (int d = 0; d < len; d++) {
842 String digitStr = digitStrs[d];
843 int ix = suffix.indexOf(digitStr, s);
844
845 if (s < ix) {
846 String text = suffix.substring(s, ix);
847 resultBuf.append(text);
848 }
849 if (digitStr != null && digitStr.length() != 0) {
850
851 resultBuf.append(normalizeFloat(digitStr, 6, 0));
852 s = ix + digitStr.length();
853 }
854
855 }
856
857 resultBuf.append(suffix.substring(s));
858 return resultBuf.toString();
859 }
860 }
861
862 return suffix;
863 }
864
865
866
867
868
869
870 public static String getReverseShelfKey(String shelfkey) {
871 StringBuilder resultBuf = new StringBuilder(reverseDefault);
872 if (shelfkey != null && shelfkey.length() > 0)
873 resultBuf.replace(0, shelfkey.length(), reverseAlphanum(shelfkey));
874 return resultBuf.toString();
875 }
876
877
878
879
880
881 private static String reverseAlphanum(String orig) {
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897 StringBuilder reverse = new StringBuilder();
898 for (int ix = 0; ix < orig.length(); ) {
899 int codePoint = Character.toUpperCase(orig.codePointAt(ix));
900 char[] chs = Character.toChars(codePoint);
901
902 if (Character.isLetterOrDigit(codePoint)) {
903 if (chs.length == 1) {
904 char c = chs[0];
905 if (alphanumReverseMap.containsKey(c))
906 reverse.append(alphanumReverseMap.get(c));
907 else {
908
909
910
911 char foldC;
912
913 if (UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.COMBINING_DIACRITICAL_MARKS &&
914 UCharacter.UnicodeBlock.of(c) != UCharacter.UnicodeBlock.SPACING_MODIFIER_LETTERS &&
915 (foldC = Utils.foldDiacriticLatinChar(c)) != 0x00)
916
917 reverse.append(alphanumReverseMap.get(foldC));
918 else
919
920
921 reverse.append(SORT_FIRST_CHAR);
922 }
923 } else {
924
925
926 reverse.append(SORT_FIRST_CHAR);
927 }
928 } else
929 reverse.append(reverseNonAlphanum(chs[0]));
930
931 ix += chs.length;
932 }
933
934 return new String(reverse);
935 }
936
937
938
939
940
941 public static char[] reverseNonAlphanum(char ch) {
942
943 switch (ch) {
944 case '.':
945 return Character.toChars('}');
946 case '{':
947 case '|':
948 case '}':
949 case '~':
950
951
952
953 return Character.toChars(' ');
954 default:
955
956 return Character.toChars('~');
957 }
958 }
959
960
961
962
963
964 public static String getDeweyShelfKey(String rawDeweyCallnum) {
965 StringBuilder resultBuf = new StringBuilder();
966
967
968
969
970 String classNum = normalizeFloat(getDeweyB4Cutter(rawDeweyCallnum), 3, 8);
971 resultBuf.append(classNum);
972
973
974
975
976
977
978 String cutter = getDeweyCutter(rawDeweyCallnum);
979 if (cutter != null)
980 resultBuf.append(" " + cutter);
981
982
983 String cutterSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
984 if (cutterSuffix != null)
985 resultBuf.append(" " + normalizeSuffix(cutterSuffix));
986
987
988 if (resultBuf.length() == 0)
989 resultBuf.append(rawDeweyCallnum);
990
991 return resultBuf.toString().trim();
992 }
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011 public static String normalizeFloat(String floatStr, int digitsB4, int digitsAfter) {
1012 double value = Double.valueOf(floatStr).doubleValue();
1013
1014 String formatStr = getFormatString(digitsB4) + '.' + getFormatString(digitsAfter);
1015
1016 DecimalFormat normFormat = new DecimalFormat(formatStr);
1017 String norm = normFormat.format(value);
1018 if (norm.endsWith("."))
1019 norm = norm.substring(0, norm.length() - 1);
1020 return norm;
1021 }
1022
1023 private static String PUNCT_PREFIX = "([\\.:\\/])?";
1024 private static String NS_PREFIX = "(n\\.s\\.?\\,? ?)?";
1025 private static String MONTHS = "jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec";
1026 private static String VOL_LETTERS = "[\\:\\/]?(bd|iss|jahrg|new ser|no|part|pts?|ser|t|v|vols?|vyp" + "|" + MONTHS + ")";
1027 private static String VOL_NUMBERS = "\\d+([\\/-]\\d+)?( \\d{4}([\\/-]\\d{4})?)?( ?suppl\\.?)?";
1028 private static String VOL_NUMBERS_LOOSER = "\\d+.*";
1029 private static String VOL_NUM_AS_LETTERS = "[A-Z]([\\/-]\\[A-Z]+)?.*";
1030
1031 private static Pattern VOL_PATTERN = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS, Pattern.CASE_INSENSITIVE);
1032 private static Pattern VOL_PATTERN_LOOSER = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "\\.? ?" + VOL_NUMBERS_LOOSER, Pattern.CASE_INSENSITIVE);
1033 private static Pattern VOL_PATTERN_LETTERS = Pattern.compile(PUNCT_PREFIX + NS_PREFIX + VOL_LETTERS + "[\\/\\. ]" + VOL_NUM_AS_LETTERS, Pattern.CASE_INSENSITIVE);
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043 public static String removeLCVolSuffix(String rawLCcallnum) {
1044
1045 String suffix = getSecondLCcutterSuffix(rawLCcallnum);
1046 if (suffix == null || suffix.length() == 0) {
1047 String cut1suffix = getFirstLCcutterSuffix(rawLCcallnum);
1048 if (cut1suffix != null) {
1049
1050 String cut2 = getSecondLCcutter(rawLCcallnum);
1051 if (cut2 != null) {
1052 int ix = cut1suffix.indexOf(cut2);
1053 if (ix != -1)
1054 suffix = cut1suffix.substring(0, ix);
1055 else
1056 suffix = cut1suffix;
1057 } else
1058 suffix = cut1suffix;
1059 }
1060 }
1061
1062
1063 if (suffix != null && suffix.length() > 0) {
1064 Matcher matcher = VOL_PATTERN.matcher(suffix);
1065 if (!matcher.find()) {
1066 matcher = VOL_PATTERN_LOOSER.matcher(suffix);
1067 if (!matcher.find()) {
1068 matcher = VOL_PATTERN_LETTERS.matcher(suffix);
1069 }
1070 }
1071
1072 if (matcher.find(0)) {
1073
1074 int ix = rawLCcallnum.indexOf(suffix) + matcher.start();
1075 if (ix != -1 && ix < rawLCcallnum.length()) {
1076 return rawLCcallnum.substring(0, ix).trim();
1077 }
1078 }
1079 }
1080 return rawLCcallnum;
1081 }
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 public static String removeDeweyVolSuffix(String rawDeweyCallnum) {
1093 String cutSuffix = getDeweyCutterSuffix(rawDeweyCallnum);
1094
1095 if (cutSuffix == null || cutSuffix.length() == 0)
1096 return rawDeweyCallnum;
1097
1098 Matcher matcher = VOL_PATTERN.matcher(cutSuffix);
1099 if (!matcher.find()) {
1100 matcher = VOL_PATTERN_LOOSER.matcher(cutSuffix);
1101 if (!matcher.find()) {
1102 matcher = VOL_PATTERN_LETTERS.matcher(cutSuffix);
1103 }
1104 }
1105
1106 if (matcher.find(0)) {
1107
1108 int ix = rawDeweyCallnum.indexOf(cutSuffix) + matcher.start();
1109 if (ix != -1 && ix < rawDeweyCallnum.length()) {
1110 return rawDeweyCallnum.substring(0, ix).trim();
1111 }
1112 }
1113 return rawDeweyCallnum;
1114 }
1115
1116
1117
1118
1119
1120
1121
1122
1123 public static String addLeadingZeros(String deweyCallNum) {
1124 String result = deweyCallNum;
1125 String b4Cutter = getPortionBeforeCutter(deweyCallNum);
1126
1127
1128
1129
1130 String b4dec = null;
1131 int decIx = b4Cutter.indexOf(".");
1132 if (decIx >= 0)
1133 b4dec = deweyCallNum.substring(0, decIx).trim();
1134 else
1135 b4dec = b4Cutter.trim();
1136
1137 if (b4dec != null) {
1138 switch (b4dec.length()) {
1139 case 1:
1140 result = "00" + deweyCallNum;
1141 break;
1142 case 2:
1143 result = "0" + deweyCallNum;
1144 }
1145 }
1146
1147 return result;
1148 }
1149
1150
1151
1152
1153
1154
1155
1156
1157 private static String getFormatString(int numDigits) {
1158 StringBuilder b4 = new StringBuilder();
1159 if (numDigits < 0)
1160 b4.append("############");
1161 else if (numDigits > 0) {
1162 for (int i = 0; i < numDigits; i++) {
1163 b4.append('0');
1164 }
1165 }
1166 return b4.toString();
1167 }
1168
1169
1170
1171
1172
1173 public static String getSuDocShelfKey(String callNumber) {
1174 String upcaseSuDoccallnum = callNumber.toUpperCase();
1175 StringBuffer shelfKey = new StringBuffer();
1176
1177 String[] cNumSub = upcaseSuDoccallnum.split(SUDOC_REGEX);
1178 for (String str : cNumSub) {
1179 if (StringUtils.isNumeric(str)) {
1180
1181 str = StringUtils.leftPad(str, 5, "0");
1182 shelfKey.append(str);
1183 shelfKey.append(" ");
1184 } else {
1185
1186 str = StringUtils.rightPad(str, 5);
1187 shelfKey.append(str);
1188 shelfKey.append(" ");
1189 }
1190 }
1191 return shelfKey.toString().trim();
1192 }
1193 }