1 package org.kuali.ole.docstore.engine.service.index.solr;
2
3 import org.apache.commons.collections.CollectionUtils;
4 import org.apache.commons.lang.StringUtils;
5 import org.apache.solr.client.solrj.SolrQuery;
6 import org.apache.solr.client.solrj.SolrServer;
7 import org.apache.solr.client.solrj.SolrServerException;
8 import org.apache.solr.client.solrj.response.QueryResponse;
9 import org.apache.solr.client.solrj.response.UpdateResponse;
10 import org.apache.solr.common.SolrDocument;
11 import org.apache.solr.common.SolrInputDocument;
12 import org.kuali.ole.docstore.OleException;
13 import org.kuali.ole.docstore.common.document.*;
14 import org.kuali.ole.docstore.common.document.config.DocumentSearchConfig;
15 import org.kuali.ole.docstore.common.document.content.bib.marc.*;
16 import org.kuali.ole.docstore.common.document.content.bib.marc.xstream.BibMarcRecordProcessor;
17 import org.kuali.ole.docstore.common.exception.DocstoreIndexException;
18 import org.kuali.ole.docstore.common.util.ReindexBatchStatistics;
19 import org.kuali.ole.docstore.discovery.service.SolrServerManager;
20 import org.kuali.ole.docstore.indexer.solr.DocumentLocalId;
21 import org.kuali.ole.docstore.model.enums.DocCategory;
22 import org.kuali.ole.docstore.model.enums.DocFormat;
23 import org.kuali.ole.docstore.model.enums.DocType;
24 import org.kuali.ole.docstore.utility.ISBNUtil;
25 import org.kuali.ole.utility.Constants;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28 import org.springframework.util.StopWatch;
29
30 import java.io.IOException;
31 import java.text.DateFormat;
32 import java.text.Normalizer;
33 import java.text.ParseException;
34 import java.text.SimpleDateFormat;
35 import java.util.*;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39
40
41
42
43
44
45
46 public class BibMarcIndexer extends DocstoreSolrIndexService implements BibConstants {
47
48
49
50
51 private static final String SEPERATOR_DATA_FIELD = ", ";
52 private static final String SEPERATOR_SUB_FIELD = " ";
53 private static final String PATTERN_CHAR = "*";
54 private static final String SEPERATOR_HYPHEN = " - ";
55 private static final String SEPERATOR_DOUBLE_HYPHEN = " -- ";
56 private static final String DYNAMIC_FIELD_PREFIX = "mdf_";
57 private static final String BIB_IDENTIFIER = "bibIdentifier";
58 private static final String HOLDINGS_IDENTIFIER = "holdingsIdentifier";
59 private static final String ITEM_IDENTIFIER = "itemIdentifier";
60 private String publicationDateRegex = "[0-9]{4}";
61 private static final Logger LOG = LoggerFactory
62 .getLogger(BibMarcIndexer.class);
63
64 private static BibMarcIndexer bibMarcIndexer = null;
65
66 public static BibMarcRecordProcessor recordProcessor = new BibMarcRecordProcessor();
67 private static DocumentSearchConfig documentSearchConfig = null;
68
69
70
71
72
73
74
75 public static BibMarcIndexer getInstance() {
76 if (bibMarcIndexer == null) {
77 bibMarcIndexer = new BibMarcIndexer();
78 }
79 documentSearchConfig = DocumentSearchConfig.getDocumentSearchConfig();
80 return bibMarcIndexer;
81 }
82
83
84 @Override
85 public void createTree(Object object) {
86 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
87 buildSolrDocsForBibTree((BibTree) object, solrInputDocuments);
88 indexSolrDocuments(solrInputDocuments, true);
89 }
90
91
92
93
94
95
96 private void buildSolrDocsForBibTree(BibTree bibTree, List<SolrInputDocument> solrInputDocuments) {
97 Bib bib = bibTree.getBib();
98 if (bib.getId() != null && bib.getId().contains("wbm")) {
99 BibMarcRecords bibMarcRecords = recordProcessor.fromXML(bib.getContent());
100 SolrInputDocument bibSolrDoc = buildSolrInputDocument(bibMarcRecords.getRecords().get(0));
101 setCommonFields(bib, bibSolrDoc);
102 solrInputDocuments.add(bibSolrDoc);
103 for (HoldingsTree holdingsTree : bibTree.getHoldingsTrees()) {
104 buildSolrDocsForHoldingsTree(solrInputDocuments, bib, bibSolrDoc, holdingsTree);
105 }
106 }
107 }
108
109
110
111
112
113
114
115
116
117
118 private void buildSolrDocsForHoldingsTree(List<SolrInputDocument> solrInputDocuments, Bib bib, SolrInputDocument bibSolrDoc, HoldingsTree holdingsTree) {
119 if (holdingsTree.getHoldings() != null) {
120 HoldingsOlemlIndexer holdingsOlemlIndexer = HoldingsOlemlIndexer.getInstance();
121 if (holdingsTree.getHoldings().getContent() != null || holdingsTree.getHoldings().getContentObject() != null) {
122 SolrInputDocument holdingsSolrInputDoc = holdingsOlemlIndexer.getSolrInputFieldsForHoldings(holdingsTree.getHoldings());
123 linkHoldingsWithBib(bibSolrDoc, holdingsSolrInputDoc,bib.getId(),solrInputDocuments,holdingsTree.getHoldings().getId());
124 holdingsSolrInputDoc.addField(BIB_IDENTIFIER, bib.getId());
125 List<Item> itemDocuments = holdingsTree.getItems();
126 List<String> itemIds = new ArrayList<String>();
127 holdingsSolrInputDoc.addField(ITEM_IDENTIFIER, itemIds);
128 ItemOlemlIndexer itemOlemlIndexer = ItemOlemlIndexer.getInstance();
129 for (Item itemDocument : itemDocuments) {
130 itemIds.add(itemDocument.getId());
131 SolrInputDocument itemSolrInputDoc = itemOlemlIndexer.getSolrInputFieldsForItem(itemDocument);
132 itemSolrInputDoc.addField(HOLDINGS_IDENTIFIER, holdingsTree.getHoldings().getId());
133 itemSolrInputDoc.addField(BIB_IDENTIFIER, bib.getId());
134 addBibInfoForHoldingsOrItems(itemSolrInputDoc, holdingsSolrInputDoc);
135 addHoldingsInfoToItem(itemSolrInputDoc, holdingsSolrInputDoc);
136 solrInputDocuments.add(itemSolrInputDoc);
137 }
138
139 solrInputDocuments.add(holdingsSolrInputDoc);
140 } else if (StringUtils.isNotEmpty(holdingsTree.getHoldings().getId())) {
141 bibSolrDoc.addField(HOLDINGS_IDENTIFIER, "who-" + holdingsTree.getHoldings().getId());
142 }
143 }
144 }
145
146 private void linkHoldingsWithBib(SolrInputDocument bibSolrDoc,SolrInputDocument holdingsSolrInputDoc,String bibId,List<SolrInputDocument> solrInputDocuments,String holdingsId ){
147 if (bibSolrDoc == null) {
148 SolrDocument bibSolrDocument = getSolrDocumentByUUID(bibId);
149 bibSolrDoc = buildSolrInputDocFromSolrDoc(bibSolrDocument);
150 solrInputDocuments.add(bibSolrDoc);
151 }
152 bibSolrDoc.addField(HOLDINGS_IDENTIFIER, holdingsId);
153 addBibInfoForHoldingsOrItems(holdingsSolrInputDoc, bibSolrDoc);
154 }
155
156 @Override
157 public void createTrees(Object object) {
158 BibTrees bibTreesObj = (BibTrees) object;
159 List<BibTree> bibTrees = bibTreesObj.getBibTrees();
160 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
161 for (BibTree bibTree : bibTrees) {
162 buildSolrDocsForBibTree(bibTree, solrInputDocuments);
163 }
164
165 indexSolrDocuments(solrInputDocuments, true);
166 }
167
168
169
170
171
172
173
174 @Override
175 public void processBibTrees(BibTrees bibTrees) {
176 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
177 List<String> idsToDelete = new ArrayList<>();
178 for (BibTree bibTree : bibTrees.getBibTrees()) {
179 processBibTree(bibTree, solrInputDocuments, idsToDelete);
180 }
181 LOG.info("Solr Input Documents Size : " + solrInputDocuments.size());
182 indexAndDelete(solrInputDocuments, idsToDelete, true);
183 }
184
185
186
187
188
189
190
191 private void processBibTree(BibTree bibTree, List<SolrInputDocument> solrInputDocuments, List<String> idsToDelete) {
192 Bib bib = bibTree.getBib();
193 if (null != bib) {
194 SolrInputDocument bibSolrInputDocument = new SolrInputDocument();
195 if (Bib.ResultType.SUCCESS.equals(bib.getResult())) {
196 if (bib.getId() != null) {
197 if (Bib.OperationType.CREATE.equals(bib.getOperation())) {
198 createBibTreeDocforSolr(bibTree, solrInputDocuments);
199 } else if (Bib.OperationType.UPDATE.equals(bib.getOperation())) {
200 updateBibDocument(bib, solrInputDocuments, bibSolrInputDocument);
201 processHoldingsTrees(bibTree.getHoldingsTrees(), bibSolrInputDocument, solrInputDocuments, idsToDelete);
202 } else if (Bib.OperationType.DELETE.equals(bib.getOperation())) {
203 idsToDelete.add(bib.getId());
204 }
205 }
206 } else if (bib.getOperation() == null || StringUtils.isBlank(bib.getOperation().name())) {
207 processHoldingsTrees(bibTree.getHoldingsTrees(), bibSolrInputDocument, solrInputDocuments, idsToDelete);
208 }
209 }
210 }
211
212
213
214
215
216
217 private void createBibTreeDocforSolr(BibTree bibTree, List<SolrInputDocument> solrInputDocuments) {
218 buildSolrDocsForBibTree(bibTree, solrInputDocuments);
219 }
220
221
222
223
224
225
226
227
228 private void processHoldingsTrees(List<HoldingsTree> holdingsTrees, SolrInputDocument bibSolrInputDocument, List<SolrInputDocument> solrInputDocuments, List<String> idsToDelete) {
229 for (HoldingsTree holdingsTree : holdingsTrees) {
230 processHoldingsTree(holdingsTree, bibSolrInputDocument, solrInputDocuments, idsToDelete);
231 }
232 }
233
234
235
236
237
238
239
240
241
242
243 private void processHoldingsTree(HoldingsTree holdingsTree, SolrInputDocument bibSolrInputDocument, List<SolrInputDocument> solrInputDocuments, List<String> idsToDelete) {
244 HoldingsOlemlIndexer holdingsOlemlIndexer = HoldingsOlemlIndexer.getInstance();
245 Holdings holdings = holdingsTree.getHoldings();
246 SolrInputDocument holdingsSolrInputDocument = new SolrInputDocument();
247
248 if (Holdings.ResultType.SUCCESS.equals(holdings.getResult())) {
249 if (holdings.getId() != null) {
250 if (Holdings.OperationType.CREATE.equals(holdings.getOperation())) {
251 Bib bib = holdings.getBib();
252 if (null != bib && null != bib.getId()) {
253 buildSolrDocsForHoldingsTree(solrInputDocuments, bib, bibSolrInputDocument, holdingsTree);
254 }
255 } else if (Holdings.OperationType.UPDATE.equals(holdings.getOperation())) {
256 holdingsOlemlIndexer.processHoldingSolrDocumentForUpdate(holdings, solrInputDocuments, holdingsSolrInputDocument);
257 processItems(holdingsTree.getItems(), solrInputDocuments, holdingsSolrInputDocument, idsToDelete);
258 } else if (Holdings.OperationType.DELETE.equals(holdings.getOperation())) {
259 idsToDelete.add(holdings.getId());
260 holdingsOlemlIndexer.processDelete(holdings.getId(), solrInputDocuments);
261 }
262 }
263 } else if ((holdings.getOperation() == null || StringUtils.isBlank(holdings.getOperation().name()))) {
264 processItems(holdingsTree.getItems(), solrInputDocuments, holdingsSolrInputDocument, idsToDelete);
265 }
266
267 }
268
269
270
271
272
273
274
275
276
277
278 private void processItems(List<Item> items, List<SolrInputDocument> solrInputDocuments, SolrInputDocument holdingsSolrInputDocument, List<String> idsToDelete) {
279 ItemOlemlIndexer itemOlemlIndexer = ItemOlemlIndexer.getInstance();
280 for (Item item : items) {
281 if (Item.ResultType.SUCCESS.equals(item.getResult())) {
282 if (item.getId() != null) {
283 if (Item.OperationType.CREATE.equals(item.getOperation())) {
284 itemOlemlIndexer.buildSolrInputDocumentForBatchProcess(item, solrInputDocuments, holdingsSolrInputDocument);
285 } else if (Item.OperationType.UPDATE.equals(item.getOperation())) {
286 itemOlemlIndexer.updateRecordInSolr(item, solrInputDocuments);
287 } else if (Item.OperationType.DELETE.equals(item.getOperation())) {
288 idsToDelete.add(item.getId());
289 itemOlemlIndexer.processDelete(item.getId(), solrInputDocuments);
290 }
291 }
292 }
293 }
294 }
295
296 public void createTrees(Object object, ReindexBatchStatistics reindexBatchStatistics) {
297 BibTrees bibTreesObj = (BibTrees) object;
298 List<BibTree> bibTrees = bibTreesObj.getBibTrees();
299 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
300 StopWatch stopWatch = new StopWatch();
301
302 stopWatch.start();
303 for (BibTree bibTree : bibTrees) {
304 buildSolrDocsForBibTree(bibTree, solrInputDocuments);
305 }
306 stopWatch.stop();
307 reindexBatchStatistics.addBuildSolrDocsTime(stopWatch.getTotalTimeMillis());
308
309 indexSolrDocuments(solrInputDocuments, true, reindexBatchStatistics);
310 }
311
312 protected void indexSolrDocuments(List<SolrInputDocument> solrDocs, boolean isCommit, ReindexBatchStatistics reindexBatchStatistics) {
313 SolrServer solr = null;
314 try {
315 solr = SolrServerManager.getInstance().getSolrServer();
316 StopWatch stopWatch = new StopWatch();
317 stopWatch.start("add");
318 UpdateResponse response = solr.add(solrDocs);
319 stopWatch.stop();
320 reindexBatchStatistics.addRecToAddInSolr(stopWatch.getLastTaskTimeMillis());
321 if (isCommit) {
322 stopWatch.start("commit");
323 solr.commit(false, false);
324 stopWatch.stop();
325 reindexBatchStatistics.addCommitTime(stopWatch.getLastTaskTimeMillis());
326 }
327 } catch (SolrServerException e) {
328 LOG.info("Exception :", e);
329 rollback(solr);
330 throw new DocstoreIndexException(e.getMessage());
331 } catch (IOException e) {
332 LOG.info("Exception :", e);
333 rollback(solr);
334 throw new DocstoreIndexException(e.getMessage());
335 }
336 }
337
338
339 protected void buildSolrInputDocument(Object object, List<SolrInputDocument> solrInputDocuments) {
340 Bib bib = (Bib) object;
341 BibMarcRecords bibMarcRecords = recordProcessor.fromXML(bib.getContent());
342 SolrInputDocument solrInputDocument = buildSolrInputDocument(bibMarcRecords.getRecords().get(0));
343
344 setCommonFields(bib, solrInputDocument);
345
346 solrInputDocuments.add(solrInputDocument);
347
348 }
349
350 protected void setCommonFields(Bib bib, SolrInputDocument solrInputDocument) {
351 solrInputDocument.setField(ID, bib.getId());
352 solrInputDocument.addField(LOCALID_SEARCH, DocumentLocalId.getDocumentId(bib.getId()));
353 solrInputDocument.addField(LOCALID_DISPLAY, DocumentLocalId.getDocumentIdDisplay(bib.getId()));
354 solrInputDocument.addField(UNIQUE_ID, bib.getId());
355 solrInputDocument.setField(DOC_CATEGORY, DocCategory.WORK.getCode());
356 solrInputDocument.setField(BIB_ID, bib.getId());
357
358 solrInputDocument.setField(STATUS_SEARCH, bib.getStatus());
359 solrInputDocument.setField(STATUS_DISPLAY, bib.getStatus());
360
361 if (StringUtils.isNotEmpty(bib.getStatusUpdatedOn())) {
362 solrInputDocument.setField(STATUS_UPDATED_ON, getDate(bib.getStatusUpdatedOn()));
363 }
364
365 solrInputDocument.addField(STAFF_ONLY_FLAG, bib.isStaffOnly());
366
367 String createdBy = bib.getCreatedBy();
368 solrInputDocument.setField(CREATED_BY, createdBy);
369 solrInputDocument.setField(UPDATED_BY, createdBy);
370
371 Date date = new Date();
372 Date createdDate = null;
373
374 if (StringUtils.isNotBlank(bib.getCreatedOn())) {
375 createdDate = getDate(bib.getCreatedOn());
376 solrInputDocument.setField(DATE_ENTERED, createdDate);
377 } else {
378 solrInputDocument.setField(DATE_ENTERED, date);
379 }
380
381 if (StringUtils.isNotBlank(bib.getUpdatedOn())) {
382 solrInputDocument.setField(DATE_UPDATED, getDate(bib.getUpdatedOn()));
383 } else {
384 if (StringUtils.isNotBlank(bib.getCreatedOn())) {
385
386 solrInputDocument.setField(DATE_UPDATED, createdDate);
387 } else {
388 solrInputDocument.setField(DATE_UPDATED, date);
389 }
390 }
391 }
392
393 protected void updateRecordInSolr(Object object, List<SolrInputDocument> solrInputDocuments) {
394 Bib bib = (Bib) object;
395 List<SolrDocument> solrDocumentList = getSolrDocumentBySolrId(bib.getId());
396 SolrDocument solrDocument = solrDocumentList.get(0);
397 SolrInputDocument solrInputDocument = new SolrInputDocument();
398 if (bib.getContent() != null) {
399 BibMarcRecord workBibMarcRecord = recordProcessor.fromXML(bib.getContent()).getRecords().get(0);
400 solrInputDocument = buildSolrInputDocument(workBibMarcRecord);
401 if (solrDocument != null && solrDocument.getFieldValue(HOLDINGS_IDENTIFIER) != null) {
402 addBibInfoToHoldings(solrInputDocuments, solrInputDocument, solrDocument);
403 }
404 if (StringUtils.isNotEmpty(bib.getStatusUpdatedOn())) {
405 solrInputDocument.setField(STATUS_UPDATED_ON, getDate(bib.getStatusUpdatedOn()));
406 }
407 } else {
408 buildSolrInputDocFromSolrDoc(solrDocument, solrInputDocument);
409 }
410 setCommonFieldsForSolrDoc(solrInputDocument, bib, solrDocument);
411 solrInputDocuments.add(solrInputDocument);
412 }
413
414
415
416
417
418
419
420
421 protected void updateBibDocument(Object object, List<SolrInputDocument> solrInputDocuments, SolrInputDocument solrbibInputDocument) {
422 Bib bib = (Bib) object;
423 List<SolrDocument> solrDocumentList = getSolrDocumentBySolrId(bib.getId());
424 SolrDocument solrDocument = solrDocumentList.get(0);
425
426 if (bib.getContent() != null) {
427 BibMarcRecord workBibMarcRecord = recordProcessor.fromXML(bib.getContent()).getRecords().get(0);
428 solrbibInputDocument = buildSolrInputDocument(workBibMarcRecord, solrbibInputDocument);
429
430 if (solrDocument != null && solrDocument.getFieldValue(HOLDINGS_IDENTIFIER) != null) {
431 addBibInfoToHoldings(solrInputDocuments, solrbibInputDocument, solrDocument);
432 }
433 if (StringUtils.isNotEmpty(bib.getStatus()) || StringUtils.isNotEmpty(bib.getStatusUpdatedOn())) {
434 solrbibInputDocument.setField(STATUS_UPDATED_ON, getDate(bib.getStatusUpdatedOn()));
435 }
436 } else {
437 buildSolrInputDocFromSolrDoc(solrDocument, solrbibInputDocument);
438 }
439 setCommonFieldsForSolrDoc(solrbibInputDocument, bib, solrDocument);
440 solrInputDocuments.add(solrbibInputDocument);
441
442 }
443
444 private void addBibInfoToHoldings(List<SolrInputDocument> solrInputDocuments, SolrInputDocument bibSolrDoc, SolrDocument solrDocument ) {
445 Object instanceIdentifier = solrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
446 bibSolrDoc.addField(HOLDINGS_IDENTIFIER, instanceIdentifier);
447 List<String> holdinsgsIds = new ArrayList<>();
448 if(instanceIdentifier instanceof String) {
449 holdinsgsIds.add((String) instanceIdentifier);
450 }
451 else {
452 holdinsgsIds.addAll((List<String>) instanceIdentifier);
453 }
454
455 for(String holdingsId : holdinsgsIds) {
456 List<SolrDocument> solrDocumentList = getSolrDocumentBySolrId(holdingsId);
457 if (CollectionUtils.isNotEmpty(solrDocumentList)) {
458 SolrDocument holdingsSolrDocument = solrDocumentList.get(0);
459 SolrInputDocument holdingsSolrInputDocument = new SolrInputDocument();
460 buildSolrInputDocFromSolrDoc(holdingsSolrDocument, holdingsSolrInputDocument);
461 removeFieldFromSolrInputDocument(holdingsSolrInputDocument);
462 addBibInfoForHoldingsOrItems(holdingsSolrInputDocument, bibSolrDoc);
463 List<String> itemIds = new ArrayList<>();
464
465 Object itemIdentifier = holdingsSolrDocument.getFieldValue(ITEM_IDENTIFIER);
466 if (itemIdentifier != null) {
467 if (itemIdentifier instanceof String) {
468 itemIds.add((String) itemIdentifier);
469 } else {
470 itemIds.addAll((List<String>) itemIdentifier);
471 }
472 }
473
474 for (String itemId : itemIds) {
475
476 List<SolrDocument> itemDocumentList = getSolrDocumentBySolrId(itemId);
477 SolrDocument itemSolrDocument = itemDocumentList.get(0);
478 SolrInputDocument itemSolrInputDocument = new SolrInputDocument();
479 buildSolrInputDocFromSolrDoc(itemSolrDocument, itemSolrInputDocument);
480 removeFieldFromSolrInputDocument(itemSolrInputDocument);
481 addBibInfoForHoldingsOrItems(itemSolrInputDocument, bibSolrDoc);
482 addHoldingsInfoToItem(itemSolrInputDocument, bibSolrDoc);
483 solrInputDocuments.add(itemSolrInputDocument);
484 }
485 solrInputDocuments.add(holdingsSolrInputDocument);
486
487 }
488 }
489
490 }
491
492 protected void deleteRecordInSolr(SolrServer solrServer, String id) throws IOException, SolrServerException {
493 String query = "bibIdentifier:" + id + " OR " + "id:" + id;
494 UpdateResponse updateResponse = solrServer.deleteByQuery(query);
495 LOG.info("updateResponse " + updateResponse);
496
497 String newId = id + "_d";
498 SolrInputDocument solrInputDocument = new SolrInputDocument();
499 solrInputDocument.setField("DocType", "bibliographic_delete");
500 solrInputDocument.setField("dateUpdated", new Date());
501 solrInputDocument.setField("uniqueId", newId);
502 solrInputDocument.setField("id", newId);
503 solrInputDocument.setField("LocalId_display", DocumentLocalId.getDocumentIdDisplay(id));
504 UpdateResponse updateResponseForBib = solrServer.add(solrInputDocument);
505 LOG.debug("updateResponse " + updateResponseForBib);
506
507 }
508
509 private void setCommonFieldsForSolrDoc(SolrInputDocument solrInputDocument, Bib bib, SolrDocument solrDocument) {
510 solrInputDocument.setField(ID, bib.getId());
511 solrInputDocument.addField(UNIQUE_ID, bib.getId());
512 solrInputDocument.setField(DOC_CATEGORY, DocCategory.WORK.getCode());
513 String updatedBy = bib.getUpdatedBy();
514 solrInputDocument.setField(UPDATED_BY, updatedBy);
515 solrInputDocument.setField(DATE_UPDATED, new Date());
516 solrInputDocument.setField(CREATED_BY, solrDocument.getFieldValue(CREATED_BY));
517 solrInputDocument.setField(DATE_ENTERED, solrDocument.getFieldValue(DATE_ENTERED));
518 solrInputDocument.setField(BIB_ID, bib.getId());
519 solrInputDocument.addField(LOCALID_SEARCH, DocumentLocalId.getDocumentId(bib.getId()));
520 solrInputDocument.addField(LOCALID_DISPLAY, DocumentLocalId.getDocumentIdDisplay(bib.getId()));
521 solrInputDocument.addField(STAFF_ONLY_FLAG, bib.isStaffOnly());
522 solrInputDocument.setField(STATUS_SEARCH, bib.getStatus());
523 solrInputDocument.setField(STATUS_DISPLAY, bib.getStatus());
524 }
525
526
527 private Date getDate(String dateStr) {
528 DateFormat format = new SimpleDateFormat(Constants.DATE_FORMAT);
529 try {
530 if (StringUtils.isNotEmpty(dateStr)) {
531 return format.parse(dateStr);
532 } else {
533 return new Date();
534 }
535
536 } catch (ParseException e) {
537 LOG.info("Exception : " + dateStr + " for format:: " + Constants.DATE_FORMAT, e);
538 return new Date();
539 }
540 }
541
542
543
544
545
546
547
548
549 public SolrInputDocument buildSolrInputDocument(BibMarcRecord record) {
550 SolrInputDocument solrDoc = new SolrInputDocument();
551 buildSolrInputDocument(record,solrDoc);
552 return solrDoc;
553 }
554
555 public SolrInputDocument buildSolrInputDocument(BibMarcRecord record,SolrInputDocument solrDoc ) {
556
557 solrDoc.addField(LEADER, record.getLeader());
558
559
560 List<ControlField> controlFieldList = record.getControlFields();
561
562 for (ControlField cf : controlFieldList) {
563 solrDoc.addField("controlfield_" + cf.getTag(), cf.getValue());
564 }
565
566 solrDoc.addField(DOC_TYPE, DocType.BIB.getDescription());
567 solrDoc.addField(DOC_FORMAT, DocFormat.MARC.getDescription());
568
569 for (String field : documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.keySet()) {
570 if (!field.equalsIgnoreCase("mdf_035a") && !field.startsWith("Local")) {
571 addFieldToSolrDoc(record, field, buildFieldValue(field, record), solrDoc);
572 }
573
574 }
575 addFieldToSolrDoc(record, ALL_TEXT, getAllText(record), solrDoc);
576 addGeneralFieldsToSolrDoc(record, solrDoc);
577 if(record.getLeader() == null || ((record.getLeader().length() >= 8) && (record.getLeader().charAt(7) != 's'))) {
578 solrDoc.removeField(JOURNAL_TITLE_SEARCH);
579 solrDoc.removeField(JOURNAL_TITLE_DISPLAY);
580 solrDoc.removeField(JOURNAL_TITLE_SORT);
581 }
582 return solrDoc;
583 }
584
585 private void addFieldToSolrDoc(BibMarcRecord record, String fieldName, Object value,
586 SolrInputDocument solrDoc) {
587 int ind2Value = 0;
588 if (value instanceof List) {
589 if (fieldName.toLowerCase().endsWith("_sort"))
590 {
591 ind2Value = getSecondIndicator(record, fieldName);
592 LOG.debug("field name -->" + fieldName + "----->" + ind2Value);
593 if (ind2Value > 0) {
594 solrDoc.addField(fieldName, ((List) value).get(0).toString().substring(ind2Value));
595 } else {
596 solrDoc.addField(fieldName, ((List) value).get(0));
597 }
598
599 } else if (fieldName.endsWith("_facet")) {
600 solrDoc.addField(fieldName, getSortString((List) value));
601 } else {
602 if (((List) value).size() > 0) {
603 for (Object obj : (List<Object>) value)
604
605 {
606 solrDoc.addField(fieldName, obj);
607 }
608 } else {
609 solrDoc.addField(fieldName, null);
610 }
611 }
612 } else {
613 if (fieldName.toLowerCase().endsWith("_sort"))
614 {
615 ind2Value = getSecondIndicator(record, fieldName);
616 LOG.debug("field name -->" + fieldName + "----->" + ind2Value);
617 if (value != null && ind2Value > 0) {
618 String fieldValue = value.toString();
619 try {
620 fieldValue = value.toString().substring(ind2Value);
621 }
622 catch (Exception e) {
623 LOG.error("Exception while getting value:" + value.toString() + " for field:" + fieldName + ". Exception:" + e.toString());
624
625 }
626 solrDoc.addField(fieldName, fieldValue);
627 } else {
628 solrDoc.addField(fieldName, value);
629 }
630 } else if (fieldName.endsWith("_facet")) {
631 if (value != null) {
632 solrDoc.addField(fieldName, getSortString(value.toString()));
633 }
634 } else {
635 solrDoc.addField(fieldName, value);
636 }
637 }
638 }
639
640
641
642
643
644
645
646
647 public Object buildFieldValue(String fieldName, BibMarcRecord record) {
648 List<ControlField> controlFieldList = record.getControlFields();
649 List<DataField> dataFields = record.getDataFields();
650 String includeTags = documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.get(fieldName);
651 if ((includeTags != null) && (includeTags.length() > 0)) {
652 String excludeTags = documentSearchConfig.FIELDS_TO_TAGS_2_EXCLUDE_MAP.get(fieldName);
653 if (excludeTags == null) {
654 excludeTags = "";
655 }
656 if (fieldName.startsWith("Subject_")) {
657 return getDataFieldValue(includeTags, excludeTags, record, true, fieldName);
658 } else {
659 if (fieldName.equals(ISBN_SEARCH))
660 return normalizeIsbn(getDataFieldValue(includeTags, excludeTags, record, false, fieldName));
661 else
662 return getDataFieldValue(includeTags, excludeTags, record, false, fieldName);
663 }
664 } else if (fieldName.equals(PUBLICATIONDATE_DISPLAY) || fieldName.equals(PUBLICATIONDATE_SEARCH) || fieldName.equals(PUBLICATIONDATE_FACET)
665 || fieldName.equals(PUBLICATIONDATE_SORT)) {
666 String publicationDate = "";
667 String publicationEndDate = "";
668 Object publicationDateValue = null;
669 for (ControlField controlField : controlFieldList) {
670 if (controlField.getTag().equalsIgnoreCase("008")) {
671 String controlField008 = controlField.getValue();
672 if (controlField008 != null && controlField008.length() > 10) {
673 publicationDate = controlField008.substring(7, 11);
674 publicationDate = extractPublicationDateWithRegex(publicationDate);
675 if (controlField008.length() > 14) {
676 publicationEndDate = controlField008.substring(11, 15);
677 publicationEndDate = extractPublicationDateWithRegex(publicationEndDate);
678 }
679 }
680 }
681 }
682 if (publicationDate == null || publicationDate.trim().length() == 0) {
683 if (getDataFieldValue("260-c", "", record, true, fieldName) instanceof String) {
684 publicationDate = (String) getDataFieldValue("260-c", "", record, true, fieldName);
685 } else if (getDataFieldValue("260-c", "", record, true, fieldName) instanceof List) {
686 publicationDate = ((List<String>) getDataFieldValue("260-c", "", record, true, fieldName)).get(0);
687 }
688 publicationDate = extractPublicationDateWithRegex(publicationDate);
689 }
690 if (fieldName.equals(PUBLICATIONDATE_FACET)) {
691 if (publicationDate.equalsIgnoreCase("")) {
692 publicationDateValue = "Date could not be determined";
693 } else {
694 publicationDateValue = buildPublicationDateFacetValue(publicationDate, publicationEndDate);
695 }
696 return publicationDateValue;
697 }
698 return publicationDate;
699 } else if (fieldName.equals(LANGUAGE_DISPLAY) || fieldName.equals(LANGUAGE_SEARCH) || fieldName.equals(LANGUAGE_FACET)) {
700 List<Object> langs = new ArrayList<Object>();
701 for (ControlField controlField : controlFieldList) {
702 if (controlField.getTag().equalsIgnoreCase("008")) {
703 String cf8 = controlField.getValue();
704 if (cf8 != null && cf8.length() > 37) {
705 String lang = Languages.getInstance(Languages.ISO_639_3).getLanguageDescription(
706 cf8.substring(35, 38));
707 langs.add(lang == null ? "Undefined" : lang);
708 }
709 }
710 }
711 if (fieldName.equals(LANGUAGE_SEARCH) || fieldName.equals(LANGUAGE_FACET)) {
712 for (DataField df : dataFields) {
713 if (df.getTag().equals("546")) {
714 try {
715 for (SubField subfield : df.getSubFields()) {
716 if (subfield.getCode().equalsIgnoreCase("a")) {
717 langs.add(subfield.getValue());
718 }
719 }
720 } catch (RuntimeException re) {
721 LOG.info("Exception :", re);
722 }
723 }
724 }
725 }
726 return langs;
727 } else if (fieldName.equals(FORMAT_DISPLAY) || fieldName.equals(FORMAT_SEARCH) || fieldName.equals(FORMAT_FACET)) {
728 return getRecordFormat(record);
729 } else if (fieldName.equals(RESOURCETYPE_DISPLAY) || fieldName.equals(RESOURCETYPE_SEARCH)) {
730 return getRecordFormat_ResourceType(record);
731 } else if (fieldName.equals(CARRIER_DISPLAY) || fieldName.equals(CARRIER_SEARCH)) {
732 return getRecordFormat_Carrier(record);
733 } else if(fieldName.equals(DESCRIPTION_SEARCH)) {
734 String excludeTags = documentSearchConfig.FIELDS_TO_TAGS_2_EXCLUDE_MAP.get(fieldName);
735 if (excludeTags == null) {
736 excludeTags = "";
737 }
738 if (includeTags == null) {
739 includeTags = "";
740 }
741 return getDataFieldValue(includeTags, excludeTags, record, false, fieldName);
742 } else {
743 throw new RuntimeException("Unknown field named:" + fieldName);
744 }
745 }
746
747
748
749
750
751
752
753 public String getAllText(BibMarcRecord record) {
754 StringBuilder allText = new StringBuilder();
755 allText.append(record.getLeader());
756 allText.append(SEPERATOR_DATA_FIELD);
757 for (ControlField cf : record.getControlFields()) {
758 allText.append(cf.getValue());
759 allText.append(SEPERATOR_DATA_FIELD);
760 }
761 for (DataField df : record.getDataFields()) {
762 for (SubField sf : df.getSubFields()) {
763 allText.append(sf.getValue());
764 allText.append(SEPERATOR_SUB_FIELD);
765 }
766 allText.append(SEPERATOR_DATA_FIELD);
767 }
768 return allText.toString();
769 }
770
771
772
773
774
775
776
777 public String getRecordFormat(BibMarcRecord record) {
778 String format = null;
779 String cF7 = null;
780 String cF8 = null;
781 String formatData = "";
782 char cF8Ch21 = ' ';
783 char cF8Ch22 = ' ';
784 char cF8Ch28 = ' ';
785 char cF7Ch0 = ' ';
786 int cFIndex = record.getControlFields().indexOf(new ControlField("007"));
787 if (cFIndex != -1) {
788 cF7 = record.getControlFields().get(cFIndex).getValue();
789 }
790 cFIndex = record.getControlFields().indexOf(new ControlField("008"));
791 if (cFIndex != -1) {
792 cF8 = record.getControlFields().get(cFIndex).getValue();
793 }
794 Object tmp = null;
795 tmp = getDataFieldValue("111-a", "", record, false, "");
796 String dF111a = tmp != null ? tmp.toString() : null;
797 tmp = getDataFieldValue("254-h", "", record, false, "");
798 String dF254h = tmp != null ? tmp.toString() : null;
799 tmp = getDataFieldValue("254-k", "", record, false, "");
800 String dF254k = tmp != null ? tmp.toString() : null;
801 tmp = getDataFieldValue("260-b", "", record, false, "");
802 String dF260b = tmp != null ? tmp.toString() : null;
803 tmp = getDataFieldValue("502-a", "", record, false, "");
804 String dF502a = tmp != null ? tmp.toString() : null;
805 tmp = getDataFieldValue("711-a", "", record, false, "");
806 String dF711a = tmp != null ? tmp.toString() : null;
807
808 if (cF8 != null && cF8.length() > 22) {
809 cF8Ch21 = cF8.charAt(21);
810 cF8Ch22 = cF8.charAt(22);
811 }
812 if (cF8 != null && cF8.length() > 28) {
813 cF8Ch28 = cF8.charAt(28);
814 }
815 if (cF7 != null) {
816 cF7Ch0 = cF7.charAt(0);
817 }
818 if (record.getLeader() != null && record.getLeader().length() > 8) {
819 formatData = record.getLeader().substring(6, 8);
820 }
821
822 if (dF254h != null && dF254h.contains("micro")) {
823 format = "Microformat";
824 } else if (formatData.equals("tm") && dF502a != null) {
825 format = "Thesis/Dissertation";
826 } else if (dF111a != null || dF711a != null) {
827 format = "Conference/Event";
828 } else if (formatData.equals("aa") || formatData.equals("am") || formatData.equals("ac") || formatData
829 .equals("tm")) {
830 if (dF254k != null && dF254k.contains("kit")) {
831 format = "Other";
832 } else {
833 format = "Book";
834 }
835 } else if (formatData.equals("im") || formatData.equals("jm") || formatData.equals("jc")
836 || formatData.equals("jd") || formatData.equals("js")) {
837 format = "Sound recording";
838 } else if (formatData.equals("cm") || formatData.equals("dm") || formatData.equals("ca")
839 || formatData.equals("cb") || formatData.equals("cd") || formatData.equals("cs")) {
840 format = "Musical score";
841 } else if (formatData.equals("fm") || ("".equals(formatData) && formatData.startsWith("e"))) {
842 format = "Map/Atlas";
843 } else if (formatData.equals("gm") || (cF7 != null && (cF7Ch0 == ('v')))) {
844 format = "Video";
845 } else if (formatData.equals("gm") || (cF7 != null && (cF7Ch0 == ('g')))) {
846 format = "Projected graphic";
847 } else if (formatData.equals("as") || formatData.equals("gs")) {
848 format = "Journal/Periodical";
849 } else if (formatData.equals("km")) {
850 format = "Image";
851 } else if (formatData.equals("mm")) {
852 format = "Datafile";
853 } else if (formatData.equals("as") && (cF8Ch21 == 'n' || cF8Ch22 == 'e')) {
854 format = "Newspaper";
855 } else if ("".equals(formatData) && formatData.startsWith("r")) {
856 format = "3D object";
857 } else if (formatData != "" && formatData.endsWith("i")) {
858 format = "Database/Website";
859 } else if (("".equals(formatData) && (!formatData.startsWith("c") || !formatData.startsWith("d")
860 || !formatData.startsWith("i") || !formatData.startsWith("j"))) && (
861 (cF8Ch28 == 'f' || cF8Ch28 == 'i' || cF8Ch28 == 'o') && (dF260b != null && !dF260b
862 .contains("press")))) {
863 format = "Government document";
864 } else {
865 format = "Other";
866 }
867 return format;
868 }
869
870
871
872
873
874
875
876 public String getRecordFormat_ResourceType(BibMarcRecord record) {
877 String format = null;
878 char leader6 = ' ';
879 char leader7 = ' ';
880 if (record.getLeader() != null) {
881 String leader = record.getLeader().trim();
882 if (StringUtils.isNotBlank(leader)) {
883 if (leader.length() >= 7) {
884 leader6 = leader.charAt(6);
885 }
886 if (leader.length() >= 8) {
887 leader7 = leader.charAt(7);
888 }
889
890 if ((leader6 == 'a' || leader6 == 't') && leader7 == 'm') {
891 format = "Book";
892 }
893 if (leader6 == 'a' && leader7 == 's') {
894 format = "Serial";
895 }
896 if (leader6 == 'c' || leader6 == 'd') {
897 format = "Score";
898 }
899 if (leader6 == 'j' || leader6 == 'i') {
900 format = "Sound recording";
901 }
902 if (leader6 == 'e' || leader6 == 'f') {
903 format = "Map";
904 }
905 if (leader6 == 'g') {
906 format = "Motion picture";
907 }
908 if (leader6 == 'k') {
909 format = "Photo/Print";
910 }
911 if (leader6 == 'm') {
912 format = "Computer file";
913 }
914 if (leader6 == 'p') {
915 format = "Archival materials";
916 }
917 if (leader6 == 'r') {
918 format = "Artifacts";
919 }
920 }
921 }
922 return format;
923 }
924
925 public String getRecordFormat_Carrier(BibMarcRecord record) {
926 String format = null;
927 String cF7 = null;
928 String cF8 = null;
929 char cF70 = ' ';
930 char cF71 = ' ';
931 char cF823 = ' ';
932 char cF829 = ' ';
933 char leader06 = ' ';
934 String leader ="";
935 if (record.getLeader() != null) {
936 leader = record.getLeader();
937 }
938 for (ControlField controlField : record.getControlFields()) {
939 if (controlField.getTag().equals("007")) {
940 cF7 = controlField.getValue();
941 }else if(controlField.getTag().equals("008")){
942 cF8 = controlField.getValue();
943 }
944 }
945
946 if(StringUtils.isNotBlank(cF7) && cF7.length() >= 1){
947 cF70 = cF7.charAt(0);
948 }
949 if(StringUtils.isNotBlank(cF7) && cF7.length() >= 2){
950 cF71 = cF7.charAt(1);
951 }
952 if(StringUtils.isNotBlank(cF8) && cF8.length() >= 24){
953 cF823 = cF8.charAt(23);
954 }
955 if(StringUtils.isNotBlank(cF8) && cF8.length() >= 30){
956 cF829 = cF8.charAt(29);
957 }
958 if(StringUtils.isNotBlank(leader) && leader.length() >= 7){
959 leader06 = leader.charAt(6);
960 }
961
962 if(cF70 == 'h'){
963 format = "Microform";
964 return format;
965 }
966 if(cF70 == 'c' && cF71 == 'r'){
967 format = "Remote e-resource";
968 return format;
969 }
970 if(cF70 == 'c' && cF71 != 'r'){
971 format = "Direct access 3-resource";
972 return format;
973 }
974 if((leader06 == 'a' || leader06 == 'c' || leader06 == 'd' || leader06 == 'p' || leader06 == 't') && (cF823 == 'd' || cF823 == 'f' || cF823 == 'r' || cF823 == ' ')){
975 format = "Print";
976 }
977 if((leader06 == 'e' || leader06 == 'f' || leader06 == 'k') && (cF829 == 'd' || cF829 == 'r' || cF829 == ' ')){
978 format = "Print";
979 }
980 return format;
981 }
982
983
984
985
986
987
988
989
990
991
992
993
994 private Object getDataFieldValue(String includeTags, String excludeTags, BibMarcRecord record,
995 boolean isHyphenSeperatorFirst, String fieldName) {
996 List<Object> fieldValues = new ArrayList<Object>();
997 StringTokenizer includeTagsTokenizer = new StringTokenizer(includeTags, ",");
998
999 while (includeTagsTokenizer.hasMoreElements()) {
1000 String tag = includeTagsTokenizer.nextToken();
1001 tag = tag.trim();
1002 int subFieldIdx = tag.indexOf('-');
1003 String tagNum = (subFieldIdx == -1) ? tag : tag.substring(0, subFieldIdx);
1004
1005 for (int i = 0; i < record.getDataFields().size(); i++) {
1006 DataField dataField = record.getDataFields().get(i);
1007 if (isValidTag(dataField.getTag(), tagNum)) {
1008 StringBuilder fieldValue = new StringBuilder();
1009 List<SubField> subFields = dataField.getSubFields();
1010 if (subFieldIdx != -1) {
1011 if (!excludeTags.contains(tag)) {
1012 String subFieldCodes = tag.substring(subFieldIdx + 1, tag.length());
1013 boolean isHyphenCodedOnce = false;
1014 for (SubField subField : subFields) {
1015 if (subFieldCodes.contains(subField.getCode())) {
1016 if (fieldValue.length() != 0) {
1017 if (!isHyphenSeperatorFirst || isHyphenCodedOnce || (
1018 dataField.getTag().endsWith("00") || dataField.getTag().endsWith("10")
1019 || dataField.getTag().endsWith("11"))) {
1020 fieldValue.append(SEPERATOR_SUB_FIELD);
1021 } else {
1022 fieldValue.append(SEPERATOR_HYPHEN);
1023 isHyphenCodedOnce = true;
1024 }
1025 }
1026 fieldValue.append(subField.getValue());
1027 }
1028 }
1029 }
1030 } else {
1031 boolean isHyphenCodedOnce = false;
1032 boolean isFirstSubField = false;
1033 for (SubField subField : subFields) {
1034 if (!excludeTags.contains(dataField.getTag() + "-" + subField.getCode()) && !excludeTags
1035 .contains(tagNum + "-" + subField.getCode())) {
1036 if (fieldValue.length() != 0) {
1037 if (!isHyphenSeperatorFirst || isHyphenCodedOnce || (
1038 dataField.getTag().endsWith("00") || dataField.getTag().endsWith("10")
1039 || dataField.getTag().endsWith("11"))) {
1040 fieldValue.append(SEPERATOR_SUB_FIELD);
1041 } else if (fieldName != null && (fieldName.equalsIgnoreCase(SUBJECT_FACET)
1042 || fieldName.equalsIgnoreCase(SUBJECT_DISPLAY))) {
1043 if (dataField.getTag().equalsIgnoreCase("630")) {
1044 if (subField.getCode().equals("v") || subField.getCode().equals("x")
1045 || subField.getCode().equals("y") || subField.getCode().equals("z")) {
1046 fieldValue.append(SEPERATOR_DOUBLE_HYPHEN);
1047 }
1048 } else if (dataField.getTag().equalsIgnoreCase("650") || dataField.getTag()
1049 .equalsIgnoreCase(
1050 "651")) {
1051 if (isFirstSubField && fieldName.equalsIgnoreCase(SUBJECT_FACET)) {
1052 fieldValues.add(fieldValue.toString().trim());
1053 }
1054 fieldValue.append(SEPERATOR_DOUBLE_HYPHEN);
1055 isFirstSubField = true;
1056 } else {
1057 fieldValue.append(SEPERATOR_SUB_FIELD);
1058 }
1059 } else {
1060 if (fieldName.startsWith("Subject_")) {
1061 fieldValue.append(SEPERATOR_SUB_FIELD);
1062 } else {
1063 fieldValue.append(SEPERATOR_HYPHEN);
1064 isHyphenCodedOnce = true;
1065 }
1066 }
1067 }
1068 fieldValue.append(subField.getValue());
1069 }
1070 }
1071 }
1072 if ((dataField.getTag().equalsIgnoreCase("650") || dataField.getTag().equalsIgnoreCase("651"))
1073 && fieldValue != null && fieldValue.length() > 1 && fieldValue.toString().trim().length() > 1) {
1074 String fieldVal = fieldValue.toString().trim();
1075 String lastChar = String.valueOf(fieldVal.charAt(fieldVal.length() - 1));
1076 if (!lastChar.equalsIgnoreCase(".")) {
1077 fieldValue.append(".");
1078 }
1079 }
1080 fieldValues.add(fieldValue.toString().trim());
1081 }
1082 }
1083 }
1084 if (fieldValues.size() == 1) {
1085 return fieldValues.get(0);
1086 } else if (fieldValues.size() > 0) {
1087 return fieldValues;
1088 } else {
1089 return null;
1090 }
1091 }
1092
1093
1094
1095
1096
1097
1098
1099
1100 private boolean isValidTag(String tag, String tagFormat) {
1101 try {
1102 if (!tagFormat.contains(PATTERN_CHAR)) {
1103 return tagFormat.equals(tag);
1104 } else {
1105 int idx = tagFormat.lastIndexOf(PATTERN_CHAR);
1106 return isValidTag(tag.substring(0, idx) + tag.substring(idx + PATTERN_CHAR.length(), tag.length()), tagFormat.substring(0, idx)
1107 + tagFormat.substring(idx + PATTERN_CHAR.length(), tagFormat.length()));
1108 }
1109 } catch (Exception e) {
1110 LOG.info("Exception :", e);
1111 return false;
1112 }
1113 }
1114
1115 private void addGeneralFieldsToSolrDoc(BibMarcRecord record, SolrInputDocument solrDoc) {
1116 String isbnDataFields = documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.get(ISBN_SEARCH);
1117 for (DataField dataField : record.getDataFields()) {
1118 String tag = dataField.getTag();
1119 for (SubField subField : dataField.getSubFields()) {
1120 String subFieldKey = subField.getCode();
1121 String subFieldValue = subField.getValue();
1122 String key = tag + subFieldKey;
1123 subFieldValue = processGeneralFieldValue(tag, subFieldKey, subFieldValue, isbnDataFields);
1124 solrDoc.addField(DYNAMIC_FIELD_PREFIX + key, subFieldValue);
1125 }
1126 }
1127 }
1128
1129 private String processGeneralFieldValue(String tag, String subFieldKey, String subFieldValue, String isbnKey) {
1130 String value = subFieldValue;
1131 if (isbnKey.contains(tag) && isbnKey.contains(subFieldKey)) {
1132 value = (String) normalizeIsbn(subFieldValue);
1133 }
1134 return value;
1135 }
1136
1137 private Object normalizeIsbn(Object isbnValue) {
1138 Object result = null;
1139 ISBNUtil isbnUtil = new ISBNUtil();
1140 if (isbnValue != null) {
1141 if (isbnValue instanceof List) {
1142 result = new ArrayList<String>();
1143 for (Object obj : (List<Object>) isbnValue) {
1144 if (((String) obj).length() > 0) {
1145 try {
1146 ((List<String>) result).add(isbnUtil.normalizeISBN(obj));
1147 } catch (OleException e) {
1148
1149 ((List<String>) result).add((String) obj + " " + ISBN_NOT_NORMALIZED);
1150 }
1151 } else {
1152 ((List<String>) result).add((String) obj);
1153 }
1154 }
1155 } else {
1156 if (((String) isbnValue).length() > 0) {
1157 try {
1158 result = isbnUtil.normalizeISBN(isbnValue);
1159 } catch (OleException e) {
1160
1161 result = isbnValue + " " + ISBN_NOT_NORMALIZED;
1162 }
1163 } else {
1164 result = isbnValue;
1165 }
1166 }
1167 }
1168 return result;
1169 }
1170
1171
1172
1173
1174
1175
1176 private void pubCentury(int pubCen, List<String> pubList) {
1177 String pubCentury = String.valueOf(pubCen);
1178 if (pubCentury.endsWith("1")) {
1179 if (pubCentury.equalsIgnoreCase("11")) {
1180 pubList.add(pubCentury + "th Century");
1181 } else {
1182 pubList.add(pubCentury + "st Century");
1183 }
1184 } else if (pubCentury.endsWith("2")) {
1185 if (pubCentury.equalsIgnoreCase("12")) {
1186 pubList.add(pubCentury + "th Century");
1187 } else {
1188 pubList.add(pubCentury + "nd Century");
1189 }
1190 } else if (pubCentury.endsWith("3")) {
1191 if (pubCentury.equalsIgnoreCase("13")) {
1192 pubList.add(pubCentury + "th Century");
1193 } else {
1194 pubList.add(pubCentury + "rd Century");
1195 }
1196 } else {
1197 pubList.add(pubCentury + "th Century");
1198 }
1199
1200 }
1201
1202
1203 public String extractPublicationDateWithRegex(String publicationDate) {
1204 Pattern pattern = Pattern.compile(publicationDateRegex);
1205 Matcher matcher = pattern.matcher(publicationDate);
1206 if (matcher.find()) {
1207 if (matcher.group(0).equalsIgnoreCase("0000")) {
1208 return "";
1209 }
1210 return matcher.group(0);
1211 } else {
1212 return "";
1213 }
1214
1215
1216 }
1217
1218
1219
1220
1221
1222
1223 public Object buildPublicationDateFacetValue(String publicationDate, String publicationEndDate) {
1224 int pubDat = 0;
1225 List<String> pubList = new ArrayList<String>();
1226 Calendar cal = Calendar.getInstance();
1227 int year = cal.get(Calendar.YEAR);
1228 if (publicationDate != null && publicationDate.length() == 4 && Integer.parseInt(publicationDate) <= year) {
1229 int pubStartDate = Integer.parseInt(publicationDate);
1230 if (publicationEndDate != null && publicationEndDate.length() == 4 && pubStartDate < Integer
1231 .parseInt(publicationEndDate)) {
1232 if (Integer.parseInt(publicationEndDate) > year) {
1233 publicationEndDate = String.valueOf(year);
1234 }
1235 int pubEndDate = Integer.parseInt(publicationEndDate);
1236 while (pubStartDate < pubEndDate) {
1237 pubStartDate = (pubStartDate / 10) * 10;
1238 if (pubStartDate == 0) {
1239 pubList.add("Date could not be determined");
1240 } else {
1241 pubList.add(String.valueOf(pubStartDate) + "s");
1242 }
1243 pubStartDate = pubStartDate + 10;
1244 }
1245 pubStartDate = Integer.parseInt(publicationDate);
1246 pubEndDate = Integer.parseInt(publicationEndDate);
1247 while (pubStartDate < pubEndDate) {
1248 pubStartDate = (pubStartDate) / 100;
1249 pubDat = (pubStartDate) + 1;
1250 pubCentury(pubDat, pubList);
1251 pubStartDate = pubStartDate * 100 + 100;
1252 }
1253 } else {
1254 pubDat = (pubStartDate / 10) * 10;
1255 int pubCen = ((pubStartDate) / 100) + 1;
1256 if (pubDat == 0) {
1257 pubList.add("Date could not be determined");
1258 } else {
1259 pubList.add(String.valueOf(pubDat) + "s");
1260 pubCentury(pubCen, pubList);
1261 }
1262 }
1263 } else {
1264 pubList.add("Date could not be determined");
1265 }
1266 return pubList;
1267 }
1268
1269
1270
1271
1272
1273
1274
1275
1276 public List<String> buildPublicationDateFacetValues(List<String> publicationDates) {
1277 List<String> valueList = null;
1278 if (!CollectionUtils.isEmpty(publicationDates)) {
1279 valueList = new ArrayList<String>(publicationDates.size());
1280 for (int i = 0; i < publicationDates.size(); i++) {
1281 String pubDate = publicationDates.get(i);
1282 Object pubDt = buildPublicationDateFacetValue(pubDate, "");
1283 if (pubDt instanceof String) {
1284 valueList.add((String) pubDt);
1285 } else if (pubDt instanceof List) {
1286 List<String> pubDateList = (List<String>) pubDt;
1287 for (String pubDtVal : pubDateList) {
1288 valueList.add(pubDtVal);
1289 }
1290 }
1291 }
1292 }
1293 return valueList;
1294 }
1295
1296
1297 public String getSortString(String str) {
1298 String ret = "";
1299 StringBuffer sortString = new StringBuffer();
1300 ret = str.toLowerCase();
1301 ret = ret.replaceAll("[\\-\\/]", " ");
1302 ret = ret.replace("<", "");
1303 ret = ret.replace(">", "");
1304 ret = ret.replaceAll("[\\.\\,\\;\\:\\(\\)\\{\\}\\'\\!\\?\\\"\\<\\>\\[\\]]", "");
1305 ret = Normalizer.normalize(ret, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
1306 ret = ret.replaceAll("\\s+", " ");
1307 sortString.append(ret);
1308 sortString.append(" /r/n!@#$");
1309 sortString.append(str);
1310 return sortString.toString();
1311 }
1312
1313 public List<String> getSortString(List<String> list) {
1314 List<String> sortStringList = new ArrayList<String>();
1315 for (String str : list) {
1316 sortStringList.add(getSortString(str));
1317 }
1318 return sortStringList;
1319 }
1320
1321 private int getSecondIndicator(BibMarcRecord record, String fieldName) {
1322 int ind2Value = 0;
1323 String fieldTags = documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.get(fieldName);
1324 String[] tagValueList = null;
1325 if (fieldTags != null) {
1326 tagValueList = fieldTags.split(",");
1327 List<DataField> dataFieldList = record.getDataFields();
1328 String ind2 = null;
1329 boolean isVisit = true;
1330 for (DataField dataField : dataFieldList) {
1331 String tag = dataField.getTag();
1332 for (String tagValue : tagValueList) {
1333 StringBuffer sb = null;
1334 if (fieldName.equalsIgnoreCase(AUTHOR_SORT) || fieldName.equalsIgnoreCase(TITLE_SORT)) {
1335 sb = getTagValues(dataField, tag, tagValue);
1336 if (sb != null && sb.toString().length() > 0 && isVisit) {
1337 ind2 = dataField.getInd2();
1338 isVisit = false;
1339 }
1340
1341 }
1342 }
1343 }
1344 try {
1345 if (ind2 != null)
1346 ind2Value = Integer.parseInt(ind2);
1347
1348 } catch (Exception e) {
1349 ind2Value = -1;
1350 }
1351
1352 }
1353 return ind2Value;
1354 }
1355
1356 private StringBuffer getTagValues(DataField dataField, String tag, String tagValue) {
1357 StringBuffer sb = new StringBuffer();
1358 String[] tags = tagValue.split("-");
1359 for (String tagName : tags) {
1360 if (tag.equalsIgnoreCase(tagName)) {
1361 List<SubField> subFieldList = dataField.getSubFields();
1362 for (SubField subField : subFieldList) {
1363 sb.append(subField.getValue() + " ");
1364 }
1365
1366 }
1367 }
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377 return sb;
1378 }
1379
1380 public void bind(String holdingsId, List<String> bibIds) throws SolrServerException, IOException {
1381 List<SolrInputDocument> solrInputDocumentList = new ArrayList<SolrInputDocument>();
1382 updateInstanceDocument(holdingsId, bibIds, solrInputDocumentList);
1383 updateBibDocument(holdingsId, bibIds, solrInputDocumentList);
1384 LOG.info("solrInputDocumentList-->" + solrInputDocumentList);
1385 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1386 UpdateResponse updateResponse = server.add(solrInputDocumentList);
1387 server.commit();
1388 }
1389
1390 private void updateBibDocument(String holdingsId, List<String> bibIds, List<SolrInputDocument> solrInputDocumentList) {
1391 for (String bibId : bibIds) {
1392 SolrDocument bibSolrDocument = getSolrDocumentByUUID(bibId);
1393 List<String> holdingsIdentifierList = new ArrayList<String>();
1394 Object holdingsIdentifier = bibSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1395 if (holdingsIdentifier instanceof List) {
1396 holdingsIdentifierList = (List<String>) bibSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1397 } else if (holdingsIdentifier instanceof String) {
1398 holdingsIdentifierList.add((String) holdingsIdentifier);
1399 }
1400 holdingsIdentifierList.add(holdingsId);
1401 bibSolrDocument.setField(HOLDINGS_IDENTIFIER, holdingsIdentifierList);
1402 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(bibSolrDocument));
1403 }
1404 }
1405
1406 private void updateInstanceDocument(String holdingsId, List<String> bibIds, List<SolrInputDocument> solrInputDocumentList) throws SolrServerException {
1407 SolrQuery solrQuery = new SolrQuery();
1408 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1409 solrQuery.setQuery(("id:" + holdingsId + " AND DocType:" + DocType.HOLDINGS.getCode()));
1410 QueryResponse response = server.query(solrQuery);
1411 List<SolrDocument> solrDocumentList = response.getResults();
1412 LOG.debug("response.getResults()-->" + response.getResults());
1413 for (SolrDocument solrDocument : solrDocumentList) {
1414 List<String> bibIdentifierList = new ArrayList<String>();
1415
1416
1417 Object bibIdentifier = solrDocument.getFieldValue(BIB_IDENTIFIER);
1418 if (bibIdentifier instanceof List) {
1419 bibIdentifierList = (List<String>) solrDocument.getFieldValue(BIB_IDENTIFIER);
1420
1421 } else if (bibIdentifier instanceof String) {
1422 bibIdentifierList.add((String) bibIdentifier);
1423 }
1424 LOG.info("bibIdentifierList-->" + bibIdentifierList);
1425
1426 for (String bibId : bibIds) {
1427 bibIdentifierList.add(bibId);
1428 }
1429 solrDocument.setField("isBoundwith", true);
1430 solrDocument.setField(BIB_IDENTIFIER, bibIdentifierList);
1431 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(solrDocument));
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448 Object itemIdentifier = solrDocument.getFieldValue(ITEM_IDENTIFIER);
1449 List<String> itemIdentifierList = new ArrayList<String>();
1450 if (itemIdentifier instanceof List) {
1451 itemIdentifierList = (List<String>) solrDocument.getFieldValue(ITEM_IDENTIFIER);
1452
1453 } else if (itemIdentifier instanceof String) {
1454 itemIdentifierList.add((String) itemIdentifier);
1455 }
1456
1457 for (String itemId : itemIdentifierList) {
1458 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1459 itemSolrDocument.setField(BIB_IDENTIFIER, bibIdentifierList);
1460 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(itemSolrDocument));
1461
1462 }
1463 }
1464 }
1465
1466 public void bindAnalytics(String seriesHoldingsId, List<String> itemIds, String createOrBreak) throws SolrServerException, IOException {
1467 List<SolrInputDocument> solrInputDocumentList = new ArrayList<SolrInputDocument>();
1468 updateHoldingsDocument(seriesHoldingsId, itemIds, solrInputDocumentList, createOrBreak);
1469 updateItemDocument(seriesHoldingsId, itemIds, solrInputDocumentList, createOrBreak);
1470 LOG.info("solrInputDocumentList-->" + solrInputDocumentList);
1471 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1472 UpdateResponse updateResponse = server.add(solrInputDocumentList);
1473 server.commit();
1474 }
1475
1476 private void updateHoldingsDocument(String seriesHoldingsId, List<String> itemIds, List<SolrInputDocument> solrInputDocumentList, String createOrBreak) throws SolrServerException {
1477 SolrQuery solrQuery = new SolrQuery();
1478 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1479 solrQuery.setQuery(("id:" + seriesHoldingsId + " AND DocType:" + DocType.HOLDINGS.getCode()));
1480 QueryResponse response = server.query(solrQuery);
1481 List<SolrDocument> solrDocumentList = response.getResults();
1482 LOG.debug("response.getResults()-->" + response.getResults());
1483 List<String> itemIdentifierList = new ArrayList<String>();
1484 List<String> holdingsIdentifierList = new ArrayList<String>();
1485 for (SolrDocument holdingsSolrDocument : solrDocumentList) {
1486 Object itemIdentifier = holdingsSolrDocument.getFieldValue(ITEM_IDENTIFIER);
1487 if (itemIdentifier instanceof List) {
1488 itemIdentifierList = (List<String>) holdingsSolrDocument.getFieldValue(ITEM_IDENTIFIER);
1489 } else if (itemIdentifier instanceof String) {
1490 itemIdentifierList.add((String) itemIdentifier);
1491 }
1492 if (!CollectionUtils.isEmpty(itemIdentifierList) && createOrBreak.equalsIgnoreCase("CREATE")) {
1493 itemIdentifierList.addAll(itemIds);
1494 holdingsSolrDocument.setField(ITEM_IDENTIFIER, itemIdentifierList);
1495 holdingsSolrDocument.setField("isSeries", Boolean.TRUE);
1496 if (!CollectionUtils.isEmpty(itemIds)) {
1497 for (String itemId : itemIds) {
1498 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1499
1500 Object holdingsIdentifier = itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1501 if (holdingsIdentifier instanceof List) {
1502 holdingsIdentifierList = (List<String>) itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1503 } else if (holdingsIdentifier instanceof String) {
1504 holdingsIdentifierList.add((String) holdingsIdentifier);
1505 }
1506 if (!CollectionUtils.isEmpty(holdingsIdentifierList)) {
1507 for (String holdingId : holdingsIdentifierList) {
1508 SolrDocument holdingSolrDocument = getSolrDocumentByUUID(holdingId);
1509 holdingSolrDocument.setField("isAnalytic", Boolean.TRUE);
1510 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(holdingSolrDocument));
1511 }
1512 }
1513 }
1514 }
1515
1516 LOG.info("itemIdentifierList-->" + itemIdentifierList);
1517 } else if (!CollectionUtils.isEmpty(itemIdentifierList) && createOrBreak.equalsIgnoreCase("BREAK")) {
1518 itemIdentifierList.removeAll(itemIds);
1519 holdingsSolrDocument.setField(ITEM_IDENTIFIER, itemIdentifierList);
1520 boolean hasAnalytic = false;
1521 if (!CollectionUtils.isEmpty(itemIdentifierList)) {
1522 for (String itemId : itemIdentifierList) {
1523 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1524 if (itemSolrDocument.getFieldValue("isAnalytic") instanceof Boolean) {
1525 hasAnalytic = (Boolean) itemSolrDocument.getFieldValue("isAnalytic");
1526 if (hasAnalytic) {
1527 break;
1528 }
1529 }
1530 }
1531 if (!hasAnalytic) {
1532 holdingsSolrDocument.setField("isSeries", Boolean.FALSE);
1533
1534 if (!CollectionUtils.isEmpty(itemIds)) {
1535 for (String itemId : itemIds) {
1536 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1537
1538 Object holdingsIdentifier = itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1539 if (holdingsIdentifier instanceof List) {
1540 holdingsIdentifierList = (List<String>) itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1541 } else if (holdingsIdentifier instanceof String) {
1542 holdingsIdentifierList.add((String) holdingsIdentifier);
1543 }
1544 if (!CollectionUtils.isEmpty(holdingsIdentifierList)) {
1545 for (String holdingId : holdingsIdentifierList) {
1546 SolrDocument holdingSolrDocument = getSolrDocumentByUUID(holdingId);
1547 holdingSolrDocument.setField("isAnalytic", Boolean.FALSE);
1548 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(holdingSolrDocument));
1549 }
1550 }
1551 }
1552 }
1553
1554 }
1555 }
1556 LOG.info("itemIdentifierList-->" + itemIdentifierList);
1557 }
1558 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(holdingsSolrDocument));
1559 }
1560 }
1561
1562 private void updateItemDocument(String seriesHoldingsId, List<String> itemIds, List<SolrInputDocument> solrInputDocumentList, String createOrBreak) throws SolrServerException {
1563 for (String itemId : itemIds) {
1564 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1565 List<String> holdingsIdentifierList = new ArrayList<String>();
1566 Object holdingsIdentifier = itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1567 if (holdingsIdentifier instanceof List) {
1568 holdingsIdentifierList = (List<String>) itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1569 } else if (holdingsIdentifier instanceof String) {
1570 holdingsIdentifierList.add((String) holdingsIdentifier);
1571 }
1572 if (!CollectionUtils.isEmpty(holdingsIdentifierList) && createOrBreak.equalsIgnoreCase("CREATE")) {
1573 holdingsIdentifierList.add(seriesHoldingsId);
1574 itemSolrDocument.setField(HOLDINGS_IDENTIFIER, holdingsIdentifierList);
1575 itemSolrDocument.setField("isAnalytic", Boolean.TRUE);
1576 } else if (!CollectionUtils.isEmpty(holdingsIdentifierList) && createOrBreak.equalsIgnoreCase("BREAK")) {
1577 holdingsIdentifierList.remove(seriesHoldingsId);
1578 itemSolrDocument.setField(HOLDINGS_IDENTIFIER, holdingsIdentifierList);
1579 itemSolrDocument.setField("isAnalytic", Boolean.FALSE);
1580 }
1581 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(itemSolrDocument));
1582 }
1583 }
1584
1585
1586
1587
1588 }