1 package org.kuali.ole.docstore.engine.service.index.solr;
2
3 import org.apache.commons.collections.CollectionUtils;
4 import org.apache.commons.lang.StringUtils;
5 import org.apache.solr.client.solrj.SolrQuery;
6 import org.apache.solr.client.solrj.SolrServer;
7 import org.apache.solr.client.solrj.SolrServerException;
8 import org.apache.solr.client.solrj.response.QueryResponse;
9 import org.apache.solr.client.solrj.response.UpdateResponse;
10 import org.apache.solr.common.SolrDocument;
11 import org.apache.solr.common.SolrInputDocument;
12 import org.kuali.ole.docstore.OleException;
13 import org.kuali.ole.docstore.common.document.*;
14 import org.kuali.ole.docstore.common.document.config.DocumentSearchConfig;
15 import org.kuali.ole.docstore.common.document.content.bib.marc.*;
16 import org.kuali.ole.docstore.common.document.content.bib.marc.xstream.BibMarcRecordProcessor;
17 import org.kuali.ole.docstore.common.exception.DocstoreIndexException;
18 import org.kuali.ole.docstore.common.util.ReindexBatchStatistics;
19 import org.kuali.ole.docstore.discovery.service.SolrServerManager;
20 import org.kuali.ole.docstore.indexer.solr.DocumentLocalId;
21 import org.kuali.ole.docstore.model.enums.DocCategory;
22 import org.kuali.ole.docstore.model.enums.DocFormat;
23 import org.kuali.ole.docstore.model.enums.DocType;
24 import org.kuali.ole.docstore.utility.ISBNUtil;
25 import org.kuali.ole.utility.Constants;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28 import org.springframework.util.StopWatch;
29
30 import java.io.IOException;
31 import java.text.DateFormat;
32 import java.text.Normalizer;
33 import java.text.ParseException;
34 import java.text.SimpleDateFormat;
35 import java.util.*;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39
40
41
42
43
44
45
46 public class BibMarcIndexer extends DocstoreSolrIndexService implements BibConstants {
47
48
49
50
51 private static final String SEPERATOR_DATA_FIELD = ", ";
52 private static final String SEPERATOR_SUB_FIELD = " ";
53 private static final String PATTERN_CHAR = "*";
54 private static final String SEPERATOR_HYPHEN = " - ";
55 private static final String SEPERATOR_DOUBLE_HYPHEN = " -- ";
56 private static final String DYNAMIC_FIELD_PREFIX = "mdf_";
57 private static final String BIB_IDENTIFIER = "bibIdentifier";
58 private static final String HOLDINGS_IDENTIFIER = "holdingsIdentifier";
59 private static final String ITEM_IDENTIFIER = "itemIdentifier";
60 private String publicationDateRegex = "[0-9]{4}";
61 private static final Logger LOG = LoggerFactory
62 .getLogger(BibMarcIndexer.class);
63
64 private static BibMarcIndexer bibMarcIndexer = null;
65
66 public static BibMarcRecordProcessor recordProcessor = new BibMarcRecordProcessor();
67 private static DocumentSearchConfig documentSearchConfig = null;
68
69
70
71
72
73
74
75 public static BibMarcIndexer getInstance() {
76 if (bibMarcIndexer == null) {
77 bibMarcIndexer = new BibMarcIndexer();
78 }
79 documentSearchConfig = DocumentSearchConfig.getDocumentSearchConfig();
80 return bibMarcIndexer;
81 }
82
83
84 @Override
85 public void createTree(Object object) {
86 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
87 buildSolrDocsForBibTree((BibTree) object, solrInputDocuments);
88 indexSolrDocuments(solrInputDocuments, true);
89 }
90
91
92
93
94
95
96 private void buildSolrDocsForBibTree(BibTree bibTree, List<SolrInputDocument> solrInputDocuments) {
97 Bib bib = bibTree.getBib();
98 if (bib.getId() != null && bib.getId().contains("wbm")) {
99 BibMarcRecords bibMarcRecords = recordProcessor.fromXML(bib.getContent());
100 SolrInputDocument bibSolrDoc = buildSolrInputDocument(bibMarcRecords.getRecords().get(0));
101 setCommonFields(bib, bibSolrDoc);
102 solrInputDocuments.add(bibSolrDoc);
103 for (HoldingsTree holdingsTree : bibTree.getHoldingsTrees()) {
104 buildSolrDocsForHoldingsTree(solrInputDocuments, bib, bibSolrDoc, holdingsTree);
105 }
106 }
107 }
108
109
110
111
112
113
114
115
116
117
118 private void buildSolrDocsForHoldingsTree(List<SolrInputDocument> solrInputDocuments, Bib bib, SolrInputDocument bibSolrDoc, HoldingsTree holdingsTree) {
119 if (holdingsTree.getHoldings() != null) {
120 HoldingsOlemlIndexer holdingsOlemlIndexer = HoldingsOlemlIndexer.getInstance();
121 if (holdingsTree.getHoldings().getContent() != null || holdingsTree.getHoldings().getContentObject() != null) {
122 SolrInputDocument holdingsSolrInputDoc = holdingsOlemlIndexer.getSolrInputFieldsForHoldings(holdingsTree.getHoldings());
123 linkHoldingsWithBib(bibSolrDoc, holdingsSolrInputDoc,bib.getId(),solrInputDocuments,holdingsTree.getHoldings().getId());
124 holdingsSolrInputDoc.addField(BIB_IDENTIFIER, bib.getId());
125 List<Item> itemDocuments = holdingsTree.getItems();
126 List<String> itemIds = new ArrayList<String>();
127 holdingsSolrInputDoc.addField(ITEM_IDENTIFIER, itemIds);
128 ItemOlemlIndexer itemOlemlIndexer = ItemOlemlIndexer.getInstance();
129 for (Item itemDocument : itemDocuments) {
130 itemIds.add(itemDocument.getId());
131 SolrInputDocument itemSolrInputDoc = itemOlemlIndexer.getSolrInputFieldsForItem(itemDocument);
132 itemSolrInputDoc.addField(HOLDINGS_IDENTIFIER, holdingsTree.getHoldings().getId());
133 itemSolrInputDoc.addField(BIB_IDENTIFIER, bib.getId());
134 addBibInfoForHoldingsOrItems(itemSolrInputDoc, holdingsSolrInputDoc);
135 addHoldingsInfoToItem(itemSolrInputDoc, holdingsSolrInputDoc);
136 solrInputDocuments.add(itemSolrInputDoc);
137 }
138
139 solrInputDocuments.add(holdingsSolrInputDoc);
140 } else if (StringUtils.isNotEmpty(holdingsTree.getHoldings().getId())) {
141 bibSolrDoc.addField(HOLDINGS_IDENTIFIER, "who-" + holdingsTree.getHoldings().getId());
142 }
143 }
144 }
145
146 private void linkHoldingsWithBib(SolrInputDocument bibSolrDoc,SolrInputDocument holdingsSolrInputDoc,String bibId,List<SolrInputDocument> solrInputDocuments,String holdingsId ){
147 if (bibSolrDoc == null) {
148 SolrDocument bibSolrDocument = getSolrDocumentByUUID(bibId);
149 bibSolrDoc = buildSolrInputDocFromSolrDoc(bibSolrDocument);
150 solrInputDocuments.add(bibSolrDoc);
151 }
152 bibSolrDoc.addField(HOLDINGS_IDENTIFIER, holdingsId);
153 addBibInfoForHoldingsOrItems(holdingsSolrInputDoc, bibSolrDoc);
154 }
155
156 @Override
157 public void createTrees(Object object) {
158 BibTrees bibTreesObj = (BibTrees) object;
159 List<BibTree> bibTrees = bibTreesObj.getBibTrees();
160 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
161 for (BibTree bibTree : bibTrees) {
162 buildSolrDocsForBibTree(bibTree, solrInputDocuments);
163 }
164
165 indexSolrDocuments(solrInputDocuments, true);
166 }
167
168
169
170
171
172
173
174 @Override
175 public void processBibTrees(BibTrees bibTrees) {
176 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
177 List<String> idsToDelete = new ArrayList<>();
178 for (BibTree bibTree : bibTrees.getBibTrees()) {
179 processBibTree(bibTree, solrInputDocuments, idsToDelete);
180 }
181 LOG.info("Solr Input Documents Size : " + solrInputDocuments.size());
182 indexAndDelete(solrInputDocuments, idsToDelete, true);
183 }
184
185
186
187
188
189
190
191 private void processBibTree(BibTree bibTree, List<SolrInputDocument> solrInputDocuments, List<String> idsToDelete) {
192 Bib bib = bibTree.getBib();
193 if (null != bib) {
194 SolrInputDocument bibSolrInputDocument = new SolrInputDocument();
195 if (Bib.ResultType.SUCCESS.equals(bib.getResult())) {
196 if (bib.getId() != null) {
197 if (Bib.OperationType.CREATE.equals(bib.getOperation())) {
198 createBibTreeDocforSolr(bibTree, solrInputDocuments);
199 } else if (Bib.OperationType.UPDATE.equals(bib.getOperation())) {
200 updateBibDocument(bib, solrInputDocuments, bibSolrInputDocument);
201 processHoldingsTrees(bibTree.getHoldingsTrees(), bibSolrInputDocument, solrInputDocuments, idsToDelete);
202 } else if (Bib.OperationType.DELETE.equals(bib.getOperation())) {
203 idsToDelete.add(bib.getId());
204 }
205 }
206 } else if (bib.getOperation() == null || StringUtils.isBlank(bib.getOperation().name())) {
207 processHoldingsTrees(bibTree.getHoldingsTrees(), bibSolrInputDocument, solrInputDocuments, idsToDelete);
208 }
209 }
210 }
211
212
213
214
215
216
217 private void createBibTreeDocforSolr(BibTree bibTree, List<SolrInputDocument> solrInputDocuments) {
218 buildSolrDocsForBibTree(bibTree, solrInputDocuments);
219 }
220
221
222
223
224
225
226
227
228 private void processHoldingsTrees(List<HoldingsTree> holdingsTrees, SolrInputDocument bibSolrInputDocument, List<SolrInputDocument> solrInputDocuments, List<String> idsToDelete) {
229 for (HoldingsTree holdingsTree : holdingsTrees) {
230 processHoldingsTree(holdingsTree, bibSolrInputDocument, solrInputDocuments, idsToDelete);
231 }
232 }
233
234
235
236
237
238
239
240
241
242
243 private void processHoldingsTree(HoldingsTree holdingsTree, SolrInputDocument bibSolrInputDocument, List<SolrInputDocument> solrInputDocuments, List<String> idsToDelete) {
244 HoldingsOlemlIndexer holdingsOlemlIndexer = HoldingsOlemlIndexer.getInstance();
245 Holdings holdings = holdingsTree.getHoldings();
246 SolrInputDocument holdingsSolrInputDocument = new SolrInputDocument();
247
248 if (Holdings.ResultType.SUCCESS.equals(holdings.getResult())) {
249 if (holdings.getId() != null) {
250 if (Holdings.OperationType.CREATE.equals(holdings.getOperation())) {
251 Bib bib = holdings.getBib();
252 if (null != bib && null != bib.getId()) {
253 buildSolrDocsForHoldingsTree(solrInputDocuments, bib, bibSolrInputDocument, holdingsTree);
254 }
255 } else if (Holdings.OperationType.UPDATE.equals(holdings.getOperation())) {
256 holdingsOlemlIndexer.processHoldingSolrDocumentForUpdate(holdings, solrInputDocuments, holdingsSolrInputDocument);
257 processItems(holdingsTree.getItems(), solrInputDocuments, holdingsSolrInputDocument, idsToDelete);
258 } else if (Holdings.OperationType.DELETE.equals(holdings.getOperation())) {
259 idsToDelete.add(holdings.getId());
260 holdingsOlemlIndexer.processDelete(holdings.getId(), solrInputDocuments);
261 }
262 }
263 } else if ((holdings.getOperation() == null || StringUtils.isBlank(holdings.getOperation().name()))) {
264 processItems(holdingsTree.getItems(), solrInputDocuments, holdingsSolrInputDocument, idsToDelete);
265 }
266
267 }
268
269
270
271
272
273
274
275
276
277
278 private void processItems(List<Item> items, List<SolrInputDocument> solrInputDocuments, SolrInputDocument holdingsSolrInputDocument, List<String> idsToDelete) {
279 ItemOlemlIndexer itemOlemlIndexer = ItemOlemlIndexer.getInstance();
280 for (Item item : items) {
281 if (Item.ResultType.SUCCESS.equals(item.getResult())) {
282 if (item.getId() != null) {
283 if (Item.OperationType.CREATE.equals(item.getOperation())) {
284 itemOlemlIndexer.buildSolrInputDocumentForBatchProcess(item, solrInputDocuments, holdingsSolrInputDocument);
285 } else if (Item.OperationType.UPDATE.equals(item.getOperation())) {
286 itemOlemlIndexer.updateRecordInSolr(item, solrInputDocuments);
287 } else if (Item.OperationType.DELETE.equals(item.getOperation())) {
288 idsToDelete.add(item.getId());
289 itemOlemlIndexer.processDelete(item.getId(), solrInputDocuments);
290 }
291 }
292 }
293 }
294 }
295
296 public void createTrees(Object object, ReindexBatchStatistics reindexBatchStatistics) {
297 BibTrees bibTreesObj = (BibTrees) object;
298 List<BibTree> bibTrees = bibTreesObj.getBibTrees();
299 List<SolrInputDocument> solrInputDocuments = new ArrayList<>();
300 StopWatch stopWatch = new StopWatch();
301
302 stopWatch.start();
303 for (BibTree bibTree : bibTrees) {
304 buildSolrDocsForBibTree(bibTree, solrInputDocuments);
305 }
306 stopWatch.stop();
307 reindexBatchStatistics.addBuildSolrDocsTime(stopWatch.getTotalTimeMillis());
308
309 indexSolrDocuments(solrInputDocuments, true, reindexBatchStatistics);
310 }
311
312 protected void indexSolrDocuments(List<SolrInputDocument> solrDocs, boolean isCommit, ReindexBatchStatistics reindexBatchStatistics) {
313 SolrServer solr = null;
314 try {
315 solr = SolrServerManager.getInstance().getSolrServer();
316 StopWatch stopWatch = new StopWatch();
317 stopWatch.start("add");
318 UpdateResponse response = solr.add(solrDocs);
319 stopWatch.stop();
320 reindexBatchStatistics.addRecToAddInSolr(stopWatch.getLastTaskTimeMillis());
321 if (isCommit) {
322 stopWatch.start("commit");
323 solr.commit(false, false);
324 stopWatch.stop();
325 reindexBatchStatistics.addCommitTime(stopWatch.getLastTaskTimeMillis());
326 }
327 } catch (SolrServerException e) {
328 LOG.info("Exception :", e);
329 rollback(solr);
330 throw new DocstoreIndexException(e.getMessage());
331 } catch (IOException e) {
332 LOG.info("Exception :", e);
333 rollback(solr);
334 throw new DocstoreIndexException(e.getMessage());
335 }
336 }
337
338
339 protected void buildSolrInputDocument(Object object, List<SolrInputDocument> solrInputDocuments) {
340 Bib bib = (Bib) object;
341 BibMarcRecords bibMarcRecords = recordProcessor.fromXML(bib.getContent());
342 SolrInputDocument solrInputDocument = buildSolrInputDocument(bibMarcRecords.getRecords().get(0));
343
344 setCommonFields(bib, solrInputDocument);
345
346 solrInputDocuments.add(solrInputDocument);
347
348 }
349
350 protected void setCommonFields(Bib bib, SolrInputDocument solrInputDocument) {
351 solrInputDocument.setField(ID, bib.getId());
352 solrInputDocument.addField(LOCALID_SEARCH, DocumentLocalId.getDocumentId(bib.getId()));
353 solrInputDocument.addField(LOCALID_DISPLAY, DocumentLocalId.getDocumentIdDisplay(bib.getId()));
354 solrInputDocument.addField(UNIQUE_ID, bib.getId());
355 solrInputDocument.setField(DOC_CATEGORY, DocCategory.WORK.getCode());
356 solrInputDocument.setField(BIB_ID, bib.getId());
357
358 solrInputDocument.setField(STATUS_SEARCH, bib.getStatus());
359 solrInputDocument.setField(STATUS_DISPLAY, bib.getStatus());
360
361 if (StringUtils.isNotEmpty(bib.getStatusUpdatedOn())) {
362 solrInputDocument.setField(STATUS_UPDATED_ON, getDate(bib.getStatusUpdatedOn()));
363 }
364
365 solrInputDocument.addField(STAFF_ONLY_FLAG, bib.isStaffOnly());
366
367 String createdBy = bib.getCreatedBy();
368 solrInputDocument.setField(CREATED_BY, createdBy);
369 solrInputDocument.setField(UPDATED_BY, createdBy);
370
371 Date date = new Date();
372 Date createdDate = null;
373
374 if (StringUtils.isNotBlank(bib.getCreatedOn())) {
375 createdDate = getDate(bib.getCreatedOn());
376 solrInputDocument.setField(DATE_ENTERED, createdDate);
377 } else {
378 solrInputDocument.setField(DATE_ENTERED, date);
379 }
380
381 if (StringUtils.isNotBlank(bib.getUpdatedOn())) {
382 solrInputDocument.setField(DATE_UPDATED, getDate(bib.getUpdatedOn()));
383 } else {
384 if (StringUtils.isNotBlank(bib.getCreatedOn())) {
385
386 solrInputDocument.setField(DATE_UPDATED, createdDate);
387 } else {
388 solrInputDocument.setField(DATE_UPDATED, date);
389 }
390 }
391 }
392
393 protected void updateRecordInSolr(Object object, List<SolrInputDocument> solrInputDocuments) {
394 Bib bib = (Bib) object;
395 List<SolrDocument> solrDocumentList = getSolrDocumentBySolrId(bib.getId());
396 SolrDocument solrDocument = solrDocumentList.get(0);
397 SolrInputDocument solrInputDocument = new SolrInputDocument();
398 if (bib.getContent() != null) {
399 BibMarcRecord workBibMarcRecord = recordProcessor.fromXML(bib.getContent()).getRecords().get(0);
400 solrInputDocument = buildSolrInputDocument(workBibMarcRecord);
401 if (solrDocument != null && solrDocument.getFieldValue(HOLDINGS_IDENTIFIER) != null) {
402 addBibInfoToHoldings(solrInputDocuments, solrInputDocument, solrDocument);
403 }
404 if (StringUtils.isNotEmpty(bib.getStatusUpdatedOn())) {
405 solrInputDocument.setField(STATUS_UPDATED_ON, getDate(bib.getStatusUpdatedOn()));
406 }
407 } else {
408 buildSolrInputDocFromSolrDoc(solrDocument, solrInputDocument);
409 }
410 setCommonFieldsForSolrDoc(solrInputDocument, bib, solrDocument);
411 solrInputDocuments.add(solrInputDocument);
412 }
413
414
415
416
417
418
419
420
421 protected void updateBibDocument(Object object, List<SolrInputDocument> solrInputDocuments, SolrInputDocument solrbibInputDocument) {
422 Bib bib = (Bib) object;
423 List<SolrDocument> solrDocumentList = getSolrDocumentBySolrId(bib.getId());
424 SolrDocument solrDocument = solrDocumentList.get(0);
425
426 if (bib.getContent() != null) {
427 BibMarcRecord workBibMarcRecord = recordProcessor.fromXML(bib.getContent()).getRecords().get(0);
428 solrbibInputDocument = buildSolrInputDocument(workBibMarcRecord, solrbibInputDocument);
429
430 if (solrDocument != null && solrDocument.getFieldValue(HOLDINGS_IDENTIFIER) != null) {
431 addBibInfoToHoldings(solrInputDocuments, solrbibInputDocument, solrDocument);
432 }
433 if (StringUtils.isNotEmpty(bib.getStatus()) || StringUtils.isNotEmpty(bib.getStatusUpdatedOn())) {
434 solrbibInputDocument.setField(STATUS_UPDATED_ON, getDate(bib.getStatusUpdatedOn()));
435 }
436 } else {
437 buildSolrInputDocFromSolrDoc(solrDocument, solrbibInputDocument);
438 }
439 setCommonFieldsForSolrDoc(solrbibInputDocument, bib, solrDocument);
440 solrInputDocuments.add(solrbibInputDocument);
441
442 }
443
444 private void addBibInfoToHoldings(List<SolrInputDocument> solrInputDocuments, SolrInputDocument bibSolrDoc, SolrDocument solrDocument ) {
445 Object instanceIdentifier = solrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
446 bibSolrDoc.addField(HOLDINGS_IDENTIFIER, instanceIdentifier);
447 List<String> holdinsgsIds = new ArrayList<>();
448 if(instanceIdentifier instanceof String) {
449 holdinsgsIds.add((String) instanceIdentifier);
450 }
451 else {
452 holdinsgsIds.addAll((List<String>) instanceIdentifier);
453 }
454
455 for(String holdingsId : holdinsgsIds) {
456 List<SolrDocument> solrDocumentList = getSolrDocumentBySolrId(holdingsId);
457 if (CollectionUtils.isNotEmpty(solrDocumentList)) {
458 SolrDocument holdingsSolrDocument = solrDocumentList.get(0);
459 SolrInputDocument holdingsSolrInputDocument = new SolrInputDocument();
460 buildSolrInputDocFromSolrDoc(holdingsSolrDocument, holdingsSolrInputDocument);
461 removeFieldFromSolrInputDocument(holdingsSolrInputDocument);
462 addBibInfoForHoldingsOrItems(holdingsSolrInputDocument, bibSolrDoc);
463 List<String> itemIds = new ArrayList<>();
464
465 Object itemIdentifier = holdingsSolrDocument.getFieldValue(ITEM_IDENTIFIER);
466 if (itemIdentifier != null) {
467 if (itemIdentifier instanceof String) {
468 itemIds.add((String) itemIdentifier);
469 } else {
470 itemIds.addAll((List<String>) itemIdentifier);
471 }
472 }
473
474 for (String itemId : itemIds) {
475
476 List<SolrDocument> itemDocumentList = getSolrDocumentBySolrId(itemId);
477 SolrDocument itemSolrDocument = itemDocumentList.get(0);
478 SolrInputDocument itemSolrInputDocument = new SolrInputDocument();
479 buildSolrInputDocFromSolrDoc(itemSolrDocument, itemSolrInputDocument);
480 removeFieldFromSolrInputDocument(itemSolrInputDocument);
481 addBibInfoForHoldingsOrItems(itemSolrInputDocument, bibSolrDoc);
482 addHoldingsInfoToItem(itemSolrInputDocument, bibSolrDoc);
483 solrInputDocuments.add(itemSolrInputDocument);
484 }
485 solrInputDocuments.add(holdingsSolrInputDocument);
486
487 }
488 }
489
490 }
491
492 protected void deleteRecordInSolr(SolrServer solrServer, String id) throws IOException, SolrServerException {
493 String query = "bibIdentifier:" + id + " OR " + "id:" + id;
494 UpdateResponse updateResponse = solrServer.deleteByQuery(query);
495 LOG.info("updateResponse " + updateResponse);
496
497 String newId = id + "_d";
498 SolrInputDocument solrInputDocument = new SolrInputDocument();
499 solrInputDocument.setField("DocType", "bibliographic_delete");
500 solrInputDocument.setField("dateUpdated", new Date());
501 solrInputDocument.setField("uniqueId", newId);
502 solrInputDocument.setField("id", newId);
503 solrInputDocument.setField("LocalId_display", DocumentLocalId.getDocumentIdDisplay(id));
504 UpdateResponse updateResponseForBib = solrServer.add(solrInputDocument);
505 LOG.debug("updateResponse " + updateResponseForBib);
506
507 }
508
509 private void setCommonFieldsForSolrDoc(SolrInputDocument solrInputDocument, Bib bib, SolrDocument solrDocument) {
510 solrInputDocument.setField(ID, bib.getId());
511 solrInputDocument.addField(UNIQUE_ID, bib.getId());
512 solrInputDocument.setField(DOC_CATEGORY, DocCategory.WORK.getCode());
513 String updatedBy = bib.getUpdatedBy();
514 solrInputDocument.setField(UPDATED_BY, updatedBy);
515 solrInputDocument.setField(DATE_UPDATED, new Date());
516 solrInputDocument.setField(CREATED_BY, solrDocument.getFieldValue(CREATED_BY));
517 solrInputDocument.setField(DATE_ENTERED, solrDocument.getFieldValue(DATE_ENTERED));
518 solrInputDocument.setField(BIB_ID, bib.getId());
519 solrInputDocument.addField(LOCALID_SEARCH, DocumentLocalId.getDocumentId(bib.getId()));
520 solrInputDocument.addField(LOCALID_DISPLAY, DocumentLocalId.getDocumentIdDisplay(bib.getId()));
521 solrInputDocument.addField(STAFF_ONLY_FLAG, bib.isStaffOnly());
522 solrInputDocument.setField(STATUS_SEARCH, bib.getStatus());
523 solrInputDocument.setField(STATUS_DISPLAY, bib.getStatus());
524 }
525
526
527 private Date getDate(String dateStr) {
528 DateFormat format = new SimpleDateFormat(Constants.DATE_FORMAT);
529 try {
530 if (StringUtils.isNotEmpty(dateStr)) {
531 return format.parse(dateStr);
532 } else {
533 return new Date();
534 }
535
536 } catch (ParseException e) {
537 LOG.info("Exception : " + dateStr + " for format:: " + Constants.DATE_FORMAT, e);
538 return new Date();
539 }
540 }
541
542
543
544
545
546
547
548
549 public SolrInputDocument buildSolrInputDocument(BibMarcRecord record) {
550 SolrInputDocument solrDoc = new SolrInputDocument();
551 buildSolrInputDocument(record,solrDoc);
552 return solrDoc;
553 }
554
555 public SolrInputDocument buildSolrInputDocument(BibMarcRecord record,SolrInputDocument solrDoc ) {
556
557 solrDoc.addField(LEADER, record.getLeader());
558
559
560 List<ControlField> controlFieldList = record.getControlFields();
561
562 for (ControlField cf : controlFieldList) {
563 solrDoc.addField("controlfield_" + cf.getTag(), cf.getValue());
564 }
565
566 solrDoc.addField(DOC_TYPE, DocType.BIB.getDescription());
567 solrDoc.addField(DOC_FORMAT, DocFormat.MARC.getDescription());
568
569 for (String field : documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.keySet()) {
570 if (!field.startsWith("Local")) {
571 addFieldToSolrDoc(record, field, buildFieldValue(field, record), solrDoc);
572 }
573
574 }
575 addFieldToSolrDoc(record, ALL_TEXT, getAllText(record), solrDoc);
576 addGeneralFieldsToSolrDoc(record, solrDoc);
577 if(record.getLeader() == null || ((record.getLeader().length() >= 8) && (record.getLeader().charAt(7) != 's'))) {
578 solrDoc.removeField(JOURNAL_TITLE_SEARCH);
579 solrDoc.removeField(JOURNAL_TITLE_DISPLAY);
580 solrDoc.removeField(JOURNAL_TITLE_SORT);
581 }
582 return solrDoc;
583 }
584
585 private void addFieldToSolrDoc(BibMarcRecord record, String fieldName, Object value,
586 SolrInputDocument solrDoc) {
587 int ind2Value = 0;
588 if (value instanceof List) {
589 if (fieldName.toLowerCase().endsWith("_sort"))
590 {
591 ind2Value = getSecondIndicator(record, fieldName);
592 LOG.debug("field name -->" + fieldName + "----->" + ind2Value);
593 if (ind2Value > 0) {
594 solrDoc.addField(fieldName, ((List) value).get(0).toString().substring(ind2Value));
595 } else {
596 solrDoc.addField(fieldName, ((List) value).get(0));
597 }
598
599 } else if (fieldName.endsWith("_facet")) {
600 solrDoc.addField(fieldName, getSortString((List) value));
601 } else {
602 if (((List) value).size() > 0) {
603 for (Object obj : (List<Object>) value)
604
605 {
606 solrDoc.addField(fieldName, obj);
607 }
608 } else {
609 solrDoc.addField(fieldName, null);
610 }
611 }
612 } else {
613 if (fieldName.toLowerCase().endsWith("_sort"))
614 {
615 ind2Value = getSecondIndicator(record, fieldName);
616 LOG.debug("field name -->" + fieldName + "----->" + ind2Value);
617 if (value != null && ind2Value > 0) {
618 String fieldValue = value.toString();
619 try {
620 fieldValue = value.toString().substring(ind2Value);
621 }
622 catch (Exception e) {
623 LOG.error("Exception while getting value:" + value.toString() + " for field:" + fieldName + ". Exception:" + e.toString());
624
625 }
626 solrDoc.addField(fieldName, fieldValue);
627 } else {
628 solrDoc.addField(fieldName, value);
629 }
630 } else if (fieldName.endsWith("_facet")) {
631 if (value != null) {
632 solrDoc.addField(fieldName, getSortString(value.toString()));
633 }
634 } else {
635 solrDoc.addField(fieldName, value);
636 }
637 }
638 }
639
640
641
642
643
644
645
646
647 public Object buildFieldValue(String fieldName, BibMarcRecord record) {
648 List<ControlField> controlFieldList = record.getControlFields();
649 List<DataField> dataFields = record.getDataFields();
650 String includeTags = documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.get(fieldName);
651 if ((includeTags != null) && (includeTags.length() > 0)) {
652 String excludeTags = documentSearchConfig.FIELDS_TO_TAGS_2_EXCLUDE_MAP.get(fieldName);
653 if (excludeTags == null) {
654 excludeTags = "";
655 }
656 if (fieldName.startsWith("Subject_")) {
657 return getDataFieldValue(includeTags, excludeTags, record, true, fieldName);
658 } else {
659 if (fieldName.equals(ISBN_SEARCH))
660 return normalizeIsbn(getDataFieldValue(includeTags, excludeTags, record, false, fieldName));
661 else
662 return getDataFieldValue(includeTags, excludeTags, record, false, fieldName);
663 }
664 } else if (fieldName.equals(PUBLICATIONDATE_DISPLAY) || fieldName.equals(PUBLICATIONDATE_SEARCH) || fieldName.equals(PUBLICATIONDATE_FACET)
665 || fieldName.equals(PUBLICATIONDATE_SORT)) {
666 String publicationDate = "";
667 String publicationEndDate = "";
668 Object publicationDateValue = null;
669 for (ControlField controlField : controlFieldList) {
670 if (controlField.getTag().equalsIgnoreCase("008")) {
671 String controlField008 = controlField.getValue();
672 if (controlField008 != null && controlField008.length() > 10) {
673 publicationDate = controlField008.substring(7, 11);
674 publicationDate = extractPublicationDateWithRegex(publicationDate);
675 if (controlField008.length() > 14) {
676 publicationEndDate = controlField008.substring(11, 15);
677 publicationEndDate = extractPublicationDateWithRegex(publicationEndDate);
678 }
679 }
680 }
681 }
682 if (publicationDate == null || publicationDate.trim().length() == 0) {
683 if (getDataFieldValue("260-c", "", record, true, fieldName) instanceof String) {
684 publicationDate = (String) getDataFieldValue("260-c", "", record, true, fieldName);
685 } else if (getDataFieldValue("260-c", "", record, true, fieldName) instanceof List) {
686 publicationDate = ((List<String>) getDataFieldValue("260-c", "", record, true, fieldName)).get(0);
687 }
688 publicationDate = extractPublicationDateWithRegex(publicationDate);
689 }
690 if (fieldName.equals(PUBLICATIONDATE_FACET)) {
691 if (publicationDate.equalsIgnoreCase("")) {
692 publicationDateValue = "Date could not be determined";
693 } else {
694 publicationDateValue = buildPublicationDateFacetValue(publicationDate, publicationEndDate);
695 }
696 return publicationDateValue;
697 }
698 return publicationDate;
699 } else if (fieldName.equals(LANGUAGE_DISPLAY) || fieldName.equals(LANGUAGE_SEARCH) || fieldName.equals(LANGUAGE_FACET)) {
700 List<Object> langs = new ArrayList<Object>();
701 for (ControlField controlField : controlFieldList) {
702 if (controlField.getTag().equalsIgnoreCase("008")) {
703 String cf8 = controlField.getValue();
704 if (cf8 != null && cf8.length() > 37) {
705 String lang = Languages.getInstance(Languages.ISO_639_3).getLanguageDescription(
706 cf8.substring(35, 38));
707 langs.add(lang == null ? "Undefined" : lang);
708 }
709 }
710 }
711 if (fieldName.equals(LANGUAGE_SEARCH) || fieldName.equals(LANGUAGE_FACET)) {
712 for (DataField df : dataFields) {
713 if (df.getTag().equals("546")) {
714 try {
715 for (SubField subfield : df.getSubFields()) {
716 if (subfield.getCode().equalsIgnoreCase("a")) {
717 langs.add(subfield.getValue());
718 }
719 }
720 } catch (RuntimeException re) {
721 LOG.info("Exception :", re);
722 }
723 }
724 }
725 }
726 return langs;
727 } else if (fieldName.equals(FORMAT_DISPLAY) || fieldName.equals(FORMAT_SEARCH) || fieldName.equals(FORMAT_FACET)) {
728 return getRecordFormat(record);
729 } else if(fieldName.equals(DESCRIPTION_SEARCH)) {
730 String excludeTags = documentSearchConfig.FIELDS_TO_TAGS_2_EXCLUDE_MAP.get(fieldName);
731 if (excludeTags == null) {
732 excludeTags = "";
733 }
734 if (includeTags == null) {
735 includeTags = "";
736 }
737 return getDataFieldValue(includeTags, excludeTags, record, false, fieldName);
738 } else {
739 throw new RuntimeException("Unknown field named:" + fieldName);
740 }
741 }
742
743
744
745
746
747
748
749 public String getAllText(BibMarcRecord record) {
750 StringBuilder allText = new StringBuilder();
751 allText.append(record.getLeader());
752 allText.append(SEPERATOR_DATA_FIELD);
753 for (ControlField cf : record.getControlFields()) {
754 allText.append(cf.getValue());
755 allText.append(SEPERATOR_DATA_FIELD);
756 }
757 for (DataField df : record.getDataFields()) {
758 for (SubField sf : df.getSubFields()) {
759 allText.append(sf.getValue());
760 allText.append(SEPERATOR_SUB_FIELD);
761 }
762 allText.append(SEPERATOR_DATA_FIELD);
763 }
764 return allText.toString();
765 }
766
767
768
769
770
771
772
773 public String getRecordFormat(BibMarcRecord record) {
774 String format = null;
775 String cF7 = null;
776 String cF8 = null;
777 String formatData = "";
778 char cF8Ch21 = ' ';
779 char cF8Ch22 = ' ';
780 char cF8Ch28 = ' ';
781 char cF7Ch0 = ' ';
782 int cFIndex = record.getControlFields().indexOf(new ControlField("007"));
783 if (cFIndex != -1) {
784 cF7 = record.getControlFields().get(cFIndex).getValue();
785 }
786 cFIndex = record.getControlFields().indexOf(new ControlField("008"));
787 if (cFIndex != -1) {
788 cF8 = record.getControlFields().get(cFIndex).getValue();
789 }
790 Object tmp = null;
791 tmp = getDataFieldValue("111-a", "", record, false, "");
792 String dF111a = tmp != null ? tmp.toString() : null;
793 tmp = getDataFieldValue("254-h", "", record, false, "");
794 String dF254h = tmp != null ? tmp.toString() : null;
795 tmp = getDataFieldValue("254-k", "", record, false, "");
796 String dF254k = tmp != null ? tmp.toString() : null;
797 tmp = getDataFieldValue("260-b", "", record, false, "");
798 String dF260b = tmp != null ? tmp.toString() : null;
799 tmp = getDataFieldValue("502-a", "", record, false, "");
800 String dF502a = tmp != null ? tmp.toString() : null;
801 tmp = getDataFieldValue("711-a", "", record, false, "");
802 String dF711a = tmp != null ? tmp.toString() : null;
803
804 if (cF8 != null && cF8.length() > 22) {
805 cF8Ch21 = cF8.charAt(21);
806 cF8Ch22 = cF8.charAt(22);
807 }
808 if (cF8 != null && cF8.length() > 28) {
809 cF8Ch28 = cF8.charAt(28);
810 }
811 if (cF7 != null) {
812 cF7Ch0 = cF7.charAt(0);
813 }
814 if (record.getLeader() != null && record.getLeader().length() > 8) {
815 formatData = record.getLeader().substring(6, 8);
816 }
817
818 if (dF254h != null && dF254h.contains("micro")) {
819 format = "Microformat";
820 } else if (formatData.equals("tm") && dF502a != null) {
821 format = "Thesis/Dissertation";
822 } else if (dF111a != null || dF711a != null) {
823 format = "Conference/Event";
824 } else if (formatData.equals("aa") || formatData.equals("am") || formatData.equals("ac") || formatData
825 .equals("tm")) {
826 if (dF254k != null && dF254k.contains("kit")) {
827 format = "Other";
828 } else {
829 format = "Book";
830 }
831 } else if (formatData.equals("im") || formatData.equals("jm") || formatData.equals("jc")
832 || formatData.equals("jd") || formatData.equals("js")) {
833 format = "Sound recording";
834 } else if (formatData.equals("cm") || formatData.equals("dm") || formatData.equals("ca")
835 || formatData.equals("cb") || formatData.equals("cd") || formatData.equals("cs")) {
836 format = "Musical score";
837 } else if (formatData.equals("fm") || ("".equals(formatData) && formatData.startsWith("e"))) {
838 format = "Map/Atlas";
839 } else if (formatData.equals("gm") || (cF7 != null && (cF7Ch0 == ('v')))) {
840 format = "Video";
841 } else if (formatData.equals("gm") || (cF7 != null && (cF7Ch0 == ('g')))) {
842 format = "Projected graphic";
843 } else if (formatData.equals("as") || formatData.equals("gs")) {
844 format = "Journal/Periodical";
845 } else if (formatData.equals("km")) {
846 format = "Image";
847 } else if (formatData.equals("mm")) {
848 format = "Datafile";
849 } else if (formatData.equals("as") && (cF8Ch21 == 'n' || cF8Ch22 == 'e')) {
850 format = "Newspaper";
851 } else if ("".equals(formatData) && formatData.startsWith("r")) {
852 format = "3D object";
853 } else if (formatData != "" && formatData.endsWith("i")) {
854 format = "Database/Website";
855 } else if (("".equals(formatData) && (!formatData.startsWith("c") || !formatData.startsWith("d")
856 || !formatData.startsWith("i") || !formatData.startsWith("j"))) && (
857 (cF8Ch28 == 'f' || cF8Ch28 == 'i' || cF8Ch28 == 'o') && (dF260b != null && !dF260b
858 .contains("press")))) {
859 format = "Government document";
860 } else {
861 format = "Other";
862 }
863 return format;
864 }
865
866
867
868
869
870
871
872
873
874
875
876
877 private Object getDataFieldValue(String includeTags, String excludeTags, BibMarcRecord record,
878 boolean isHyphenSeperatorFirst, String fieldName) {
879 List<Object> fieldValues = new ArrayList<Object>();
880 StringTokenizer includeTagsTokenizer = new StringTokenizer(includeTags, ",");
881
882 while (includeTagsTokenizer.hasMoreElements()) {
883 String tag = includeTagsTokenizer.nextToken();
884 tag = tag.trim();
885 int subFieldIdx = tag.indexOf('-');
886 String tagNum = (subFieldIdx == -1) ? tag : tag.substring(0, subFieldIdx);
887
888 for (int i = 0; i < record.getDataFields().size(); i++) {
889 DataField dataField = record.getDataFields().get(i);
890 if (isValidTag(dataField.getTag(), tagNum)) {
891 StringBuilder fieldValue = new StringBuilder();
892 List<SubField> subFields = dataField.getSubFields();
893 if (subFieldIdx != -1) {
894 if (!excludeTags.contains(tag)) {
895 String subFieldCodes = tag.substring(subFieldIdx + 1, tag.length());
896 boolean isHyphenCodedOnce = false;
897 for (SubField subField : subFields) {
898 if (subFieldCodes.contains(subField.getCode())) {
899 if (fieldValue.length() != 0) {
900 if (!isHyphenSeperatorFirst || isHyphenCodedOnce || (
901 dataField.getTag().endsWith("00") || dataField.getTag().endsWith("10")
902 || dataField.getTag().endsWith("11"))) {
903 fieldValue.append(SEPERATOR_SUB_FIELD);
904 } else {
905 fieldValue.append(SEPERATOR_HYPHEN);
906 isHyphenCodedOnce = true;
907 }
908 }
909 fieldValue.append(subField.getValue());
910 }
911 }
912 }
913 } else {
914 boolean isHyphenCodedOnce = false;
915 boolean isFirstSubField = false;
916 for (SubField subField : subFields) {
917 if (!excludeTags.contains(dataField.getTag() + "-" + subField.getCode()) && !excludeTags
918 .contains(tagNum + "-" + subField.getCode())) {
919 if (fieldValue.length() != 0) {
920 if (!isHyphenSeperatorFirst || isHyphenCodedOnce || (
921 dataField.getTag().endsWith("00") || dataField.getTag().endsWith("10")
922 || dataField.getTag().endsWith("11"))) {
923 fieldValue.append(SEPERATOR_SUB_FIELD);
924 } else if (fieldName != null && (fieldName.equalsIgnoreCase(SUBJECT_FACET)
925 || fieldName.equalsIgnoreCase(SUBJECT_DISPLAY))) {
926 if (dataField.getTag().equalsIgnoreCase("630")) {
927 if (subField.getCode().equals("v") || subField.getCode().equals("x")
928 || subField.getCode().equals("y") || subField.getCode().equals("z")) {
929 fieldValue.append(SEPERATOR_DOUBLE_HYPHEN);
930 }
931 } else if (dataField.getTag().equalsIgnoreCase("650") || dataField.getTag()
932 .equalsIgnoreCase(
933 "651")) {
934 if (isFirstSubField && fieldName.equalsIgnoreCase(SUBJECT_FACET)) {
935 fieldValues.add(fieldValue.toString().trim());
936 }
937 fieldValue.append(SEPERATOR_DOUBLE_HYPHEN);
938 isFirstSubField = true;
939 } else {
940 fieldValue.append(SEPERATOR_SUB_FIELD);
941 }
942 } else {
943 if (fieldName.startsWith("Subject_")) {
944 fieldValue.append(SEPERATOR_SUB_FIELD);
945 } else {
946 fieldValue.append(SEPERATOR_HYPHEN);
947 isHyphenCodedOnce = true;
948 }
949 }
950 }
951 fieldValue.append(subField.getValue());
952 }
953 }
954 }
955 if ((dataField.getTag().equalsIgnoreCase("650") || dataField.getTag().equalsIgnoreCase("651"))
956 && fieldValue != null && fieldValue.length() > 1 && fieldValue.toString().trim().length() > 1) {
957 String fieldVal = fieldValue.toString().trim();
958 String lastChar = String.valueOf(fieldVal.charAt(fieldVal.length() - 1));
959 if (!lastChar.equalsIgnoreCase(".")) {
960 fieldValue.append(".");
961 }
962 }
963 fieldValues.add(fieldValue.toString().trim());
964 }
965 }
966 }
967 if (fieldValues.size() == 1) {
968 return fieldValues.get(0);
969 } else if (fieldValues.size() > 0) {
970 return fieldValues;
971 } else {
972 return null;
973 }
974 }
975
976
977
978
979
980
981
982
983 private boolean isValidTag(String tag, String tagFormat) {
984 try {
985 if (!tagFormat.contains(PATTERN_CHAR)) {
986 return tagFormat.equals(tag);
987 } else {
988 int idx = tagFormat.lastIndexOf(PATTERN_CHAR);
989 return isValidTag(tag.substring(0, idx) + tag.substring(idx + PATTERN_CHAR.length(), tag.length()), tagFormat.substring(0, idx)
990 + tagFormat.substring(idx + PATTERN_CHAR.length(), tagFormat.length()));
991 }
992 } catch (Exception e) {
993 LOG.info("Exception :", e);
994 return false;
995 }
996 }
997
998 private void addGeneralFieldsToSolrDoc(BibMarcRecord record, SolrInputDocument solrDoc) {
999 String isbnDataFields = documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.get(ISBN_SEARCH);
1000 for (DataField dataField : record.getDataFields()) {
1001 String tag = dataField.getTag();
1002 for (SubField subField : dataField.getSubFields()) {
1003 String subFieldKey = subField.getCode();
1004 String subFieldValue = subField.getValue();
1005 String key = tag + subFieldKey;
1006 subFieldValue = processGeneralFieldValue(tag, subFieldKey, subFieldValue, isbnDataFields);
1007 solrDoc.addField(DYNAMIC_FIELD_PREFIX + key, subFieldValue);
1008 }
1009 }
1010 }
1011
1012 private String processGeneralFieldValue(String tag, String subFieldKey, String subFieldValue, String isbnKey) {
1013 String value = subFieldValue;
1014 if (isbnKey.contains(tag) && isbnKey.contains(subFieldKey)) {
1015 value = (String) normalizeIsbn(subFieldValue);
1016 }
1017 return value;
1018 }
1019
1020 private Object normalizeIsbn(Object isbnValue) {
1021 Object result = null;
1022 ISBNUtil isbnUtil = new ISBNUtil();
1023 if (isbnValue != null) {
1024 if (isbnValue instanceof List) {
1025 result = new ArrayList<String>();
1026 for (Object obj : (List<Object>) isbnValue) {
1027 if (((String) obj).length() > 0) {
1028 try {
1029 ((List<String>) result).add(isbnUtil.normalizeISBN(obj));
1030 } catch (OleException e) {
1031
1032 ((List<String>) result).add((String) obj + " " + ISBN_NOT_NORMALIZED);
1033 }
1034 } else {
1035 ((List<String>) result).add((String) obj);
1036 }
1037 }
1038 } else {
1039 if (((String) isbnValue).length() > 0) {
1040 try {
1041 result = isbnUtil.normalizeISBN(isbnValue);
1042 } catch (OleException e) {
1043
1044 result = isbnValue + " " + ISBN_NOT_NORMALIZED;
1045 }
1046 } else {
1047 result = isbnValue;
1048 }
1049 }
1050 }
1051 return result;
1052 }
1053
1054
1055
1056
1057
1058
1059 private void pubCentury(int pubCen, List<String> pubList) {
1060 String pubCentury = String.valueOf(pubCen);
1061 if (pubCentury.endsWith("1")) {
1062 if (pubCentury.equalsIgnoreCase("11")) {
1063 pubList.add(pubCentury + "th Century");
1064 } else {
1065 pubList.add(pubCentury + "st Century");
1066 }
1067 } else if (pubCentury.endsWith("2")) {
1068 if (pubCentury.equalsIgnoreCase("12")) {
1069 pubList.add(pubCentury + "th Century");
1070 } else {
1071 pubList.add(pubCentury + "nd Century");
1072 }
1073 } else if (pubCentury.endsWith("3")) {
1074 if (pubCentury.equalsIgnoreCase("13")) {
1075 pubList.add(pubCentury + "th Century");
1076 } else {
1077 pubList.add(pubCentury + "rd Century");
1078 }
1079 } else {
1080 pubList.add(pubCentury + "th Century");
1081 }
1082
1083 }
1084
1085
1086 public String extractPublicationDateWithRegex(String publicationDate) {
1087 Pattern pattern = Pattern.compile(publicationDateRegex);
1088 Matcher matcher = pattern.matcher(publicationDate);
1089 if (matcher.find()) {
1090 if (matcher.group(0).equalsIgnoreCase("0000")) {
1091 return "";
1092 }
1093 return matcher.group(0);
1094 } else {
1095 return "";
1096 }
1097
1098
1099 }
1100
1101
1102
1103
1104
1105
1106 public Object buildPublicationDateFacetValue(String publicationDate, String publicationEndDate) {
1107 int pubDat = 0;
1108 List<String> pubList = new ArrayList<String>();
1109 Calendar cal = Calendar.getInstance();
1110 int year = cal.get(Calendar.YEAR);
1111 if (publicationDate != null && publicationDate.length() == 4 && Integer.parseInt(publicationDate) <= year) {
1112 int pubStartDate = Integer.parseInt(publicationDate);
1113 if (publicationEndDate != null && publicationEndDate.length() == 4 && pubStartDate < Integer
1114 .parseInt(publicationEndDate)) {
1115 if (Integer.parseInt(publicationEndDate) > year) {
1116 publicationEndDate = String.valueOf(year);
1117 }
1118 int pubEndDate = Integer.parseInt(publicationEndDate);
1119 while (pubStartDate < pubEndDate) {
1120 pubStartDate = (pubStartDate / 10) * 10;
1121 if (pubStartDate == 0) {
1122 pubList.add("Date could not be determined");
1123 } else {
1124 pubList.add(String.valueOf(pubStartDate) + "s");
1125 }
1126 pubStartDate = pubStartDate + 10;
1127 }
1128 pubStartDate = Integer.parseInt(publicationDate);
1129 pubEndDate = Integer.parseInt(publicationEndDate);
1130 while (pubStartDate < pubEndDate) {
1131 pubStartDate = (pubStartDate) / 100;
1132 pubDat = (pubStartDate) + 1;
1133 pubCentury(pubDat, pubList);
1134 pubStartDate = pubStartDate * 100 + 100;
1135 }
1136 } else {
1137 pubDat = (pubStartDate / 10) * 10;
1138 int pubCen = ((pubStartDate) / 100) + 1;
1139 if (pubDat == 0) {
1140 pubList.add("Date could not be determined");
1141 } else {
1142 pubList.add(String.valueOf(pubDat) + "s");
1143 pubCentury(pubCen, pubList);
1144 }
1145 }
1146 } else {
1147 pubList.add("Date could not be determined");
1148 }
1149 return pubList;
1150 }
1151
1152
1153
1154
1155
1156
1157
1158
1159 public List<String> buildPublicationDateFacetValues(List<String> publicationDates) {
1160 List<String> valueList = null;
1161 if (!CollectionUtils.isEmpty(publicationDates)) {
1162 valueList = new ArrayList<String>(publicationDates.size());
1163 for (int i = 0; i < publicationDates.size(); i++) {
1164 String pubDate = publicationDates.get(i);
1165 Object pubDt = buildPublicationDateFacetValue(pubDate, "");
1166 if (pubDt instanceof String) {
1167 valueList.add((String) pubDt);
1168 } else if (pubDt instanceof List) {
1169 List<String> pubDateList = (List<String>) pubDt;
1170 for (String pubDtVal : pubDateList) {
1171 valueList.add(pubDtVal);
1172 }
1173 }
1174 }
1175 }
1176 return valueList;
1177 }
1178
1179
1180 public String getSortString(String str) {
1181 String ret = "";
1182 StringBuffer sortString = new StringBuffer();
1183 ret = str.toLowerCase();
1184 ret = ret.replaceAll("[\\-\\/]", " ");
1185 ret = ret.replace("<", "");
1186 ret = ret.replace(">", "");
1187 ret = ret.replaceAll("[\\.\\,\\;\\:\\(\\)\\{\\}\\'\\!\\?\\\"\\<\\>\\[\\]]", "");
1188 ret = Normalizer.normalize(ret, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
1189 ret = ret.replaceAll("\\s+", " ");
1190 sortString.append(ret);
1191 sortString.append(" /r/n!@#$");
1192 sortString.append(str);
1193 return sortString.toString();
1194 }
1195
1196 public List<String> getSortString(List<String> list) {
1197 List<String> sortStringList = new ArrayList<String>();
1198 for (String str : list) {
1199 sortStringList.add(getSortString(str));
1200 }
1201 return sortStringList;
1202 }
1203
1204 private int getSecondIndicator(BibMarcRecord record, String fieldName) {
1205 int ind2Value = 0;
1206 String fieldTags = documentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP.get(fieldName);
1207 String[] tagValueList = null;
1208 if (fieldTags != null) {
1209 tagValueList = fieldTags.split(",");
1210 List<DataField> dataFieldList = record.getDataFields();
1211 String ind2 = null;
1212 boolean isVisit = true;
1213 for (DataField dataField : dataFieldList) {
1214 String tag = dataField.getTag();
1215 for (String tagValue : tagValueList) {
1216 StringBuffer sb = null;
1217 if (fieldName.equalsIgnoreCase(AUTHOR_SORT) || fieldName.equalsIgnoreCase(TITLE_SORT)) {
1218 sb = getTagValues(dataField, tag, tagValue);
1219 if (sb != null && sb.toString().length() > 0 && isVisit) {
1220 ind2 = dataField.getInd2();
1221 isVisit = false;
1222 }
1223
1224 }
1225 }
1226 }
1227 try {
1228 if (ind2 != null)
1229 ind2Value = Integer.parseInt(ind2);
1230
1231 } catch (Exception e) {
1232 ind2Value = -1;
1233 }
1234
1235 }
1236 return ind2Value;
1237 }
1238
1239 private StringBuffer getTagValues(DataField dataField, String tag, String tagValue) {
1240 StringBuffer sb = new StringBuffer();
1241 String[] tags = tagValue.split("-");
1242 for (String tagName : tags) {
1243 if (tag.equalsIgnoreCase(tagName)) {
1244 List<SubField> subFieldList = dataField.getSubFields();
1245 for (SubField subField : subFieldList) {
1246 sb.append(subField.getValue() + " ");
1247 }
1248
1249 }
1250 }
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260 return sb;
1261 }
1262
1263 public void bind(String holdingsId, List<String> bibIds) throws SolrServerException, IOException {
1264 List<SolrInputDocument> solrInputDocumentList = new ArrayList<SolrInputDocument>();
1265 updateInstanceDocument(holdingsId, bibIds, solrInputDocumentList);
1266 updateBibDocument(holdingsId, bibIds, solrInputDocumentList);
1267 LOG.info("solrInputDocumentList-->" + solrInputDocumentList);
1268 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1269 UpdateResponse updateResponse = server.add(solrInputDocumentList);
1270 server.commit();
1271 }
1272
1273 private void updateBibDocument(String holdingsId, List<String> bibIds, List<SolrInputDocument> solrInputDocumentList) {
1274 for (String bibId : bibIds) {
1275 SolrDocument bibSolrDocument = getSolrDocumentByUUID(bibId);
1276 List<String> holdingsIdentifierList = new ArrayList<String>();
1277 Object holdingsIdentifier = bibSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1278 if (holdingsIdentifier instanceof List) {
1279 holdingsIdentifierList = (List<String>) bibSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1280 } else if (holdingsIdentifier instanceof String) {
1281 holdingsIdentifierList.add((String) holdingsIdentifier);
1282 }
1283 holdingsIdentifierList.add(holdingsId);
1284 bibSolrDocument.setField(HOLDINGS_IDENTIFIER, holdingsIdentifierList);
1285 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(bibSolrDocument));
1286 }
1287 }
1288
1289 private void updateInstanceDocument(String holdingsId, List<String> bibIds, List<SolrInputDocument> solrInputDocumentList) throws SolrServerException {
1290 SolrQuery solrQuery = new SolrQuery();
1291 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1292 solrQuery.setQuery(("id:" + holdingsId + " AND DocType:" + DocType.HOLDINGS.getCode()));
1293 QueryResponse response = server.query(solrQuery);
1294 List<SolrDocument> solrDocumentList = response.getResults();
1295 LOG.debug("response.getResults()-->" + response.getResults());
1296 for (SolrDocument solrDocument : solrDocumentList) {
1297 List<String> bibIdentifierList = new ArrayList<String>();
1298
1299
1300 Object bibIdentifier = solrDocument.getFieldValue(BIB_IDENTIFIER);
1301 if (bibIdentifier instanceof List) {
1302 bibIdentifierList = (List<String>) solrDocument.getFieldValue(BIB_IDENTIFIER);
1303
1304 } else if (bibIdentifier instanceof String) {
1305 bibIdentifierList.add((String) bibIdentifier);
1306 }
1307 LOG.info("bibIdentifierList-->" + bibIdentifierList);
1308
1309 for (String bibId : bibIds) {
1310 bibIdentifierList.add(bibId);
1311 }
1312 solrDocument.setField("isBoundwith", true);
1313 solrDocument.setField(BIB_IDENTIFIER, bibIdentifierList);
1314 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(solrDocument));
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331 Object itemIdentifier = solrDocument.getFieldValue(ITEM_IDENTIFIER);
1332 List<String> itemIdentifierList = new ArrayList<String>();
1333 if (itemIdentifier instanceof List) {
1334 itemIdentifierList = (List<String>) solrDocument.getFieldValue(ITEM_IDENTIFIER);
1335
1336 } else if (itemIdentifier instanceof String) {
1337 itemIdentifierList.add((String) itemIdentifier);
1338 }
1339
1340 for (String itemId : itemIdentifierList) {
1341 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1342 itemSolrDocument.setField(BIB_IDENTIFIER, bibIdentifierList);
1343 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(itemSolrDocument));
1344
1345 }
1346 }
1347 }
1348
1349 public void bindAnalytics(String seriesHoldingsId, List<String> itemIds, String createOrBreak) throws SolrServerException, IOException {
1350 List<SolrInputDocument> solrInputDocumentList = new ArrayList<SolrInputDocument>();
1351 updateHoldingsDocument(seriesHoldingsId, itemIds, solrInputDocumentList, createOrBreak);
1352 updateItemDocument(seriesHoldingsId, itemIds, solrInputDocumentList, createOrBreak);
1353 LOG.info("solrInputDocumentList-->" + solrInputDocumentList);
1354 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1355 UpdateResponse updateResponse = server.add(solrInputDocumentList);
1356 server.commit();
1357 }
1358
1359 private void updateHoldingsDocument(String seriesHoldingsId, List<String> itemIds, List<SolrInputDocument> solrInputDocumentList, String createOrBreak) throws SolrServerException {
1360 SolrQuery solrQuery = new SolrQuery();
1361 SolrServer server = SolrServerManager.getInstance().getSolrServer();
1362 solrQuery.setQuery(("id:" + seriesHoldingsId + " AND DocType:" + DocType.HOLDINGS.getCode()));
1363 QueryResponse response = server.query(solrQuery);
1364 List<SolrDocument> solrDocumentList = response.getResults();
1365 LOG.debug("response.getResults()-->" + response.getResults());
1366 List<String> itemIdentifierList = new ArrayList<String>();
1367 List<String> holdingsIdentifierList = new ArrayList<String>();
1368 for (SolrDocument holdingsSolrDocument : solrDocumentList) {
1369 Object itemIdentifier = holdingsSolrDocument.getFieldValue(ITEM_IDENTIFIER);
1370 if (itemIdentifier instanceof List) {
1371 itemIdentifierList = (List<String>) holdingsSolrDocument.getFieldValue(ITEM_IDENTIFIER);
1372 } else if (itemIdentifier instanceof String) {
1373 itemIdentifierList.add((String) itemIdentifier);
1374 }
1375 if (!CollectionUtils.isEmpty(itemIdentifierList) && createOrBreak.equalsIgnoreCase("CREATE")) {
1376 itemIdentifierList.addAll(itemIds);
1377 holdingsSolrDocument.setField(ITEM_IDENTIFIER, itemIdentifierList);
1378 holdingsSolrDocument.setField("isSeries", Boolean.TRUE);
1379 if (!CollectionUtils.isEmpty(itemIds)) {
1380 for (String itemId : itemIds) {
1381 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1382
1383 Object holdingsIdentifier = itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1384 if (holdingsIdentifier instanceof List) {
1385 holdingsIdentifierList = (List<String>) itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1386 } else if (holdingsIdentifier instanceof String) {
1387 holdingsIdentifierList.add((String) holdingsIdentifier);
1388 }
1389 if (!CollectionUtils.isEmpty(holdingsIdentifierList)) {
1390 for (String holdingId : holdingsIdentifierList) {
1391 SolrDocument holdingSolrDocument = getSolrDocumentByUUID(holdingId);
1392 holdingSolrDocument.setField("isAnalytic", Boolean.TRUE);
1393 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(holdingSolrDocument));
1394 }
1395 }
1396 }
1397 }
1398
1399 LOG.info("itemIdentifierList-->" + itemIdentifierList);
1400 } else if (!CollectionUtils.isEmpty(itemIdentifierList) && createOrBreak.equalsIgnoreCase("BREAK")) {
1401 itemIdentifierList.removeAll(itemIds);
1402 holdingsSolrDocument.setField(ITEM_IDENTIFIER, itemIdentifierList);
1403 boolean hasAnalytic = false;
1404 if (!CollectionUtils.isEmpty(itemIdentifierList)) {
1405 for (String itemId : itemIdentifierList) {
1406 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1407 if (itemSolrDocument.getFieldValue("isAnalytic") instanceof Boolean) {
1408 hasAnalytic = (Boolean) itemSolrDocument.getFieldValue("isAnalytic");
1409 if (hasAnalytic) {
1410 break;
1411 }
1412 }
1413 }
1414 if (!hasAnalytic) {
1415 holdingsSolrDocument.setField("isSeries", Boolean.FALSE);
1416
1417 if (!CollectionUtils.isEmpty(itemIds)) {
1418 for (String itemId : itemIds) {
1419 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1420
1421 Object holdingsIdentifier = itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1422 if (holdingsIdentifier instanceof List) {
1423 holdingsIdentifierList = (List<String>) itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1424 } else if (holdingsIdentifier instanceof String) {
1425 holdingsIdentifierList.add((String) holdingsIdentifier);
1426 }
1427 if (!CollectionUtils.isEmpty(holdingsIdentifierList)) {
1428 for (String holdingId : holdingsIdentifierList) {
1429 SolrDocument holdingSolrDocument = getSolrDocumentByUUID(holdingId);
1430 holdingSolrDocument.setField("isAnalytic", Boolean.FALSE);
1431 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(holdingSolrDocument));
1432 }
1433 }
1434 }
1435 }
1436
1437 }
1438 }
1439 LOG.info("itemIdentifierList-->" + itemIdentifierList);
1440 }
1441 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(holdingsSolrDocument));
1442 }
1443 }
1444
1445 private void updateItemDocument(String seriesHoldingsId, List<String> itemIds, List<SolrInputDocument> solrInputDocumentList, String createOrBreak) throws SolrServerException {
1446 for (String itemId : itemIds) {
1447 SolrDocument itemSolrDocument = getSolrDocumentByUUID(itemId);
1448 List<String> holdingsIdentifierList = new ArrayList<String>();
1449 Object holdingsIdentifier = itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1450 if (holdingsIdentifier instanceof List) {
1451 holdingsIdentifierList = (List<String>) itemSolrDocument.getFieldValue(HOLDINGS_IDENTIFIER);
1452 } else if (holdingsIdentifier instanceof String) {
1453 holdingsIdentifierList.add((String) holdingsIdentifier);
1454 }
1455 if (!CollectionUtils.isEmpty(holdingsIdentifierList) && createOrBreak.equalsIgnoreCase("CREATE")) {
1456 holdingsIdentifierList.add(seriesHoldingsId);
1457 itemSolrDocument.setField(HOLDINGS_IDENTIFIER, holdingsIdentifierList);
1458 itemSolrDocument.setField("isAnalytic", Boolean.TRUE);
1459 } else if (!CollectionUtils.isEmpty(holdingsIdentifierList) && createOrBreak.equalsIgnoreCase("BREAK")) {
1460 holdingsIdentifierList.remove(seriesHoldingsId);
1461 itemSolrDocument.setField(HOLDINGS_IDENTIFIER, holdingsIdentifierList);
1462 itemSolrDocument.setField("isAnalytic", Boolean.FALSE);
1463 }
1464 solrInputDocumentList.add(buildSolrInputDocFromSolrDoc(itemSolrDocument));
1465 }
1466 }
1467
1468
1469
1470
1471 }