1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.kuali.ole.docstore.discovery.service;
17
18 import org.apache.commons.io.FileUtils;
19 import org.apache.commons.lang.time.StopWatch;
20 import org.apache.solr.client.solrj.SolrQuery;
21 import org.apache.solr.client.solrj.SolrServer;
22 import org.apache.solr.client.solrj.SolrServerException;
23 import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
24 import org.apache.solr.client.solrj.response.QueryResponse;
25 import org.apache.solr.client.solrj.response.UpdateResponse;
26 import org.apache.solr.client.solrj.util.ClientUtils;
27 import org.apache.solr.common.SolrDocument;
28 import org.apache.solr.common.SolrInputDocument;
29 import org.apache.solr.common.SolrInputField;
30 import org.kuali.ole.docstore.discovery.solr.security.patron.oleml.SecurityPatronOlemlDocBuilder;
31 import org.kuali.ole.docstore.discovery.solr.work.bib.WorkBibCommonFields;
32 import org.kuali.ole.docstore.discovery.solr.work.bib.dublin.WorkBibDublinDocBuilder;
33 import org.kuali.ole.docstore.discovery.solr.work.bib.dublin.unqualified.WorkBibDublinUnQualifiedDocBuilder;
34 import org.kuali.ole.docstore.discovery.solr.work.bib.marc.WorkBibMarcDocBuilder;
35 import org.kuali.ole.docstore.discovery.solr.work.instance.oleml.WorkInstanceOlemlDocBuilder;
36 import org.kuali.ole.docstore.discovery.util.PropertyUtil;
37 import org.kuali.ole.docstore.model.enums.DocCategory;
38 import org.kuali.ole.docstore.model.enums.DocFormat;
39 import org.kuali.ole.docstore.model.enums.DocType;
40 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
41 import org.kuali.ole.docstore.model.xmlpojo.work.bib.dublin.WorkBibDublinRecord;
42 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
43 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.OleInstance;
44 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.ResourceIdentifier;
45 import org.kuali.ole.docstore.model.xstream.work.bib.dublin.WorkBibDublinRecordProcessor;
46 import org.kuali.ole.docstore.model.xstream.work.bib.dublin.unqualified.WorkBibDublinUnQualifiedRecordProcessor;
47 import org.kuali.ole.docstore.model.xstream.work.bib.marc.WorkBibMarcRecordProcessor;
48
49 import javax.xml.stream.XMLInputFactory;
50 import javax.xml.stream.XMLStreamConstants;
51 import javax.xml.stream.XMLStreamReader;
52 import javax.xml.transform.OutputKeys;
53 import javax.xml.transform.Transformer;
54 import javax.xml.transform.TransformerFactory;
55 import javax.xml.transform.stax.StAXSource;
56 import javax.xml.transform.stream.StreamResult;
57 import java.io.*;
58 import java.net.HttpURLConnection;
59 import java.net.MalformedURLException;
60 import java.net.URL;
61 import java.util.*;
62
63
64
65
66 public class IndexerServiceImpl
67 implements IndexerService {
68 private static final org.apache.log4j.Logger LOG = org.apache.log4j.Logger
69 .getLogger(IndexerServiceImpl.class);
70 public static final String UUID_FILE_NAME_SUFFIX = "_UUID_.xml";
71
72
73 private static IndexerService indexerService = null;
74 public static final String ID_FIELD_PREFIX = "id_disc_";
75 public static final int BATCH_SIZE = 10000;
76 private final String BIBLIOGRAPHIC = "bibliographic";
77 private final String DOC_TYPE = "DocType";
78 private final String DOC_FORMAT = "DocFormat";
79 private final String HOLDINGS_IDENTIFIER = "holdingsIdentifier";
80 private final String ITEM_IDENTIFIER = "itemIdentifier";
81 private final String INSTANCE = "instance";
82
83
84 private IndexerServiceImpl() {
85 init();
86 }
87
88 public static IndexerService getInstance() {
89 if (null == indexerService) {
90 indexerService = new IndexerServiceImpl();
91 }
92 return indexerService;
93 }
94
95 protected void init() {
96 LOG.debug("IndexerServiceImpl init ");
97
98
99
100
101 }
102
103 public String deleteDocuments(String docCategory, List<String> uuidList)
104 throws MalformedURLException, SolrServerException {
105 String result = deleteDocumentsByUUIDList(uuidList, docCategory);
106 return result;
107 }
108
109 public String deleteDocument(String docCategory, String uuid) {
110 String result = deleteDocumentByUUID(uuid, docCategory);
111 return result;
112 }
113
114 protected String indexSolrDocuments(List<SolrInputDocument> solrDocs) {
115 String result = null;
116 StopWatch timer = new StopWatch();
117 timer.start();
118 try {
119 result = indexSolrDocuments(solrDocs, true, false);
120 timer.stop();
121 LOG.info("Time taken for indexing Solr docs:" + timer.toString());
122 }
123 catch (Exception e) {
124 result = buildFailureMsg(null, "Indexing failed. " + e.getMessage());
125 LOG.error(result, e);
126 }
127 return result;
128 }
129
130 @Override
131 public String indexDocumentsFromDirBySolrDoc(String docCategory, String docType, String docFormat, String dataDir) {
132 String result = null;
133 String xmlContent = "";
134
135 File srcDir = new File(dataDir);
136 if ((null == srcDir) || !srcDir.isDirectory()) {
137 result = buildFailureMsg(null, "Invalid data directory:" + dataDir);
138 return result;
139 }
140 FilenameFilter filter = new FilenameFilter() {
141 public boolean accept(File dir, String name) {
142 return (!name.startsWith(".") && (name.endsWith(".xml")));
143 }
144 };
145
146 String[] srcFileNames = srcDir.list(filter);
147 if ((null == srcFileNames) || (srcFileNames.length == 0)) {
148 result = buildFailureMsg(null, "No data files found in data dir:" + dataDir);
149 return result;
150 }
151 List<File> fileList = new ArrayList<File>(srcFileNames.length);
152 for (int i = 0; i < srcFileNames.length; i++) {
153 File srcFile = new File(dataDir + File.separator + srcFileNames[i]);
154 fileList.add(srcFile);
155 }
156 return indexDocumentsFromFiles(docCategory, docType, docFormat, fileList);
157 }
158
159 @Override
160 public String indexDocumentsFromStringBySolrDoc(String docCategory, String docType, String docFormat, String data)
161 throws IOException {
162
163 File file = File.createTempFile("marc.xml", ".tmp");
164 FileUtils.writeStringToFile(file, data, "UTF-8");
165 String filePath = file.getAbsolutePath();
166 return indexDocumentsFromFileBySolrDoc(docCategory, docType, docFormat,
167 filePath);
168 }
169
170 @Override
171 public String indexDocumentsFromFileBySolrDoc(String docCategory, String docType, String docFormat,
172 String filePath) {
173 List<File> fileList = new ArrayList<File>(0);
174 fileList.add(new File(filePath));
175 return indexDocumentsFromFiles(docCategory, docType, docFormat, fileList);
176 }
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191 @Override
192 public String indexDocumentsFromFiles(String docCategory, String docType, String docFormat, List<File> fileList) {
193
194 String result = null;
195 String xmlContent = "";
196 try {
197 StopWatch indexingTimer = new StopWatch();
198 StopWatch conversionTimer = new StopWatch();
199 StopWatch fileIOTimer = new StopWatch();
200 StopWatch totalTimer = new StopWatch();
201 totalTimer.start();
202 fileIOTimer.start();
203 fileIOTimer.suspend();
204
205 if ((null == fileList) || (fileList.size() == 0)) {
206 result = buildFailureMsg(null, "No files found in data dir:" + fileList);
207 return result;
208 }
209 int numFiles = fileList.size();
210 int numDocs = 0;
211 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
212 TransformerFactory tf = new com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl();
213 Transformer t = tf.newTransformer();
214 t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
215 t.setOutputProperty(OutputKeys.INDENT, "yes");
216 conversionTimer.start();
217 conversionTimer.suspend();
218 indexingTimer.start();
219 indexingTimer.suspend();
220 for (int i = 0; i < fileList.size(); i++) {
221 File srcFile = fileList.get(i);
222 LOG.info("Processing File: " + srcFile.getAbsolutePath());
223 String srcFileName = srcFile.getName();
224
225
226 String idFromFileName = null;
227 List<String> idFromFileNameList = null;
228 int suffixIndex = srcFileName.indexOf(UUID_FILE_NAME_SUFFIX);
229 if (suffixIndex > 0) {
230 idFromFileName = srcFileName.substring(0, suffixIndex);
231 idFromFileNameList = new ArrayList<String>(1);
232 idFromFileNameList.add(idFromFileName);
233 }
234
235 int recordsProcessedInFile = 0;
236 try {
237 XMLInputFactory xif = XMLInputFactory.newInstance();
238 XMLStreamReader xsr = xif.createXMLStreamReader(new FileReader(srcFile));
239 xsr.nextTag();
240 recordsProcessedInFile = 0;
241 List<SolrInputDocument> solrDocsToAdd = new ArrayList<SolrInputDocument>();
242 List<SolrInputDocument> solrDocs = null;
243 while (xsr.hasNext()) {
244 int eventType = xsr.next();
245 if (eventType == XMLStreamConstants.START_ELEMENT) {
246 if (DocFormat.MARC.isEqualTo(docFormat)) {
247 recordsProcessedInFile++;
248 LOG.debug("Processing Record(" + recordsProcessedInFile + ") of File: " + srcFileName);
249 fileIOTimer.resume();
250 StringWriter str = new StringWriter();
251 str.append("<collection>");
252 t.transform(new StAXSource(xsr), new StreamResult(str));
253 str.append("</collection>");
254 xmlContent = str.getBuffer().toString();
255 fileIOTimer.suspend();
256 conversionTimer.resume();
257 solrDocs = convertToSolrDocs(docCategory, docType, docFormat, xmlContent);
258 if ((null == solrDocs) || (solrDocs.size() == 0)) {
259 continue;
260 }
261 if (idFromFileName == null) {
262 assignUUIDs(solrDocs, null);
263 }
264 else {
265 assignUUIDs(solrDocs.subList(0, 1), idFromFileNameList);
266 }
267 conversionTimer.suspend();
268 numDocs += solrDocs.size();
269 }
270 else if (DocFormat.DUBLIN_CORE.isEqualTo(docFormat)) {
271
272 conversionTimer.resume();
273 solrDocs = convertToSolrDocs(docCategory, docType, docFormat,
274 FileUtils.readFileToString(srcFile, "UTF-8"));
275 assignUUIDs(solrDocs, null);
276 conversionTimer.suspend();
277 solrDocsToAdd.addAll(solrDocs);
278 numDocs += solrDocs.size();
279 break;
280 }
281 else if (DocFormat.DUBLIN_UNQUALIFIED.isEqualTo(docFormat)) {
282 if (xsr.getName().getLocalPart().equalsIgnoreCase("record")) {
283 conversionTimer.resume();
284 solrDocs = new ArrayList<SolrInputDocument>();
285 StringWriter str = new StringWriter();
286 str.append("<OAI-PMH><ListRecords>");
287 t.transform(new StAXSource(xsr), new StreamResult(str));
288 str.append("</ListRecords></OAI-PMH>");
289 str.close();
290 xmlContent = str.getBuffer().toString();
291 solrDocs = convertToSolrDocs(docCategory, docType, docFormat, xmlContent);
292 str.flush();
293 assignUUIDs(solrDocs, null);
294 conversionTimer.suspend();
295 numDocs += solrDocs.size();
296 }
297 }
298 else {
299 throw new Exception("Unsupported Document Format: " + docFormat);
300 }
301 }
302 else {
303 continue;
304 }
305
306 if (solrDocs != null) {
307 solrDocsToAdd.addAll(solrDocs);
308 }
309 if (solrDocsToAdd.size() < 500) {
310
311 continue;
312 }
313 indexingTimer.resume();
314 solr.add(solrDocsToAdd);
315 indexingTimer.suspend();
316 solrDocsToAdd.clear();
317 if (recordsProcessedInFile % 10000 == 0) {
318 totalTimer.split();
319 LOG.info("Records processed in file " + srcFileName + ":" + recordsProcessedInFile
320 + "; Time elapsed:" + totalTimer.toSplitString());
321 }
322 if (idFromFileName != null || DocFormat.DUBLIN_CORE.isEqualTo(docFormat)) {
323 break;
324 }
325 }
326 if (solrDocsToAdd.size() > 0) {
327 indexingTimer.resume();
328 solr.add(solrDocsToAdd);
329 indexingTimer.suspend();
330 solrDocsToAdd.clear();
331 }
332 }
333 catch (Exception ex) {
334 String message = "Failure while processing file '" + srcFile.getAbsolutePath() + "' \nat Record: "
335 + recordsProcessedInFile + "\n" + xmlContent;
336 ex.printStackTrace();
337 LOG.error(message);
338 solr.rollback();
339 throw ex;
340 }
341 totalTimer.split();
342 if (recordsProcessedInFile > 0) {
343
344 LOG.info("Records processed in file " + srcFileName + ":" + recordsProcessedInFile
345 + "; Time elapsed:" + totalTimer.toSplitString());
346 }
347 }
348
349 if (numDocs > 0) {
350 indexingTimer.resume();
351 solr.commit();
352 indexingTimer.suspend();
353 }
354
355 conversionTimer.stop();
356 fileIOTimer.stop();
357 indexingTimer.stop();
358 totalTimer.stop();
359 LOG.info("Num of files processed:" + numFiles + "; Num of documents processed:" + numDocs);
360 LOG.info("Time taken for reading files:" + fileIOTimer.toString()
361 + "; Time taken for parsing and converting to Solr Docs:" + conversionTimer.toString());
362 LOG.info(
363 "Time taken for indexing Solr docs:" + indexingTimer.toString() + "; Total time taken:" + totalTimer
364 .toString());
365 result = SUCCESS + "-" + numDocs;
366 }
367 catch (Exception e) {
368 result = buildFailureMsg(null, "Indexing failed. " + e.getMessage());
369 LOG.error(result, e);
370 }
371 return result;
372 }
373
374
375
376
377
378
379
380
381 public String indexDocument(RequestDocument requestDocument) {
382 List<RequestDocument> requestDocuments = null;
383 if (requestDocument != null) {
384 requestDocuments = new ArrayList<RequestDocument>(1);
385 requestDocuments.add(requestDocument);
386 }
387 return indexDocuments(requestDocuments);
388 }
389
390 public String indexDocuments(List<RequestDocument> requestDocuments) {
391 String result = null;
392 StopWatch timer = new StopWatch();
393 timer.start();
394 List<SolrInputDocument> solrInputDocuments = new ArrayList<SolrInputDocument>();
395 try {
396 for (RequestDocument requestDocument : requestDocuments) {
397 if (requestDocument == null) {
398 continue;
399 }
400 if (DocCategory.WORK.isEqualTo(requestDocument.getCategory())) {
401 if (DocType.BIB.isEqualTo(requestDocument.getType())) {
402 if (DocFormat.MARC.isEqualTo(requestDocument.getFormat())) {
403 new WorkBibMarcDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
404 }
405 else if(DocFormat.DUBLIN_CORE.isEqualTo(requestDocument.getFormat())){
406 new WorkBibDublinDocBuilder().buildSolrInputDocument(requestDocument,solrInputDocuments);
407 }
408 else if (DocFormat.DUBLIN_UNQUALIFIED.isEqualTo(requestDocument.getFormat())) {
409 new WorkBibDublinUnQualifiedDocBuilder().buildSolrInputDocument(requestDocument,solrInputDocuments);
410
411 }
412 else {
413 throw new Exception(
414 "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
415 }
416 }
417 else if (DocType.INSTANCE.isEqualTo(requestDocument.getType())) {
418 if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
419 new WorkInstanceOlemlDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
420 }
421 else {
422 throw new Exception(
423 "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
424 }
425 }
426 else if (DocType.HOLDINGS.isEqualTo(requestDocument.getType())) {
427 if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
428 new WorkInstanceOlemlDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
429 }
430 else {
431 throw new Exception(
432 "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
433 }
434 }
435 else if (DocType.ITEM.isEqualTo(requestDocument.getType())) {
436 if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
437 new WorkInstanceOlemlDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
438 }
439 else {
440 throw new Exception(
441 "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
442 }
443 }
444 else {
445 throw new Exception(
446 "Unsupported Document Type : " + requestDocument.getFormat() + " Called.");
447 }
448 }
449 else if (DocCategory.SECURITY.isEqualTo(requestDocument.getCategory())) {
450 if (DocType.PATRON.isEqualTo(requestDocument.getType())) {
451 if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
452 new SecurityPatronOlemlDocBuilder()
453 .buildSolrInputDocument(requestDocument, solrInputDocuments);
454 }
455 else {
456 throw new Exception(
457 "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
458 }
459 }
460 else {
461 throw new Exception("Unsupported Document Type : " + requestDocument.getType() + " Called.");
462 }
463 }
464 else {
465
466 throw new Exception(
467 "Unsupported Document Category : " + requestDocument.getCategory() + " Called.");
468 }
469 assignUUIDs(solrInputDocuments, null);
470 }
471 }
472 catch (Exception e1) {
473 result = buildFailureMsg(null, "Indexing failed. " + e1.getMessage());
474 LOG.error(result, e1);
475 }
476 timer.stop();
477 if ((null == solrInputDocuments) || (solrInputDocuments.isEmpty())) {
478 result = buildFailureMsg(null, "No valid documents found in input.");
479 return result;
480 }
481 int numDocs = solrInputDocuments.size();
482 LOG.info("Conversion to Solr docs- Num:" + numDocs + ": Time taken:" + timer.toString());
483 result = indexSolrDocuments(solrInputDocuments);
484 return result;
485 }
486
487 public String bulkIndexDocuments(List<RequestDocument> requestDocuments) {
488 String result = "success";
489 Map<String, SolrInputDocument> bibIdToDocMap = new HashMap<String, SolrInputDocument>();
490 if (requestDocuments != null && requestDocuments.size() > 0) {
491 StopWatch timer = new StopWatch();
492 timer.start();
493 List<SolrInputDocument> solrInputDocuments = new ArrayList<SolrInputDocument>();
494 try {
495 if (DocCategory.WORK.isEqualTo(requestDocuments.get(0).getCategory())) {
496 if (DocType.BIB.isEqualTo(requestDocuments.get(0).getType())) {
497 if (DocFormat.MARC.isEqualTo(requestDocuments.get(0).getFormat())) {
498 WorkBibMarcDocBuilder marcBuilder = new WorkBibMarcDocBuilder();
499 for (RequestDocument requestDocument : requestDocuments) {
500 marcBuilder.buildSolrInputDocument(requestDocument, solrInputDocuments);
501 }
502 }
503 else if (DocFormat.DUBLIN_CORE.isEqualTo(requestDocuments.get(0).getFormat())) {
504 WorkBibDublinDocBuilder dublinBuilder = new WorkBibDublinDocBuilder();
505 for (RequestDocument requestDocument : requestDocuments) {
506 dublinBuilder.buildSolrInputDocument(requestDocument,solrInputDocuments);
507 }
508 }
509 else if (DocFormat.DUBLIN_UNQUALIFIED.isEqualTo(requestDocuments.get(0).getFormat())) {
510 WorkBibDublinUnQualifiedDocBuilder dublinUnqBuilder
511 = new WorkBibDublinUnQualifiedDocBuilder();
512 for (RequestDocument requestDocument : requestDocuments) {
513 dublinUnqBuilder.buildSolrInputDocument(requestDocument,solrInputDocuments);
514 }
515 }
516 }
517 else if (DocType.INSTANCE.isEqualTo(requestDocuments.get(0).getType())) {
518 WorkInstanceOlemlDocBuilder oleMlDocBuilder = new WorkInstanceOlemlDocBuilder();
519 for (RequestDocument requestDocument : requestDocuments) {
520 OleInstance instance = ((InstanceCollection) requestDocument.getContent()
521 .getContentObject())
522 .getInstanceCollection().get(0);
523 for (ResourceIdentifier rid : instance.getResourceIdentifier()) {
524 List<SolrDocument> docs = getSolrDocumentBySolrId(rid.getValue());
525 for (SolrDocument solrDoc : docs) {
526 SolrInputDocument bibSolrIDoc = ClientUtils.toSolrInputDocument(solrDoc);
527 String bibId = bibSolrIDoc.getFieldValue(WorkBibCommonFields.UNIQUE_ID).toString();
528 if (bibIdToDocMap.get(bibId) == null) {
529 bibIdToDocMap.put(bibId, bibSolrIDoc);
530 }
531 bibIdToDocMap.get(bibId)
532 .addField("instanceIdentifier", instance.getInstanceIdentifier());
533 }
534 }
535 oleMlDocBuilder.buildSolrInputDocuments(requestDocument, solrInputDocuments);
536 }
537 }
538 }
539 if (DocCategory.SECURITY.isEqualTo(requestDocuments.get(0).getCategory())) {
540 if (DocType.PATRON.isEqualTo(requestDocuments.get(0).getType())) {
541 if (DocFormat.OLEML.isEqualTo(requestDocuments.get(0).getFormat())) {
542 SecurityPatronOlemlDocBuilder patronBuilder = new SecurityPatronOlemlDocBuilder();
543 for (RequestDocument requestDocument : requestDocuments) {
544 patronBuilder.buildSolrInputDocument(requestDocument, solrInputDocuments);
545 }
546 }
547 }
548 }
549 assignUUIDs(solrInputDocuments, null);
550 solrInputDocuments.addAll(bibIdToDocMap.values());
551 }
552 catch (Exception e1) {
553 result = buildFailureMsg(null, "Bulk Indexing failed. " + e1.getMessage());
554 LOG.error(result, e1);
555 }
556 timer.stop();
557 if (solrInputDocuments.isEmpty()) {
558 result = buildFailureMsg(null, "No valid documents found in input.");
559 return result;
560 }
561 int numDocs = solrInputDocuments.size();
562 LOG.info("Conversion to Solr docs- Num:" + numDocs + ": Time taken:" + timer.toString());
563 result = indexSolrDocuments(solrInputDocuments);
564 }
565 return result;
566 }
567
568 public List<SolrDocument> getSolrDocumentBySolrId(String uniqueId) {
569 QueryResponse response = null;
570 String result = null;
571 try {
572 String args = "(" + WorkBibCommonFields.UNIQUE_ID + ":" + uniqueId + ")";
573 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
574 SolrQuery query = new SolrQuery();
575 query.setQuery(args);
576 response = solr.query(query);
577 }
578 catch (Exception e) {
579 result = buildFailureMsg();
580 LOG.error(result, e);
581 }
582 return response.getResults();
583 }
584
585 public List<SolrDocument> getSolrDocument(String fieldName, String fieldValue) {
586 QueryResponse response = null;
587 String result = null;
588 try {
589 String args = "(" + fieldName + ":" + fieldValue + ")";
590 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
591 SolrQuery query = new SolrQuery();
592 query.setQuery(args);
593 response = solr.query(query);
594 }
595 catch (Exception e) {
596 result = buildFailureMsg();
597 LOG.error(result, e);
598 }
599 return response.getResults();
600 }
601
602
603
604
605
606
607
608
609
610
611 protected void assignUUIDs(List<SolrInputDocument> solrDocs, List<String> ids) throws Exception {
612 if ((null == solrDocs) || (solrDocs.size() == 0)) {
613 return;
614 }
615 if ((null != ids) && (ids.size() < solrDocs.size())) {
616 throw new Exception(
617 "Insufficient UUIDs(" + ids.size() + ") specified for documents(" + solrDocs.size() + ".");
618 }
619 for (int i = 0; i < solrDocs.size(); i++) {
620 SolrInputDocument solrInputDocument = solrDocs.get(i);
621 SolrInputField idField = solrInputDocument.getField("id");
622 String uuid = null;
623 if (null != ids) {
624
625 uuid = ids.get(i);
626 }
627 if (null == idField) {
628 if (null == uuid) {
629
630 uuid = UUID.randomUUID().toString();
631 uuid = ID_FIELD_PREFIX + uuid;
632 }
633 solrInputDocument.addField("id", uuid);
634 solrInputDocument.addField("uniqueId", uuid);
635 }
636 else {
637 if (null != uuid) {
638
639 solrInputDocument.setField("id", uuid);
640 solrInputDocument.setField("uniqueId", uuid);
641 }
642 else {
643
644 uuid = (String) idField.getValue();
645 if (null == uuid) {
646
647 uuid = UUID.randomUUID().toString();
648 uuid = ID_FIELD_PREFIX + uuid;
649 idField.setValue(uuid, 1.0f);
650 }
651 SolrInputField uniqueIdField = solrInputDocument.getField("uniqueId");
652 if (null == uniqueIdField) {
653 solrInputDocument.addField("uniqueId", uuid);
654 }
655 else {
656 solrInputDocument.setField("uniqueId", uuid);
657 }
658 }
659 }
660 }
661 }
662
663 protected String indexSolrDocuments(List<SolrInputDocument> solrDocs, boolean commit, boolean optimize)
664 throws Exception {
665 SolrServer solr = null;
666 if ((null == solrDocs) || (solrDocs.isEmpty())) {
667 return SUCCESS + "-0";
668 }
669 solr = SolrServerManager.getInstance().getSolrServer();
670 if (solrDocs.size() > BATCH_SIZE) {
671 int numSolrDocs = solrDocs.size();
672 for (int fromIndex = 0; fromIndex < numSolrDocs; fromIndex += BATCH_SIZE) {
673 int toIndex = fromIndex + BATCH_SIZE;
674 if (toIndex > numSolrDocs) {
675 toIndex = numSolrDocs;
676 }
677 List batchSolrDocs = solrDocs.subList(fromIndex, toIndex);
678 if ((null != batchSolrDocs) && (!batchSolrDocs.isEmpty())) {
679 LOG.info("Indexing records. fromIndex=" + fromIndex + ", toIndex=" + toIndex);
680 UpdateResponse response = solr.add(solrDocs);
681 }
682 }
683 }
684 else {
685 LOG.debug("Indexing records. size=" + solrDocs.size());
686 UpdateResponse response = solr.add(solrDocs);
687 }
688 if (commit) {
689 solr.commit();
690 }
691 if (optimize) {
692 solr.optimize();
693 }
694 return SUCCESS + "-" + solrDocs.size();
695 }
696
697 protected List<SolrInputDocument> convertToSolrDocs(String docCategory, String docType, String docFormat,
698 String docContent) throws Exception {
699 List<SolrInputDocument> solrDocs = null;
700 if (DocCategory.WORK.isEqualTo(docCategory) && DocType.BIB.isEqualTo(docType) && DocFormat.MARC
701 .isEqualTo(docFormat)) {
702 try {
703 WorkBibMarcRecordProcessor recordProcessor = new WorkBibMarcRecordProcessor();
704 solrDocs = new WorkBibMarcDocBuilder()
705 .buildSolrInputDocuments(recordProcessor.fromXML(docContent).getRecords());
706 }
707 catch (Exception e) {
708 e.printStackTrace();
709 throw new Exception("Exception while converting given XML Document: ", e);
710 }
711 }
712 else if (DocCategory.WORK.isEqualTo(docCategory) && DocType.BIB.isEqualTo(docType) && DocFormat.DUBLIN_CORE
713 .isEqualTo(docFormat)) {
714 WorkBibDublinRecordProcessor processor = new WorkBibDublinRecordProcessor();
715 WorkBibDublinRecord record = processor.fromXML(docContent);
716 solrDocs = new ArrayList<SolrInputDocument>();
717 solrDocs.add(new WorkBibDublinDocBuilder().buildSolrInputDocument(record));
718 }
719 else if (DocCategory.WORK.isEqualTo(docCategory) && DocType.BIB.isEqualTo(docType) && DocFormat
720 .DUBLIN_UNQUALIFIED.isEqualTo(docFormat)) {
721 solrDocs = new WorkBibDublinUnQualifiedDocBuilder()
722 .buildSolrInputDocuments(new WorkBibDublinUnQualifiedRecordProcessor().fromXML(docContent));
723 }
724 else {
725 throw new Exception("UnSupported Document Format: " + docCategory + ", " + docType + ", " + docFormat);
726 }
727 return solrDocs;
728 }
729
730 protected String deleteDocumentByUUID(String uuid, String category, boolean commit) {
731 String result = SUCCESS;
732 try {
733 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
734 solr.deleteById(uuid);
735 if (commit) {
736 solr.commit();
737 }
738 } catch (Exception e) {
739 result = buildFailureMsg();
740 LOG.error(result, e);
741 }
742 return result;
743 }
744
745 protected String deleteDocumentByUUID(String uuid, String category) {
746 return deleteDocumentByUUID(uuid, category, true);
747 }
748
749 protected String deleteDocumentsByUUIDList(List<String> uuidList, String category, boolean commit) {
750 String result = SUCCESS;
751 try {
752 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
753 solr.deleteById(uuidList);
754 if (commit) {
755 solr.commit();
756 }
757 } catch (Exception e) {
758 result = buildFailureMsg();
759 LOG.error(result, e);
760 }
761 return result;
762 }
763
764 protected String deleteDocumentsByUUIDList(List<String> uuidsList, String category)
765 throws SolrServerException, MalformedURLException {
766 List<String> deleteUuidsList = new ArrayList<String>();
767 List<String> holdingsIdentifierList = new ArrayList<String>();
768 List<String> itemIdentifierList = new ArrayList<String>();
769 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
770 SolrQuery query = new SolrQuery();
771 deleteUuidsList.addAll(uuidsList);
772 for (int i = 0; i < uuidsList.size(); i++) {
773 query.setQuery("id:" + uuidsList.get(i));
774 QueryResponse response = solr.query(query);
775 LOG.debug("query-->" + query);
776 for (SolrDocument doc : response.getResults()) {
777 LOG.debug("doc" + doc.toString());
778 String docFormat = (String) doc.getFieldValue(DOC_FORMAT);
779 String docType = (String) doc.getFieldValue(DOC_TYPE);
780 if (docType.equalsIgnoreCase(BIBLIOGRAPHIC)) {
781 } else if (docType.equalsIgnoreCase(INSTANCE)) {
782 if (doc.getFieldValue(ITEM_IDENTIFIER) instanceof List) {
783 itemIdentifierList = (List<String>) doc.getFieldValue(ITEM_IDENTIFIER);
784 } else {
785 itemIdentifierList.add((String) doc.getFieldValue(ITEM_IDENTIFIER));
786 }
787 if (doc.getFieldValue(HOLDINGS_IDENTIFIER) instanceof String) {
788 holdingsIdentifierList.add((String) doc.getFieldValue(HOLDINGS_IDENTIFIER));
789 } else {
790 holdingsIdentifierList = (List<String>) doc.getFieldValue(HOLDINGS_IDENTIFIER);
791 }
792 if (holdingsIdentifierList != null && holdingsIdentifierList.size() > 0) {
793 deleteUuidsList.addAll(holdingsIdentifierList);
794 }
795 if (itemIdentifierList != null && itemIdentifierList.size() > 0) {
796 deleteUuidsList.addAll(itemIdentifierList);
797
798 }
799 }
800 }
801 }
802 return deleteDocumentsByUUIDList(deleteUuidsList, category, true);
803 }
804
805 protected String buildDeleteQueryParamsForDeleteUrl(List<String> uuidList, boolean commit) {
806 StringBuffer deleteQueryBuffer = new StringBuffer("");
807 deleteQueryBuffer.append("stream.body=");
808 deleteQueryBuffer.append("<delete>");
809 for (int i = 0; i < uuidList.size(); i++) {
810 deleteQueryBuffer.append("<query>");
811 deleteQueryBuffer.append("id:");
812 deleteQueryBuffer.append(uuidList.get(i));
813 deleteQueryBuffer.append("</query>");
814 }
815 deleteQueryBuffer.append("</delete>");
816 if (commit) {
817 deleteQueryBuffer.append("&stream.body=<commit/>");
818 }
819 return deleteQueryBuffer.toString();
820
821 }
822
823 protected String buildDeleteQuery(String uuid, String category, boolean commit) {
824 StringBuffer deleteQueryUrl = new StringBuffer("");
825 if (commit) {
826 deleteQueryUrl.append(SolrServerManager.getInstance().getSolrCoreURL());
827 deleteQueryUrl.append("/update?stream.body=<delete><query>id:" + uuid
828 + "</query></delete>&stream.body=<commit/>");
829 }
830 else {
831 deleteQueryUrl.append(SolrServerManager.getInstance().getSolrCoreURL());
832 deleteQueryUrl.append("/update?stream.body=<delete><query>id:" + uuid + "</query></delete>");
833 }
834 return deleteQueryUrl.toString();
835 }
836
837
838
839
840
841 protected void openConnection(URL inputURL) throws Exception {
842 HttpURLConnection urlConnection = (HttpURLConnection) inputURL.openConnection();
843 urlConnection.setDoOutput(true);
844 urlConnection.connect();
845 OutputStreamWriter streamWriter = new OutputStreamWriter(urlConnection.getOutputStream());
846 streamWriter.flush();
847
848 BufferedReader bufferReader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
849 String xmlResponse;
850 while ((xmlResponse = bufferReader.readLine()) != null) {
851 if (LOG.isDebugEnabled()) {
852 LOG.debug("XmlResponse->" + xmlResponse);
853 }
854 }
855 }
856
857 protected String getErrorID() {
858 return String.valueOf(new Date().getTime());
859 }
860
861 protected String buildFailureMsg(String id, String msg) {
862 StringBuilder sb = new StringBuilder();
863 sb.append(FAILURE).append("-ErrorID:");
864 if (null != id) {
865 sb.append(id);
866 }
867 else {
868 sb.append(getErrorID());
869 }
870 if (null != msg) {
871 sb.append("-ErrorMsg:").append(msg);
872 }
873 return sb.toString();
874 }
875
876 protected String buildFailureMsg() {
877 return FAILURE + "-ErrorID:" + getErrorID();
878 }
879
880 public QueryResponse searchBibRecord(String docCat, String docType, String docFormat, String fieldName,
881 String fieldValue, String fieldList) {
882 QueryResponse response = null;
883 String result = null;
884 try {
885 String identifier_args = "(" + fieldName + ":" + fieldValue + ")";
886 String docCategory_args = "(DocCategory" + ":" + docCat + ")";
887 String docType_args = "(DocType" + ":" + docType + ")";
888 String docFormat_args = "(DocFormat" + ":" + docFormat + ")";
889 String args = identifier_args + "AND" + docCategory_args + "AND" + docType_args + "AND" + docFormat_args;
890 SolrServer solr = new CommonsHttpSolrServer(
891 PropertyUtil.getPropertyUtil().getProperty("docSearchURL") + "bib");
892 SolrQuery query = new SolrQuery();
893 query.addField(fieldList);
894 query.setQuery(args);
895 response = solr.query(query);
896 }
897 catch (Exception e) {
898 result = buildFailureMsg();
899 LOG.error(result, e);
900 }
901 return response;
902 }
903 }