View Javadoc

1   /*
2    * Copyright 2011 The Kuali Foundation.
3    * 
4    * Licensed under the Educational Community License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    * http://www.opensource.org/licenses/ecl2.php
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.kuali.ole.docstore.discovery.service;
17  
18  import org.apache.commons.io.FileUtils;
19  import org.apache.commons.lang.time.StopWatch;
20  import org.apache.solr.client.solrj.SolrQuery;
21  import org.apache.solr.client.solrj.SolrServer;
22  import org.apache.solr.client.solrj.SolrServerException;
23  import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
24  import org.apache.solr.client.solrj.response.QueryResponse;
25  import org.apache.solr.client.solrj.response.UpdateResponse;
26  import org.apache.solr.client.solrj.util.ClientUtils;
27  import org.apache.solr.common.SolrDocument;
28  import org.apache.solr.common.SolrInputDocument;
29  import org.apache.solr.common.SolrInputField;
30  import org.kuali.ole.docstore.discovery.solr.security.patron.oleml.SecurityPatronOlemlDocBuilder;
31  import org.kuali.ole.docstore.discovery.solr.work.bib.WorkBibCommonFields;
32  import org.kuali.ole.docstore.discovery.solr.work.bib.dublin.WorkBibDublinDocBuilder;
33  import org.kuali.ole.docstore.discovery.solr.work.bib.dublin.unqualified.WorkBibDublinUnQualifiedDocBuilder;
34  import org.kuali.ole.docstore.discovery.solr.work.bib.marc.WorkBibMarcDocBuilder;
35  import org.kuali.ole.docstore.discovery.solr.work.instance.oleml.WorkInstanceOlemlDocBuilder;
36  import org.kuali.ole.docstore.discovery.util.PropertyUtil;
37  import org.kuali.ole.docstore.model.enums.DocCategory;
38  import org.kuali.ole.docstore.model.enums.DocFormat;
39  import org.kuali.ole.docstore.model.enums.DocType;
40  import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
41  import org.kuali.ole.docstore.model.xmlpojo.work.bib.dublin.WorkBibDublinRecord;
42  import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
43  import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.OleInstance;
44  import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.ResourceIdentifier;
45  import org.kuali.ole.docstore.model.xstream.work.bib.dublin.WorkBibDublinRecordProcessor;
46  import org.kuali.ole.docstore.model.xstream.work.bib.dublin.unqualified.WorkBibDublinUnQualifiedRecordProcessor;
47  import org.kuali.ole.docstore.model.xstream.work.bib.marc.WorkBibMarcRecordProcessor;
48  
49  import javax.xml.stream.XMLInputFactory;
50  import javax.xml.stream.XMLStreamConstants;
51  import javax.xml.stream.XMLStreamReader;
52  import javax.xml.transform.OutputKeys;
53  import javax.xml.transform.Transformer;
54  import javax.xml.transform.TransformerFactory;
55  import javax.xml.transform.stax.StAXSource;
56  import javax.xml.transform.stream.StreamResult;
57  import java.io.*;
58  import java.net.HttpURLConnection;
59  import java.net.MalformedURLException;
60  import java.net.URL;
61  import java.util.*;
62  
63  /**
64   * This class implements the {@link IndexerService} methods.
65   */
66  public class IndexerServiceImpl
67          implements IndexerService {
68      private static final org.apache.log4j.Logger LOG                   = org.apache.log4j.Logger
69              .getLogger(IndexerServiceImpl.class);
70      public static final  String                  UUID_FILE_NAME_SUFFIX = "_UUID_.xml";
71  
72      //    private static      String         docSearchUrl            = null;
73      private static      IndexerService indexerService      = null;
74      public static final String         ID_FIELD_PREFIX     = "id_disc_";
75      public static final int            BATCH_SIZE          = 10000;
76      private final       String         BIBLIOGRAPHIC       = "bibliographic";
77      private final       String         DOC_TYPE            = "DocType";
78      private final       String         DOC_FORMAT          = "DocFormat";
79      private final       String         HOLDINGS_IDENTIFIER = "holdingsIdentifier";
80      private final       String         ITEM_IDENTIFIER     = "itemIdentifier";
81      private final       String         INSTANCE            = "instance";
82  
83  
84      private IndexerServiceImpl() {
85          init();
86      }
87  
88      public static IndexerService getInstance() {
89          if (null == indexerService) {
90              indexerService = new IndexerServiceImpl();
91          }
92          return indexerService;
93      }
94  
95      protected void init() {
96          LOG.debug("IndexerServiceImpl init ");
97          //        docSearchUrl = PropertyUtil.getPropertyUtil().getProperty("docSearchURL");
98          //        if ((null != docSearchUrl) && !docSearchUrl.endsWith("/")) {
99          //            docSearchUrl = docSearchUrl + "/";
100         //        }
101     }
102 
103     public String deleteDocuments(String docCategory, List<String> uuidList)
104             throws MalformedURLException, SolrServerException {
105         String result = deleteDocumentsByUUIDList(uuidList, docCategory);
106         return result;
107     }
108 
109     public String deleteDocument(String docCategory, String uuid) {
110         String result = deleteDocumentByUUID(uuid, docCategory);
111         return result;
112     }
113 
114     protected String indexSolrDocuments(List<SolrInputDocument> solrDocs) {
115         String result = null;
116         StopWatch timer = new StopWatch();
117         timer.start();
118         try {
119             result = indexSolrDocuments(solrDocs, true, false);
120             timer.stop();
121             LOG.info("Time taken for indexing Solr docs:" + timer.toString());
122         }
123         catch (Exception e) {
124             result = buildFailureMsg(null, "Indexing failed. " + e.getMessage());
125             LOG.error(result, e);
126         }
127         return result;
128     }
129 
130     @Override
131     public String indexDocumentsFromDirBySolrDoc(String docCategory, String docType, String docFormat, String dataDir) {
132         String result = null;
133         String xmlContent = "";
134         // get the files from the dir.
135         File srcDir = new File(dataDir);
136         if ((null == srcDir) || !srcDir.isDirectory()) {
137             result = buildFailureMsg(null, "Invalid data directory:" + dataDir);
138             return result;
139         }
140         FilenameFilter filter = new FilenameFilter() {
141             public boolean accept(File dir, String name) {
142                 return (!name.startsWith(".") && (name.endsWith(".xml")));
143             }
144         };
145 
146         String[] srcFileNames = srcDir.list(filter);
147         if ((null == srcFileNames) || (srcFileNames.length == 0)) {
148             result = buildFailureMsg(null, "No data files found in data dir:" + dataDir);
149             return result;
150         }
151         List<File> fileList = new ArrayList<File>(srcFileNames.length);
152         for (int i = 0; i < srcFileNames.length; i++) {
153             File srcFile = new File(dataDir + File.separator + srcFileNames[i]);
154             fileList.add(srcFile);
155         }
156         return indexDocumentsFromFiles(docCategory, docType, docFormat, fileList);
157     }
158 
159     @Override
160     public String indexDocumentsFromStringBySolrDoc(String docCategory, String docType, String docFormat, String data)
161             throws IOException {
162 
163         File file = File.createTempFile("marc.xml", ".tmp");
164         FileUtils.writeStringToFile(file, data, "UTF-8");
165         String filePath = file.getAbsolutePath();
166         return indexDocumentsFromFileBySolrDoc(docCategory, docType, docFormat,
167                                                filePath);  //To change body of implemented methods use File | Settings | File Templates.
168     }
169 
170     @Override
171     public String indexDocumentsFromFileBySolrDoc(String docCategory, String docType, String docFormat,
172                                                   String filePath) {
173         List<File> fileList = new ArrayList<File>(0);
174         fileList.add(new File(filePath));
175         return indexDocumentsFromFiles(docCategory, docType, docFormat, fileList);
176     }
177 
178 
179     /**
180      * Indexes the records (of the given docCategory, docType and docFormat) from the files in the given data directory.
181      * <p>
182      * This is a utility method to use Discovery separately from DocStore.
183      * </p>
184      *
185      * @param docCategory category of the documents expected in the input files
186      * @param docType     type of the documents expected in the input files
187      * @param docFormat   format of the documents expected in the input files
188      * @param fileList    list of files to be indexed
189      * @return SUCCESS or FAILURE
190      */
191     @Override
192     public String indexDocumentsFromFiles(String docCategory, String docType, String docFormat, List<File> fileList) {
193         // TODO: Modify this method so that if dataDir is a file, it should be indexed.
194         String result = null;
195         String xmlContent = "";
196         try {
197             StopWatch indexingTimer = new StopWatch();
198             StopWatch conversionTimer = new StopWatch();
199             StopWatch fileIOTimer = new StopWatch();
200             StopWatch totalTimer = new StopWatch();
201             totalTimer.start();
202             fileIOTimer.start();
203             fileIOTimer.suspend();
204 
205             if ((null == fileList) || (fileList.size() == 0)) {
206                 result = buildFailureMsg(null, "No  files found in data dir:" + fileList);
207                 return result;
208             }
209             int numFiles = fileList.size();
210             int numDocs = 0;
211             SolrServer solr = SolrServerManager.getInstance().getSolrServer();
212             TransformerFactory tf = new com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl();
213             Transformer t = tf.newTransformer();
214             t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
215             t.setOutputProperty(OutputKeys.INDENT, "yes");
216             conversionTimer.start();
217             conversionTimer.suspend();
218             indexingTimer.start();
219             indexingTimer.suspend();
220             for (int i = 0; i < fileList.size(); i++) {
221                 File srcFile = fileList.get(i);
222                 LOG.info("Processing File: " + srcFile.getAbsolutePath());
223                 String srcFileName = srcFile.getName();
224 
225                 // Get the id of the doc from the file name if Exists.
226                 String idFromFileName = null;
227                 List<String> idFromFileNameList = null;
228                 int suffixIndex = srcFileName.indexOf(UUID_FILE_NAME_SUFFIX);
229                 if (suffixIndex > 0) {
230                     idFromFileName = srcFileName.substring(0, suffixIndex);
231                     idFromFileNameList = new ArrayList<String>(1);
232                     idFromFileNameList.add(idFromFileName);
233                 }
234 
235                 int recordsProcessedInFile = 0;
236                 try {
237                     XMLInputFactory xif = XMLInputFactory.newInstance();
238                     XMLStreamReader xsr = xif.createXMLStreamReader(new FileReader(srcFile));
239                     xsr.nextTag();
240                     recordsProcessedInFile = 0;
241                     List<SolrInputDocument> solrDocsToAdd = new ArrayList<SolrInputDocument>();
242                     List<SolrInputDocument> solrDocs = null;
243                     while (xsr.hasNext()) {
244                         int eventType = xsr.next();
245                         if (eventType == XMLStreamConstants.START_ELEMENT) {
246                             if (DocFormat.MARC.isEqualTo(docFormat)) {
247                                 recordsProcessedInFile++;
248                                 LOG.debug("Processing Record(" + recordsProcessedInFile + ") of File: " + srcFileName);
249                                 fileIOTimer.resume();
250                                 StringWriter str = new StringWriter();
251                                 str.append("<collection>");
252                                 t.transform(new StAXSource(xsr), new StreamResult(str));
253                                 str.append("</collection>");
254                                 xmlContent = str.getBuffer().toString();
255                                 fileIOTimer.suspend();
256                                 conversionTimer.resume();
257                                 solrDocs = convertToSolrDocs(docCategory, docType, docFormat, xmlContent);
258                                 if ((null == solrDocs) || (solrDocs.size() == 0)) {
259                                     continue;
260                                 }
261                                 if (idFromFileName == null) {
262                                     assignUUIDs(solrDocs, null);
263                                 }
264                                 else {
265                                     assignUUIDs(solrDocs.subList(0, 1), idFromFileNameList);
266                                 }
267                                 conversionTimer.suspend();
268                                 numDocs += solrDocs.size();
269                             }
270                             else if (DocFormat.DUBLIN_CORE.isEqualTo(docFormat)) {
271                                 // TODO: May be moved out of while loop?
272                                 conversionTimer.resume();
273                                 solrDocs = convertToSolrDocs(docCategory, docType, docFormat,
274                                                              FileUtils.readFileToString(srcFile, "UTF-8"));
275                                 assignUUIDs(solrDocs, null);
276                                 conversionTimer.suspend();
277                                 solrDocsToAdd.addAll(solrDocs);
278                                 numDocs += solrDocs.size();
279                                 break;
280                             }
281                             else if (DocFormat.DUBLIN_UNQUALIFIED.isEqualTo(docFormat)) {
282                                 if (xsr.getName().getLocalPart().equalsIgnoreCase("record")) {
283                                     conversionTimer.resume();
284                                     solrDocs = new ArrayList<SolrInputDocument>();
285                                     StringWriter str = new StringWriter();
286                                     str.append("<OAI-PMH><ListRecords>");
287                                     t.transform(new StAXSource(xsr), new StreamResult(str));
288                                     str.append("</ListRecords></OAI-PMH>");
289                                     str.close();
290                                     xmlContent = str.getBuffer().toString();
291                                     solrDocs = convertToSolrDocs(docCategory, docType, docFormat, xmlContent);
292                                     str.flush();
293                                     assignUUIDs(solrDocs, null);
294                                     conversionTimer.suspend();
295                                     numDocs += solrDocs.size();
296                                 }
297                             }
298                             else {
299                                 throw new Exception("Unsupported Document Format: " + docFormat);
300                             }
301                         }
302                         else {
303                             continue;
304                         }
305 
306                         if (solrDocs != null) {
307                             solrDocsToAdd.addAll(solrDocs);
308                         }
309                         if (solrDocsToAdd.size() < 500) {
310                             // TODO: Handle the case when the size of the batch is too high. Do a check on the size.
311                             continue;
312                         }
313                         indexingTimer.resume();
314                         solr.add(solrDocsToAdd);
315                         indexingTimer.suspend();
316                         solrDocsToAdd.clear();
317                         if (recordsProcessedInFile % 10000 == 0) {
318                             totalTimer.split();
319                             LOG.info("Records processed in file " + srcFileName + ":" + recordsProcessedInFile
320                                      + "; Time elapsed:" + totalTimer.toSplitString());
321                         }
322                         if (idFromFileName != null || DocFormat.DUBLIN_CORE.isEqualTo(docFormat)) {
323                             break;
324                         }
325                     }
326                     if (solrDocsToAdd.size() > 0) {
327                         indexingTimer.resume();
328                         solr.add(solrDocsToAdd);
329                         indexingTimer.suspend();
330                         solrDocsToAdd.clear();
331                     }
332                 }
333                 catch (Exception ex) {
334                     String message = "Failure while processing file '" + srcFile.getAbsolutePath() + "' \nat Record: "
335                                      + recordsProcessedInFile + "\n" + xmlContent;
336                     ex.printStackTrace();
337                     LOG.error(message);
338                     solr.rollback();
339                     throw ex;
340                 }
341                 totalTimer.split();
342                 if (recordsProcessedInFile > 0) {
343                     // Do not log this message if a file has only one record.
344                     LOG.info("Records processed in file " + srcFileName + ":" + recordsProcessedInFile
345                              + "; Time elapsed:" + totalTimer.toSplitString());
346                 }
347             }
348             // commit after all docs are added.
349             if (numDocs > 0) {
350                 indexingTimer.resume();
351                 solr.commit();
352                 indexingTimer.suspend();
353             }
354 
355             conversionTimer.stop();
356             fileIOTimer.stop();
357             indexingTimer.stop();
358             totalTimer.stop();
359             LOG.info("Num of files processed:" + numFiles + "; Num of documents processed:" + numDocs);
360             LOG.info("Time taken for reading files:" + fileIOTimer.toString()
361                      + "; Time taken for parsing and converting to Solr Docs:" + conversionTimer.toString());
362             LOG.info(
363                     "Time taken for indexing Solr docs:" + indexingTimer.toString() + "; Total time taken:" + totalTimer
364                             .toString());
365             result = SUCCESS + "-" + numDocs;
366         }
367         catch (Exception e) {
368             result = buildFailureMsg(null, "Indexing failed. " + e.getMessage());
369             LOG.error(result, e);
370         }
371         return result;
372     }
373 
374 //    public String indexDocuments(List<RequestDocument> requestDocuments) {
375 //        for (RequestDocument requestDocument : requestDocuments) {
376 //            indexDocument(requestDocument);
377 //        }
378 //        return null;
379 //    }
380 
381     public String indexDocument(RequestDocument requestDocument) {
382         List<RequestDocument> requestDocuments = null;
383         if (requestDocument != null) {
384             requestDocuments = new ArrayList<RequestDocument>(1);
385             requestDocuments.add(requestDocument);
386         }
387         return indexDocuments(requestDocuments);
388     }
389 
390     public String indexDocuments(List<RequestDocument> requestDocuments) {
391         String result = null;
392         StopWatch timer = new StopWatch();
393         timer.start();
394         List<SolrInputDocument> solrInputDocuments = new ArrayList<SolrInputDocument>();
395         try {
396             for (RequestDocument requestDocument : requestDocuments) {
397                 if (requestDocument == null) {
398                     continue;
399                 }
400                 if (DocCategory.WORK.isEqualTo(requestDocument.getCategory())) {
401                     if (DocType.BIB.isEqualTo(requestDocument.getType())) { // Biblographic
402                         if (DocFormat.MARC.isEqualTo(requestDocument.getFormat())) {
403                             new WorkBibMarcDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
404                         }
405                         else if(DocFormat.DUBLIN_CORE.isEqualTo(requestDocument.getFormat())){
406                             new WorkBibDublinDocBuilder().buildSolrInputDocument(requestDocument,solrInputDocuments);
407                         }
408                         else if (DocFormat.DUBLIN_UNQUALIFIED.isEqualTo(requestDocument.getFormat())) {
409                              new WorkBibDublinUnQualifiedDocBuilder().buildSolrInputDocument(requestDocument,solrInputDocuments);
410 
411                         }
412                         else {
413                             throw new Exception(
414                                     "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
415                         }
416                     }
417                     else if (DocType.INSTANCE.isEqualTo(requestDocument.getType())) { // Instance
418                         if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
419                             new WorkInstanceOlemlDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
420                         }
421                         else {
422                             throw new Exception(
423                                     "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
424                         }
425                     }
426                     else if (DocType.HOLDINGS.isEqualTo(requestDocument.getType())) { // Holdings
427                         if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
428                             new WorkInstanceOlemlDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
429                         }
430                         else {
431                             throw new Exception(
432                                     "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
433                         }
434                     }
435                     else if (DocType.ITEM.isEqualTo(requestDocument.getType())) { // Item
436                         if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
437                             new WorkInstanceOlemlDocBuilder().buildSolrInputDocument(requestDocument, solrInputDocuments);
438                         }
439                         else {
440                             throw new Exception(
441                                     "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
442                         }
443                     }
444                     else {
445                         throw new Exception(
446                                 "Unsupported Document Type : " + requestDocument.getFormat() + " Called.");
447                     }
448                 }
449                 else if (DocCategory.SECURITY.isEqualTo(requestDocument.getCategory())) {
450                     if (DocType.PATRON.isEqualTo(requestDocument.getType())) {
451                         if (DocFormat.OLEML.isEqualTo(requestDocument.getFormat())) {
452                             new SecurityPatronOlemlDocBuilder()
453                                     .buildSolrInputDocument(requestDocument, solrInputDocuments);
454                         }
455                         else {
456                             throw new Exception(
457                                     "Unsupported Document Format : " + requestDocument.getFormat() + " Called.");
458                         }
459                     }
460                     else {
461                         throw new Exception("Unsupported Document Type : " + requestDocument.getType() + " Called.");
462                     }
463                 }
464                 else {
465                     //                        logger.error("Unsupported Document Format : " + reqDoc.getFormat() + " Called.");
466                     throw new Exception(
467                             "Unsupported Document Category : " + requestDocument.getCategory() + " Called.");
468                 }
469                 assignUUIDs(solrInputDocuments, null);
470             }
471         }
472         catch (Exception e1) {
473             result = buildFailureMsg(null, "Indexing failed. " + e1.getMessage());
474             LOG.error(result, e1);
475         }
476         timer.stop();
477         if ((null == solrInputDocuments) || (solrInputDocuments.isEmpty())) {
478             result = buildFailureMsg(null, "No valid documents found in input.");
479             return result;
480         }
481         int numDocs = solrInputDocuments.size();
482         LOG.info("Conversion to Solr docs- Num:" + numDocs + ": Time taken:" + timer.toString());
483         result = indexSolrDocuments(solrInputDocuments);
484         return result;
485     }
486 
487     public String bulkIndexDocuments(List<RequestDocument> requestDocuments) {
488         String result = "success";
489         Map<String, SolrInputDocument> bibIdToDocMap = new HashMap<String, SolrInputDocument>();
490         if (requestDocuments != null && requestDocuments.size() > 0) {
491             StopWatch timer = new StopWatch();
492             timer.start();
493             List<SolrInputDocument> solrInputDocuments = new ArrayList<SolrInputDocument>();
494             try {
495                 if (DocCategory.WORK.isEqualTo(requestDocuments.get(0).getCategory())) {
496                     if (DocType.BIB.isEqualTo(requestDocuments.get(0).getType())) {
497                         if (DocFormat.MARC.isEqualTo(requestDocuments.get(0).getFormat())) {
498                             WorkBibMarcDocBuilder marcBuilder = new WorkBibMarcDocBuilder();
499                             for (RequestDocument requestDocument : requestDocuments) {
500                                 marcBuilder.buildSolrInputDocument(requestDocument, solrInputDocuments);
501                             }
502                         }
503                         else if (DocFormat.DUBLIN_CORE.isEqualTo(requestDocuments.get(0).getFormat())) {
504                             WorkBibDublinDocBuilder dublinBuilder = new WorkBibDublinDocBuilder();
505                             for (RequestDocument requestDocument : requestDocuments) {
506                                 dublinBuilder.buildSolrInputDocument(requestDocument,solrInputDocuments);
507                             }
508                         }
509                         else if (DocFormat.DUBLIN_UNQUALIFIED.isEqualTo(requestDocuments.get(0).getFormat())) {
510                             WorkBibDublinUnQualifiedDocBuilder dublinUnqBuilder
511                                     = new WorkBibDublinUnQualifiedDocBuilder();
512                             for (RequestDocument requestDocument : requestDocuments) {
513                                 dublinUnqBuilder.buildSolrInputDocument(requestDocument,solrInputDocuments);
514                             }
515                         }
516                     }
517                     else if (DocType.INSTANCE.isEqualTo(requestDocuments.get(0).getType())) {
518                         WorkInstanceOlemlDocBuilder oleMlDocBuilder = new WorkInstanceOlemlDocBuilder();
519                         for (RequestDocument requestDocument : requestDocuments) {
520                             OleInstance instance = ((InstanceCollection) requestDocument.getContent()
521                                                                                         .getContentObject())
522                                     .getInstanceCollection().get(0);
523                             for (ResourceIdentifier rid : instance.getResourceIdentifier()) {
524                                 List<SolrDocument> docs = getSolrDocumentBySolrId(rid.getValue());
525                                 for (SolrDocument solrDoc : docs) {
526                                     SolrInputDocument bibSolrIDoc = ClientUtils.toSolrInputDocument(solrDoc);
527                                     String bibId = bibSolrIDoc.getFieldValue(WorkBibCommonFields.UNIQUE_ID).toString();
528                                     if (bibIdToDocMap.get(bibId) == null) {
529                                         bibIdToDocMap.put(bibId, bibSolrIDoc);
530                                     }
531                                     bibIdToDocMap.get(bibId)
532                                                  .addField("instanceIdentifier", instance.getInstanceIdentifier());
533                                 }
534                             }
535                             oleMlDocBuilder.buildSolrInputDocuments(requestDocument, solrInputDocuments);
536                         }
537                     }
538                 }
539                 if (DocCategory.SECURITY.isEqualTo(requestDocuments.get(0).getCategory())) {
540                     if (DocType.PATRON.isEqualTo(requestDocuments.get(0).getType())) {
541                         if (DocFormat.OLEML.isEqualTo(requestDocuments.get(0).getFormat())) {
542                             SecurityPatronOlemlDocBuilder patronBuilder = new SecurityPatronOlemlDocBuilder();
543                             for (RequestDocument requestDocument : requestDocuments) {
544                                 patronBuilder.buildSolrInputDocument(requestDocument, solrInputDocuments);
545                             }
546                         }
547                     }
548                 }
549                 assignUUIDs(solrInputDocuments, null);
550                 solrInputDocuments.addAll(bibIdToDocMap.values());
551             }
552             catch (Exception e1) {
553                 result = buildFailureMsg(null, "Bulk Indexing failed. " + e1.getMessage());
554                 LOG.error(result, e1);
555             }
556             timer.stop();
557             if (solrInputDocuments.isEmpty()) {
558                 result = buildFailureMsg(null, "No valid documents found in input.");
559                 return result;
560             }
561             int numDocs = solrInputDocuments.size();
562             LOG.info("Conversion to Solr docs- Num:" + numDocs + ": Time taken:" + timer.toString());
563             result = indexSolrDocuments(solrInputDocuments);
564         }
565         return result;
566     }
567 
568     public List<SolrDocument> getSolrDocumentBySolrId(String uniqueId) {
569         QueryResponse response = null;
570         String result = null;
571         try {
572             String args = "(" + WorkBibCommonFields.UNIQUE_ID + ":" + uniqueId + ")";
573             SolrServer solr = SolrServerManager.getInstance().getSolrServer();
574             SolrQuery query = new SolrQuery();
575             query.setQuery(args);
576             response = solr.query(query);
577         }
578         catch (Exception e) {
579             result = buildFailureMsg();
580             LOG.error(result, e);
581         }
582         return response.getResults();
583     }
584 
585     public List<SolrDocument> getSolrDocument(String fieldName, String fieldValue) {
586         QueryResponse response = null;
587         String result = null;
588         try {
589             String args = "(" + fieldName + ":" + fieldValue + ")";
590             SolrServer solr = SolrServerManager.getInstance().getSolrServer();
591             SolrQuery query = new SolrQuery();
592             query.setQuery(args);
593             response = solr.query(query);
594         }
595         catch (Exception e) {
596             result = buildFailureMsg();
597             LOG.error(result, e);
598         }
599         return response.getResults();
600     }
601 
602     /**
603      * Assigns UUIDs for each document (that does not have an "id" field) in the given list.
604      * Also makes sure "uniqueId" field is present. The UUIDs generated by this method start
605      * with ID_FIELD_PREFIX for easy identification. Optionally takes a list
606      * of UUIDs to be used to set/override the "id" field values of the documents.
607      *
608      * @param solrDocs
609      * @param ids      List of id values (optional) to be used for the given documents.
610      */
611     protected void assignUUIDs(List<SolrInputDocument> solrDocs, List<String> ids) throws Exception {
612         if ((null == solrDocs) || (solrDocs.size() == 0)) {
613             return;
614         }
615         if ((null != ids) && (ids.size() < solrDocs.size())) {
616             throw new Exception(
617                     "Insufficient UUIDs(" + ids.size() + ") specified for documents(" + solrDocs.size() + ".");
618         }
619         for (int i = 0; i < solrDocs.size(); i++) {
620             SolrInputDocument solrInputDocument = solrDocs.get(i);
621             SolrInputField idField = solrInputDocument.getField("id");
622             String uuid = null;
623             if (null != ids) {
624                 // Get the supplied UUID.
625                 uuid = ids.get(i);
626             }
627             if (null == idField) {
628                 if (null == uuid) {
629                     // Generate UUID.
630                     uuid = UUID.randomUUID().toString();
631                     uuid = ID_FIELD_PREFIX + uuid; // identifies the uuid generated by discovery module.
632                 }
633                 solrInputDocument.addField("id", uuid);
634                 solrInputDocument.addField("uniqueId", uuid);
635             }
636             else {
637                 if (null != uuid) {
638                     // Use the supplied UUID.
639                     solrInputDocument.setField("id", uuid);
640                     solrInputDocument.setField("uniqueId", uuid);
641                 }
642                 else {
643                     // Leave the existing id value and make sure uniqueId is set.
644                     uuid = (String) idField.getValue();
645                     if (null == uuid) {
646                         // Generate UUID.
647                         uuid = UUID.randomUUID().toString();
648                         uuid = ID_FIELD_PREFIX + uuid; // identifies the uuid generated by discovery module.
649                         idField.setValue(uuid, 1.0f);
650                     }
651                     SolrInputField uniqueIdField = solrInputDocument.getField("uniqueId");
652                     if (null == uniqueIdField) {
653                         solrInputDocument.addField("uniqueId", uuid);
654                     }
655                     else {
656                         solrInputDocument.setField("uniqueId", uuid);
657                     }
658                 }
659             }
660         }
661     }
662 
663     protected String indexSolrDocuments(List<SolrInputDocument> solrDocs, boolean commit, boolean optimize)
664             throws Exception {
665         SolrServer solr = null;
666         if ((null == solrDocs) || (solrDocs.isEmpty())) {
667             return SUCCESS + "-0";
668         }
669         solr = SolrServerManager.getInstance().getSolrServer();
670         if (solrDocs.size() > BATCH_SIZE) {
671             int numSolrDocs = solrDocs.size();
672             for (int fromIndex = 0; fromIndex < numSolrDocs; fromIndex += BATCH_SIZE) {
673                 int toIndex = fromIndex + BATCH_SIZE;
674                 if (toIndex > numSolrDocs) {
675                     toIndex = numSolrDocs;
676                 }
677                 List batchSolrDocs = solrDocs.subList(fromIndex, toIndex);
678                 if ((null != batchSolrDocs) && (!batchSolrDocs.isEmpty())) {
679                     LOG.info("Indexing records. fromIndex=" + fromIndex + ", toIndex=" + toIndex);
680                     UpdateResponse response = solr.add(solrDocs);
681                 }
682             }
683         }
684         else {
685             LOG.debug("Indexing records. size=" + solrDocs.size());
686             UpdateResponse response = solr.add(solrDocs);
687         }
688         if (commit) {
689             solr.commit();
690         }
691         if (optimize) {
692             solr.optimize();
693         }
694         return SUCCESS + "-" + solrDocs.size();
695     }
696 
697     protected List<SolrInputDocument> convertToSolrDocs(String docCategory, String docType, String docFormat,
698                                                         String docContent) throws Exception {
699         List<SolrInputDocument> solrDocs = null;
700         if (DocCategory.WORK.isEqualTo(docCategory) && DocType.BIB.isEqualTo(docType) && DocFormat.MARC
701                 .isEqualTo(docFormat)) {
702             try {
703                 WorkBibMarcRecordProcessor recordProcessor = new WorkBibMarcRecordProcessor();
704                 solrDocs = new WorkBibMarcDocBuilder()
705                         .buildSolrInputDocuments(recordProcessor.fromXML(docContent).getRecords());
706             }
707             catch (Exception e) {
708                 e.printStackTrace();
709                 throw new Exception("Exception while converting given XML Document: ", e);
710             }
711         }
712         else if (DocCategory.WORK.isEqualTo(docCategory) && DocType.BIB.isEqualTo(docType) && DocFormat.DUBLIN_CORE
713                 .isEqualTo(docFormat)) {
714             WorkBibDublinRecordProcessor processor = new WorkBibDublinRecordProcessor();
715             WorkBibDublinRecord record = processor.fromXML(docContent);
716             solrDocs = new ArrayList<SolrInputDocument>();
717             solrDocs.add(new WorkBibDublinDocBuilder().buildSolrInputDocument(record));
718         }
719         else if (DocCategory.WORK.isEqualTo(docCategory) && DocType.BIB.isEqualTo(docType) && DocFormat
720                 .DUBLIN_UNQUALIFIED.isEqualTo(docFormat)) {
721             solrDocs = new WorkBibDublinUnQualifiedDocBuilder()
722                     .buildSolrInputDocuments(new WorkBibDublinUnQualifiedRecordProcessor().fromXML(docContent));
723         }
724         else {
725             throw new Exception("UnSupported Document Format: " + docCategory + ", " + docType + ", " + docFormat);
726         }
727         return solrDocs;
728     }
729 
730     protected String deleteDocumentByUUID(String uuid, String category, boolean commit) {
731         String result = SUCCESS;
732         try {
733             SolrServer solr = SolrServerManager.getInstance().getSolrServer();
734             solr.deleteById(uuid);
735             if (commit) {
736                 solr.commit();
737             }
738         } catch (Exception e) {
739             result = buildFailureMsg();
740             LOG.error(result, e);
741         }
742         return result;
743     }
744 
745     protected String deleteDocumentByUUID(String uuid, String category) {
746         return deleteDocumentByUUID(uuid, category, true);
747     }
748 
749     protected String deleteDocumentsByUUIDList(List<String> uuidList, String category, boolean commit) {
750         String result = SUCCESS;
751         try {
752             SolrServer solr = SolrServerManager.getInstance().getSolrServer();
753             solr.deleteById(uuidList);
754             if (commit) {
755                 solr.commit();
756             }
757         } catch (Exception e) {
758             result = buildFailureMsg();
759             LOG.error(result, e);
760         }
761         return result;
762     }
763 
764     protected String deleteDocumentsByUUIDList(List<String> uuidsList, String category)
765             throws SolrServerException, MalformedURLException {
766         List<String> deleteUuidsList = new ArrayList<String>();
767         List<String> holdingsIdentifierList = new ArrayList<String>();
768         List<String> itemIdentifierList = new ArrayList<String>();
769         SolrServer solr = SolrServerManager.getInstance().getSolrServer();
770         SolrQuery query = new SolrQuery();
771         deleteUuidsList.addAll(uuidsList);
772         for (int i = 0; i < uuidsList.size(); i++) {
773             query.setQuery("id:" + uuidsList.get(i));
774             QueryResponse response = solr.query(query);
775             LOG.debug("query-->" + query);
776             for (SolrDocument doc : response.getResults()) {
777                 LOG.debug("doc" + doc.toString());
778                 String docFormat = (String) doc.getFieldValue(DOC_FORMAT);
779                 String docType = (String) doc.getFieldValue(DOC_TYPE);
780                 if (docType.equalsIgnoreCase(BIBLIOGRAPHIC)) {
781                 } else if (docType.equalsIgnoreCase(INSTANCE)) {
782                     if (doc.getFieldValue(ITEM_IDENTIFIER) instanceof List) {
783                         itemIdentifierList = (List<String>) doc.getFieldValue(ITEM_IDENTIFIER);
784                     } else {
785                         itemIdentifierList.add((String) doc.getFieldValue(ITEM_IDENTIFIER));
786                     }
787                     if (doc.getFieldValue(HOLDINGS_IDENTIFIER) instanceof String) {
788                         holdingsIdentifierList.add((String) doc.getFieldValue(HOLDINGS_IDENTIFIER));
789                     } else {
790                         holdingsIdentifierList = (List<String>) doc.getFieldValue(HOLDINGS_IDENTIFIER);
791                     }
792                     if (holdingsIdentifierList != null && holdingsIdentifierList.size() > 0) {
793                         deleteUuidsList.addAll(holdingsIdentifierList);
794                     }
795                     if (itemIdentifierList != null && itemIdentifierList.size() > 0) {
796                         deleteUuidsList.addAll(itemIdentifierList);
797 
798                     }
799                 }
800             }
801         }
802         return deleteDocumentsByUUIDList(deleteUuidsList, category, true);
803     }
804 
805     protected String buildDeleteQueryParamsForDeleteUrl(List<String> uuidList, boolean commit) {
806         StringBuffer deleteQueryBuffer = new StringBuffer("");
807         deleteQueryBuffer.append("stream.body=");
808         deleteQueryBuffer.append("<delete>");
809         for (int i = 0; i < uuidList.size(); i++) {
810             deleteQueryBuffer.append("<query>");
811             deleteQueryBuffer.append("id:");
812             deleteQueryBuffer.append(uuidList.get(i));
813             deleteQueryBuffer.append("</query>");
814         }
815         deleteQueryBuffer.append("</delete>");
816         if (commit) {
817             deleteQueryBuffer.append("&stream.body=<commit/>");
818         }
819         return deleteQueryBuffer.toString();
820 
821     }
822 
823     protected String buildDeleteQuery(String uuid, String category, boolean commit) {
824         StringBuffer deleteQueryUrl = new StringBuffer("");
825         if (commit) {
826             deleteQueryUrl.append(SolrServerManager.getInstance().getSolrCoreURL());
827             deleteQueryUrl.append("/update?stream.body=<delete><query>id:" + uuid
828                                   + "</query></delete>&stream.body=<commit/>");
829         }
830         else {
831             deleteQueryUrl.append(SolrServerManager.getInstance().getSolrCoreURL());
832             deleteQueryUrl.append("/update?stream.body=<delete><query>id:" + uuid + "</query></delete>");
833         }
834         return deleteQueryUrl.toString();
835     }
836 
837     /**
838      * @param inputURL
839      * @throws Exception
840      */
841     protected void openConnection(URL inputURL) throws Exception {
842         HttpURLConnection urlConnection = (HttpURLConnection) inputURL.openConnection();
843         urlConnection.setDoOutput(true);
844         urlConnection.connect();
845         OutputStreamWriter streamWriter = new OutputStreamWriter(urlConnection.getOutputStream());
846         streamWriter.flush();
847         // Get the response from inputURL
848         BufferedReader bufferReader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
849         String xmlResponse;
850         while ((xmlResponse = bufferReader.readLine()) != null) {
851             if (LOG.isDebugEnabled()) {
852                 LOG.debug("XmlResponse->" + xmlResponse);
853             }
854         }
855     }
856 
857     protected String getErrorID() {
858         return String.valueOf(new Date().getTime());
859     }
860 
861     protected String buildFailureMsg(String id, String msg) {
862         StringBuilder sb = new StringBuilder();
863         sb.append(FAILURE).append("-ErrorID:");
864         if (null != id) {
865             sb.append(id);
866         }
867         else {
868             sb.append(getErrorID());
869         }
870         if (null != msg) {
871             sb.append("-ErrorMsg:").append(msg);
872         }
873         return sb.toString();
874     }
875 
876     protected String buildFailureMsg() {
877         return FAILURE + "-ErrorID:" + getErrorID();
878     }
879 
880     public QueryResponse searchBibRecord(String docCat, String docType, String docFormat, String fieldName,
881                                          String fieldValue, String fieldList) {
882         QueryResponse response = null;
883         String result = null;
884         try {
885             String identifier_args = "(" + fieldName + ":" + fieldValue + ")";
886             String docCategory_args = "(DocCategory" + ":" + docCat + ")";
887             String docType_args = "(DocType" + ":" + docType + ")";
888             String docFormat_args = "(DocFormat" + ":" + docFormat + ")";
889             String args = identifier_args + "AND" + docCategory_args + "AND" + docType_args + "AND" + docFormat_args;
890             SolrServer solr = new CommonsHttpSolrServer(
891                     PropertyUtil.getPropertyUtil().getProperty("docSearchURL") + "bib");
892             SolrQuery query = new SolrQuery();
893             query.addField(fieldList);
894             query.setQuery(args);
895             response = solr.query(query);
896         }
897         catch (Exception e) {
898             result = buildFailureMsg();
899             LOG.error(result, e);
900         }
901         return response;
902     }
903 }