View Javadoc

1   package org.kuali.ole.docstore.service;
2   
3   import org.apache.commons.lang.time.StopWatch;
4   import org.apache.jackrabbit.commons.flat.*;
5   import org.kuali.ole.docstore.model.enums.DocFormat;
6   import org.kuali.ole.docstore.model.enums.DocType;
7   import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
8   import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.Instance;
9   import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
10  import org.kuali.ole.documenthandler.InstanceRequestDocumentResolver;
11  import org.kuali.ole.repository.NodeHandler;
12  import org.slf4j.Logger;
13  import org.slf4j.LoggerFactory;
14  
15  import javax.jcr.Node;
16  import javax.jcr.RepositoryException;
17  import javax.jcr.Session;
18  import javax.jcr.nodetype.NodeType;
19  import java.text.DateFormat;
20  import java.text.Format;
21  import java.text.SimpleDateFormat;
22  import java.util.ArrayList;
23  import java.util.Date;
24  import java.util.List;
25  import java.util.Random;
26  
27  import static org.kuali.ole.docstore.process.ProcessParameters.*;
28  
29  
30  /**
31   * Class to Ingest Documents.
32   *
33   * @author Rajesh Chowdary K
34   * @created Feb 16, 2012
35   */
36  public class DocumentIngester {
37  
38      private static Logger      logger      = LoggerFactory.getLogger(DocumentIngester.class);
39      private        NodeHandler nodeHandler = new NodeHandler();
40      private TreeManager  treeManager;
41      private NodeSequence nodeSequence;
42      private int i = 0;
43  
44      public Node getStaticFormatNode(RequestDocument doc, Session session) throws RepositoryException {
45          Node formatNode = null;
46          Node root = session.getRootNode();
47          Node categoryNode = nodeHandler.initStaticNode(doc.getCategory(), root, session);
48          Node typeNode = nodeHandler.initStaticNode(doc.getType(), categoryNode, session);
49          formatNode = nodeHandler.initStaticNode(doc.getFormat(), typeNode, session);
50          return formatNode;
51      }
52  
53      /**
54       * Method to ingest a Bib RequestDocument.
55       *
56       * @param reqDoc
57       * @param session
58       * @param formatNode
59       * @return
60       * @throws Exception
61       */
62      protected synchronized Node ingestBibDocument(RequestDocument reqDoc, Session session, Node formatNode)
63              throws Exception {
64          Node bibFileNode = null;
65          try {
66              String file = "file";
67              if (DocFormat.MARC.isEqualTo(reqDoc.getFormat())) {
68                  file = FILE_MARC;
69              }
70              else {
71                  file = reqDoc.getFormat() + FILE;
72              }
73  
74              Node bibFormatNode = null;
75              if (formatNode == null) {
76                  bibFormatNode = getStaticFormatNode(reqDoc, session);
77              }
78              else {
79                  bibFormatNode = formatNode;
80              }
81              Node l1 = null;
82              Node l3 = null;
83              synchronized (nodeHandler) {
84                  l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
85  //                Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
86  //                l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
87              }
88  //            bibFileNode = nodeHandler.initFileNode(reqDoc, file, l3, session);
89              bibFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
90          }
91          catch (Exception e) {
92              logger.error("Ingest failed for RequestDocument: ", e);
93              throw e;
94          }
95          return bibFileNode;
96      }
97  
98      /**
99       * Method to ingest a Bib RequestDocument using Btree manager.
100      *
101      * @param reqDocs
102      * @param session
103      * @param formatNode
104      * @return
105      * @throws Exception
106      */
107     protected synchronized List<Node> ingestBibDocumentUsingBTreeMgr(List<RequestDocument> reqDocs, Session session,
108                                                                      Node formatNode) throws Exception {
109         List<Node> fileNodes = null;
110         fileNodes = new ArrayList<Node>();
111         try {
112             /*String file = "file";
113             if (DocFormat.MARC.isEqualTo(reqDoc.getFormat()))
114                 file = FILE_MARC;
115             else
116                 file = reqDoc.getFormat() + FILE;
117             Node bibFormatNode = null;
118             if (formatNode == null)
119                 bibFormatNode = getStaticFormatNode(reqDoc, session);
120             else
121                 bibFormatNode = formatNode;
122             Node l3 = null;
123             synchronized (nodeHandler) {
124                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
125                 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
126                 l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
127             } */
128             StopWatch btreeTimer = new StopWatch();
129             DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
130             Date date = new Date();
131             btreeTimer.start();
132             treeManager = new BTreeManager(formatNode, 500, 1000, Rank.<String>comparableComparator(), true);
133             // Create a new NodeSequence with that tree manager
134             nodeSequence = ItemSequence.createNodeSequence(treeManager);
135             btreeTimer.stop();
136             logger.info("Time taken for initializing btree manager sequence=" + btreeTimer.toString());
137             StopWatch btreeAddNodeTimer = new StopWatch();
138             Node node = null;
139             btreeAddNodeTimer.start();
140             Random generator = new Random(19580427);
141             Format formatter = new SimpleDateFormat("dd-MM-yyyy HH-mm-ss");
142             Date date1 = null;
143             for (RequestDocument reqDoc : reqDocs) {
144                 node = null;
145                 date1 = new Date();
146                 String dateStr = formatter.format(date1);
147                 node = nodeSequence.addNode(dateStr + "-" + generator.nextInt(), NodeType.NT_UNSTRUCTURED);
148                 nodeHandler.initFileNode(node, reqDoc, FILE_MARC, null, session);
149                 fileNodes.add(node);
150                 //i++;
151             }
152             btreeAddNodeTimer.stop();
153             logger.info("Time taken for adding " + reqDocs.size() + " nodes to btree: " + btreeAddNodeTimer.toString());
154 
155         }
156         catch (Exception e) {
157             logger.error("Ingest failed for RequestDocument: ", e);
158             throw new Exception(e);
159         }
160         return fileNodes;
161     }
162 
163 
164     /**
165      * Method to ingest a License RequestDocument.
166      *
167      * @param reqDoc
168      * @param session
169      * @param formatNode
170      * @return
171      * @throws Exception
172      */
173     protected synchronized Node ingestLicenseDocument(RequestDocument reqDoc, Session session, Node formatNode)
174             throws Exception {
175         Node licenseFileNode = null;
176         try {
177             String file = "file";
178             if (DocFormat.ONIXPL.isEqualTo(reqDoc.getFormat())) {
179                 file = FILE_ONIXPL;
180             }
181             else {
182                 file = reqDoc.getFormat() + FILE;
183             }
184             Node licenseFormatNode = null;
185             if (formatNode == null) {
186                 licenseFormatNode = getStaticFormatNode(reqDoc, session);
187             }
188             else {
189                 licenseFormatNode = formatNode;
190             }
191             synchronized (nodeHandler) {
192                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, licenseFormatNode, false, session);
193                 licenseFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
194             }
195         }
196         catch (Exception e) {
197             logger.error("Ingest failed for RequestDocument: ", e);
198             throw e;
199         }
200         return licenseFileNode;
201     }
202 
203     /**
204      * Method to ingest an Instance Request Document.
205      *
206      * @param reqDoc
207      * @param session
208      * @param ingestedIds  - can even be null if ingested Ids are not required for Outside.
209      * @param linkedBibIds - Pass Linked Bib Id's only if linking is necessary & be used in making a resource Linking.
210      * @param formatNode
211      * @return
212      * @throws Exception - Throws Exception if it cannot ingest any of the instance / item / holdings documents.
213      */
214     protected synchronized Node ingestInstanceDocument(RequestDocument reqDoc, Session session,
215                                                        List<String> ingestedIds, List<String> linkedBibIds,
216                                                        Node formatNode) throws Exception {
217         Node instanceNode = null;
218         if (ingestedIds == null) {
219             ingestedIds = new ArrayList<String>();
220         }
221         try {
222             InstanceRequestDocumentResolver resolver = new InstanceRequestDocumentResolver();
223             List<RequestDocument> resolvedDocs = resolver.getParsedHoldingsNItemDocuments(reqDoc, linkedBibIds);
224             Node instFormatNode = null;
225             if (formatNode == null) {
226                 instFormatNode = getStaticFormatNode(reqDoc, session);
227             }
228             else {
229                 instFormatNode = formatNode;
230             }
231             synchronized (nodeHandler) {
232                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, instFormatNode, false, session);
233                 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
234                 instanceNode = nodeHandler.initLevelNode(NODE_INSTANCE, l2, false, session);
235             }
236             Node holdingsNode = nodeHandler.initNonStaticNode(NODE_HOLDINGS, instanceNode);
237             reqDoc.setUuid(instanceNode.getIdentifier());
238             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(0), FILE_INSTANCE, instanceNode, session)
239                                        .getIdentifier());
240             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(1), FILE_HOLDINGS, holdingsNode, session)
241                                        .getIdentifier());
242             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(2), FILE_SOURCE_HOLDINGS, holdingsNode, session)
243                                        .getIdentifier());
244             for (int i = 3; i < resolvedDocs.size(); i++) {
245                 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(i), FILE_ITEM, holdingsNode, session)
246                                            .getIdentifier());
247             }
248             ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0).setInstanceIdentifier(
249                     instanceNode.getIdentifier());
250         }
251         catch (Exception e) {
252             logger.error("Ingest failed for RequestDocument: ", e);
253             throw e;
254         }
255         return instanceNode;
256     }
257 
258     protected synchronized List<String> ingestPatronRequestDocument(RequestDocument reqDoc, Session session,
259                                                                     Node formatNode) throws Exception {
260         List<String> ingestedIds = new ArrayList<String>();
261         try {
262             // Validation at Content Level.
263             validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
264             // Ingest
265             Node patronFormatNode = null;
266             if (formatNode == null) {
267                 patronFormatNode = getStaticFormatNode(reqDoc, session);
268             }
269             else {
270                 patronFormatNode = formatNode;
271             }
272             String uuid = nodeHandler.initFileNode(reqDoc, FILE_PATRON_OLEML, patronFormatNode, session)
273                                      .getIdentifier();
274             reqDoc.setUuid(uuid);
275             ingestedIds.add(uuid);
276         }
277         catch (Exception e) {
278             logger.error("Ingest failed for RequestDocument: ", e);
279             throw e;
280         }
281         return ingestedIds;
282     }
283 
284     /**
285      * Method to ingest RequestDocuments For Bulk Ingest Opertaions.
286      *
287      * @param reqDocs
288      * @param session
289      * @return
290      * @throws Exception
291      */
292     public List<String> ingestRequestDocumentsForBulk(List<RequestDocument> reqDocs, Session session) throws Exception {
293         List<String> ingestedIds = new ArrayList<String>();
294         if (reqDocs != null && reqDocs.size() > 0) {
295             Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
296             String docType = reqDocs.get(0).getType();
297             if (DocType.BIB.isEqualTo(docType)) {
298                 ingestedIds = ingestBatch(reqDocs, session, formatNode);
299             }
300             else {
301                 for (RequestDocument reqDoc : reqDocs) {
302 //                                    if (DocType.BIB.isEqualTo(reqDoc.getType())) {
303 //                                        ingestedIds.add(ingestBibDocument(reqDoc, session, formatNode).getIdentifier());
304 //                                    }
305 //                                    else
306                     if (DocType.INSTANCE.isEqualTo(reqDoc.getType())) {
307                         ingestedIds.addAll(ingestInstaceRequestDocumentForBulk(reqDoc, session, formatNode));
308                     }
309                     else if (DocType.PATRON.isEqualTo(reqDoc.getType())) {
310                         ingestedIds.addAll(ingestPatronRequestDocument(reqDoc, session, formatNode));
311                     }
312                 }
313             }
314         }
315         return ingestedIds;
316     }
317 
318     /**
319      * This method is specifically written only for testing performance for 1 Million bib records.
320      * @param reqDocs
321      * @param session
322      * @param formatNode
323      * @return
324      */
325     public List<String> ingestBatch(List<RequestDocument> reqDocs, Session session, Node formatNode) throws Exception {
326         String nodeName = reqDocs.get(0).getFormat() + FILE;
327         List<String> idList = new ArrayList<String>();
328         // Get a new level1 node and add file nodes to it.
329         Node levelNode = null;
330         synchronized (nodeHandler) {
331             levelNode = nodeHandler.initNonStaticNode(NODE_LEVEL1, formatNode);
332         }
333         for (RequestDocument requestDocument : reqDocs) {
334             Node fileNode = nodeHandler.initFileNode(requestDocument, nodeName, levelNode, session);
335             idList.add(fileNode.getIdentifier());
336         }
337         return idList;
338     }
339 
340 
341     /**
342      * Method to ingest RequestDocuments For Bulk Ingest Opertaions using Btree Manager.
343      *
344      * @param reqDocs
345      * @param session
346      * @return
347      * @throws Exception
348      */
349     public List<String> ingestRequestDocumentsForBulkUsingBTreeMgr(List<RequestDocument> reqDocs, Session session)
350             throws Exception {
351         List<String> ingestedIds = new ArrayList<String>();
352         if (reqDocs != null && reqDocs.size() > 0) {
353             Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
354             List<Node> nodes = ingestBibDocumentUsingBTreeMgr(reqDocs, session, formatNode);
355             for (Node node : nodes) {
356                 ingestedIds.add(node.getIdentifier());
357             }
358         }
359         return ingestedIds;
360     }
361 
362     /**
363      * Method to ingest Bib & Linked Instance RequestDocuments for String Ingest.
364      *
365      * @param reqDoc
366      * @param session
367      * @return - Ingested List of Documents UUIDs.
368      * @throws Exception
369      */
370     public List<String> ingestBibNLinkedInstanceRequestDocuments(RequestDocument reqDoc, Session session)
371             throws Exception {
372         List<String> ingestedIds = new ArrayList<String>();
373         try {
374             Node bibNode = ingestBibDocument(reqDoc, session, null);
375             ingestedIds.add(bibNode.getIdentifier());
376             List<String> linkedBibIds = new ArrayList<String>();
377             linkedBibIds.add(bibNode.getIdentifier());
378             for (RequestDocument linkedDoc : reqDoc.getLinkedRequestDocuments()) {
379                 Node instanceNode = ingestInstanceDocument(linkedDoc, session, ingestedIds, linkedBibIds, null);
380                 instanceNode.setProperty("bibIdentifier", reqDoc.getUuid());
381                 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
382             }
383         }
384         catch (Exception e) {
385             logger.error("Ingest failed for Request Document: ", e);
386             throw e;
387         }
388         return ingestedIds;
389     }
390 
391     /**
392      * Method to ingest Bib & Linked Instance RequestDocuments for String Ingest.
393      *
394      * @param reqDoc
395      * @param session
396      * @return - Ingested List of Documents UUIDs.
397      * @throws Exception
398      */
399     public Node ingestWorkLicenseOnixplRequestDocument(RequestDocument reqDoc, Session session,
400                                                        List<String> ingestedIds) throws Exception {
401         Node licenseNode;
402         if (ingestedIds == null) {
403             ingestedIds = new ArrayList<String>();
404         }
405         try {
406             licenseNode = ingestLicenseDocument(reqDoc, session, null);
407             ingestedIds.add(licenseNode.getIdentifier());
408         }
409         catch (Exception e) {
410             logger.error("Ingest failed for Request Document: ", e);
411             throw e;
412         }
413         return licenseNode;
414     }
415 
416     /**
417      * Method to ingestInstaceRequestDocument
418      *
419      * @param reqDoc
420      * @param session
421      * @param formatNode
422      * @throws Exception
423      */
424     public synchronized List<String> ingestInstaceRequestDocumentForBulk(RequestDocument reqDoc, Session session,
425                                                                          Node formatNode) throws Exception {
426         List<String> parsedDocs = new ArrayList<String>();
427         try {
428             // Validation at Content Level.
429             validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
430             reqDoc.getContent().setContentObject(reqDoc.getContent().getContent());
431             Node instanceNode = ingestInstanceDocument(reqDoc, session, parsedDocs, null, formatNode);
432             Instance instance = ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0);
433             for (String resourceId : instance.getResourceIdentifier()) {
434                 try {
435                     Node bibNode = nodeHandler.getNodeByUUID(session, resourceId);
436                     bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
437                     instanceNode.setProperty("bibIdentifier", resourceId);
438                 }
439                 catch (Exception e) {
440                     logger.info("Mapping Not Successful: From Bib(" + resourceId + ") --> Instance("
441                                 + instanceNode.getIdentifier() + ")");
442                 }
443             }
444         }
445         catch (Exception e) {
446             logger.error("Ingest failed for Request Document: ", e);
447             throw new Exception("Ingest failed for Request Document: ", e);
448         }
449         return parsedDocs;
450     }
451 
452     /**
453      * Method to rollback Doc Store Ingested Data.
454      *
455      * @param session
456      * @param requestDocuments
457      */
458     public void rollbackDocStoreIngestedData(Session session, List<RequestDocument> requestDocuments) {
459         try {
460             for (RequestDocument document : requestDocuments) {
461                 try {
462                     session.getNodeByIdentifier(document.getUuid()).remove();
463                 }
464                 catch (Exception e) {
465                     //ignore this exception as uuid may not be valid.
466                 }
467                 for (RequestDocument linkedDoc : document.getLinkedRequestDocuments()) {
468                     try {
469                         session.getNodeByIdentifier(linkedDoc.getUuid()).remove();
470                     }
471                     catch (Exception e) {
472                         //ignore this exception as uuid may not be valid.
473                     }
474                 }
475             }
476             session.save();
477         }
478         catch (Exception e) {
479             logger.info(e.getMessage());
480         }
481     }
482 
483     /**
484      * Method to validate Content of a given format Request Document
485      *
486      * @param format
487      * @param content
488      */
489     private void validateContent(String format, String content) {
490     }
491 }