View Javadoc
1   package org.kuali.ole.docstore.service;
2   
3   import org.apache.commons.lang.time.StopWatch;
4   import org.apache.jackrabbit.commons.flat.*;
5   import org.kuali.ole.docstore.common.document.content.instance.Instance;
6   import org.kuali.ole.docstore.model.enums.DocFormat;
7   import org.kuali.ole.docstore.model.enums.DocType;
8   import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
9   import org.kuali.ole.docstore.common.document.content.instance.InstanceCollection;
10  import org.kuali.ole.documenthandler.InstanceRequestDocumentResolver;
11  import org.kuali.ole.repository.NodeHandler;
12  import org.slf4j.Logger;
13  import org.slf4j.LoggerFactory;
14  
15  import javax.jcr.Node;
16  import javax.jcr.RepositoryException;
17  import javax.jcr.Session;
18  import javax.jcr.nodetype.NodeType;
19  import java.text.DateFormat;
20  import java.text.Format;
21  import java.text.SimpleDateFormat;
22  import java.util.ArrayList;
23  import java.util.Date;
24  import java.util.List;
25  import java.util.Random;
26  
27  import static org.kuali.ole.docstore.process.ProcessParameters.*;
28  
29  
30  /**
31   * Class to Ingest Documents.
32   *
33   * @author Rajesh Chowdary K
34   * @created Feb 16, 2012
35   */
36  public class DocumentIngester {
37  
38      private static Logger logger = LoggerFactory.getLogger(DocumentIngester.class);
39      private NodeHandler nodeHandler = new NodeHandler();
40      private TreeManager treeManager;
41      private NodeSequence nodeSequence;
42      private int i = 0;
43  
44      public Node getStaticFormatNode(RequestDocument doc, Session session) throws RepositoryException {
45          Node formatNode = null;
46          Node root = session.getRootNode();
47          Node categoryNode = nodeHandler.initStaticNode(doc.getCategory(), root, session);
48          Node typeNode = nodeHandler.initStaticNode(doc.getType(), categoryNode, session);
49          formatNode = nodeHandler.initStaticNode(doc.getFormat(), typeNode, session);
50          return formatNode;
51      }
52  
53      /**
54       * Method to ingest a Bib RequestDocument.
55       *
56       * @param reqDoc
57       * @param session
58       * @param formatNode
59       * @return
60       * @throws Exception
61       */
62      protected synchronized Node ingestBibDocument(RequestDocument reqDoc, Session session, Node formatNode)
63              throws Exception {
64          Node bibFileNode = null;
65          try {
66              String file = "file";
67              if (DocFormat.MARC.isEqualTo(reqDoc.getFormat())) {
68                  file = FILE_MARC;
69              } else {
70                  file = reqDoc.getFormat() + FILE;
71              }
72  
73              Node bibFormatNode = null;
74              if (formatNode == null) {
75                  bibFormatNode = getStaticFormatNode(reqDoc, session);
76              } else {
77                  bibFormatNode = formatNode;
78              }
79              Node l1 = null;
80              Node l3 = null;
81              synchronized (nodeHandler) {
82                  l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
83  //                Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
84  //                l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
85              }
86  //            bibFileNode = nodeHandler.initFileNode(reqDoc, file, l3, session);
87              bibFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
88          } catch (Exception e) {
89              logger.error("Ingest failed for RequestDocument: ", e);
90              throw e;
91          }
92          return bibFileNode;
93      }
94  
95      /**
96       * Method to ingest a Bib RequestDocument using Btree manager.
97       *
98       * @param reqDocs
99       * @param session
100      * @param formatNode
101      * @return
102      * @throws Exception
103      */
104     protected synchronized List<Node> ingestBibDocumentUsingBTreeMgr(List<RequestDocument> reqDocs, Session session,
105                                                                      Node formatNode) throws Exception {
106         List<Node> fileNodes = null;
107         fileNodes = new ArrayList<Node>();
108         try {
109             /*String file = "file";
110             if (DocFormat.MARC.isEqualTo(reqDoc.getFormat()))
111                 file = FILE_MARC;
112             else
113                 file = reqDoc.getFormat() + FILE;
114             Node bibFormatNode = null;
115             if (formatNode == null)
116                 bibFormatNode = getStaticFormatNode(reqDoc, session);
117             else
118                 bibFormatNode = formatNode;
119             Node l3 = null;
120             synchronized (nodeHandler) {
121                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
122                 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
123                 l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
124             } */
125             StopWatch btreeTimer = new StopWatch();
126             DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
127             Date date = new Date();
128             btreeTimer.start();
129             treeManager = new BTreeManager(formatNode, 500, 1000, Rank.<String>comparableComparator(), true);
130             // Create a new NodeSequence with that tree manager
131             nodeSequence = ItemSequence.createNodeSequence(treeManager);
132             btreeTimer.stop();
133             logger.info("Time taken for initializing btree manager sequence=" + btreeTimer.toString());
134             StopWatch btreeAddNodeTimer = new StopWatch();
135             Node node = null;
136             btreeAddNodeTimer.start();
137             Random generator = new Random(19580427);
138             Format formatter = new SimpleDateFormat("dd-MM-yyyy HH-mm-ss");
139             Date date1 = null;
140             for (RequestDocument reqDoc : reqDocs) {
141                 node = null;
142                 date1 = new Date();
143                 String dateStr = formatter.format(date1);
144                 node = nodeSequence.addNode(dateStr + "-" + generator.nextInt(), NodeType.NT_UNSTRUCTURED);
145                 nodeHandler.initFileNode(node, reqDoc, FILE_MARC, null, session);
146                 fileNodes.add(node);
147                 //i++;
148             }
149             btreeAddNodeTimer.stop();
150             logger.info("Time taken for adding " + reqDocs.size() + " nodes to btree: " + btreeAddNodeTimer.toString());
151 
152         } catch (Exception e) {
153             logger.error("Ingest failed for RequestDocument: ", e);
154             throw new Exception(e);
155         }
156         return fileNodes;
157     }
158 
159 
160     /**
161      * Method to ingest a License RequestDocument.
162      *
163      * @param reqDoc
164      * @param session
165      * @param formatNode
166      * @return
167      * @throws Exception
168      */
169     protected synchronized Node ingestLicenseDocument(RequestDocument reqDoc, Session session, Node formatNode)
170             throws Exception {
171         Node licenseFileNode = null;
172         try {
173             String file = "file";
174             if (DocFormat.ONIXPL.isEqualTo(reqDoc.getFormat())) {
175                 file = FILE_ONIXPL;
176             } else {
177                 file = reqDoc.getFormat() + FILE;
178             }
179             Node licenseFormatNode = null;
180             if (formatNode == null) {
181                 licenseFormatNode = getStaticFormatNode(reqDoc, session);
182             } else {
183                 licenseFormatNode = formatNode;
184             }
185             synchronized (nodeHandler) {
186                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, licenseFormatNode, false, session);
187                 licenseFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
188             }
189         } catch (Exception e) {
190             logger.error("Ingest failed for RequestDocument: ", e);
191             throw e;
192         }
193         return licenseFileNode;
194     }
195 
196     /**
197      * Method to ingest an Instance Request Document.
198      *
199      * @param reqDoc
200      * @param session
201      * @param ingestedIds  - can even be null if ingested Ids are not required for Outside.
202      * @param linkedBibIds - Pass Linked Bib Id's only if linking is necessary & be used in making a resource Linking.
203      * @param formatNode
204      * @return
205      * @throws Exception - Throws Exception if it cannot ingest any of the instance / item / holdings documents.
206      */
207     protected synchronized Node ingestInstanceDocument(RequestDocument reqDoc, Session session,
208                                                        List<String> ingestedIds, List<String> linkedBibIds,
209                                                        Node formatNode) throws Exception {
210         Node instanceNode = null;
211         if (ingestedIds == null) {
212             ingestedIds = new ArrayList<String>();
213         }
214         try {
215             InstanceRequestDocumentResolver resolver = new InstanceRequestDocumentResolver();
216             List<RequestDocument> resolvedDocs = resolver.getParsedHoldingsNItemDocuments(reqDoc, linkedBibIds);
217             Node instFormatNode = null;
218             if (formatNode == null) {
219                 instFormatNode = getStaticFormatNode(reqDoc, session);
220             } else {
221                 instFormatNode = formatNode;
222             }
223             synchronized (nodeHandler) {
224                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, instFormatNode, false, session);
225                 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
226                 instanceNode = nodeHandler.initLevelNode(NODE_INSTANCE, l2, false, session);
227             }
228             Node holdingsNode = nodeHandler.initNonStaticNode(NODE_HOLDINGS, instanceNode);
229             reqDoc.setUuid(instanceNode.getIdentifier());
230             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(0), FILE_INSTANCE, instanceNode, session)
231                     .getIdentifier());
232             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(1), FILE_HOLDINGS, holdingsNode, session)
233                     .getIdentifier());
234             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(2), FILE_SOURCE_HOLDINGS, holdingsNode, session)
235                     .getIdentifier());
236             for (int i = 3; i < resolvedDocs.size(); i++) {
237                 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(i), FILE_ITEM, holdingsNode, session)
238                         .getIdentifier());
239             }
240             ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0).setInstanceIdentifier(
241                     instanceNode.getIdentifier());
242         } catch (Exception e) {
243             logger.error("Ingest failed for RequestDocument: ", e);
244             throw e;
245         }
246         return instanceNode;
247     }
248 
249     protected synchronized List<String> ingestPatronRequestDocument(RequestDocument reqDoc, Session session,
250                                                                     Node formatNode) throws Exception {
251         List<String> ingestedIds = new ArrayList<String>();
252         try {
253             // Validation at Content Level.
254             validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
255             // Ingest
256             Node patronFormatNode = null;
257             if (formatNode == null) {
258                 patronFormatNode = getStaticFormatNode(reqDoc, session);
259             } else {
260                 patronFormatNode = formatNode;
261             }
262             String uuid = nodeHandler.initFileNode(reqDoc, FILE_PATRON_OLEML, patronFormatNode, session)
263                     .getIdentifier();
264             reqDoc.setUuid(uuid);
265             ingestedIds.add(uuid);
266         } catch (Exception e) {
267             logger.error("Ingest failed for RequestDocument: ", e);
268             throw e;
269         }
270         return ingestedIds;
271     }
272 
273     /**
274      * Method to ingest RequestDocuments For Bulk Ingest Opertaions.
275      *
276      * @param reqDocs
277      * @param session
278      * @return
279      * @throws Exception
280      */
281     public List<String> ingestRequestDocumentsForBulk(List<RequestDocument> reqDocs, Session session) throws Exception {
282         List<String> ingestedIds = new ArrayList<String>();
283         if (reqDocs != null && reqDocs.size() > 0) {
284             Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
285             String docType = reqDocs.get(0).getType();
286             if (DocType.BIB.isEqualTo(docType)) {
287                 ingestedIds = ingestBatch(reqDocs, session, formatNode);
288             } else {
289                 for (RequestDocument reqDoc : reqDocs) {
290 //                                    if (DocType.BIB.isEqualTo(reqDoc.getType())) {
291 //                                        ingestedIds.add(ingestBibDocument(reqDoc, session, formatNode).getIdentifier());
292 //                                    }
293 //                                    else
294                     if (DocType.INSTANCE.isEqualTo(reqDoc.getType())) {
295                         ingestedIds.addAll(ingestInstaceRequestDocumentForBulk(reqDoc, session, formatNode));
296                     } else if (DocType.PATRON.isEqualTo(reqDoc.getType())) {
297                         ingestedIds.addAll(ingestPatronRequestDocument(reqDoc, session, formatNode));
298                     }
299                 }
300             }
301         }
302         return ingestedIds;
303     }
304 
305     /**
306      * This method is specifically written only for testing performance for 1 Million bib records.
307      *
308      * @param reqDocs
309      * @param session
310      * @param formatNode
311      * @return
312      */
313     public List<String> ingestBatch(List<RequestDocument> reqDocs, Session session, Node formatNode) throws Exception {
314         String nodeName = reqDocs.get(0).getFormat() + FILE;
315         List<String> idList = new ArrayList<String>();
316         // Get a new level1 node and add file nodes to it.
317         Node levelNode = null;
318         synchronized (nodeHandler) {
319             levelNode = nodeHandler.initNonStaticNode(NODE_LEVEL1, formatNode);
320         }
321         for (RequestDocument requestDocument : reqDocs) {
322             Node fileNode = nodeHandler.initFileNode(requestDocument, nodeName, levelNode, session);
323             idList.add(fileNode.getIdentifier());
324         }
325         return idList;
326     }
327 
328 
329     /**
330      * Method to ingest RequestDocuments For Bulk Ingest Opertaions using Btree Manager.
331      *
332      * @param reqDocs
333      * @param session
334      * @return
335      * @throws Exception
336      */
337     @Deprecated
338     public List<String> ingestRequestDocumentsForBulkUsingBTreeMgr(List<RequestDocument> reqDocs, Session session)
339             throws Exception {
340         List<String> ingestedIds = new ArrayList<String>();
341         if (reqDocs != null && reqDocs.size() > 0) {
342             Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
343             List<Node> nodes = ingestBibDocumentUsingBTreeMgr(reqDocs, session, formatNode);
344             for (Node node : nodes) {
345                 ingestedIds.add(node.getIdentifier());
346             }
347         }
348         return ingestedIds;
349     }
350 
351     /**
352      * Method to ingest Bib & Linked Instance RequestDocuments for String Ingest.
353      *
354      * @param reqDoc
355      * @param session
356      * @return - Ingested List of Documents UUIDs.
357      * @throws Exception
358      */
359     public List<String> ingestBibNLinkedInstanceRequestDocuments(RequestDocument reqDoc, Session session)
360             throws Exception {
361         List<String> ingestedIds = new ArrayList<String>();
362         try {
363             Node bibNode = ingestBibDocument(reqDoc, session, null);
364             ingestedIds.add(bibNode.getIdentifier());
365             List<String> linkedBibIds = new ArrayList<String>();
366             linkedBibIds.add(bibNode.getIdentifier());
367             for (RequestDocument linkedDoc : reqDoc.getLinkedRequestDocuments()) {
368                 Node instanceNode = ingestInstanceDocument(linkedDoc, session, ingestedIds, linkedBibIds, null);
369                 instanceNode.setProperty("bibIdentifier", reqDoc.getUuid());
370                 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
371             }
372         } catch (Exception e) {
373             logger.error("Ingest failed for Request Document: ", e);
374             throw e;
375         }
376         return ingestedIds;
377     }
378 
379     /**
380      * Method to ingest Bib & Linked Instance RequestDocuments for String Ingest.
381      *
382      * @param reqDoc
383      * @param session
384      * @return - Ingested List of Documents UUIDs.
385      * @throws Exception
386      */
387     public Node ingestWorkLicenseOnixplRequestDocument(RequestDocument reqDoc, Session session,
388                                                        List<String> ingestedIds) throws Exception {
389         Node licenseNode;
390         if (ingestedIds == null) {
391             ingestedIds = new ArrayList<String>();
392         }
393         try {
394             licenseNode = ingestLicenseDocument(reqDoc, session, null);
395             ingestedIds.add(licenseNode.getIdentifier());
396         } catch (Exception e) {
397             logger.error("Ingest failed for Request Document: ", e);
398             throw e;
399         }
400         return licenseNode;
401     }
402 
403     /**
404      * Method to ingestInstaceRequestDocument
405      *
406      * @param reqDoc
407      * @param session
408      * @param formatNode
409      * @throws Exception
410      */
411     public synchronized List<String> ingestInstaceRequestDocumentForBulk(RequestDocument reqDoc, Session session,
412                                                                          Node formatNode) throws Exception {
413         List<String> parsedDocs = new ArrayList<String>();
414         try {
415             // Validation at Content Level.
416             validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
417             reqDoc.getContent().setContentObject(reqDoc.getContent().getContent());
418             Node instanceNode = ingestInstanceDocument(reqDoc, session, parsedDocs, null, formatNode);
419             Instance instance = ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0);
420             for (String resourceId : instance.getResourceIdentifier()) {
421                 try {
422                     Node bibNode = nodeHandler.getNodeByUUID(session, resourceId);
423                     bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
424                     instanceNode.setProperty("bibIdentifier", resourceId);
425                 } catch (Exception e) {
426                     logger.info("Mapping Not Successful: From Bib(" + resourceId + ") --> Instance("
427                             + instanceNode.getIdentifier() + ")");
428                 }
429             }
430         } catch (Exception e) {
431             logger.error("Ingest failed for Request Document: ", e);
432             throw new Exception("Ingest failed for Request Document: ", e);
433         }
434         return parsedDocs;
435     }
436 
437     /**
438      * Method to rollback Doc Store Ingested Data.
439      *
440      * @param session
441      * @param requestDocuments
442      */
443     public void rollbackDocStoreIngestedData(Session session, List<RequestDocument> requestDocuments) {
444         try {
445             for (RequestDocument document : requestDocuments) {
446                 try {
447                     session.getNodeByIdentifier(document.getUuid()).remove();
448                 } catch (Exception e) {
449                     logger.error(e.getMessage() , e);
450                 }
451                 for (RequestDocument linkedDoc : document.getLinkedRequestDocuments()) {
452                     try {
453                         session.getNodeByIdentifier(linkedDoc.getUuid()).remove();
454                     } catch (Exception e) {
455                         logger.error(e.getMessage() , e);
456                     }
457                 }
458             }
459             session.save();
460         } catch (Exception e) {
461             logger.info(e.getMessage() , e);
462         }
463     }
464 
465     /**
466      * Method to validate Content of a given format Request Document
467      *
468      * @param format
469      * @param content
470      */
471     private void validateContent(String format, String content) {
472     }
473 }