View Javadoc

1   package org.kuali.ole.docstore.service;
2   
3   import org.apache.commons.lang.time.StopWatch;
4   import org.kuali.ole.docstore.model.enums.DocFormat;
5   import org.kuali.ole.docstore.model.enums.DocType;
6   import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
7   import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
8   import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.OleInstance;
9   import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.ResourceIdentifier;
10  import org.kuali.ole.documenthandler.InstanceRequestDocumentResolver;
11  import org.kuali.ole.repository.NodeHandler;
12  import org.slf4j.Logger;
13  import org.slf4j.LoggerFactory;
14  
15  import javax.jcr.Node;
16  import javax.jcr.RepositoryException;
17  import javax.jcr.Session;
18  import javax.jcr.Workspace;
19  import javax.jcr.nodetype.NodeType;
20  import java.text.DateFormat;
21  import java.text.Format;
22  import java.text.SimpleDateFormat;
23  import java.util.ArrayList;
24  import java.util.Date;
25  import java.util.List;
26  import java.util.Random;
27  
28  import org.apache.jackrabbit.commons.flat.TreeManager;
29  import org.apache.jackrabbit.commons.flat.BTreeManager;
30  import org.apache.jackrabbit.commons.flat.NodeSequence;
31  import org.apache.jackrabbit.commons.flat.ItemSequence;
32  import org.apache.jackrabbit.commons.flat.Rank;
33  
34  
35  
36  
37  import static org.kuali.ole.docstore.process.ProcessParameters.*;
38  
39  
40  /**
41   * 
42   * Class to Ingest Documents.
43   * 
44   * @author Rajesh Chowdary K
45   * @created Feb 16, 2012
46   */
47  public class DocumentIngester {
48  
49      private static Logger logger      = LoggerFactory.getLogger(DocumentIngester.class);
50      private NodeHandler   nodeHandler = new NodeHandler();
51      private TreeManager treeManager;
52      private NodeSequence nodeSequence;
53      private int i = 0;
54  
55      public Node getStaticFormatNode(RequestDocument doc, Session session) throws RepositoryException {
56          Node formatNode = null;
57          Node root = session.getRootNode();
58          Node categoryNode = nodeHandler.initStaticNode(doc.getCategory(), root, session);
59          Node typeNode = nodeHandler.initStaticNode(doc.getType(), categoryNode, session);
60          formatNode = nodeHandler.initStaticNode(doc.getFormat(), typeNode, session);
61          return formatNode;
62      }
63  
64      /**
65       * 
66       * Method to ingest a Bib RequestDocument.
67       * 
68       * @param reqDoc
69       * @param session
70       * @param formatNode
71       * @return
72       * @throws Exception
73       */
74      protected synchronized Node ingestBibDocument(RequestDocument reqDoc, Session session, Node formatNode) throws Exception {
75          Node bibFileNode = null;
76          try {
77              String file = "file";
78              if (DocFormat.MARC.isEqualTo(reqDoc.getFormat()))
79                  file = FILE_MARC;
80              else
81                  file = reqDoc.getFormat() + FILE;
82  
83              Node bibFormatNode = null;
84              if (formatNode == null)
85                  bibFormatNode = getStaticFormatNode(reqDoc, session);
86              else
87                  bibFormatNode = formatNode;
88              Node l3 = null;
89              synchronized (nodeHandler) {
90                  Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
91                  Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
92                  l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
93              }
94              bibFileNode = nodeHandler.initFileNode(reqDoc, file, l3, session);
95          } catch (Exception e) {
96              logger.error("Ingest failed for RequestDocument: ", e);
97              throw e;
98          }
99          return bibFileNode;
100     }
101 
102     /**
103      *
104      * Method to ingest a Bib RequestDocument using Btree manager.
105      *
106      * @param reqDocs
107      * @param session
108      * @param formatNode
109      * @return
110      * @throws Exception
111      */
112     protected synchronized List<Node> ingestBibDocumentUsingBTreeMgr(List<RequestDocument> reqDocs, Session session, Node formatNode) throws Exception {
113         List<Node> fileNodes = null;
114         fileNodes = new ArrayList<Node>();
115         try {
116             /*String file = "file";
117             if (DocFormat.MARC.isEqualTo(reqDoc.getFormat()))
118                 file = FILE_MARC;
119             else
120                 file = reqDoc.getFormat() + FILE;
121             Node bibFormatNode = null;
122             if (formatNode == null)
123                 bibFormatNode = getStaticFormatNode(reqDoc, session);
124             else
125                 bibFormatNode = formatNode;
126             Node l3 = null;
127             synchronized (nodeHandler) {
128                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
129                 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
130                 l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
131             } */
132             StopWatch btreeTimer = new StopWatch();
133             DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
134             Date date = new Date();
135             btreeTimer.start();
136             treeManager = new BTreeManager(formatNode, 500, 1000, Rank.<String>comparableComparator(), true);
137             // Create a new NodeSequence with that tree manager
138             nodeSequence = ItemSequence.createNodeSequence(treeManager);
139             btreeTimer.stop();
140             logger.info("Time taken for initializing btree manager sequence=" + btreeTimer.toString());
141             StopWatch btreeAddNodeTimer = new StopWatch();
142             Node node = null;
143             btreeAddNodeTimer.start();
144             Random generator = new Random(19580427);
145             Format formatter = new SimpleDateFormat("dd-MM-yyyy HH-mm-ss");
146             Date date1 = null;
147             for (RequestDocument reqDoc : reqDocs) {
148                 node = null;
149                 date1 = new Date();
150                 String dateStr = formatter.format(date1);
151                 node = nodeSequence.addNode(dateStr +"-" + generator.nextInt(), NodeType.NT_UNSTRUCTURED);
152                 nodeHandler.initFileNode(node, reqDoc, FILE_MARC, null, session);
153                 fileNodes.add(node);
154                 //i++;
155             }
156             btreeAddNodeTimer.stop();
157             logger.info("Time taken for adding " + reqDocs.size() + " nodes to btree: " + btreeAddNodeTimer.toString());
158 
159         } catch (Exception e) {
160             logger.error("Ingest failed for RequestDocument: ", e);
161             throw new Exception(e);
162         }
163         return fileNodes;
164     }
165 
166 
167     /**
168      *
169      * Method to ingest a License RequestDocument.
170      *
171      * @param reqDoc
172      * @param session
173      * @param formatNode
174      * @return
175      * @throws Exception
176      */
177      protected synchronized Node ingestLicenseDocument(RequestDocument reqDoc, Session session, Node formatNode)
178              throws Exception {
179          Node licenseFileNode = null;
180          try {
181              String file = "file";
182              if (DocFormat.ONIXPL.isEqualTo(reqDoc.getFormat())) {
183                  file = FILE_ONIXPL;
184              }
185              else {
186                  file = reqDoc.getFormat() + FILE;
187              }
188              Node licenseFormatNode = null;
189              if (formatNode == null) {
190                  licenseFormatNode = getStaticFormatNode(reqDoc, session);
191              }
192              else {
193                  licenseFormatNode = formatNode;
194              }
195              synchronized (nodeHandler) {
196                  Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, licenseFormatNode, false, session);
197                  licenseFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
198              }
199          }
200          catch (Exception e) {
201              logger.error("Ingest failed for RequestDocument: ", e);
202              throw e;
203          }
204          return licenseFileNode;
205      }
206 
207     /**
208      * 
209      * Method to ingest an Instance Request Document.
210      * 
211      * @param reqDoc
212      * @param session
213      * @param ingestedIds
214      *            - can even be null if ingested Ids are not required for Outside.
215      * @param linkedBibIds
216      *            - Pass Linked Bib Id's only if linking is necessary & be used in making a resource Linking.
217      * @param formatNode
218      * @return
219      * @throws Exception
220      *             - Throws Exception if it cannot ingest any of the instance / item / holdings documents.
221      */
222     protected synchronized Node ingestInstanceDocument(RequestDocument reqDoc, Session session, List<String> ingestedIds, List<String> linkedBibIds,
223             Node formatNode) throws Exception {
224         Node instanceNode = null;
225         if (ingestedIds == null)
226             ingestedIds = new ArrayList<String>();
227         try {
228             InstanceRequestDocumentResolver resolver = new InstanceRequestDocumentResolver();
229             List<RequestDocument> resolvedDocs = resolver.getParsedHoldingsNItemDocuments(reqDoc, linkedBibIds);
230             Node instFormatNode = null;
231             if (formatNode == null)
232                 instFormatNode = getStaticFormatNode(reqDoc, session);
233             else
234                 instFormatNode = formatNode;
235             synchronized (nodeHandler) {
236                 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, instFormatNode, false, session);
237                 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
238                 instanceNode = nodeHandler.initLevelNode(NODE_INSTANCE, l2, false, session);
239             }
240             Node holdingsNode = nodeHandler.initNonStaticNode(NODE_HOLDINGS, instanceNode);
241             reqDoc.setUuid(instanceNode.getIdentifier());
242             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(0), FILE_INSTANCE, instanceNode, session).getIdentifier());
243             ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(1), FILE_HOLDINGS, holdingsNode, session).getIdentifier());
244             for (int i = 2; i < resolvedDocs.size(); i++)
245                 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(i), FILE_ITEM, holdingsNode, session).getIdentifier());
246             ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstanceCollection().get(0)
247                     .setInstanceIdentifier(instanceNode.getIdentifier());
248         } catch (Exception e) {
249             logger.error("Ingest failed for RequestDocument: ", e);
250             throw e;
251         }
252         return instanceNode;
253     }
254 
255     protected synchronized List<String> ingestPatronRequestDocument(RequestDocument reqDoc, Session session, Node formatNode) throws Exception {
256         List<String> ingestedIds = new ArrayList<String>();
257         try {
258             // Validation at Content Level.
259             validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
260             // Ingest
261             Node patronFormatNode = null;
262             if (formatNode == null)
263                 patronFormatNode = getStaticFormatNode(reqDoc, session);
264             else
265                 patronFormatNode = formatNode;
266             String uuid = nodeHandler.initFileNode(reqDoc, FILE_PATRON_OLEML, patronFormatNode, session).getIdentifier();
267             reqDoc.setUuid(uuid);
268             ingestedIds.add(uuid);
269         } catch (Exception e) {
270             logger.error("Ingest failed for RequestDocument: ", e);
271             throw e;
272         }
273         return ingestedIds;
274     }
275 
276     /**
277      * 
278      * Method to ingest RequestDocuments For Bulk Ingest Opertaions.
279      * 
280      * @param reqDocs
281      * @param session
282      * @return
283      * @throws Exception
284      */
285     public List<String> ingestRequestDocumentsForBulk(List<RequestDocument> reqDocs, Session session) throws Exception {
286         List<String> ingestedIds = new ArrayList<String>();
287         if (reqDocs != null && reqDocs.size() > 0) {
288             Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
289             for (RequestDocument reqDoc : reqDocs) {
290                 if (DocType.BIB.isEqualTo(reqDoc.getType()))
291                     ingestedIds.add(ingestBibDocument(reqDoc, session, formatNode).getIdentifier());
292                 else if (DocType.INSTANCE.isEqualTo(reqDoc.getType()))
293                     ingestedIds.addAll(ingestInstaceRequestDocumentForBulk(reqDoc, session, formatNode));
294                 else if (DocType.PATRON.isEqualTo(reqDoc.getType()))
295                     ingestedIds.addAll(ingestPatronRequestDocument(reqDoc, session, formatNode));
296             }
297         }
298         return ingestedIds;
299     }
300 
301     /**
302      *
303      * Method to ingest RequestDocuments For Bulk Ingest Opertaions using Btree Manager.
304      *
305      * @param reqDocs
306      * @param session
307      * @return
308      * @throws Exception
309      */
310     public List<String> ingestRequestDocumentsForBulkUsingBTreeMgr(List<RequestDocument> reqDocs, Session session) throws Exception {
311         List<String> ingestedIds = new ArrayList<String>();
312         if (reqDocs != null && reqDocs.size() > 0) {
313             Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
314             List<Node> nodes = ingestBibDocumentUsingBTreeMgr(reqDocs, session, formatNode);
315             for (Node node : nodes) {
316                 ingestedIds.add(node.getIdentifier());
317             }
318         }
319         return ingestedIds;
320     }
321 
322     /**
323      * 
324      * Method to ingest Bib & Linked Instance RequestDocuments for String Ingest.
325      * 
326      * @param reqDoc
327      * @param session
328      * @return - Ingested List of Documents UUIDs.
329      * @throws Exception
330      */
331     public List<String> ingestBibNLinkedInstanceRequestDocuments(RequestDocument reqDoc, Session session) throws Exception {
332         List<String> ingestedIds = new ArrayList<String>();
333         try {
334             Node bibNode = ingestBibDocument(reqDoc, session, null);
335             ingestedIds.add(bibNode.getIdentifier());
336             List<String> linkedBibIds = new ArrayList<String>();
337             linkedBibIds.add(bibNode.getIdentifier());
338             for (RequestDocument linkedDoc : reqDoc.getLinkedRequestDocuments()) {
339                 Node instanceNode = ingestInstanceDocument(linkedDoc, session, ingestedIds, linkedBibIds, null);
340                 instanceNode.setProperty("bibIdentifier", reqDoc.getUuid());
341                 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
342             }
343         } catch (Exception e) {
344             logger.error("Ingest failed for Request Document: ", e);
345             throw e;
346         }
347         return ingestedIds;
348     }
349 
350     /**
351      *
352      * Method to ingest Bib & Linked Instance RequestDocuments for String Ingest.
353      *
354      * @param reqDoc
355      * @param session
356      * @return - Ingested List of Documents UUIDs.
357      * @throws Exception
358      */
359     public Node ingestWorkLicenseOnixplRequestDocument(RequestDocument reqDoc, Session session, List<String> ingestedIds) throws Exception {
360         Node licenseNode;
361          if (ingestedIds == null){
362             ingestedIds = new ArrayList<String>();
363          }
364         try {
365             licenseNode = ingestLicenseDocument(reqDoc, session, null);
366             ingestedIds.add(licenseNode.getIdentifier());
367         }
368         catch (Exception e) {
369             logger.error("Ingest failed for Request Document: ", e);
370             throw e;
371         }
372         return licenseNode;
373     }
374 
375     /**
376      * 
377      * Method to ingestInstaceRequestDocument
378      * 
379      * @param reqDoc
380      * @param session
381      * @param formatNode
382      * @throws Exception
383      */
384     public synchronized List<String> ingestInstaceRequestDocumentForBulk(RequestDocument reqDoc, Session session, Node formatNode) throws Exception {
385         List<String> parsedDocs = new ArrayList<String>();
386         try {
387             // Validation at Content Level.
388             validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
389 
390             Node instanceNode = ingestInstanceDocument(reqDoc, session, parsedDocs, null, formatNode);
391             OleInstance instance = ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstanceCollection().get(0);
392             for (ResourceIdentifier resourceId : instance.getResourceIdentifier()) {
393                 try {
394                     Node bibNode = nodeHandler.getNodeByUUID(session, resourceId.getValue());
395                     bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
396                     instanceNode.setProperty("bibIdentifier", resourceId.getValue());
397                 } catch (Exception e) {
398                     logger.info("Mapping Not Successful: From Bib(" + resourceId + ") --> Instance(" + instanceNode.getIdentifier() + ")");
399                 }
400             }
401         } catch (Exception e) {
402             logger.error("Ingest failed for Request Document: ", e);
403             throw new Exception("Ingest failed for Request Document: ", e);
404         }
405         return parsedDocs;
406     }
407 
408     /**
409      * 
410      * Method to rollback Doc Store Ingested Data.
411      * 
412      * @param session
413      * @param requestDocuments
414      */
415     public void rollbackDocStoreIngestedData(Session session, List<RequestDocument> requestDocuments) {
416         try {
417             for (RequestDocument document : requestDocuments) {
418                 try {
419                     session.getNodeByIdentifier(document.getUuid()).remove();
420                 }
421                 catch (Exception e) {
422                     //ignore this exception as uuid may not be valid.
423                 }
424                 for (RequestDocument linkedDoc : document.getLinkedRequestDocuments()) {
425                     try {
426                         session.getNodeByIdentifier(linkedDoc.getUuid()).remove();
427                     }
428                     catch (Exception e) {
429                         //ignore this exception as uuid may not be valid.
430                     }
431                 }
432             }
433             session.save();
434         }
435         catch (Exception e) {
436             e.printStackTrace();
437         }
438     }
439 
440     /**
441      * 
442      * Method to validate Content of a given format Request Document
443      * 
444      * @param format
445      * @param content
446      */
447     private void validateContent(String format, String content) {  
448     }
449 }