1 package org.kuali.ole.docstore.service;
2
3 import org.apache.commons.lang.time.StopWatch;
4 import org.kuali.ole.docstore.model.enums.DocFormat;
5 import org.kuali.ole.docstore.model.enums.DocType;
6 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
7 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
8 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.OleInstance;
9 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.ResourceIdentifier;
10 import org.kuali.ole.documenthandler.InstanceRequestDocumentResolver;
11 import org.kuali.ole.repository.NodeHandler;
12 import org.slf4j.Logger;
13 import org.slf4j.LoggerFactory;
14
15 import javax.jcr.Node;
16 import javax.jcr.RepositoryException;
17 import javax.jcr.Session;
18 import javax.jcr.Workspace;
19 import javax.jcr.nodetype.NodeType;
20 import java.text.DateFormat;
21 import java.text.Format;
22 import java.text.SimpleDateFormat;
23 import java.util.ArrayList;
24 import java.util.Date;
25 import java.util.List;
26 import java.util.Random;
27
28 import org.apache.jackrabbit.commons.flat.TreeManager;
29 import org.apache.jackrabbit.commons.flat.BTreeManager;
30 import org.apache.jackrabbit.commons.flat.NodeSequence;
31 import org.apache.jackrabbit.commons.flat.ItemSequence;
32 import org.apache.jackrabbit.commons.flat.Rank;
33
34
35
36
37 import static org.kuali.ole.docstore.process.ProcessParameters.*;
38
39
40
41
42
43
44
45
46
47 public class DocumentIngester {
48
49 private static Logger logger = LoggerFactory.getLogger(DocumentIngester.class);
50 private NodeHandler nodeHandler = new NodeHandler();
51 private TreeManager treeManager;
52 private NodeSequence nodeSequence;
53 private int i = 0;
54
55 public Node getStaticFormatNode(RequestDocument doc, Session session) throws RepositoryException {
56 Node formatNode = null;
57 Node root = session.getRootNode();
58 Node categoryNode = nodeHandler.initStaticNode(doc.getCategory(), root, session);
59 Node typeNode = nodeHandler.initStaticNode(doc.getType(), categoryNode, session);
60 formatNode = nodeHandler.initStaticNode(doc.getFormat(), typeNode, session);
61 return formatNode;
62 }
63
64
65
66
67
68
69
70
71
72
73
74 protected synchronized Node ingestBibDocument(RequestDocument reqDoc, Session session, Node formatNode) throws Exception {
75 Node bibFileNode = null;
76 try {
77 String file = "file";
78 if (DocFormat.MARC.isEqualTo(reqDoc.getFormat()))
79 file = FILE_MARC;
80 else
81 file = reqDoc.getFormat() + FILE;
82
83 Node bibFormatNode = null;
84 if (formatNode == null)
85 bibFormatNode = getStaticFormatNode(reqDoc, session);
86 else
87 bibFormatNode = formatNode;
88 Node l3 = null;
89 synchronized (nodeHandler) {
90 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
91 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
92 l3 = nodeHandler.initLevelNode(NODE_LEVEL3, l2, false, session);
93 }
94 bibFileNode = nodeHandler.initFileNode(reqDoc, file, l3, session);
95 } catch (Exception e) {
96 logger.error("Ingest failed for RequestDocument: ", e);
97 throw e;
98 }
99 return bibFileNode;
100 }
101
102
103
104
105
106
107
108
109
110
111
112 protected synchronized List<Node> ingestBibDocumentUsingBTreeMgr(List<RequestDocument> reqDocs, Session session, Node formatNode) throws Exception {
113 List<Node> fileNodes = null;
114 fileNodes = new ArrayList<Node>();
115 try {
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132 StopWatch btreeTimer = new StopWatch();
133 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
134 Date date = new Date();
135 btreeTimer.start();
136 treeManager = new BTreeManager(formatNode, 500, 1000, Rank.<String>comparableComparator(), true);
137
138 nodeSequence = ItemSequence.createNodeSequence(treeManager);
139 btreeTimer.stop();
140 logger.info("Time taken for initializing btree manager sequence=" + btreeTimer.toString());
141 StopWatch btreeAddNodeTimer = new StopWatch();
142 Node node = null;
143 btreeAddNodeTimer.start();
144 Random generator = new Random(19580427);
145 Format formatter = new SimpleDateFormat("dd-MM-yyyy HH-mm-ss");
146 Date date1 = null;
147 for (RequestDocument reqDoc : reqDocs) {
148 node = null;
149 date1 = new Date();
150 String dateStr = formatter.format(date1);
151 node = nodeSequence.addNode(dateStr +"-" + generator.nextInt(), NodeType.NT_UNSTRUCTURED);
152 nodeHandler.initFileNode(node, reqDoc, FILE_MARC, null, session);
153 fileNodes.add(node);
154
155 }
156 btreeAddNodeTimer.stop();
157 logger.info("Time taken for adding " + reqDocs.size() + " nodes to btree: " + btreeAddNodeTimer.toString());
158
159 } catch (Exception e) {
160 logger.error("Ingest failed for RequestDocument: ", e);
161 throw new Exception(e);
162 }
163 return fileNodes;
164 }
165
166
167
168
169
170
171
172
173
174
175
176
177 protected synchronized Node ingestLicenseDocument(RequestDocument reqDoc, Session session, Node formatNode)
178 throws Exception {
179 Node licenseFileNode = null;
180 try {
181 String file = "file";
182 if (DocFormat.ONIXPL.isEqualTo(reqDoc.getFormat())) {
183 file = FILE_ONIXPL;
184 }
185 else {
186 file = reqDoc.getFormat() + FILE;
187 }
188 Node licenseFormatNode = null;
189 if (formatNode == null) {
190 licenseFormatNode = getStaticFormatNode(reqDoc, session);
191 }
192 else {
193 licenseFormatNode = formatNode;
194 }
195 synchronized (nodeHandler) {
196 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, licenseFormatNode, false, session);
197 licenseFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
198 }
199 }
200 catch (Exception e) {
201 logger.error("Ingest failed for RequestDocument: ", e);
202 throw e;
203 }
204 return licenseFileNode;
205 }
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222 protected synchronized Node ingestInstanceDocument(RequestDocument reqDoc, Session session, List<String> ingestedIds, List<String> linkedBibIds,
223 Node formatNode) throws Exception {
224 Node instanceNode = null;
225 if (ingestedIds == null)
226 ingestedIds = new ArrayList<String>();
227 try {
228 InstanceRequestDocumentResolver resolver = new InstanceRequestDocumentResolver();
229 List<RequestDocument> resolvedDocs = resolver.getParsedHoldingsNItemDocuments(reqDoc, linkedBibIds);
230 Node instFormatNode = null;
231 if (formatNode == null)
232 instFormatNode = getStaticFormatNode(reqDoc, session);
233 else
234 instFormatNode = formatNode;
235 synchronized (nodeHandler) {
236 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, instFormatNode, false, session);
237 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
238 instanceNode = nodeHandler.initLevelNode(NODE_INSTANCE, l2, false, session);
239 }
240 Node holdingsNode = nodeHandler.initNonStaticNode(NODE_HOLDINGS, instanceNode);
241 reqDoc.setUuid(instanceNode.getIdentifier());
242 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(0), FILE_INSTANCE, instanceNode, session).getIdentifier());
243 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(1), FILE_HOLDINGS, holdingsNode, session).getIdentifier());
244 for (int i = 2; i < resolvedDocs.size(); i++)
245 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(i), FILE_ITEM, holdingsNode, session).getIdentifier());
246 ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstanceCollection().get(0)
247 .setInstanceIdentifier(instanceNode.getIdentifier());
248 } catch (Exception e) {
249 logger.error("Ingest failed for RequestDocument: ", e);
250 throw e;
251 }
252 return instanceNode;
253 }
254
255 protected synchronized List<String> ingestPatronRequestDocument(RequestDocument reqDoc, Session session, Node formatNode) throws Exception {
256 List<String> ingestedIds = new ArrayList<String>();
257 try {
258
259 validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
260
261 Node patronFormatNode = null;
262 if (formatNode == null)
263 patronFormatNode = getStaticFormatNode(reqDoc, session);
264 else
265 patronFormatNode = formatNode;
266 String uuid = nodeHandler.initFileNode(reqDoc, FILE_PATRON_OLEML, patronFormatNode, session).getIdentifier();
267 reqDoc.setUuid(uuid);
268 ingestedIds.add(uuid);
269 } catch (Exception e) {
270 logger.error("Ingest failed for RequestDocument: ", e);
271 throw e;
272 }
273 return ingestedIds;
274 }
275
276
277
278
279
280
281
282
283
284
285 public List<String> ingestRequestDocumentsForBulk(List<RequestDocument> reqDocs, Session session) throws Exception {
286 List<String> ingestedIds = new ArrayList<String>();
287 if (reqDocs != null && reqDocs.size() > 0) {
288 Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
289 for (RequestDocument reqDoc : reqDocs) {
290 if (DocType.BIB.isEqualTo(reqDoc.getType()))
291 ingestedIds.add(ingestBibDocument(reqDoc, session, formatNode).getIdentifier());
292 else if (DocType.INSTANCE.isEqualTo(reqDoc.getType()))
293 ingestedIds.addAll(ingestInstaceRequestDocumentForBulk(reqDoc, session, formatNode));
294 else if (DocType.PATRON.isEqualTo(reqDoc.getType()))
295 ingestedIds.addAll(ingestPatronRequestDocument(reqDoc, session, formatNode));
296 }
297 }
298 return ingestedIds;
299 }
300
301
302
303
304
305
306
307
308
309
310 public List<String> ingestRequestDocumentsForBulkUsingBTreeMgr(List<RequestDocument> reqDocs, Session session) throws Exception {
311 List<String> ingestedIds = new ArrayList<String>();
312 if (reqDocs != null && reqDocs.size() > 0) {
313 Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
314 List<Node> nodes = ingestBibDocumentUsingBTreeMgr(reqDocs, session, formatNode);
315 for (Node node : nodes) {
316 ingestedIds.add(node.getIdentifier());
317 }
318 }
319 return ingestedIds;
320 }
321
322
323
324
325
326
327
328
329
330
331 public List<String> ingestBibNLinkedInstanceRequestDocuments(RequestDocument reqDoc, Session session) throws Exception {
332 List<String> ingestedIds = new ArrayList<String>();
333 try {
334 Node bibNode = ingestBibDocument(reqDoc, session, null);
335 ingestedIds.add(bibNode.getIdentifier());
336 List<String> linkedBibIds = new ArrayList<String>();
337 linkedBibIds.add(bibNode.getIdentifier());
338 for (RequestDocument linkedDoc : reqDoc.getLinkedRequestDocuments()) {
339 Node instanceNode = ingestInstanceDocument(linkedDoc, session, ingestedIds, linkedBibIds, null);
340 instanceNode.setProperty("bibIdentifier", reqDoc.getUuid());
341 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
342 }
343 } catch (Exception e) {
344 logger.error("Ingest failed for Request Document: ", e);
345 throw e;
346 }
347 return ingestedIds;
348 }
349
350
351
352
353
354
355
356
357
358
359 public Node ingestWorkLicenseOnixplRequestDocument(RequestDocument reqDoc, Session session, List<String> ingestedIds) throws Exception {
360 Node licenseNode;
361 if (ingestedIds == null){
362 ingestedIds = new ArrayList<String>();
363 }
364 try {
365 licenseNode = ingestLicenseDocument(reqDoc, session, null);
366 ingestedIds.add(licenseNode.getIdentifier());
367 }
368 catch (Exception e) {
369 logger.error("Ingest failed for Request Document: ", e);
370 throw e;
371 }
372 return licenseNode;
373 }
374
375
376
377
378
379
380
381
382
383
384 public synchronized List<String> ingestInstaceRequestDocumentForBulk(RequestDocument reqDoc, Session session, Node formatNode) throws Exception {
385 List<String> parsedDocs = new ArrayList<String>();
386 try {
387
388 validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
389
390 Node instanceNode = ingestInstanceDocument(reqDoc, session, parsedDocs, null, formatNode);
391 OleInstance instance = ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstanceCollection().get(0);
392 for (ResourceIdentifier resourceId : instance.getResourceIdentifier()) {
393 try {
394 Node bibNode = nodeHandler.getNodeByUUID(session, resourceId.getValue());
395 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
396 instanceNode.setProperty("bibIdentifier", resourceId.getValue());
397 } catch (Exception e) {
398 logger.info("Mapping Not Successful: From Bib(" + resourceId + ") --> Instance(" + instanceNode.getIdentifier() + ")");
399 }
400 }
401 } catch (Exception e) {
402 logger.error("Ingest failed for Request Document: ", e);
403 throw new Exception("Ingest failed for Request Document: ", e);
404 }
405 return parsedDocs;
406 }
407
408
409
410
411
412
413
414
415 public void rollbackDocStoreIngestedData(Session session, List<RequestDocument> requestDocuments) {
416 try {
417 for (RequestDocument document : requestDocuments) {
418 try {
419 session.getNodeByIdentifier(document.getUuid()).remove();
420 }
421 catch (Exception e) {
422
423 }
424 for (RequestDocument linkedDoc : document.getLinkedRequestDocuments()) {
425 try {
426 session.getNodeByIdentifier(linkedDoc.getUuid()).remove();
427 }
428 catch (Exception e) {
429
430 }
431 }
432 }
433 session.save();
434 }
435 catch (Exception e) {
436 e.printStackTrace();
437 }
438 }
439
440
441
442
443
444
445
446
447 private void validateContent(String format, String content) {
448 }
449 }