1 package org.kuali.ole.docstore.service;
2
3 import org.apache.commons.lang.time.StopWatch;
4 import org.apache.jackrabbit.commons.flat.*;
5 import org.kuali.ole.docstore.model.enums.DocFormat;
6 import org.kuali.ole.docstore.model.enums.DocType;
7 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
8 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.Instance;
9 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
10 import org.kuali.ole.documenthandler.InstanceRequestDocumentResolver;
11 import org.kuali.ole.repository.NodeHandler;
12 import org.slf4j.Logger;
13 import org.slf4j.LoggerFactory;
14
15 import javax.jcr.Node;
16 import javax.jcr.RepositoryException;
17 import javax.jcr.Session;
18 import javax.jcr.nodetype.NodeType;
19 import java.text.DateFormat;
20 import java.text.Format;
21 import java.text.SimpleDateFormat;
22 import java.util.ArrayList;
23 import java.util.Date;
24 import java.util.List;
25 import java.util.Random;
26
27 import static org.kuali.ole.docstore.process.ProcessParameters.*;
28
29
30
31
32
33
34
35
36 public class DocumentIngester {
37
38 private static Logger logger = LoggerFactory.getLogger(DocumentIngester.class);
39 private NodeHandler nodeHandler = new NodeHandler();
40 private TreeManager treeManager;
41 private NodeSequence nodeSequence;
42 private int i = 0;
43
44 public Node getStaticFormatNode(RequestDocument doc, Session session) throws RepositoryException {
45 Node formatNode = null;
46 Node root = session.getRootNode();
47 Node categoryNode = nodeHandler.initStaticNode(doc.getCategory(), root, session);
48 Node typeNode = nodeHandler.initStaticNode(doc.getType(), categoryNode, session);
49 formatNode = nodeHandler.initStaticNode(doc.getFormat(), typeNode, session);
50 return formatNode;
51 }
52
53
54
55
56
57
58
59
60
61
62 protected synchronized Node ingestBibDocument(RequestDocument reqDoc, Session session, Node formatNode)
63 throws Exception {
64 Node bibFileNode = null;
65 try {
66 String file = "file";
67 if (DocFormat.MARC.isEqualTo(reqDoc.getFormat())) {
68 file = FILE_MARC;
69 }
70 else {
71 file = reqDoc.getFormat() + FILE;
72 }
73
74 Node bibFormatNode = null;
75 if (formatNode == null) {
76 bibFormatNode = getStaticFormatNode(reqDoc, session);
77 }
78 else {
79 bibFormatNode = formatNode;
80 }
81 Node l1 = null;
82 Node l3 = null;
83 synchronized (nodeHandler) {
84 l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
85
86
87 }
88
89 bibFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
90 }
91 catch (Exception e) {
92 logger.error("Ingest failed for RequestDocument: ", e);
93 throw e;
94 }
95 return bibFileNode;
96 }
97
98
99
100
101
102
103
104
105
106
107 protected synchronized List<Node> ingestBibDocumentUsingBTreeMgr(List<RequestDocument> reqDocs, Session session,
108 Node formatNode) throws Exception {
109 List<Node> fileNodes = null;
110 fileNodes = new ArrayList<Node>();
111 try {
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128 StopWatch btreeTimer = new StopWatch();
129 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
130 Date date = new Date();
131 btreeTimer.start();
132 treeManager = new BTreeManager(formatNode, 500, 1000, Rank.<String>comparableComparator(), true);
133
134 nodeSequence = ItemSequence.createNodeSequence(treeManager);
135 btreeTimer.stop();
136 logger.info("Time taken for initializing btree manager sequence=" + btreeTimer.toString());
137 StopWatch btreeAddNodeTimer = new StopWatch();
138 Node node = null;
139 btreeAddNodeTimer.start();
140 Random generator = new Random(19580427);
141 Format formatter = new SimpleDateFormat("dd-MM-yyyy HH-mm-ss");
142 Date date1 = null;
143 for (RequestDocument reqDoc : reqDocs) {
144 node = null;
145 date1 = new Date();
146 String dateStr = formatter.format(date1);
147 node = nodeSequence.addNode(dateStr + "-" + generator.nextInt(), NodeType.NT_UNSTRUCTURED);
148 nodeHandler.initFileNode(node, reqDoc, FILE_MARC, null, session);
149 fileNodes.add(node);
150
151 }
152 btreeAddNodeTimer.stop();
153 logger.info("Time taken for adding " + reqDocs.size() + " nodes to btree: " + btreeAddNodeTimer.toString());
154
155 }
156 catch (Exception e) {
157 logger.error("Ingest failed for RequestDocument: ", e);
158 throw new Exception(e);
159 }
160 return fileNodes;
161 }
162
163
164
165
166
167
168
169
170
171
172
173 protected synchronized Node ingestLicenseDocument(RequestDocument reqDoc, Session session, Node formatNode)
174 throws Exception {
175 Node licenseFileNode = null;
176 try {
177 String file = "file";
178 if (DocFormat.ONIXPL.isEqualTo(reqDoc.getFormat())) {
179 file = FILE_ONIXPL;
180 }
181 else {
182 file = reqDoc.getFormat() + FILE;
183 }
184 Node licenseFormatNode = null;
185 if (formatNode == null) {
186 licenseFormatNode = getStaticFormatNode(reqDoc, session);
187 }
188 else {
189 licenseFormatNode = formatNode;
190 }
191 synchronized (nodeHandler) {
192 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, licenseFormatNode, false, session);
193 licenseFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
194 }
195 }
196 catch (Exception e) {
197 logger.error("Ingest failed for RequestDocument: ", e);
198 throw e;
199 }
200 return licenseFileNode;
201 }
202
203
204
205
206
207
208
209
210
211
212
213
214 protected synchronized Node ingestInstanceDocument(RequestDocument reqDoc, Session session,
215 List<String> ingestedIds, List<String> linkedBibIds,
216 Node formatNode) throws Exception {
217 Node instanceNode = null;
218 if (ingestedIds == null) {
219 ingestedIds = new ArrayList<String>();
220 }
221 try {
222 InstanceRequestDocumentResolver resolver = new InstanceRequestDocumentResolver();
223 List<RequestDocument> resolvedDocs = resolver.getParsedHoldingsNItemDocuments(reqDoc, linkedBibIds);
224 Node instFormatNode = null;
225 if (formatNode == null) {
226 instFormatNode = getStaticFormatNode(reqDoc, session);
227 }
228 else {
229 instFormatNode = formatNode;
230 }
231 synchronized (nodeHandler) {
232 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, instFormatNode, false, session);
233 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
234 instanceNode = nodeHandler.initLevelNode(NODE_INSTANCE, l2, false, session);
235 }
236 Node holdingsNode = nodeHandler.initNonStaticNode(NODE_HOLDINGS, instanceNode);
237 reqDoc.setUuid(instanceNode.getIdentifier());
238 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(0), FILE_INSTANCE, instanceNode, session)
239 .getIdentifier());
240 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(1), FILE_HOLDINGS, holdingsNode, session)
241 .getIdentifier());
242 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(2), FILE_SOURCE_HOLDINGS, holdingsNode, session)
243 .getIdentifier());
244 for (int i = 3; i < resolvedDocs.size(); i++) {
245 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(i), FILE_ITEM, holdingsNode, session)
246 .getIdentifier());
247 }
248 ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0).setInstanceIdentifier(
249 instanceNode.getIdentifier());
250 }
251 catch (Exception e) {
252 logger.error("Ingest failed for RequestDocument: ", e);
253 throw e;
254 }
255 return instanceNode;
256 }
257
258 protected synchronized List<String> ingestPatronRequestDocument(RequestDocument reqDoc, Session session,
259 Node formatNode) throws Exception {
260 List<String> ingestedIds = new ArrayList<String>();
261 try {
262
263 validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
264
265 Node patronFormatNode = null;
266 if (formatNode == null) {
267 patronFormatNode = getStaticFormatNode(reqDoc, session);
268 }
269 else {
270 patronFormatNode = formatNode;
271 }
272 String uuid = nodeHandler.initFileNode(reqDoc, FILE_PATRON_OLEML, patronFormatNode, session)
273 .getIdentifier();
274 reqDoc.setUuid(uuid);
275 ingestedIds.add(uuid);
276 }
277 catch (Exception e) {
278 logger.error("Ingest failed for RequestDocument: ", e);
279 throw e;
280 }
281 return ingestedIds;
282 }
283
284
285
286
287
288
289
290
291
292 public List<String> ingestRequestDocumentsForBulk(List<RequestDocument> reqDocs, Session session) throws Exception {
293 List<String> ingestedIds = new ArrayList<String>();
294 if (reqDocs != null && reqDocs.size() > 0) {
295 Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
296 String docType = reqDocs.get(0).getType();
297 if (DocType.BIB.isEqualTo(docType)) {
298 ingestedIds = ingestBatch(reqDocs, session, formatNode);
299 }
300 else {
301 for (RequestDocument reqDoc : reqDocs) {
302
303
304
305
306 if (DocType.INSTANCE.isEqualTo(reqDoc.getType())) {
307 ingestedIds.addAll(ingestInstaceRequestDocumentForBulk(reqDoc, session, formatNode));
308 }
309 else if (DocType.PATRON.isEqualTo(reqDoc.getType())) {
310 ingestedIds.addAll(ingestPatronRequestDocument(reqDoc, session, formatNode));
311 }
312 }
313 }
314 }
315 return ingestedIds;
316 }
317
318
319
320
321
322
323
324
325 public List<String> ingestBatch(List<RequestDocument> reqDocs, Session session, Node formatNode) throws Exception {
326 String nodeName = reqDocs.get(0).getFormat() + FILE;
327 List<String> idList = new ArrayList<String>();
328
329 Node levelNode = null;
330 synchronized (nodeHandler) {
331 levelNode = nodeHandler.initNonStaticNode(NODE_LEVEL1, formatNode);
332 }
333 for (RequestDocument requestDocument : reqDocs) {
334 Node fileNode = nodeHandler.initFileNode(requestDocument, nodeName, levelNode, session);
335 idList.add(fileNode.getIdentifier());
336 }
337 return idList;
338 }
339
340
341
342
343
344
345
346
347
348
349 public List<String> ingestRequestDocumentsForBulkUsingBTreeMgr(List<RequestDocument> reqDocs, Session session)
350 throws Exception {
351 List<String> ingestedIds = new ArrayList<String>();
352 if (reqDocs != null && reqDocs.size() > 0) {
353 Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
354 List<Node> nodes = ingestBibDocumentUsingBTreeMgr(reqDocs, session, formatNode);
355 for (Node node : nodes) {
356 ingestedIds.add(node.getIdentifier());
357 }
358 }
359 return ingestedIds;
360 }
361
362
363
364
365
366
367
368
369
370 public List<String> ingestBibNLinkedInstanceRequestDocuments(RequestDocument reqDoc, Session session)
371 throws Exception {
372 List<String> ingestedIds = new ArrayList<String>();
373 try {
374 Node bibNode = ingestBibDocument(reqDoc, session, null);
375 ingestedIds.add(bibNode.getIdentifier());
376 List<String> linkedBibIds = new ArrayList<String>();
377 linkedBibIds.add(bibNode.getIdentifier());
378 for (RequestDocument linkedDoc : reqDoc.getLinkedRequestDocuments()) {
379 Node instanceNode = ingestInstanceDocument(linkedDoc, session, ingestedIds, linkedBibIds, null);
380 instanceNode.setProperty("bibIdentifier", reqDoc.getUuid());
381 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
382 }
383 }
384 catch (Exception e) {
385 logger.error("Ingest failed for Request Document: ", e);
386 throw e;
387 }
388 return ingestedIds;
389 }
390
391
392
393
394
395
396
397
398
399 public Node ingestWorkLicenseOnixplRequestDocument(RequestDocument reqDoc, Session session,
400 List<String> ingestedIds) throws Exception {
401 Node licenseNode;
402 if (ingestedIds == null) {
403 ingestedIds = new ArrayList<String>();
404 }
405 try {
406 licenseNode = ingestLicenseDocument(reqDoc, session, null);
407 ingestedIds.add(licenseNode.getIdentifier());
408 }
409 catch (Exception e) {
410 logger.error("Ingest failed for Request Document: ", e);
411 throw e;
412 }
413 return licenseNode;
414 }
415
416
417
418
419
420
421
422
423
424 public synchronized List<String> ingestInstaceRequestDocumentForBulk(RequestDocument reqDoc, Session session,
425 Node formatNode) throws Exception {
426 List<String> parsedDocs = new ArrayList<String>();
427 try {
428
429 validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
430 reqDoc.getContent().setContentObject(reqDoc.getContent().getContent());
431 Node instanceNode = ingestInstanceDocument(reqDoc, session, parsedDocs, null, formatNode);
432 Instance instance = ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0);
433 for (String resourceId : instance.getResourceIdentifier()) {
434 try {
435 Node bibNode = nodeHandler.getNodeByUUID(session, resourceId);
436 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
437 instanceNode.setProperty("bibIdentifier", resourceId);
438 }
439 catch (Exception e) {
440 logger.info("Mapping Not Successful: From Bib(" + resourceId + ") --> Instance("
441 + instanceNode.getIdentifier() + ")");
442 }
443 }
444 }
445 catch (Exception e) {
446 logger.error("Ingest failed for Request Document: ", e);
447 throw new Exception("Ingest failed for Request Document: ", e);
448 }
449 return parsedDocs;
450 }
451
452
453
454
455
456
457
458 public void rollbackDocStoreIngestedData(Session session, List<RequestDocument> requestDocuments) {
459 try {
460 for (RequestDocument document : requestDocuments) {
461 try {
462 session.getNodeByIdentifier(document.getUuid()).remove();
463 }
464 catch (Exception e) {
465
466 }
467 for (RequestDocument linkedDoc : document.getLinkedRequestDocuments()) {
468 try {
469 session.getNodeByIdentifier(linkedDoc.getUuid()).remove();
470 }
471 catch (Exception e) {
472
473 }
474 }
475 }
476 session.save();
477 }
478 catch (Exception e) {
479 logger.info(e.getMessage());
480 }
481 }
482
483
484
485
486
487
488
489 private void validateContent(String format, String content) {
490 }
491 }