1 package org.kuali.ole.docstore.service;
2
3 import org.apache.commons.lang.time.StopWatch;
4 import org.apache.jackrabbit.commons.flat.*;
5 import org.kuali.ole.docstore.common.document.content.instance.Instance;
6 import org.kuali.ole.docstore.model.enums.DocFormat;
7 import org.kuali.ole.docstore.model.enums.DocType;
8 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
9 import org.kuali.ole.docstore.common.document.content.instance.InstanceCollection;
10 import org.kuali.ole.documenthandler.InstanceRequestDocumentResolver;
11 import org.kuali.ole.repository.NodeHandler;
12 import org.slf4j.Logger;
13 import org.slf4j.LoggerFactory;
14
15 import javax.jcr.Node;
16 import javax.jcr.RepositoryException;
17 import javax.jcr.Session;
18 import javax.jcr.nodetype.NodeType;
19 import java.text.DateFormat;
20 import java.text.Format;
21 import java.text.SimpleDateFormat;
22 import java.util.ArrayList;
23 import java.util.Date;
24 import java.util.List;
25 import java.util.Random;
26
27 import static org.kuali.ole.docstore.process.ProcessParameters.*;
28
29
30
31
32
33
34
35
36 public class DocumentIngester {
37
38 private static Logger logger = LoggerFactory.getLogger(DocumentIngester.class);
39 private NodeHandler nodeHandler = new NodeHandler();
40 private TreeManager treeManager;
41 private NodeSequence nodeSequence;
42 private int i = 0;
43
44 public Node getStaticFormatNode(RequestDocument doc, Session session) throws RepositoryException {
45 Node formatNode = null;
46 Node root = session.getRootNode();
47 Node categoryNode = nodeHandler.initStaticNode(doc.getCategory(), root, session);
48 Node typeNode = nodeHandler.initStaticNode(doc.getType(), categoryNode, session);
49 formatNode = nodeHandler.initStaticNode(doc.getFormat(), typeNode, session);
50 return formatNode;
51 }
52
53
54
55
56
57
58
59
60
61
62 protected synchronized Node ingestBibDocument(RequestDocument reqDoc, Session session, Node formatNode)
63 throws Exception {
64 Node bibFileNode = null;
65 try {
66 String file = "file";
67 if (DocFormat.MARC.isEqualTo(reqDoc.getFormat())) {
68 file = FILE_MARC;
69 } else {
70 file = reqDoc.getFormat() + FILE;
71 }
72
73 Node bibFormatNode = null;
74 if (formatNode == null) {
75 bibFormatNode = getStaticFormatNode(reqDoc, session);
76 } else {
77 bibFormatNode = formatNode;
78 }
79 Node l1 = null;
80 Node l3 = null;
81 synchronized (nodeHandler) {
82 l1 = nodeHandler.initLevelNode(NODE_LEVEL1, bibFormatNode, false, session);
83
84
85 }
86
87 bibFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
88 } catch (Exception e) {
89 logger.error("Ingest failed for RequestDocument: ", e);
90 throw e;
91 }
92 return bibFileNode;
93 }
94
95
96
97
98
99
100
101
102
103
104 protected synchronized List<Node> ingestBibDocumentUsingBTreeMgr(List<RequestDocument> reqDocs, Session session,
105 Node formatNode) throws Exception {
106 List<Node> fileNodes = null;
107 fileNodes = new ArrayList<Node>();
108 try {
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125 StopWatch btreeTimer = new StopWatch();
126 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
127 Date date = new Date();
128 btreeTimer.start();
129 treeManager = new BTreeManager(formatNode, 500, 1000, Rank.<String>comparableComparator(), true);
130
131 nodeSequence = ItemSequence.createNodeSequence(treeManager);
132 btreeTimer.stop();
133 logger.info("Time taken for initializing btree manager sequence=" + btreeTimer.toString());
134 StopWatch btreeAddNodeTimer = new StopWatch();
135 Node node = null;
136 btreeAddNodeTimer.start();
137 Random generator = new Random(19580427);
138 Format formatter = new SimpleDateFormat("dd-MM-yyyy HH-mm-ss");
139 Date date1 = null;
140 for (RequestDocument reqDoc : reqDocs) {
141 node = null;
142 date1 = new Date();
143 String dateStr = formatter.format(date1);
144 node = nodeSequence.addNode(dateStr + "-" + generator.nextInt(), NodeType.NT_UNSTRUCTURED);
145 nodeHandler.initFileNode(node, reqDoc, FILE_MARC, null, session);
146 fileNodes.add(node);
147
148 }
149 btreeAddNodeTimer.stop();
150 logger.info("Time taken for adding " + reqDocs.size() + " nodes to btree: " + btreeAddNodeTimer.toString());
151
152 } catch (Exception e) {
153 logger.error("Ingest failed for RequestDocument: ", e);
154 throw new Exception(e);
155 }
156 return fileNodes;
157 }
158
159
160
161
162
163
164
165
166
167
168
169 protected synchronized Node ingestLicenseDocument(RequestDocument reqDoc, Session session, Node formatNode)
170 throws Exception {
171 Node licenseFileNode = null;
172 try {
173 String file = "file";
174 if (DocFormat.ONIXPL.isEqualTo(reqDoc.getFormat())) {
175 file = FILE_ONIXPL;
176 } else {
177 file = reqDoc.getFormat() + FILE;
178 }
179 Node licenseFormatNode = null;
180 if (formatNode == null) {
181 licenseFormatNode = getStaticFormatNode(reqDoc, session);
182 } else {
183 licenseFormatNode = formatNode;
184 }
185 synchronized (nodeHandler) {
186 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, licenseFormatNode, false, session);
187 licenseFileNode = nodeHandler.initFileNode(reqDoc, file, l1, session);
188 }
189 } catch (Exception e) {
190 logger.error("Ingest failed for RequestDocument: ", e);
191 throw e;
192 }
193 return licenseFileNode;
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207 protected synchronized Node ingestInstanceDocument(RequestDocument reqDoc, Session session,
208 List<String> ingestedIds, List<String> linkedBibIds,
209 Node formatNode) throws Exception {
210 Node instanceNode = null;
211 if (ingestedIds == null) {
212 ingestedIds = new ArrayList<String>();
213 }
214 try {
215 InstanceRequestDocumentResolver resolver = new InstanceRequestDocumentResolver();
216 List<RequestDocument> resolvedDocs = resolver.getParsedHoldingsNItemDocuments(reqDoc, linkedBibIds);
217 Node instFormatNode = null;
218 if (formatNode == null) {
219 instFormatNode = getStaticFormatNode(reqDoc, session);
220 } else {
221 instFormatNode = formatNode;
222 }
223 synchronized (nodeHandler) {
224 Node l1 = nodeHandler.initLevelNode(NODE_LEVEL1, instFormatNode, false, session);
225 Node l2 = nodeHandler.initLevelNode(NODE_LEVEL2, l1, false, session);
226 instanceNode = nodeHandler.initLevelNode(NODE_INSTANCE, l2, false, session);
227 }
228 Node holdingsNode = nodeHandler.initNonStaticNode(NODE_HOLDINGS, instanceNode);
229 reqDoc.setUuid(instanceNode.getIdentifier());
230 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(0), FILE_INSTANCE, instanceNode, session)
231 .getIdentifier());
232 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(1), FILE_HOLDINGS, holdingsNode, session)
233 .getIdentifier());
234 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(2), FILE_SOURCE_HOLDINGS, holdingsNode, session)
235 .getIdentifier());
236 for (int i = 3; i < resolvedDocs.size(); i++) {
237 ingestedIds.add(nodeHandler.initFileNode(resolvedDocs.get(i), FILE_ITEM, holdingsNode, session)
238 .getIdentifier());
239 }
240 ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0).setInstanceIdentifier(
241 instanceNode.getIdentifier());
242 } catch (Exception e) {
243 logger.error("Ingest failed for RequestDocument: ", e);
244 throw e;
245 }
246 return instanceNode;
247 }
248
249 protected synchronized List<String> ingestPatronRequestDocument(RequestDocument reqDoc, Session session,
250 Node formatNode) throws Exception {
251 List<String> ingestedIds = new ArrayList<String>();
252 try {
253
254 validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
255
256 Node patronFormatNode = null;
257 if (formatNode == null) {
258 patronFormatNode = getStaticFormatNode(reqDoc, session);
259 } else {
260 patronFormatNode = formatNode;
261 }
262 String uuid = nodeHandler.initFileNode(reqDoc, FILE_PATRON_OLEML, patronFormatNode, session)
263 .getIdentifier();
264 reqDoc.setUuid(uuid);
265 ingestedIds.add(uuid);
266 } catch (Exception e) {
267 logger.error("Ingest failed for RequestDocument: ", e);
268 throw e;
269 }
270 return ingestedIds;
271 }
272
273
274
275
276
277
278
279
280
281 public List<String> ingestRequestDocumentsForBulk(List<RequestDocument> reqDocs, Session session) throws Exception {
282 List<String> ingestedIds = new ArrayList<String>();
283 if (reqDocs != null && reqDocs.size() > 0) {
284 Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
285 String docType = reqDocs.get(0).getType();
286 if (DocType.BIB.isEqualTo(docType)) {
287 ingestedIds = ingestBatch(reqDocs, session, formatNode);
288 } else {
289 for (RequestDocument reqDoc : reqDocs) {
290
291
292
293
294 if (DocType.INSTANCE.isEqualTo(reqDoc.getType())) {
295 ingestedIds.addAll(ingestInstaceRequestDocumentForBulk(reqDoc, session, formatNode));
296 } else if (DocType.PATRON.isEqualTo(reqDoc.getType())) {
297 ingestedIds.addAll(ingestPatronRequestDocument(reqDoc, session, formatNode));
298 }
299 }
300 }
301 }
302 return ingestedIds;
303 }
304
305
306
307
308
309
310
311
312
313 public List<String> ingestBatch(List<RequestDocument> reqDocs, Session session, Node formatNode) throws Exception {
314 String nodeName = reqDocs.get(0).getFormat() + FILE;
315 List<String> idList = new ArrayList<String>();
316
317 Node levelNode = null;
318 synchronized (nodeHandler) {
319 levelNode = nodeHandler.initNonStaticNode(NODE_LEVEL1, formatNode);
320 }
321 for (RequestDocument requestDocument : reqDocs) {
322 Node fileNode = nodeHandler.initFileNode(requestDocument, nodeName, levelNode, session);
323 idList.add(fileNode.getIdentifier());
324 }
325 return idList;
326 }
327
328
329
330
331
332
333
334
335
336
337 @Deprecated
338 public List<String> ingestRequestDocumentsForBulkUsingBTreeMgr(List<RequestDocument> reqDocs, Session session)
339 throws Exception {
340 List<String> ingestedIds = new ArrayList<String>();
341 if (reqDocs != null && reqDocs.size() > 0) {
342 Node formatNode = getStaticFormatNode(reqDocs.get(0), session);
343 List<Node> nodes = ingestBibDocumentUsingBTreeMgr(reqDocs, session, formatNode);
344 for (Node node : nodes) {
345 ingestedIds.add(node.getIdentifier());
346 }
347 }
348 return ingestedIds;
349 }
350
351
352
353
354
355
356
357
358
359 public List<String> ingestBibNLinkedInstanceRequestDocuments(RequestDocument reqDoc, Session session)
360 throws Exception {
361 List<String> ingestedIds = new ArrayList<String>();
362 try {
363 Node bibNode = ingestBibDocument(reqDoc, session, null);
364 ingestedIds.add(bibNode.getIdentifier());
365 List<String> linkedBibIds = new ArrayList<String>();
366 linkedBibIds.add(bibNode.getIdentifier());
367 for (RequestDocument linkedDoc : reqDoc.getLinkedRequestDocuments()) {
368 Node instanceNode = ingestInstanceDocument(linkedDoc, session, ingestedIds, linkedBibIds, null);
369 instanceNode.setProperty("bibIdentifier", reqDoc.getUuid());
370 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
371 }
372 } catch (Exception e) {
373 logger.error("Ingest failed for Request Document: ", e);
374 throw e;
375 }
376 return ingestedIds;
377 }
378
379
380
381
382
383
384
385
386
387 public Node ingestWorkLicenseOnixplRequestDocument(RequestDocument reqDoc, Session session,
388 List<String> ingestedIds) throws Exception {
389 Node licenseNode;
390 if (ingestedIds == null) {
391 ingestedIds = new ArrayList<String>();
392 }
393 try {
394 licenseNode = ingestLicenseDocument(reqDoc, session, null);
395 ingestedIds.add(licenseNode.getIdentifier());
396 } catch (Exception e) {
397 logger.error("Ingest failed for Request Document: ", e);
398 throw e;
399 }
400 return licenseNode;
401 }
402
403
404
405
406
407
408
409
410
411 public synchronized List<String> ingestInstaceRequestDocumentForBulk(RequestDocument reqDoc, Session session,
412 Node formatNode) throws Exception {
413 List<String> parsedDocs = new ArrayList<String>();
414 try {
415
416 validateContent(reqDoc.getFormat(), reqDoc.getContent().getContent());
417 reqDoc.getContent().setContentObject(reqDoc.getContent().getContent());
418 Node instanceNode = ingestInstanceDocument(reqDoc, session, parsedDocs, null, formatNode);
419 Instance instance = ((InstanceCollection) reqDoc.getContent().getContentObject()).getInstance().get(0);
420 for (String resourceId : instance.getResourceIdentifier()) {
421 try {
422 Node bibNode = nodeHandler.getNodeByUUID(session, resourceId);
423 bibNode.setProperty("instanceIdentifier", instanceNode.getIdentifier());
424 instanceNode.setProperty("bibIdentifier", resourceId);
425 } catch (Exception e) {
426 logger.info("Mapping Not Successful: From Bib(" + resourceId + ") --> Instance("
427 + instanceNode.getIdentifier() + ")");
428 }
429 }
430 } catch (Exception e) {
431 logger.error("Ingest failed for Request Document: ", e);
432 throw new Exception("Ingest failed for Request Document: ", e);
433 }
434 return parsedDocs;
435 }
436
437
438
439
440
441
442
443 public void rollbackDocStoreIngestedData(Session session, List<RequestDocument> requestDocuments) {
444 try {
445 for (RequestDocument document : requestDocuments) {
446 try {
447 session.getNodeByIdentifier(document.getUuid()).remove();
448 } catch (Exception e) {
449 logger.error(e.getMessage() , e);
450 }
451 for (RequestDocument linkedDoc : document.getLinkedRequestDocuments()) {
452 try {
453 session.getNodeByIdentifier(linkedDoc.getUuid()).remove();
454 } catch (Exception e) {
455 logger.error(e.getMessage() , e);
456 }
457 }
458 }
459 session.save();
460 } catch (Exception e) {
461 logger.info(e.getMessage() , e);
462 }
463 }
464
465
466
467
468
469
470
471 private void validateContent(String format, String content) {
472 }
473 }