1 package org.kuali.ole.docstore.process;
2
3 import org.apache.commons.lang.time.StopWatch;
4 import org.kuali.ole.RepositoryBrowser;
5 import org.kuali.ole.RepositoryManager;
6 import org.kuali.ole.docstore.metrics.reindex.ReIndexingBatchStatus;
7 import org.kuali.ole.docstore.metrics.reindex.ReIndexingStatus;
8 import org.kuali.ole.docstore.model.enums.DocCategory;
9 import org.kuali.ole.docstore.model.enums.DocFormat;
10 import org.kuali.ole.docstore.model.enums.DocType;
11 import org.kuali.ole.docstore.model.xmlpojo.ingest.Content;
12 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
13 import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
14 import org.kuali.ole.docstore.model.xstream.work.instance.oleml.WorkInstanceOlemlRecordProcessor;
15 import org.kuali.ole.docstore.service.DocumentIngester;
16 import org.kuali.ole.docstore.service.ServiceLocator;
17 import org.kuali.ole.pojo.OleException;
18 import org.kuali.ole.repository.CheckoutManager;
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
21
22 import javax.jcr.Node;
23 import javax.jcr.NodeIterator;
24 import javax.jcr.Session;
25 import java.text.DateFormat;
26 import java.text.SimpleDateFormat;
27 import java.util.ArrayList;
28 import java.util.Date;
29 import java.util.List;
30
31
32
33
34
35
36
37 public class RebuildIndexesHandler
38 implements Runnable {
39
40 private static RebuildIndexesHandler reBuilder = null;
41 private boolean isRunning = false;
42 private boolean isStop = false;
43 private static final Logger logger = LoggerFactory.getLogger(RebuildIndexesHandler.class);
44 private String docCategory;
45 private String docType;
46 private String docFormat;
47 private CheckoutManager checkoutManager;
48
49
50
51 public synchronized void setRunning(boolean running) {
52 isRunning = running;
53 }
54
55 public synchronized void setStop(boolean stop) {
56 isStop = stop;
57 }
58
59 private RebuildIndexesHandler() {
60 checkoutManager = new CheckoutManager();
61 }
62
63 public static RebuildIndexesHandler getInstance() {
64 if (reBuilder == null) {
65 reBuilder = new RebuildIndexesHandler();
66 }
67 return reBuilder;
68 }
69
70
71
72
73
74
75 public synchronized boolean isRunning() {
76 return isRunning;
77 }
78
79 public synchronized boolean isStop() {
80 return isStop;
81 }
82
83
84
85
86 public String startProcess(String docCategory, String docType, String docFormat) throws InterruptedException {
87 String status = null;
88 if (isRunning()) {
89 status = "ReIndexing process is already running. Click 'Status' button to know the status. ";
90 }
91 else {
92 setRunning(true);
93 setStop(false);
94 status = "ReIndexing process has started. Click 'Status' button to know the status. ";
95 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance();
96 reIndexingStatus.reset();
97 if (docCategory == null || docCategory.equals("")) {
98 docCategory = "all";
99 }
100 if (docType == null || docType.equals("")) {
101 docType = "all";
102 }
103 if (docFormat == null || docType.equals("")) {
104 docFormat = "all";
105 }
106 this.docCategory = docCategory;
107 this.docType = docType;
108 this.docFormat = docFormat;
109 Thread reBuilderThread = new Thread(this);
110 reBuilderThread.start();
111
112 setRunning(false);
113 }
114 return status;
115 }
116
117 public String stopProcess() throws Exception {
118 String status = null;
119 if (isRunning()) {
120 status = "ReIndexing process is running. ReIndexing will stop after current batch. ";
121 setStop(true);
122 setRunning(false);
123 }
124 else {
125 status = "ReIndexing process is not running.";
126 }
127 return status;
128
129 }
130
131 public void run() {
132 DocCategoryTypeFormat docCategoryTypeFormat = new DocCategoryTypeFormat();
133 List<String> categoryList = docCategoryTypeFormat.getCategories();
134 List<String> typeList = null;
135 List<String> formatList = null;
136 for (String docCategoryCurr : categoryList) {
137 if (docCategory.equals("all") || docCategory.equals(docCategoryCurr)) {
138 typeList = docCategoryTypeFormat.getDocTypes(docCategoryCurr);
139 for (String docTypeCurr : typeList) {
140 if (docType.equals("all") || docType.equals(docTypeCurr)) {
141 formatList = docCategoryTypeFormat.getDocFormats(docCategoryCurr, docTypeCurr);
142 for (String docFormatCurr : formatList) {
143 if (docFormat.equals("all") || docFormat.equals(docFormatCurr)) {
144 if (!isStop()) {
145 ReIndexingStatus.getInstance().startDocType(docCategoryCurr, docTypeCurr, docFormatCurr);
146 reIndex(docCategoryCurr, docTypeCurr, docFormatCurr);
147 }
148 else {
149 return;
150 }
151 }
152 }
153 }
154 }
155 }
156 }
157 setRunning(false);
158 }
159
160 private void reIndex(String docCategory, String docType, String docFormat) {
161 Session session = null;
162 setRunning(true);
163 logger.info("Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): ");
164 try {
165 if (docCategory.equals(DocCategory.WORK.getCode())) {
166 if (docType.equals(DocType.BIB.getDescription())) {
167 if (docFormat.equals(DocFormat.MARC.getCode()) || docFormat.equals(DocFormat.DUBLIN_CORE.getCode())
168 || docFormat.equals(DocFormat.DUBLIN_UNQUALIFIED.getCode())) {
169 workBibMarcAndDublinAll(docCategory, docType, docFormat);
170 }
171 else {
172 logger.info(
173 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
174 }
175 }
176 else if (docType.equals(DocType.INSTANCE.getDescription())) {
177 if (docFormat.equals(DocFormat.OLEML.getCode())) {
178 workInstanceOLEML(docCategory, docType, docFormat);
179 }
180 else {
181 logger.info(
182 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
183 }
184 }
185 else if (docType.equals(DocType.LICENSE.getDescription())) {
186 if (docFormat.equals(DocFormat.ONIXPL.getCode()) || docFormat.equals(DocFormat.PDF.getCode())
187 || docFormat.equals(DocFormat.DOC.getCode())) {
188 workLicense(docCategory, docType, docFormat);
189 }
190 else {
191 logger.info(
192 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
193 }
194 }
195 }
196 }
197 catch (Exception e) {
198 logger.info(e.getMessage(), e);
199 }
200 finally {
201 try {
202 if(isStop){
203 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
204 }
205 else{
206 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
207 }
208 RepositoryManager.getRepositoryManager().logout(session);
209 }
210 catch (OleException e) {
211 logger.error(e.getMessage(), e);
212 }
213 }
214
215 }
216
217
218 private void indexDocs(List<RequestDocument> docs, long records, long recCount,
219 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) {
220 try {
221 StopWatch indexTimer = new StopWatch();
222 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
223 Date startDate = new Date();
224 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate));
225 indexTimer.start();
226 reIndexingBatchStatus.setStatus("Indexing");
227 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
228 reIndexingBatchStatus.setRecordsProcessed(records);
229 reIndexingBatchStatus.setBatchEndTime(" ");
230 batchStatusList.add(reIndexingBatchStatus);
231 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
232 ServiceLocator.getIndexerService().indexDocuments(docs);
233 indexTimer.stop();
234 Date endDate = new Date();
235 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
236 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
237 reIndexingBatchStatus.setRecordsProcessed(records);
238 reIndexingBatchStatus.setStatus("Done");
239 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
240 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
241 docs.clear();
242 }
243 catch (Exception e) {
244 logger.error("Rebuild Indexes Processed(" + (records - docs.size()) + "), Failed @ batch(" + docs.size()
245 + "): Cause: " + e + "\n\tContinuous", e);
246 }
247 }
248
249 private void workBibMarcAndDublinAll(String docCategory, String docType, String docFormat) {
250
251 Session session = null;
252 long totalCount = 0;
253 long nodeCount = 0;
254 List<RequestDocument> docs = new ArrayList<RequestDocument>();
255 try {
256 session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
257 ProcessParameters.BULK_DEFUALT_ACTION);
258 RequestDocument rd = new RequestDocument();
259 rd.setCategory(docCategory);
260 rd.setType(docType);
261 rd.setFormat(docFormat);
262 DocumentIngester docIngester = new DocumentIngester();
263 Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
264 NodeIterator nodesL1 = nodeFormat.getNodes();
265 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
266 StopWatch loadTimer = new StopWatch();
267 StopWatch batchTimer = new StopWatch();
268 loadTimer.start();
269 batchTimer.start();
270 while (nodesL1.hasNext()) {
271 Node nodeL1 = nodesL1.nextNode();
272 NodeIterator nodesL2 = nodeL1.getNodes();
273 while (nodesL2.hasNext()) {
274 Node nodeL2 = nodesL2.nextNode();
275 NodeIterator nodesL3 = nodeL2.getNodes();
276 while (nodesL3.hasNext()) {
277 Node nodeL3 = nodesL3.nextNode();
278 NodeIterator nodesFile = nodeL3.getNodes();
279 nodeCount = nodeCount + nodesFile.getSize();
280 while (nodesFile.hasNext()) {
281 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
282 if (!isStop()) {
283 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
284 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
285 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
286 resetTimers(batchTimer, loadTimer);
287 totalCount = 0;
288 logger.info("Rebuild");
289 }
290 else {
291 return;
292 }
293 }
294 else {
295 Node fileNode = nodesFile.nextNode();
296 String content = checkoutManager.getData(fileNode);
297 RequestDocument reqDoc = (RequestDocument) rd.clone();
298 reqDoc.setId(fileNode.getIdentifier());
299 reqDoc.setUuid(fileNode.getIdentifier());
300 Content contentObj = new Content();
301 contentObj.setContent(content);
302 reqDoc.setContent(contentObj);
303 docs.add(reqDoc);
304 totalCount++;
305 }
306 }
307 }
308 }
309 }
310 if (docs.size() > 0 && !isStop()) {
311 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
312 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
313 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
314 }
315 }
316 catch (Exception e) {
317 logger.error("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed("
318 + (totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
319 }
320 finally {
321 try {
322 if(isStop){
323 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
324 }
325 else{
326 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
327 }
328 RepositoryManager.getRepositoryManager().logout(session);
329
330 }
331 catch (OleException e) {
332 logger.error(e.getMessage(), e);
333 }
334 }
335 }
336
337 private void workInstanceOLEML(String docCategory, String docType, String docFormat) {
338 Session session = null;
339 long totalCount = 0;
340 long nodeCount = 0;
341 List<RequestDocument> docs = new ArrayList<RequestDocument>();
342 WorkInstanceOlemlRecordProcessor workInstanceOlemlRecordProcessor = new WorkInstanceOlemlRecordProcessor();
343 try {
344 session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
345 ProcessParameters.BULK_DEFUALT_ACTION);
346 RequestDocument rd = new RequestDocument();
347 rd.setCategory(docCategory);
348 rd.setType(docType);
349 rd.setFormat(docFormat);
350 DocumentIngester docIngester = new DocumentIngester();
351 Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
352 NodeIterator nodesL1 = nodeFormat.getNodes();
353 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
354 StopWatch loadTimer = new StopWatch();
355 StopWatch batchTimer = new StopWatch();
356 loadTimer.start();
357 while (nodesL1.hasNext()) {
358 Node nodeL1 = nodesL1.nextNode();
359 NodeIterator nodesL2 = nodeL1.getNodes();
360 while (nodesL2.hasNext()) {
361 Node nodeL2 = nodesL2.nextNode();
362 NodeIterator nodesFile = nodeL2.getNodes();
363 nodeCount = nodesFile.getSize();
364 batchTimer.start();
365 while (nodesFile.hasNext()) {
366 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
367 if (!isStop()) {
368 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
369 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
370 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
371 resetTimers(batchTimer, loadTimer);
372 totalCount = 0;
373 logger.info("Rebuild");
374 }
375 else {
376 return;
377 }
378 }
379 else {
380 Node fileNode = nodesFile.nextNode();
381 String content = checkoutManager.getXMLFORInstanceNode(fileNode);
382 RequestDocument reqDoc = (RequestDocument) rd.clone();
383 reqDoc.setId(fileNode.getIdentifier());
384 reqDoc.setUuid(fileNode.getIdentifier());
385 InstanceCollection instance = workInstanceOlemlRecordProcessor.fromXML(content);
386 Content contentObj = new Content();
387 contentObj.setContent(content);
388 contentObj.setContentObject(instance);
389 reqDoc.setContent(contentObj);
390 docs.add(reqDoc);
391 totalCount++;
392 }
393 }
394 }
395 }
396 if (docs.size() > 0 && !isStop()) {
397 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
398 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
399 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
400 }
401 }
402 catch (Exception e) {
403 logger.error("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed("
404 + (totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
405 }
406 finally {
407 try {
408 if(isStop){
409 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
410 }
411 else{
412 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
413 }
414 RepositoryManager.getRepositoryManager().logout(session);
415 }
416 catch (OleException e) {
417 logger.error(e.getMessage(), e);
418 }
419 }
420 }
421
422 private void workLicense(String docCategory, String docType, String docFormat) {
423 Session session = null;
424 long totalCount = 0;
425 long nodeCount = 0;
426 List<RequestDocument> docs = new ArrayList<RequestDocument>();
427 try {
428 session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
429 ProcessParameters.BULK_DEFUALT_ACTION);
430 RequestDocument rd = new RequestDocument();
431 rd.setCategory(docCategory);
432 rd.setType(docType);
433 rd.setFormat(docFormat);
434 DocumentIngester docIngester = new DocumentIngester();
435 Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
436 NodeIterator nodesL1 = nodeFormat.getNodes();
437 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
438 StopWatch loadTimer = new StopWatch();
439 StopWatch batchTimer = new StopWatch();
440 loadTimer.start();
441 RepositoryBrowser repositoryBrowser = new RepositoryBrowser();
442 while (nodesL1.hasNext()) {
443 Node nodeL1 = nodesL1.nextNode();
444 NodeIterator nodesFile = nodeL1.getNodes();
445 nodeCount = nodesFile.getSize();
446 batchTimer.start();
447 while (nodesFile.hasNext()) {
448 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE && !isStop()) {
449 if (!isStop()) {
450 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
451 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
452 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
453 resetTimers(batchTimer, loadTimer);
454 totalCount = 0;
455 logger.info("Rebuild");
456 }
457 else {
458 return;
459 }
460 }
461 else {
462
463 Node fileNode = nodesFile.nextNode();
464 String content = null;
465 if (docFormat.equals(DocFormat.ONIXPL.getCode())) {
466 content = checkoutManager.getData(fileNode);
467 }
468 else if (docFormat.equals(DocFormat.PDF.getCode()) || docFormat
469 .equals(DocFormat.DOC.getCode())) {
470 content = checkoutManager
471 .checkOutBinary(fileNode.getIdentifier(), ProcessParameters.BULK_DEFAULT_USER,
472 ProcessParameters.BULK_DEFUALT_ACTION, docFormat);
473 }
474 RequestDocument reqDoc = (RequestDocument) rd.clone();
475 reqDoc.setId(fileNode.getIdentifier());
476 reqDoc.setUuid(fileNode.getIdentifier());
477 Content contentObj = new Content();
478 contentObj.setContent(content);
479 reqDoc.setContent(contentObj);
480 docs.add(reqDoc);
481 totalCount++;
482 }
483 }
484 }
485 if (docs.size() > 0 && !isStop()) {
486 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
487 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
488 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
489 }
490 }
491 catch (Exception e) {
492 logger.error("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed("
493 + (totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
494 }
495 finally {
496 try {
497 if(isStop){
498 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
499 }
500 else{
501 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
502 }
503 RepositoryManager.getRepositoryManager().logout(session);
504 }
505 catch (OleException e) {
506 logger.error(e.getMessage(), e);
507 }
508 }
509 }
510
511 private void resetTimers(StopWatch batchTimer, StopWatch loadTimer) {
512 batchTimer.reset();
513 batchTimer.start();
514 loadTimer.reset();
515 loadTimer.start();
516 }
517
518 private void indexAfterParams(StopWatch batchTimer, ReIndexingBatchStatus reIndexingBatchStatus,
519 List<ReIndexingBatchStatus> batchStatusList) {
520 batchTimer.stop();
521 reIndexingBatchStatus.setBatchTotalTime(batchTimer.toString());
522 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
523 }
524
525 private ReIndexingBatchStatus indexBeforeParams(StopWatch loadTimer) {
526 loadTimer.stop();
527 ReIndexingBatchStatus reIndexingBatchStatus = new ReIndexingBatchStatus();
528 reIndexingBatchStatus.setBatchTotalTime(" ");
529 reIndexingBatchStatus.setBatchLoadTime(loadTimer.toString());
530 return reIndexingBatchStatus;
531 }
532 }