View Javadoc

1   package org.kuali.ole.docstore.process;
2   
3   import org.apache.commons.lang.time.StopWatch;
4   import org.kuali.ole.RepositoryBrowser;
5   import org.kuali.ole.RepositoryManager;
6   import org.kuali.ole.docstore.metrics.reindex.ReIndexingBatchStatus;
7   import org.kuali.ole.docstore.metrics.reindex.ReIndexingStatus;
8   import org.kuali.ole.docstore.model.enums.DocCategory;
9   import org.kuali.ole.docstore.model.enums.DocFormat;
10  import org.kuali.ole.docstore.model.enums.DocType;
11  import org.kuali.ole.docstore.model.xmlpojo.ingest.Content;
12  import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
13  import org.kuali.ole.docstore.model.xmlpojo.work.instance.oleml.InstanceCollection;
14  import org.kuali.ole.docstore.model.xstream.work.instance.oleml.WorkInstanceOlemlRecordProcessor;
15  import org.kuali.ole.docstore.service.DocumentIngester;
16  import org.kuali.ole.docstore.service.ServiceLocator;
17  import org.kuali.ole.pojo.OleException;
18  import org.kuali.ole.repository.CheckoutManager;
19  import org.slf4j.Logger;
20  import org.slf4j.LoggerFactory;
21  
22  import javax.jcr.Node;
23  import javax.jcr.NodeIterator;
24  import javax.jcr.Session;
25  import java.text.DateFormat;
26  import java.text.SimpleDateFormat;
27  import java.util.ArrayList;
28  import java.util.Date;
29  import java.util.List;
30  
31  /**
32   * Class to Rebuild Indexes.
33   *
34   * @author Rajesh Chowdary K
35   * @created May 2, 2012
36   */
37  public class RebuildIndexesHandler
38          implements Runnable {
39  
40      private static       RebuildIndexesHandler reBuilder = null;
41      private              boolean               isRunning = false;
42      private              boolean               isStop    = false;
43      private static final Logger                logger    = LoggerFactory.getLogger(RebuildIndexesHandler.class);
44      private String           docCategory;
45      private String           docType;
46      private String           docFormat;
47      private CheckoutManager  checkoutManager;
48  //    private ReIndexingStatus reIndexingStatus;
49  
50  
51      public synchronized void setRunning(boolean running) {
52          isRunning = running;
53      }
54  
55      public synchronized void setStop(boolean stop) {
56          isStop = stop;
57      }
58  
59      private RebuildIndexesHandler() {
60          checkoutManager = new CheckoutManager();
61      }
62  
63      public static RebuildIndexesHandler getInstance() {
64          if (reBuilder == null) {
65              reBuilder = new RebuildIndexesHandler();
66          }
67          return reBuilder;
68      }
69  
70      /**
71       * Method to get running status.
72       *
73       * @return
74       */
75      public synchronized boolean isRunning() {
76          return isRunning;
77      }
78  
79      public synchronized boolean isStop() {
80          return isStop;
81      }
82  
83      /**
84       * Method to startProcess
85       */
86      public String startProcess(String docCategory, String docType, String docFormat) throws InterruptedException {
87          String status = null;
88          if (isRunning()) {
89              status = "ReIndexing process is already running. Click 'Status' button to know the status. ";
90          }
91          else {
92              setRunning(true);
93              setStop(false);
94              status = "ReIndexing process has started. Click 'Status' button to know the status. ";
95              ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance();
96              reIndexingStatus.reset();
97              if (docCategory == null || docCategory.equals("")) {
98                  docCategory = "all";
99              }
100             if (docType == null || docType.equals("")) {
101                 docType = "all";
102             }
103             if (docFormat == null || docType.equals("")) {
104                 docFormat = "all";
105             }
106             this.docCategory = docCategory;
107             this.docType = docType;
108             this.docFormat = docFormat;
109             Thread reBuilderThread = new Thread(this);
110             reBuilderThread.start();
111 //            reBuilderThread.join();
112             setRunning(false);
113         }
114         return status;
115     }
116 
117     public String stopProcess() throws Exception {
118         String status = null;
119         if (isRunning()) {
120             status = "ReIndexing process is running. ReIndexing will stop after current batch. ";
121             setStop(true);
122             setRunning(false);
123         }
124         else {
125             status = "ReIndexing process is not running.";
126         }
127         return status;
128 
129     }
130 
131     public void run() {
132         DocCategoryTypeFormat docCategoryTypeFormat = new DocCategoryTypeFormat();
133         List<String> categoryList = docCategoryTypeFormat.getCategories();
134         List<String> typeList = null;
135         List<String> formatList = null;
136         for (String docCategoryCurr : categoryList) {
137             if (docCategory.equals("all") || docCategory.equals(docCategoryCurr)) {
138                 typeList = docCategoryTypeFormat.getDocTypes(docCategoryCurr);
139                 for (String docTypeCurr : typeList) {
140                     if (docType.equals("all") || docType.equals(docTypeCurr)) {
141                         formatList = docCategoryTypeFormat.getDocFormats(docCategoryCurr, docTypeCurr);
142                         for (String docFormatCurr : formatList) {
143                             if (docFormat.equals("all") || docFormat.equals(docFormatCurr)) {
144                                 if (!isStop()) {
145                                     ReIndexingStatus.getInstance().startDocType(docCategoryCurr, docTypeCurr, docFormatCurr);
146                                     reIndex(docCategoryCurr, docTypeCurr, docFormatCurr);
147                                 }
148                                 else {
149                                     return;
150                                 }
151                             }
152                         }
153                     }
154                 }
155             }
156         }
157       setRunning(false);
158     }
159 
160     private void reIndex(String docCategory, String docType, String docFormat) {
161         Session session = null;
162         setRunning(true);
163         logger.info("Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): ");
164         try {
165             if (docCategory.equals(DocCategory.WORK.getCode())) {
166                 if (docType.equals(DocType.BIB.getDescription())) {
167                     if (docFormat.equals(DocFormat.MARC.getCode()) || docFormat.equals(DocFormat.DUBLIN_CORE.getCode())
168                         || docFormat.equals(DocFormat.DUBLIN_UNQUALIFIED.getCode())) {
169                         workBibMarcAndDublinAll(docCategory, docType, docFormat);
170                     }
171                     else {
172                         logger.info(
173                                 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
174                     }
175                 }
176                 else if (docType.equals(DocType.INSTANCE.getDescription())) {
177                     if (docFormat.equals(DocFormat.OLEML.getCode())) {
178                         workInstanceOLEML(docCategory, docType, docFormat);
179                     }
180                     else {
181                         logger.info(
182                                 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
183                     }
184                 }
185                 else if (docType.equals(DocType.LICENSE.getDescription())) {
186                     if (docFormat.equals(DocFormat.ONIXPL.getCode()) || docFormat.equals(DocFormat.PDF.getCode())
187                         || docFormat.equals(DocFormat.DOC.getCode())) {
188                         workLicense(docCategory, docType, docFormat);
189                     }
190                     else {
191                         logger.info(
192                                 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
193                     }
194                 }
195             }
196         }
197         catch (Exception e) {
198             logger.info(e.getMessage(), e);
199         }
200         finally {
201             try {
202                 if(isStop){
203                   ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
204                 }
205                 else{
206                    ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
207                 }
208                 RepositoryManager.getRepositoryManager().logout(session);
209             }
210             catch (OleException e) {
211                 logger.error(e.getMessage(), e);
212             }
213         }
214 
215     }
216 
217 
218     private void indexDocs(List<RequestDocument> docs, long records, long recCount,
219                            List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) {
220         try {
221             StopWatch indexTimer = new StopWatch();
222             DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
223             Date startDate = new Date();
224             reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate));
225             indexTimer.start();
226             reIndexingBatchStatus.setStatus("Indexing");
227             reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
228             reIndexingBatchStatus.setRecordsProcessed(records);
229             reIndexingBatchStatus.setBatchEndTime(" ");
230             batchStatusList.add(reIndexingBatchStatus);
231             ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
232             ServiceLocator.getIndexerService().indexDocuments(docs);
233             indexTimer.stop();
234             Date endDate = new Date();
235             reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
236             reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
237             reIndexingBatchStatus.setRecordsProcessed(records);
238             reIndexingBatchStatus.setStatus("Done");
239             reIndexingBatchStatus.setRecordsRemaining(recCount - records);
240             ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
241             docs.clear();
242         }
243         catch (Exception e) {
244             logger.error("Rebuild Indexes Processed(" + (records - docs.size()) + "), Failed @ batch(" + docs.size()
245                          + "): Cause: " + e + "\n\tContinuous", e);
246         }
247     }
248 
249     private void workBibMarcAndDublinAll(String docCategory, String docType, String docFormat) {
250 
251         Session session = null;
252         long totalCount = 0;
253         long nodeCount = 0;
254         List<RequestDocument> docs = new ArrayList<RequestDocument>();
255         try {
256             session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
257                                                                           ProcessParameters.BULK_DEFUALT_ACTION);
258             RequestDocument rd = new RequestDocument();
259             rd.setCategory(docCategory);
260             rd.setType(docType);
261             rd.setFormat(docFormat);
262             DocumentIngester docIngester = new DocumentIngester();
263             Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
264             NodeIterator nodesL1 = nodeFormat.getNodes();
265             List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
266             StopWatch loadTimer = new StopWatch();
267             StopWatch batchTimer = new StopWatch();
268             loadTimer.start();
269             batchTimer.start();
270             while (nodesL1.hasNext()) {
271                 Node nodeL1 = nodesL1.nextNode();
272                 NodeIterator nodesL2 = nodeL1.getNodes();
273                 while (nodesL2.hasNext()) {
274                     Node nodeL2 = nodesL2.nextNode();
275                     NodeIterator nodesL3 = nodeL2.getNodes();
276                     while (nodesL3.hasNext()) {
277                         Node nodeL3 = nodesL3.nextNode();
278                         NodeIterator nodesFile = nodeL3.getNodes();
279                         nodeCount = nodeCount + nodesFile.getSize();
280                         while (nodesFile.hasNext()) {
281                             if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
282                                 if (!isStop()) {
283                                     ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
284                                     indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
285                                     indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
286                                     resetTimers(batchTimer, loadTimer);
287                                     totalCount = 0;
288                                     logger.info("Rebuild");
289                                 }
290                                 else {
291                                     return;
292                                 }
293                             }
294                             else {
295                                 Node fileNode = nodesFile.nextNode();
296                                 String content = checkoutManager.getData(fileNode);
297                                 RequestDocument reqDoc = (RequestDocument) rd.clone();
298                                 reqDoc.setId(fileNode.getIdentifier());
299                                 reqDoc.setUuid(fileNode.getIdentifier());
300                                 Content contentObj = new Content();
301                                 contentObj.setContent(content);
302                                 reqDoc.setContent(contentObj);
303                                 docs.add(reqDoc);
304                                 totalCount++;
305                             }
306                         }
307                     }
308                 }
309             }
310             if (docs.size() > 0 && !isStop()) {
311                 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
312                 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
313                 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
314             }
315         }
316         catch (Exception e) {
317             logger.error("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed("
318                          + (totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
319         }
320         finally {
321             try {
322                 if(isStop){
323                   ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
324                 }
325                 else{
326                    ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
327                 }
328                 RepositoryManager.getRepositoryManager().logout(session);
329 
330             }
331             catch (OleException e) {
332                 logger.error(e.getMessage(), e);
333             }
334         }
335     }
336 
337     private void workInstanceOLEML(String docCategory, String docType, String docFormat) {
338         Session session = null;
339         long totalCount = 0;
340         long nodeCount = 0;
341         List<RequestDocument> docs = new ArrayList<RequestDocument>();
342         WorkInstanceOlemlRecordProcessor workInstanceOlemlRecordProcessor = new WorkInstanceOlemlRecordProcessor();
343         try {
344             session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
345                                                                           ProcessParameters.BULK_DEFUALT_ACTION);
346             RequestDocument rd = new RequestDocument();
347             rd.setCategory(docCategory);
348             rd.setType(docType);
349             rd.setFormat(docFormat);
350             DocumentIngester docIngester = new DocumentIngester();
351             Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
352             NodeIterator nodesL1 = nodeFormat.getNodes();
353             List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
354             StopWatch loadTimer = new StopWatch();
355             StopWatch batchTimer = new StopWatch();
356             loadTimer.start();
357             while (nodesL1.hasNext()) {
358                 Node nodeL1 = nodesL1.nextNode();
359                 NodeIterator nodesL2 = nodeL1.getNodes();
360                 while (nodesL2.hasNext()) {
361                     Node nodeL2 = nodesL2.nextNode();
362                     NodeIterator nodesFile = nodeL2.getNodes();
363                     nodeCount = nodesFile.getSize();
364                     batchTimer.start();
365                     while (nodesFile.hasNext()) {
366                         if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
367                             if (!isStop()) {
368                                 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
369                                 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
370                                 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
371                                 resetTimers(batchTimer, loadTimer);
372                                 totalCount = 0;
373                                 logger.info("Rebuild");
374                             }
375                             else {
376                                 return;
377                             }
378                         }
379                         else {
380                             Node fileNode = nodesFile.nextNode();
381                             String content = checkoutManager.getXMLFORInstanceNode(fileNode);
382                             RequestDocument reqDoc = (RequestDocument) rd.clone();
383                             reqDoc.setId(fileNode.getIdentifier());
384                             reqDoc.setUuid(fileNode.getIdentifier());
385                             InstanceCollection instance = workInstanceOlemlRecordProcessor.fromXML(content);
386                             Content contentObj = new Content();
387                             contentObj.setContent(content);
388                             contentObj.setContentObject(instance);
389                             reqDoc.setContent(contentObj);
390                             docs.add(reqDoc);
391                             totalCount++;
392                         }
393                     }
394                 }
395             }
396             if (docs.size() > 0 && !isStop()) {
397                 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
398                 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
399                 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
400             }
401         }
402         catch (Exception e) {
403             logger.error("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed("
404                          + (totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
405         }
406         finally {
407             try {
408                if(isStop){
409                   ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
410                 }
411                 else{
412                    ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
413                 }
414                 RepositoryManager.getRepositoryManager().logout(session);
415             }
416             catch (OleException e) {
417                 logger.error(e.getMessage(), e);
418             }
419         }
420     }
421 
422     private void workLicense(String docCategory, String docType, String docFormat) {
423         Session session = null;
424         long totalCount = 0;
425         long nodeCount = 0;
426         List<RequestDocument> docs = new ArrayList<RequestDocument>();
427         try {
428             session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
429                                                                           ProcessParameters.BULK_DEFUALT_ACTION);
430             RequestDocument rd = new RequestDocument();
431             rd.setCategory(docCategory);
432             rd.setType(docType);
433             rd.setFormat(docFormat);
434             DocumentIngester docIngester = new DocumentIngester();
435             Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
436             NodeIterator nodesL1 = nodeFormat.getNodes();
437             List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
438             StopWatch loadTimer = new StopWatch();
439             StopWatch batchTimer = new StopWatch();
440             loadTimer.start();
441             RepositoryBrowser repositoryBrowser = new RepositoryBrowser();
442             while (nodesL1.hasNext()) {
443                 Node nodeL1 = nodesL1.nextNode();
444                 NodeIterator nodesFile = nodeL1.getNodes();
445                 nodeCount = nodesFile.getSize();
446                 batchTimer.start();
447                 while (nodesFile.hasNext()) {
448                     if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE && !isStop()) {
449                         if (!isStop()) {
450                             ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
451                             indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
452                             indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
453                             resetTimers(batchTimer, loadTimer);
454                             totalCount = 0;
455                             logger.info("Rebuild");
456                         }
457                         else {
458                             return;
459                         }
460                     }
461                     else {
462 
463                         Node fileNode = nodesFile.nextNode();
464                         String content = null;
465                         if (docFormat.equals(DocFormat.ONIXPL.getCode())) {
466                             content = checkoutManager.getData(fileNode);
467                         }
468                         else if (docFormat.equals(DocFormat.PDF.getCode()) || docFormat
469                                 .equals(DocFormat.DOC.getCode())) {
470                             content = checkoutManager
471                                     .checkOutBinary(fileNode.getIdentifier(), ProcessParameters.BULK_DEFAULT_USER,
472                                                     ProcessParameters.BULK_DEFUALT_ACTION, docFormat);
473                         }
474                         RequestDocument reqDoc = (RequestDocument) rd.clone();
475                         reqDoc.setId(fileNode.getIdentifier());
476                         reqDoc.setUuid(fileNode.getIdentifier());
477                         Content contentObj = new Content();
478                         contentObj.setContent(content);
479                         reqDoc.setContent(contentObj);
480                         docs.add(reqDoc);
481                         totalCount++;
482                     }
483                 }
484             }
485             if (docs.size() > 0 && !isStop()) {
486                 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
487                 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
488                 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
489             }
490         }
491         catch (Exception e) {
492             logger.error("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed("
493                          + (totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
494         }
495         finally {
496             try {
497                 if(isStop){
498                   ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
499                 }
500                 else{
501                   ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
502                 }
503                 RepositoryManager.getRepositoryManager().logout(session);
504             }
505             catch (OleException e) {
506                 logger.error(e.getMessage(), e);
507             }
508         }
509     }
510 
511     private void resetTimers(StopWatch batchTimer, StopWatch loadTimer) {
512         batchTimer.reset();
513         batchTimer.start();
514         loadTimer.reset();
515         loadTimer.start();
516     }
517 
518     private void indexAfterParams(StopWatch batchTimer, ReIndexingBatchStatus reIndexingBatchStatus,
519                                   List<ReIndexingBatchStatus> batchStatusList) {
520         batchTimer.stop();
521         reIndexingBatchStatus.setBatchTotalTime(batchTimer.toString());
522         ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
523     }
524 
525     private ReIndexingBatchStatus indexBeforeParams(StopWatch loadTimer) {
526         loadTimer.stop();
527         ReIndexingBatchStatus reIndexingBatchStatus = new ReIndexingBatchStatus();
528         reIndexingBatchStatus.setBatchTotalTime(" ");
529         reIndexingBatchStatus.setBatchLoadTime(loadTimer.toString());
530         return reIndexingBatchStatus;
531     }
532 }