1 package org.kuali.ole.docstore.process;
2
3 import org.apache.commons.lang.time.StopWatch;
4 import org.apache.solr.client.solrj.SolrServer;
5 import org.apache.solr.client.solrj.SolrServerException;
6 import org.apache.solr.common.SolrDocument;
7 import org.apache.solr.common.SolrInputDocument;
8 import org.kuali.ole.DocumentUniqueIDPrefix;
9 import org.kuali.ole.RepositoryBrowser;
10 import org.kuali.ole.RepositoryManager;
11 import org.kuali.ole.docstore.common.document.*;
12 import org.kuali.ole.docstore.common.document.content.instance.FormerIdentifier;
13 import org.kuali.ole.docstore.common.document.content.instance.Instance;
14 import org.kuali.ole.docstore.common.document.content.instance.xstream.InstanceOlemlRecordProcessor;
15 import org.kuali.ole.docstore.common.util.BatchBibTreeDBUtil;
16 import org.kuali.ole.docstore.common.util.BibInfoStatistics;
17 import org.kuali.ole.docstore.common.util.ReindexBatchStatistics;
18 import org.kuali.ole.docstore.discovery.service.SolrServerManager;
19 import org.kuali.ole.docstore.discovery.solr.work.bib.marc.WorkBibMarcDocBuilder;
20 import org.kuali.ole.docstore.document.rdbms.RdbmsWorkBibMarcDocumentManager;
21 import org.kuali.ole.docstore.document.rdbms.RdbmsWorkEInstanceDocumentManager;
22 import org.kuali.ole.docstore.document.rdbms.RdbmsWorkInstanceDocumentManager;
23 import org.kuali.ole.docstore.engine.service.index.solr.BibMarcIndexer;
24 import org.kuali.ole.docstore.engine.service.index.solr.DocumentIndexer;
25 import org.kuali.ole.docstore.indexer.solr.IndexerService;
26 import org.kuali.ole.docstore.metrics.reindex.ReIndexingBatchStatus;
27 import org.kuali.ole.docstore.metrics.reindex.ReIndexingStatus;
28 import org.kuali.ole.docstore.model.enums.DocCategory;
29 import org.kuali.ole.docstore.model.enums.DocFormat;
30 import org.kuali.ole.docstore.model.enums.DocType;
31 import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.BibRecord;
32 import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.EInstanceRecord;
33 import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.InstanceRecord;
34 import org.kuali.ole.docstore.model.xmlpojo.ingest.AdditionalAttributes;
35 import org.kuali.ole.docstore.model.xmlpojo.ingest.Content;
36 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
37 import org.kuali.ole.docstore.model.xmlpojo.ingest.ResponseDocument;
38 import org.kuali.ole.docstore.common.document.content.instance.InstanceCollection;
39 import org.kuali.ole.docstore.model.xstream.work.oleml.WorkEInstanceOlemlRecordProcessor;
40 import org.kuali.ole.docstore.service.BeanLocator;
41 import org.kuali.ole.docstore.service.DocumentIngester;
42 import org.kuali.ole.docstore.service.ServiceLocator;
43 import org.kuali.ole.pojo.OleException;
44 import org.kuali.ole.repository.CheckoutManager;
45 import org.kuali.ole.repository.NodeHandler;
46 import org.kuali.rice.krad.service.BusinessObjectService;
47 import org.kuali.rice.krad.service.KRADServiceLocator;
48 import org.slf4j.Logger;
49 import org.slf4j.LoggerFactory;
50 import org.kuali.rice.core.api.config.property.ConfigContext;
51
52 import javax.jcr.Binary;
53 import javax.jcr.Node;
54 import javax.jcr.NodeIterator;
55 import javax.jcr.Session;
56 import java.io.ByteArrayInputStream;
57 import java.io.IOException;
58 import java.sql.*;
59 import java.text.DateFormat;
60 import java.text.SimpleDateFormat;
61 import java.util.*;
62 import java.util.Date;
63
64
65
66
67
68
69
70 public class RebuildIndexesHandler
71 implements Runnable {
72
73 private static final Logger LOG = LoggerFactory.getLogger(RebuildIndexesHandler.class);
74 private static RebuildIndexesHandler reBuilder = null;
75 private boolean isRunning = false;
76 private boolean isStop = false;
77 private static final Logger logger = LoggerFactory.getLogger(RebuildIndexesHandler.class);
78 private String docCategory;
79 private String docType;
80 private String docFormat;
81 private BibInfoStatistics bibInfoStatistics = null;
82 private CheckoutManager checkoutManager;
83
84 private int batchSize;
85 private int startIndex;
86 private int endIndex;
87 private String updateDate;
88
89 public static String EXCEPION_FILE_NAME = "";
90 public static String STATUS_FILE_NAME = "";
91 public static String STORAGE_EXCEPTION_FILE_NAME = "";
92 public static String STORAGE_STATUS_FILE_NAME = "";
93 public static BatchBibTreeDBUtil bibTreeDBUtil = new BatchBibTreeDBUtil();
94
95 private String filePath = System.getProperty("solr.solr.home");
96 public synchronized void setRunning(boolean running) {
97 isRunning = running;
98 }
99
100 public synchronized void setStop(boolean stop) {
101 isStop = stop;
102 }
103
104 private RebuildIndexesHandler() {
105 checkoutManager = new CheckoutManager();
106 }
107
108 public static RebuildIndexesHandler getInstance() {
109 if (reBuilder == null) {
110 reBuilder = new RebuildIndexesHandler();
111 }
112 return reBuilder;
113 }
114
115
116
117
118
119
120 public synchronized boolean isRunning() {
121 return isRunning;
122 }
123
124 public synchronized boolean isStop() {
125 return isStop;
126 }
127
128
129
130
131 public String startProcess(String docCategory, String docType, String docFormat) throws InterruptedException {
132 String status = null;
133 if (isRunning()) {
134 status = "ReIndexing process is already running. Click 'Show Status' button to know the status. ";
135 } else {
136 setRunning(true);
137 setStop(false);
138 status = "ReIndexing process has started. Click 'Show Status' button to know the status. ";
139 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance();
140 reIndexingStatus.reset();
141 if (docCategory == null || docCategory.equals("")) {
142 docCategory = "all";
143 }
144 if (docType == null || docType.equals("")) {
145 docType = "all";
146 }
147 if (docFormat == null || docType.equals("")) {
148 docFormat = "all";
149 }
150 this.docCategory = docCategory;
151 this.docType = docType;
152 this.docFormat = docFormat;
153 Thread reBuilderThread = new Thread(this);
154 reBuilderThread.start();
155
156 setRunning(false);
157 }
158 return status;
159 }
160 public String startProcess(String docCategory, String docType, String docFormat, int batchSize, int startIndex, int endIndex,String updateDate) throws InterruptedException {
161 String status = null;
162 if (isRunning()) {
163 status = "ReIndexing process is already running. Click 'Show Status' button to know the status. ";
164 } else {
165 setRunning(true);
166 setStop(false);
167 status = "ReIndexing process has started. Click 'Show Status' button to know the status. ";
168 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance();
169 reIndexingStatus.reset();
170 if (docCategory == null || docCategory.equals("")) {
171 docCategory = "all";
172 }
173 if (docType == null || docType.equals("")) {
174 docType = "all";
175 }
176 if (docFormat == null || docType.equals("")) {
177 docFormat = "all";
178 }
179 this.docCategory = docCategory;
180 this.docType = docType;
181 this.docFormat = docFormat;
182 this.batchSize = batchSize;
183 this.startIndex = startIndex;
184 this.endIndex = endIndex;
185 this.updateDate=updateDate;
186 Thread reBuilderThread = new Thread(this);
187 reBuilderThread.start();
188
189 setRunning(false);
190 }
191 return status;
192 }
193
194 public String stopProcess() throws Exception {
195 String status = null;
196 if (isRunning()) {
197 status = "ReIndexing process is running. ReIndexing will stop after current batch. ";
198 setStop(true);
199 setRunning(false);
200 } else {
201 status = "ReIndexing process is not running.";
202 }
203 return status;
204
205 }
206
207 public void run() {
208 DocCategoryTypeFormat docCategoryTypeFormat = new DocCategoryTypeFormat();
209 List<String> categoryList = docCategoryTypeFormat.getCategories();
210 List<String> typeList = null;
211 List<String> formatList = null;
212 for (String docCategoryCurr : categoryList) {
213 if (docCategory.equals("all") || docCategory.equals(docCategoryCurr)) {
214 typeList = docCategoryTypeFormat.getDocTypes(docCategoryCurr);
215 for (String docTypeCurr : typeList) {
216 if (docType.equals("all") || docType.equals(docTypeCurr)) {
217 formatList = docCategoryTypeFormat.getDocFormats(docCategoryCurr, docTypeCurr);
218 for (String docFormatCurr : formatList) {
219 if (docFormat.equals("all") || docFormat.equals(docFormatCurr)) {
220 if (!isStop()) {
221 ReIndexingStatus.getInstance()
222 .startDocType(docCategoryCurr, docTypeCurr, docFormatCurr);
223 reIndex(docCategoryCurr, docTypeCurr, docFormatCurr);
224 } else {
225 return;
226 }
227 }
228 }
229 }
230 }
231 }
232 }
233 setRunning(false);
234 }
235
236 private void reIndex(String docCategory, String docType, String docFormat) {
237 Session session = null;
238 setRunning(true);
239 logger.info("Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): ");
240 try {
241 if (docCategory.equals(DocCategory.WORK.getCode())) {
242 if (docType.equals(DocType.BIB.getDescription())) {
243 if (docFormat.equals(DocFormat.MARC.getCode()) || docFormat.equals(DocFormat.DUBLIN_CORE.getCode())
244 || docFormat.equals(DocFormat.DUBLIN_UNQUALIFIED.getCode())) {
245 org.springframework.util.StopWatch stopWatch = new org.springframework.util.StopWatch();
246 stopWatch.start("total time taken");
247 Date date = new Date();
248 EXCEPION_FILE_NAME = "ReindexErrors-" + date.toString() + ".txt";
249 STATUS_FILE_NAME = "ReindexBatchStatus-" + date.toString() + ".txt";
250 BatchBibTreeDBUtil.writeStatusToFile(filePath, RebuildIndexesHandler.EXCEPION_FILE_NAME, "Reindex started at:" + date);
251 BibHoldingItemReindexer bibHoldingItemReindexer = BibHoldingItemReindexer.getInstance();
252 bibHoldingItemReindexer.setTotalBatchStatistics(new ReindexBatchStatistics());
253 bibHoldingItemReindexer.index(batchSize, startIndex, endIndex,updateDate);
254 date = new Date();
255 BatchBibTreeDBUtil.writeStatusToFile(filePath, RebuildIndexesHandler.EXCEPION_FILE_NAME, "Reindex ended at:" + date);
256 stopWatch.stop();
257 logger.info(stopWatch.prettyPrint());
258
259 } else {
260 logger.info(
261 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
262 }
263 } else if (docType.equals(DocType.INSTANCE.getDescription())) {
264 if (docFormat.equals(DocFormat.OLEML.getCode())) {
265 workInstanceOLEML(docCategory, docType, docFormat);
266 } else {
267 logger.info(
268 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
269 }
270 } else if (docType.equals(DocType.LICENSE.getDescription())) {
271 if (docFormat.equals(DocFormat.ONIXPL.getCode()) || docFormat.equals(DocFormat.PDF.getCode())
272 || docFormat.equals(DocFormat.DOC.getCode())) {
273 workLicense(docCategory, docType, docFormat);
274 } else {
275 logger.info(
276 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
277 }
278 } else if (docType.equals(DocType.EINSTANCE.getCode())) {
279 if (docFormat.equals(DocFormat.OLEML.getCode())) {
280 workEInstanceOLEML(docCategory, docType, docFormat);
281 } else {
282 logger.info(
283 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
284 }
285 }
286 }
287 } catch (Exception e) {
288 logger.info(e.getMessage(), e);
289 } finally {
290 try {
291 if (isStop) {
292 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
293 } else {
294 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
295 }
296 RepositoryManager.getRepositoryManager().logout(session);
297 } catch (OleException e) {
298 logger.error(e.getMessage(), e);
299 }
300 }
301
302 }
303
304 private void workEInstanceOLEML(String docCategory, String docType, String docFormat) {
305 long totalCount = 0;
306 long nodeCount = 0;
307 List<RequestDocument> docs = new ArrayList<RequestDocument>();
308 WorkEInstanceOlemlRecordProcessor workEInstanceOlemlRecordProcessor = new WorkEInstanceOlemlRecordProcessor();
309 try {
310 RequestDocument rd = new RequestDocument();
311 rd.setCategory(docCategory);
312 rd.setType(docType);
313 rd.setFormat(docFormat);
314 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
315 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService();
316 List<EInstanceRecord> instanceRecords = (List<EInstanceRecord>) businessObjectService.findAll(EInstanceRecord.class);
317 StopWatch loadTimer = new StopWatch();
318 StopWatch batchTimer = new StopWatch();
319 loadTimer.start();
320 batchTimer.start();
321 for (int i = 0; i < instanceRecords.size(); i++) {
322 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
323 if (!isStop()) {
324 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
325 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
326 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
327 resetTimers(batchTimer, loadTimer);
328 totalCount = 0;
329 logger.info("Rebuild");
330 } else {
331 return;
332 }
333 } else {
334 EInstanceRecord instanceRecord = instanceRecords.get(i);
335 String uuid = DocumentUniqueIDPrefix.getPrefixedId(instanceRecord.getUniqueIdPrefix(), instanceRecord.geteInstanceIdentifier());
336 RequestDocument requestDocument = buildRequestDocumentForCheckout(docCategory, docType, docFormat, uuid);
337 ResponseDocument responseDocument = RdbmsWorkEInstanceDocumentManager.getInstance().checkoutContent(requestDocument, businessObjectService);
338 String content = responseDocument.getContent().getContent();
339 RequestDocument requestDocumentForIndex = (RequestDocument) rd.clone();
340 requestDocumentForIndex.setAdditionalAttributes(responseDocument.getAdditionalAttributes());
341 requestDocumentForIndex.setId(uuid);
342 requestDocumentForIndex.setUuid(uuid);
343 org.kuali.ole.docstore.model.xmlpojo.work.einstance.oleml.InstanceCollection instanceCollection = workEInstanceOlemlRecordProcessor.fromXML(content);
344 content = workEInstanceOlemlRecordProcessor.toXML(instanceCollection);
345 Content contentObj = new Content();
346 contentObj.setContent(content);
347 contentObj.setContentObject(instanceCollection);
348 requestDocumentForIndex.setContent(contentObj);
349 docs.add(requestDocumentForIndex);
350 totalCount++;
351 }
352 }
353 if (docs.size() > 0 && !isStop()) {
354 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
355 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
356 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
357 }
358 } catch (Exception e) {
359 logger.error(
360 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
361 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
362 } finally {
363 if (isStop) {
364 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
365 } else {
366 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
367 }
368 }
369
370 }
371
372
373 private void indexBibDocs(List<BibTree> bibTreeList, long records, long recCount,
374 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) {
375 StopWatch indexTimer = new StopWatch();
376 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
377 try {
378 Date startDate = new Date();
379 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate));
380 indexTimer.start();
381 reIndexingBatchStatus.setStatus("Indexing");
382 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
383 reIndexingBatchStatus.setRecordsProcessed(records);
384 reIndexingBatchStatus.setBatchEndTime(" ");
385 batchStatusList.add(reIndexingBatchStatus);
386 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
387 DocumentIndexer documentIndexer = BibMarcIndexer.getInstance();
388 BibTrees bibTrees = new BibTrees();
389 bibTrees.getBibTrees().addAll(bibTreeList);
390 documentIndexer.createTrees(bibTrees);
391
392 indexTimer.stop();
393 Date endDate = new Date();
394 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
395 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
396 reIndexingBatchStatus.setRecordsProcessed(records);
397 reIndexingBatchStatus.setStatus("Done");
398 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
399 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
400 bibTreeList.clear();
401 } catch (Exception e) {
402 String firstBibId = bibTreeList.get(0).getBib().getId();
403 String lastBibId = bibTreeList.get(bibTreeList.size()-1).getBib().getId();
404 logger.error(
405 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
406 records - bibTreeList.size()) + "), Failed @ bibId( First BibId: " + firstBibId + " : Last BibId : "+ lastBibId +"): Cause: " + e, e);
407 indexTimer.stop();
408 Date endDate = new Date();
409 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
410 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
411 reIndexingBatchStatus.setRecordsProcessed(0L);
412 reIndexingBatchStatus.setStatus("Done");
413 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
414 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
415 bibTreeList.clear();
416 }
417 }
418
419
420 private void indexDocs(List<RequestDocument> docs, long records, long recCount,
421 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) {
422 try {
423 StopWatch indexTimer = new StopWatch();
424 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
425 Date startDate = new Date();
426 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate));
427 indexTimer.start();
428 reIndexingBatchStatus.setStatus("Indexing");
429 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
430 reIndexingBatchStatus.setRecordsProcessed(records);
431 reIndexingBatchStatus.setBatchEndTime(" ");
432 batchStatusList.add(reIndexingBatchStatus);
433 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
434 IndexerService indexerService = BeanLocator.getDocstoreFactory().getDocumentIndexManager(docs.get(0).getCategory(), docs.get(0).getType(), docs.get(0).getFormat());
435 String result = indexerService.indexDocuments(docs, false);
436 logger.debug(result);
437 indexTimer.stop();
438 Date endDate = new Date();
439 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
440 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
441 reIndexingBatchStatus.setRecordsProcessed(records);
442 reIndexingBatchStatus.setStatus("Done");
443 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
444 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
445 docs.clear();
446 } catch (Exception e) {
447 logger.error("Rebuild Indexes Processed(" + (records - docs.size()) + "), Failed @ batch(" + docs.size()
448 + "): Cause: " + e + "\n\tContinuous", e);
449 }
450 }
451
452 private void workBibMarcAndDublinAll(String docCategory, String docType, String docFormat) throws SolrServerException, IOException {
453 long totalCount = 0;
454 long nodeCount = 0;
455 int start = 0;
456 String sqlQuery = null;
457 long startTime = 0;
458 long commitEndTime = 0;
459 long commitStartTime = 0;
460 int batchSize = 50000;
461 int commitSize = 50000;
462 long endIndexBatch = 0;
463 String dbVendor = ConfigContext.getCurrentContextConfig().getProperty("db.vendor");
464 List<BibTree> bibTrees = new ArrayList<BibTree>();
465
466 try {
467 String prefix = DocumentUniqueIDPrefix.getPrefix(docCategory, docType, docFormat);
468 Map prefixMap = new HashMap(0);
469 prefixMap.put("uniqueIdPrefix", prefix);
470 startTime = System.currentTimeMillis();
471
472 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService();
473 int bibCount = businessObjectService.countMatching(BibRecord.class, prefixMap);
474 Connection connection = null;
475 PreparedStatement preparedStatement = null;
476 if (bibCount > 0) {
477 connection = getConnection();
478 if (dbVendor.equalsIgnoreCase("mysql")) {
479 sqlQuery = "select * from ole_ds_bib_t b ORDER BY b.bib_id LIMIT ?,?";
480 } else {
481 sqlQuery = "select * from (select b.*,ROWNUM r from OLE_DS_BIB_T b) where r between ? and ?";
482 }
483 preparedStatement = connection.prepareStatement(sqlQuery);
484 }
485 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
486 StopWatch loadTimer = new StopWatch();
487 StopWatch batchTimer = new StopWatch();
488 loadTimer.start();
489 batchTimer.start();
490
491 for (int i = 0; i < bibCount; i++) {
492 if (bibTrees.size() == batchSize) {
493 if (!isStop()) {
494
495 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
496 indexBibDocs(bibTrees, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
497 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
498 resetTimers(batchTimer, loadTimer);
499 totalCount = 0;
500 logger.info("Rebuild");
501 i = start;
502 if (start % commitSize == 0) {
503 commitStartTime = System.currentTimeMillis();
504 logger.info("Time elapsed since start ====>>>>>> " + (commitStartTime - startTime));
505 logger.info("Time elapsed since last commit ====>>>>>> " + (commitStartTime - commitEndTime));
506 logger.info("commit started ====>>>>>> " + commitStartTime);
507 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
508 solr.commit();
509 logger.info("No..of records committed ====>>>>>> " + start);
510 commitEndTime = System.currentTimeMillis();
511 logger.info("Time Taken for commit ====>>>>>> " + (commitEndTime - commitStartTime));
512
513 }
514 } else {
515 return;
516 }
517 } else {
518 if (start < bibCount) {
519 long b2time = System.currentTimeMillis();
520 if (dbVendor.equalsIgnoreCase("mysql")) {
521 preparedStatement.setInt(1, start);
522 preparedStatement.setInt(2, batchSize);
523 } else {
524 preparedStatement.setInt(1, start + 1);
525 preparedStatement.setInt(2, start + batchSize);
526 }
527 ResultSet resultSet = preparedStatement.executeQuery();
528 logger.info("time taking for getting records from DB end======>>>>>" + (System.currentTimeMillis() - b2time));
529 while (resultSet.next()) {
530
531 BibTree bibTree = new BibTree();
532 bibTree.setCategory(docCategory);
533 bibTree.setType(docType);
534 bibTree.setFormat(docFormat);
535 Bib bib = new BibMarc();
536 bib.setCategory(docCategory);
537 bib.setType(docType);
538 bib.setFormat(docFormat);
539 bib.setCreatedBy(resultSet.getString("CREATED_BY"));
540 bib.setCreatedOn(resultSet.getString("DATE_CREATED"));
541 bib.setStaffOnly((resultSet.getString("STAFF_ONLY").equalsIgnoreCase("Y") ? Boolean.TRUE : Boolean.FALSE));
542 bib.setContent(resultSet.getString("CONTENT"));
543 bib.setUpdatedBy(resultSet.getString("UPDATED_BY"));
544 bib.setUpdatedOn(resultSet.getString("DATE_UPDATED"));
545 bib.setLastUpdated(resultSet.getString("DATE_UPDATED"));
546 bib.setStatus(resultSet.getString("STATUS"));
547 bib.setStatusUpdatedBy(resultSet.getString("STATUS_UPDATED_BY"));
548 bib.setStatusUpdatedOn(resultSet.getString("STATUS_UPDATED_DATE"));
549 String uuid = DocumentUniqueIDPrefix.getPrefixedId(resultSet.getString("UNIQUE_ID_PREFIX"), resultSet.getString(1));
550 bib.setId(uuid);
551 bib.setLocalId(uuid);
552 bibTree.setBib(bib);
553
554 start++;
555 totalCount++;
556 bibTrees.add(bibTree);
557 }
558 resultSet.close();
559 }
560 }
561
562 }
563 if (bibTrees.size() > 0 && !isStop()) {
564 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
565 indexBibDocs(bibTrees, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
566 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
567 commitStartTime = System.currentTimeMillis();
568 logger.info("commit started : ----> " + commitStartTime);
569 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
570 solr.commit();
571 logger.info("No..of records committed : ----> " + start);
572 commitEndTime = System.currentTimeMillis();
573 logger.info("Time Taken for commit ======>>> " + (commitEndTime - commitStartTime));
574
575 }
576 endIndexBatch = System.currentTimeMillis();
577 logger.info("Time elapsed since end ====>>>>>> " + endIndexBatch);
578 } catch (Exception e) {
579 logger.error(
580 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
581 totalCount - bibTrees.size()) + "), Failed @ batch(" + bibTrees.size() + "): Cause: " + e, e);
582 } finally {
583 if (isStop) {
584 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
585 } else {
586 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
587 }
588 }
589 }
590
591 private void workInstanceOLEML(String docCategory, String docType, String docFormat) {
592 long totalCount = 0;
593 long nodeCount = 0;
594 List<RequestDocument> docs = new ArrayList<RequestDocument>();
595 InstanceOlemlRecordProcessor instanceOlemlRecordProcessor = new InstanceOlemlRecordProcessor();
596 try {
597 RequestDocument rd = new RequestDocument();
598 rd.setCategory(docCategory);
599 rd.setType(docType);
600 rd.setFormat(docFormat);
601 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
602 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService();
603 List<InstanceRecord> instanceRecords = (List<InstanceRecord>) businessObjectService.findAll(InstanceRecord.class);
604 StopWatch loadTimer = new StopWatch();
605 StopWatch batchTimer = new StopWatch();
606 loadTimer.start();
607 batchTimer.start();
608 for (int i = 0; i < instanceRecords.size(); i++) {
609 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
610 if (!isStop()) {
611 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
612 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
613 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
614 resetTimers(batchTimer, loadTimer);
615 totalCount = 0;
616 logger.info("Rebuild");
617 } else {
618 return;
619 }
620 } else {
621 InstanceRecord instanceRecord = instanceRecords.get(i);
622 String uuid = DocumentUniqueIDPrefix.getPrefixedId(instanceRecord.getUniqueIdPrefix(), instanceRecord.getInstanceId());
623 RequestDocument requestDocument = buildRequestDocumentForCheckout(docCategory, docType, docFormat, uuid);
624 ResponseDocument responseDocument = RdbmsWorkInstanceDocumentManager.getInstance().checkoutContent(requestDocument, businessObjectService);
625 String content = responseDocument.getContent().getContent();
626 RequestDocument requestDocumentForIndex = (RequestDocument) rd.clone();
627 requestDocumentForIndex.setAdditionalAttributes(responseDocument.getAdditionalAttributes());
628 requestDocumentForIndex.setId(uuid);
629 requestDocumentForIndex.setUuid(uuid);
630 InstanceCollection instanceCollection = instanceOlemlRecordProcessor.fromXML(content);
631
632
633
634
635 content = instanceOlemlRecordProcessor.toXML(instanceCollection);
636 Content contentObj = new Content();
637 contentObj.setContent(content);
638 contentObj.setContentObject(instanceCollection);
639 requestDocumentForIndex.setContent(contentObj);
640 docs.add(requestDocumentForIndex);
641 totalCount++;
642 }
643 }
644 if (docs.size() > 0 && !isStop()) {
645 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
646 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
647 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
648 }
649 } catch (Exception e) {
650 logger.error(
651 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
652 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
653 } finally {
654 if (isStop) {
655 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
656 } else {
657 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
658 }
659 }
660 }
661
662 private RequestDocument buildRequestDocumentForCheckout(String docCategory, String docType, String docFormat, String uuid) {
663 RequestDocument requestDocument = new RequestDocument();
664 requestDocument.setCategory(docCategory);
665 requestDocument.setType(docType);
666 requestDocument.setFormat(docFormat);
667 requestDocument.setUuid(uuid);
668 return requestDocument;
669 }
670
671 private void linkingInstanceWithBib(InstanceCollection instanceCollection, Session session, Node fileNode) {
672 for (Instance instance : instanceCollection.getInstance()) {
673 instance.getResourceIdentifier().clear();
674 for (FormerIdentifier frids : instance.getFormerResourceIdentifier()) {
675 try {
676 if (frids != null && frids.getIdentifier() != null &&
677 frids.getIdentifier().getIdentifierValue() != null &&
678 frids.getIdentifier().getIdentifierValue().trim().length() >= 0) {
679 List<SolrDocument> solrBibDocs = ServiceLocator.getIndexerService()
680 .getSolrDocument("SystemControlNumber",
681 frids.getIdentifier()
682 .getIdentifierValue());
683 SolrInputDocument solrInputDocument = new SolrInputDocument();
684 WorkBibMarcDocBuilder marcDocBuilder = new WorkBibMarcDocBuilder();
685 List<SolrInputDocument> solrInputDocs = new ArrayList<SolrInputDocument>();
686 if (solrBibDocs != null && solrBibDocs.size() > 0) {
687 for (SolrDocument solrbibDoc : solrBibDocs) {
688 if (checkApplicability(frids.getIdentifier().getIdentifierValue(),
689 solrbibDoc.getFieldValue("SystemControlNumber"))) {
690
691 compareObjNAddValue(instance.getInstanceIdentifier(),
692 solrbibDoc.getFieldValue("instanceIdentifier"), solrbibDoc,
693 "instanceIdentifier");
694 solrInputDocument = new SolrInputDocument();
695 marcDocBuilder.buildSolrInputDocFromSolrDoc(solrbibDoc, solrInputDocument);
696 solrInputDocs.add(solrInputDocument);
697 String bibId = compareListRString(solrbibDoc.getFieldValue("id"));
698 instance.getResourceIdentifier().add(bibId);
699 modifyContentAddLinkedIdsInDocStore(instance, bibId, session, fileNode);
700 indexSolrDocs(solrInputDocs);
701 }
702 }
703 }
704 }
705 } catch (Exception e) {
706 logger.error("error message" + e.getMessage(), e);
707 }
708 }
709 }
710 }
711
712 private void modifyContentAddLinkedIdsInDocStore(Instance instance, String id, Session session, Node fileNode) {
713
714 try {
715 Node bibNode = getNodeByUUID(session, id);
716 bibNode.setProperty("instanceIdentifier", instance.getInstanceIdentifier());
717 fileNode.setProperty("bibIdentifier", id);
718
719 InstanceOlemlRecordProcessor recordProcessor = new InstanceOlemlRecordProcessor();
720 NodeIterator nodeIterator = fileNode.getNodes();
721 while (nodeIterator.hasNext()) {
722 Node instNode = nodeIterator.nextNode();
723 if (instNode.getName().equalsIgnoreCase("instanceFile")) {
724 InstanceCollection instCol = new InstanceCollection();
725 Instance inst = new Instance();
726 inst.setResourceIdentifier(instance.getResourceIdentifier());
727 inst.setFormerResourceIdentifier(instance.getFormerResourceIdentifier());
728 inst.setExtension(instance.getExtension());
729 inst.setInstanceIdentifier(instance.getInstanceIdentifier());
730 List<Instance> instanceList = new ArrayList<Instance>();
731 instanceList.add(inst);
732 instCol.setInstance(instanceList);
733
734 byte[] documentBytes = recordProcessor.toXML(instCol).getBytes();
735 Binary binary = null;
736 if (documentBytes != null && instNode != null && documentBytes.length > 0) {
737 binary = session.getValueFactory().createBinary(new ByteArrayInputStream(documentBytes));
738 instNode.getNode("jcr:content").setProperty("jcr:data", binary);
739 }
740 }
741 }
742 } catch (Exception e) {
743 logger.error("error while updating Docstore in reindexing Process" + e.getMessage(), e);
744 }
745 }
746
747 private void indexSolrDocs(List<SolrInputDocument> solrInputDocs) {
748
749 try {
750 ServiceLocator.getIndexerService().indexSolrDocuments(solrInputDocs);
751 logger.info("Linking Bib and Instance Records (" + solrInputDocs.size() + "): ");
752 solrInputDocs.clear();
753 } catch (Exception e) {
754 logger.error(
755 "Linking Bib and Instance Records (" + (solrInputDocs.size()) + "), Failed @ batch(" + solrInputDocs
756 .size() + "): Cause: " + e + "\n\tContinuous", e);
757 }
758 }
759
760
761 private boolean checkApplicability(Object value, Object fieldValue) {
762 if (fieldValue instanceof Collection) {
763 for (Object object : (Collection) fieldValue) {
764 if (object.equals(value)) {
765 return true;
766 }
767 }
768 return false;
769 } else {
770 return value.equals(fieldValue);
771 }
772 }
773
774
775 private String compareListRString(Object id) {
776 if (id != null) {
777 if (id instanceof List) {
778 List<String> idList = (List<String>) id;
779 return idList.get(0);
780 } else if (id instanceof String) {
781 String strId = (String) id;
782 return strId;
783 }
784 }
785 return null;
786 }
787
788 private void compareObjNAddValue(String id, Object idObj, SolrDocument solrDoc, String identifier) {
789 if (idObj != null) {
790 if (idObj instanceof List) {
791 List<String> instBibIdList = (List<String>) idObj;
792 if (!instBibIdList.contains(id)) {
793 solrDoc.addField(identifier, id);
794 }
795 } else if (idObj instanceof String) {
796 String instBibId = (String) idObj;
797 if (!instBibId.equalsIgnoreCase(id)) {
798 solrDoc.addField(identifier, id);
799 }
800 }
801 } else {
802 solrDoc.addField(identifier, id);
803 }
804 }
805
806 private void workLicense(String docCategory, String docType, String docFormat) {
807 Session session = null;
808 long totalCount = 0;
809 long nodeCount = 0;
810 List<RequestDocument> docs = new ArrayList<RequestDocument>();
811 try {
812 session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
813 ProcessParameters.BULK_DEFUALT_ACTION);
814 RequestDocument rd = new RequestDocument();
815 rd.setCategory(docCategory);
816 rd.setType(docType);
817 rd.setFormat(docFormat);
818 DocumentIngester docIngester = new DocumentIngester();
819 Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
820 NodeIterator nodesL1 = nodeFormat.getNodes();
821 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
822 StopWatch loadTimer = new StopWatch();
823 StopWatch batchTimer = new StopWatch();
824 loadTimer.start();
825 RepositoryBrowser repositoryBrowser = new RepositoryBrowser();
826 while (nodesL1.hasNext()) {
827 Node nodeL1 = nodesL1.nextNode();
828 NodeIterator nodesFile = nodeL1.getNodes();
829 nodeCount = nodesFile.getSize();
830 batchTimer.start();
831 while (nodesFile.hasNext()) {
832 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE && !isStop()) {
833 if (!isStop()) {
834 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
835 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
836 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
837 resetTimers(batchTimer, loadTimer);
838 totalCount = 0;
839 logger.info("Rebuild");
840 } else {
841 return;
842 }
843 } else {
844
845 Node fileNode = nodesFile.nextNode();
846 String content = null;
847 if (docFormat.equals(DocFormat.ONIXPL.getCode())) {
848 content = checkoutManager.getData(fileNode);
849 } else if (docFormat.equals(DocFormat.PDF.getCode()) || docFormat
850 .equals(DocFormat.DOC.getCode())) {
851 content = checkoutManager
852 .checkOutBinary(fileNode.getIdentifier(), ProcessParameters.BULK_DEFAULT_USER,
853 ProcessParameters.BULK_DEFUALT_ACTION, docFormat);
854 }
855 RequestDocument reqDoc = (RequestDocument) rd.clone();
856 reqDoc.setId(fileNode.getIdentifier());
857 reqDoc.setUuid(fileNode.getIdentifier());
858 Content contentObj = new Content();
859 contentObj.setContent(content);
860 reqDoc.setContent(contentObj);
861 docs.add(reqDoc);
862 totalCount++;
863 }
864 }
865 }
866 if (docs.size() > 0 && !isStop()) {
867 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
868 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
869 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
870 }
871 } catch (Exception e) {
872 logger.error(
873 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
874 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
875 } finally {
876 try {
877 if (isStop) {
878 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
879 } else {
880 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
881 }
882 RepositoryManager.getRepositoryManager().logout(session);
883 } catch (OleException e) {
884 logger.error(e.getMessage(), e);
885 }
886 }
887 }
888
889 private void resetTimers(StopWatch batchTimer, StopWatch loadTimer) {
890 batchTimer.reset();
891 batchTimer.start();
892 loadTimer.reset();
893 loadTimer.start();
894 }
895
896 private void indexAfterParams(StopWatch batchTimer, ReIndexingBatchStatus reIndexingBatchStatus,
897 List<ReIndexingBatchStatus> batchStatusList) {
898 batchTimer.stop();
899 reIndexingBatchStatus.setBatchTotalTime(batchTimer.toString());
900 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
901 }
902
903 private ReIndexingBatchStatus indexBeforeParams(StopWatch loadTimer) {
904 loadTimer.stop();
905 ReIndexingBatchStatus reIndexingBatchStatus = new ReIndexingBatchStatus();
906 reIndexingBatchStatus.setBatchTotalTime(" ");
907 reIndexingBatchStatus.setBatchLoadTime(loadTimer.toString());
908 return reIndexingBatchStatus;
909 }
910
911 private Node getNodeByUUID(Session newSession, String uuid) throws OleException {
912 return new NodeHandler().getNodeByUUID(newSession, uuid);
913 }
914
915 public Connection getConnection() {
916 Connection connection = null;
917 try {
918
919
920
921 String connectionUrl = ConfigContext.getCurrentContextConfig().getProperty("datasource.url");
922 String userName = ConfigContext.getCurrentContextConfig().getProperty("datasource.username");
923 String passWord = ConfigContext.getCurrentContextConfig().getProperty("datasource.password");
924 String driverName = ConfigContext.getCurrentContextConfig().getProperty("jdbc.driver");
925 Class.forName(driverName);
926 connection = DriverManager.getConnection(connectionUrl, userName, passWord);
927 } catch (Exception e) {
928 LOG.error("Exception : ", e);
929 }
930 return connection;
931 }
932
933 public String showStatus() {
934 BibHoldingItemReindexer bibHoldingItemReindexer = BibHoldingItemReindexer.getInstance();
935 return bibHoldingItemReindexer.showStats();
936 }
937
938 public String showBibStatus() {
939 if(bibInfoStatistics == null) {
940 bibInfoStatistics = new BibInfoStatistics();
941 }
942 return bibInfoStatistics.toString();
943 }
944
945
946 public String storeBibInfo(int batchSize) throws Exception {
947
948 Date date = new Date();
949 String STORAGE_EXCEPTION_FILE_NAME = "BibInfoLoadingErrors-" + date.toString() + ".txt";
950 String STORAGE_STATUS_FILE_NAME = "BibInfoLoadingStatus" + date.toString() + ".txt";
951
952 long startTime = System.currentTimeMillis();
953 bibInfoStatistics = new BibInfoStatistics();
954 bibInfoStatistics.setStartDateTime(date);
955
956 bibTreeDBUtil.init(0, 0,null);
957
958 int batchNo = 0;
959 int count = bibTreeDBUtil.storeBibInfo(batchSize, filePath, STORAGE_EXCEPTION_FILE_NAME, bibInfoStatistics, batchNo);
960 long batchStartTime = startTime;
961 long batchEndTime = System.currentTimeMillis();
962 long totalTimeForBatch = batchEndTime - batchStartTime;
963 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Time taken for batch " + totalTimeForBatch);
964 while(count > 0) {
965 Date batchStartDate = new Date();
966 batchStartTime = System.currentTimeMillis();
967 bibInfoStatistics.setBatchStartDateTime(batchStartDate);
968 count = bibTreeDBUtil.storeBibInfo(batchSize, filePath, STORAGE_EXCEPTION_FILE_NAME, bibInfoStatistics, batchNo++);
969 batchEndTime = System.currentTimeMillis();
970 Date batchEndDate = new Date();
971 bibInfoStatistics.setBatchEndDateTime(batchEndDate);
972 bibInfoStatistics.setBatchTotalTime((batchEndTime - batchStartTime));
973 totalTimeForBatch = batchEndTime - batchStartTime;
974 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Time taken for batch " + totalTimeForBatch);
975 }
976
977 long endTime = System.currentTimeMillis();
978 Date endDate = new Date();
979 bibInfoStatistics.setEndDateTime(endDate);
980 long totalTime = endTime - startTime;
981 bibInfoStatistics.setTotalTime(totalTime);
982 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Total Time taken " + totalTime);
983 return bibInfoStatistics.toString();
984 }
985
986 }