1 package org.kuali.ole.docstore.process;
2
3 import org.apache.commons.lang.time.StopWatch;
4 import org.apache.solr.client.solrj.SolrServer;
5 import org.apache.solr.client.solrj.SolrServerException;
6 import org.apache.solr.common.SolrDocument;
7 import org.apache.solr.common.SolrInputDocument;
8 import org.kuali.ole.DocumentUniqueIDPrefix;
9 import org.kuali.ole.RepositoryBrowser;
10 import org.kuali.ole.RepositoryManager;
11 import org.kuali.ole.docstore.common.document.*;
12 import org.kuali.ole.docstore.common.document.content.instance.FormerIdentifier;
13 import org.kuali.ole.docstore.common.document.content.instance.Instance;
14 import org.kuali.ole.docstore.common.document.content.instance.xstream.InstanceOlemlRecordProcessor;
15 import org.kuali.ole.docstore.common.util.BatchBibTreeDBUtil;
16 import org.kuali.ole.docstore.common.util.BibInfoStatistics;
17 import org.kuali.ole.docstore.common.util.ReindexBatchStatistics;
18 import org.kuali.ole.docstore.discovery.service.SolrServerManager;
19 import org.kuali.ole.docstore.discovery.solr.work.bib.marc.WorkBibMarcDocBuilder;
20 import org.kuali.ole.docstore.document.rdbms.RdbmsWorkBibMarcDocumentManager;
21 import org.kuali.ole.docstore.document.rdbms.RdbmsWorkEInstanceDocumentManager;
22 import org.kuali.ole.docstore.document.rdbms.RdbmsWorkInstanceDocumentManager;
23 import org.kuali.ole.docstore.engine.service.index.solr.BibMarcIndexer;
24 import org.kuali.ole.docstore.engine.service.index.solr.DocumentIndexer;
25 import org.kuali.ole.docstore.indexer.solr.IndexerService;
26 import org.kuali.ole.docstore.metrics.reindex.ReIndexingBatchStatus;
27 import org.kuali.ole.docstore.metrics.reindex.ReIndexingStatus;
28 import org.kuali.ole.docstore.model.enums.DocCategory;
29 import org.kuali.ole.docstore.model.enums.DocFormat;
30 import org.kuali.ole.docstore.model.enums.DocType;
31 import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.BibRecord;
32 import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.EInstanceRecord;
33 import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.InstanceRecord;
34 import org.kuali.ole.docstore.model.xmlpojo.ingest.AdditionalAttributes;
35 import org.kuali.ole.docstore.model.xmlpojo.ingest.Content;
36 import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
37 import org.kuali.ole.docstore.model.xmlpojo.ingest.ResponseDocument;
38 import org.kuali.ole.docstore.common.document.content.instance.InstanceCollection;
39 import org.kuali.ole.docstore.model.xstream.work.oleml.WorkEInstanceOlemlRecordProcessor;
40 import org.kuali.ole.docstore.service.BeanLocator;
41 import org.kuali.ole.docstore.service.DocumentIngester;
42 import org.kuali.ole.docstore.service.ServiceLocator;
43 import org.kuali.ole.pojo.OleException;
44 import org.kuali.ole.repository.CheckoutManager;
45 import org.kuali.ole.repository.NodeHandler;
46 import org.kuali.rice.krad.service.BusinessObjectService;
47 import org.kuali.rice.krad.service.KRADServiceLocator;
48 import org.slf4j.Logger;
49 import org.slf4j.LoggerFactory;
50 import org.kuali.rice.core.api.config.property.ConfigContext;
51
52 import javax.jcr.Binary;
53 import javax.jcr.Node;
54 import javax.jcr.NodeIterator;
55 import javax.jcr.Session;
56 import java.io.ByteArrayInputStream;
57 import java.io.IOException;
58 import java.sql.*;
59 import java.text.DateFormat;
60 import java.text.SimpleDateFormat;
61 import java.util.*;
62 import java.util.Date;
63
64
65
66
67
68
69
70 public class RebuildIndexesHandler
71 implements Runnable {
72
73 private static final Logger LOG = LoggerFactory.getLogger(RebuildIndexesHandler.class);
74 private static RebuildIndexesHandler reBuilder = null;
75 private boolean isRunning = false;
76 private boolean isStop = false;
77 private static final Logger logger = LoggerFactory.getLogger(RebuildIndexesHandler.class);
78 private String docCategory;
79 private String docType;
80 private String docFormat;
81 private BibInfoStatistics bibInfoStatistics = null;
82 private CheckoutManager checkoutManager;
83
84 private int batchSize;
85 private int startIndex;
86 private int endIndex;
87 private String updateDate;
88
89 public static String EXCEPION_FILE_NAME = "";
90 public static String STATUS_FILE_NAME = "";
91 public static String STORAGE_EXCEPTION_FILE_NAME = "";
92 public static String STORAGE_STATUS_FILE_NAME = "";
93 public static BatchBibTreeDBUtil bibTreeDBUtil = new BatchBibTreeDBUtil();
94
95 private String filePath = System.getProperty("solr.solr.home");
96 public synchronized void setRunning(boolean running) {
97 isRunning = running;
98 }
99
100 public synchronized void setStop(boolean stop) {
101 isStop = stop;
102 }
103
104 private RebuildIndexesHandler() {
105 checkoutManager = new CheckoutManager();
106 }
107
108 public static RebuildIndexesHandler getInstance() {
109 if (reBuilder == null) {
110 reBuilder = new RebuildIndexesHandler();
111 }
112 return reBuilder;
113 }
114
115
116
117
118
119
120 public synchronized boolean isRunning() {
121 return isRunning;
122 }
123
124 public synchronized boolean isStop() {
125 return isStop;
126 }
127
128
129
130
131 public String startProcess(String docCategory, String docType, String docFormat) throws InterruptedException {
132 String status = null;
133 if (isRunning()) {
134 status = "ReIndexing process is already running. Click 'Show Status' button to know the status. ";
135 } else {
136 setRunning(true);
137 setStop(false);
138 status = "ReIndexing process has started. Click 'Show Status' button to know the status. ";
139 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance();
140 reIndexingStatus.reset();
141 if (docCategory == null || docCategory.equals("")) {
142 docCategory = "all";
143 }
144 if (docType == null || docType.equals("")) {
145 docType = "all";
146 }
147 if (docFormat == null || docType.equals("")) {
148 docFormat = "all";
149 }
150 this.docCategory = docCategory;
151 this.docType = docType;
152 this.docFormat = docFormat;
153 Thread reBuilderThread = new Thread(this);
154 reBuilderThread.start();
155
156 setRunning(false);
157 }
158 return status;
159 }
160 public String startProcess(String docCategory, String docType, String docFormat, int batchSize, int startIndex, int endIndex,String updateDate) throws InterruptedException {
161 String status = null;
162 if (isRunning()) {
163 status = "ReIndexing process is already running. Click 'Show Status' button to know the status. ";
164 } else {
165 setRunning(true);
166 setStop(false);
167 status = "ReIndexing process has started. Click 'Show Status' button to know the status. ";
168 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance();
169 reIndexingStatus.reset();
170 if (docCategory == null || docCategory.equals("")) {
171 docCategory = "all";
172 }
173 if (docType == null || docType.equals("")) {
174 docType = "all";
175 }
176 if (docFormat == null || docType.equals("")) {
177 docFormat = "all";
178 }
179 this.docCategory = docCategory;
180 this.docType = docType;
181 this.docFormat = docFormat;
182 this.batchSize = batchSize;
183 this.startIndex = startIndex;
184 this.endIndex = endIndex;
185 this.updateDate=updateDate;
186 Thread reBuilderThread = new Thread(this);
187 reBuilderThread.start();
188
189 setRunning(false);
190 }
191 return status;
192 }
193
194 public String stopProcess() throws Exception {
195 String status = null;
196 if (isRunning()) {
197 status = "ReIndexing process is running. ReIndexing will stop after current batch. ";
198 setStop(true);
199 setRunning(false);
200 } else {
201 status = "ReIndexing process is not running.";
202 }
203 return status;
204
205 }
206
207 public void run() {
208 DocCategoryTypeFormat docCategoryTypeFormat = new DocCategoryTypeFormat();
209 List<String> categoryList = docCategoryTypeFormat.getCategories();
210 List<String> typeList = null;
211 List<String> formatList = null;
212 for (String docCategoryCurr : categoryList) {
213 if (docCategory.equals("all") || docCategory.equals(docCategoryCurr)) {
214 typeList = docCategoryTypeFormat.getDocTypes(docCategoryCurr);
215 for (String docTypeCurr : typeList) {
216 if (docType.equals("all") || docType.equals(docTypeCurr)) {
217 formatList = docCategoryTypeFormat.getDocFormats(docCategoryCurr, docTypeCurr);
218 for (String docFormatCurr : formatList) {
219 if (docFormat.equals("all") || docFormat.equals(docFormatCurr)) {
220 if (!isStop()) {
221 ReIndexingStatus.getInstance()
222 .startDocType(docCategoryCurr, docTypeCurr, docFormatCurr);
223 reIndex(docCategoryCurr, docTypeCurr, docFormatCurr);
224 } else {
225 return;
226 }
227 }
228 }
229 }
230 }
231 }
232 }
233 setRunning(false);
234 }
235
236 private void reIndex(String docCategory, String docType, String docFormat) {
237 Session session = null;
238 setRunning(true);
239 logger.info("Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): ");
240 try {
241 if (docCategory.equals(DocCategory.WORK.getCode())) {
242 if (docType.equals(DocType.BIB.getDescription())) {
243 if (docFormat.equals(DocFormat.MARC.getCode()) || docFormat.equals(DocFormat.DUBLIN_CORE.getCode())
244 || docFormat.equals(DocFormat.DUBLIN_UNQUALIFIED.getCode())) {
245 org.springframework.util.StopWatch stopWatch = new org.springframework.util.StopWatch();
246 stopWatch.start("total time taken");
247 Date date = new Date();
248 EXCEPION_FILE_NAME = "ReindexErrors-" + date.toString() + ".txt";
249 STATUS_FILE_NAME = "ReindexBatchStatus-" + date.toString() + ".txt";
250 BatchBibTreeDBUtil.writeStatusToFile(filePath, RebuildIndexesHandler.EXCEPION_FILE_NAME, "Reindex started at:" + date);
251 BibHoldingItemReindexer bibHoldingItemReindexer = BibHoldingItemReindexer.getInstance();
252 bibHoldingItemReindexer.setTotalBatchStatistics(new ReindexBatchStatistics());
253 bibHoldingItemReindexer.index(batchSize, startIndex, endIndex,updateDate);
254 date = new Date();
255 BatchBibTreeDBUtil.writeStatusToFile(filePath, RebuildIndexesHandler.EXCEPION_FILE_NAME, "Reindex ended at:" + date);
256 stopWatch.stop();
257 logger.info(stopWatch.prettyPrint());
258
259 } else {
260 logger.info(
261 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
262 }
263 } else if (docType.equals(DocType.INSTANCE.getDescription())) {
264 if (docFormat.equals(DocFormat.OLEML.getCode())) {
265 workInstanceOLEML(docCategory, docType, docFormat);
266 } else {
267 logger.info(
268 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
269 }
270 } else if (docType.equals(DocType.LICENSE.getDescription())) {
271 if (docFormat.equals(DocFormat.ONIXPL.getCode()) || docFormat.equals(DocFormat.PDF.getCode())
272 || docFormat.equals(DocFormat.DOC.getCode())) {
273 workLicense(docCategory, docType, docFormat);
274 } else {
275 logger.info(
276 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
277 }
278 } else if (docType.equals(DocType.EINSTANCE.getCode())) {
279 if (docFormat.equals(DocFormat.OLEML.getCode())) {
280 workEInstanceOLEML(docCategory, docType, docFormat);
281 } else {
282 logger.info(
283 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL");
284 }
285 }
286 }
287 } catch (Exception e) {
288 logger.info(e.getMessage(), e);
289 } finally {
290 try {
291 if (isStop) {
292 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
293 } else {
294 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
295 }
296 RepositoryManager.getRepositoryManager().logout(session);
297 } catch (OleException e) {
298 logger.error(e.getMessage(), e);
299 }
300 }
301
302 }
303
304 private void workEInstanceOLEML(String docCategory, String docType, String docFormat) {
305 long totalCount = 0;
306 long nodeCount = 0;
307 List<RequestDocument> docs = new ArrayList<RequestDocument>();
308 WorkEInstanceOlemlRecordProcessor workEInstanceOlemlRecordProcessor = new WorkEInstanceOlemlRecordProcessor();
309 try {
310 RequestDocument rd = new RequestDocument();
311 rd.setCategory(docCategory);
312 rd.setType(docType);
313 rd.setFormat(docFormat);
314 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
315 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService();
316 List<EInstanceRecord> instanceRecords = (List<EInstanceRecord>) businessObjectService.findAll(EInstanceRecord.class);
317 StopWatch loadTimer = new StopWatch();
318 StopWatch batchTimer = new StopWatch();
319 loadTimer.start();
320 batchTimer.start();
321 for (int i = 0; i < instanceRecords.size(); i++) {
322 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
323 if (!isStop()) {
324 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
325 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
326 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
327 resetTimers(batchTimer, loadTimer);
328 totalCount = 0;
329 logger.info("Rebuild");
330 } else {
331 return;
332 }
333 } else {
334 EInstanceRecord instanceRecord = instanceRecords.get(i);
335 String uuid = DocumentUniqueIDPrefix.getPrefixedId(instanceRecord.getUniqueIdPrefix(), instanceRecord.geteInstanceIdentifier());
336 RequestDocument requestDocument = buildRequestDocumentForCheckout(docCategory, docType, docFormat, uuid);
337 ResponseDocument responseDocument = RdbmsWorkEInstanceDocumentManager.getInstance().checkoutContent(requestDocument, businessObjectService);
338 String content = responseDocument.getContent().getContent();
339 RequestDocument requestDocumentForIndex = (RequestDocument) rd.clone();
340 requestDocumentForIndex.setAdditionalAttributes(responseDocument.getAdditionalAttributes());
341 requestDocumentForIndex.setId(uuid);
342 requestDocumentForIndex.setUuid(uuid);
343 org.kuali.ole.docstore.model.xmlpojo.work.einstance.oleml.InstanceCollection instanceCollection = workEInstanceOlemlRecordProcessor.fromXML(content);
344 content = workEInstanceOlemlRecordProcessor.toXML(instanceCollection);
345 Content contentObj = new Content();
346 contentObj.setContent(content);
347 contentObj.setContentObject(instanceCollection);
348 requestDocumentForIndex.setContent(contentObj);
349 docs.add(requestDocumentForIndex);
350 totalCount++;
351 }
352 }
353 if (docs.size() > 0 && !isStop()) {
354 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
355 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
356 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
357 }
358 } catch (Exception e) {
359 logger.error(
360 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
361 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
362 } finally {
363 if (isStop) {
364 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
365 } else {
366 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
367 }
368 }
369
370 }
371
372
373 private void indexBibDocs(List<BibTree> bibTreeList, long records, long recCount,
374 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) {
375 StopWatch indexTimer = new StopWatch();
376 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
377 try {
378 Date startDate = new Date();
379 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate));
380 indexTimer.start();
381 reIndexingBatchStatus.setStatus("Indexing");
382 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
383 reIndexingBatchStatus.setRecordsProcessed(records);
384 reIndexingBatchStatus.setBatchEndTime(" ");
385 batchStatusList.add(reIndexingBatchStatus);
386 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
387 DocumentIndexer documentIndexer = BibMarcIndexer.getInstance();
388 BibTrees bibTrees = new BibTrees();
389 bibTrees.getBibTrees().addAll(bibTreeList);
390 documentIndexer.createTrees(bibTrees);
391
392 indexTimer.stop();
393 Date endDate = new Date();
394 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
395 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
396 reIndexingBatchStatus.setRecordsProcessed(records);
397 reIndexingBatchStatus.setStatus("Done");
398 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
399 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
400 bibTreeList.clear();
401 } catch (Exception e) {
402 String firstBibId = bibTreeList.get(0).getBib().getId();
403 String lastBibId = bibTreeList.get(bibTreeList.size()-1).getBib().getId();
404 logger.error(
405 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
406 records - bibTreeList.size()) + "), Failed @ bibId( First BibId: " + firstBibId + " : Last BibId : "+ lastBibId +"): Cause: " + e, e);
407 indexTimer.stop();
408 Date endDate = new Date();
409 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
410 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
411 reIndexingBatchStatus.setRecordsProcessed(0L);
412 reIndexingBatchStatus.setStatus("Done");
413 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
414 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
415 bibTreeList.clear();
416 }
417 }
418
419
420 private void indexDocs(List<RequestDocument> docs, long records, long recCount,
421 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) {
422 try {
423 StopWatch indexTimer = new StopWatch();
424 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS");
425 Date startDate = new Date();
426 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate));
427 indexTimer.start();
428 reIndexingBatchStatus.setStatus("Indexing");
429 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
430 reIndexingBatchStatus.setRecordsProcessed(records);
431 reIndexingBatchStatus.setBatchEndTime(" ");
432 batchStatusList.add(reIndexingBatchStatus);
433 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
434 IndexerService indexerService = BeanLocator.getDocstoreFactory().getDocumentIndexManager(docs.get(0).getCategory(), docs.get(0).getType(), docs.get(0).getFormat());
435 String result = indexerService.indexDocuments(docs, false);
436 logger.debug(result);
437 indexTimer.stop();
438 Date endDate = new Date();
439 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate));
440 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString());
441 reIndexingBatchStatus.setRecordsProcessed(records);
442 reIndexingBatchStatus.setStatus("Done");
443 reIndexingBatchStatus.setRecordsRemaining(recCount - records);
444 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
445 docs.clear();
446 } catch (Exception e) {
447 logger.error("Rebuild Indexes Processed(" + (records - docs.size()) + "), Failed @ batch(" + docs.size()
448 + "): Cause: " + e + "\n\tContinuous", e);
449 }
450 }
451
452 private void workBibMarcAndDublinAll(String docCategory, String docType, String docFormat) throws SolrServerException, IOException {
453 long totalCount = 0;
454 long nodeCount = 0;
455 int start = 0;
456 String sqlQuery = null;
457 long startTime = 0;
458 long commitEndTime = 0;
459 long commitStartTime = 0;
460 int batchSize = 50000;
461 int commitSize = 50000;
462 long endIndexBatch = 0;
463 String dbVendor = ConfigContext.getCurrentContextConfig().getProperty("db.vendor");
464 List<BibTree> bibTrees = new ArrayList<BibTree>();
465
466 try {
467 String prefix = DocumentUniqueIDPrefix.getPrefix(docCategory, docType, docFormat);
468 Map prefixMap = new HashMap(0);
469 prefixMap.put("uniqueIdPrefix", prefix);
470 startTime = System.currentTimeMillis();
471
472 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService();
473 int bibCount = businessObjectService.countMatching(BibRecord.class, prefixMap);
474 Connection connection = null;
475 PreparedStatement preparedStatement = null;
476 if (bibCount > 0) {
477 connection = getConnection();
478 if (dbVendor.equalsIgnoreCase("mysql")) {
479 sqlQuery = "select * from ole_ds_bib_t b ORDER BY b.bib_id LIMIT ?,?";
480 } else {
481 sqlQuery = "select * from (select b.*,ROWNUM r from OLE_DS_BIB_T b) where r between ? and ?";
482 }
483 preparedStatement = connection.prepareStatement(sqlQuery);
484 }
485 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
486 StopWatch loadTimer = new StopWatch();
487 StopWatch batchTimer = new StopWatch();
488 loadTimer.start();
489 batchTimer.start();
490
491 for (int i = 0; i < bibCount; i++) {
492 if (bibTrees.size() == batchSize) {
493 if (!isStop()) {
494
495 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
496 indexBibDocs(bibTrees, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
497 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
498 resetTimers(batchTimer, loadTimer);
499 totalCount = 0;
500 logger.info("Rebuild");
501 i = start;
502 if (start % commitSize == 0) {
503 commitStartTime = System.currentTimeMillis();
504 logger.info("Time elapsed since start ====>>>>>> " + (commitStartTime - startTime));
505 logger.info("Time elapsed since last commit ====>>>>>> " + (commitStartTime - commitEndTime));
506 logger.info("commit started ====>>>>>> " + commitStartTime);
507 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
508 solr.commit();
509 logger.info("No..of records committed ====>>>>>> " + start);
510 commitEndTime = System.currentTimeMillis();
511 logger.info("Time Taken for commit ====>>>>>> " + (commitEndTime - commitStartTime));
512
513 }
514 } else {
515 return;
516 }
517 } else {
518 if (start < bibCount) {
519 long b2time = System.currentTimeMillis();
520 if (dbVendor.equalsIgnoreCase("mysql")) {
521 preparedStatement.setInt(1, start);
522 preparedStatement.setInt(2, batchSize);
523 } else {
524 preparedStatement.setInt(1, start + 1);
525 preparedStatement.setInt(2, start + batchSize);
526 }
527 ResultSet resultSet = preparedStatement.executeQuery();
528 logger.info("time taking for getting records from DB end======>>>>>" + (System.currentTimeMillis() - b2time));
529 while (resultSet.next()) {
530
531 BibTree bibTree = new BibTree();
532 Bib bib = new BibMarc();
533 bib.setCategory(docCategory);
534 bib.setType(docType);
535 bib.setFormat(docFormat);
536 bib.setCreatedBy(resultSet.getString("CREATED_BY"));
537 bib.setCreatedOn(resultSet.getString("DATE_CREATED"));
538 bib.setStaffOnly((resultSet.getString("STAFF_ONLY").equalsIgnoreCase("Y") ? Boolean.TRUE : Boolean.FALSE));
539 bib.setContent(resultSet.getString("CONTENT"));
540 bib.setUpdatedBy(resultSet.getString("UPDATED_BY"));
541 bib.setUpdatedOn(resultSet.getString("DATE_UPDATED"));
542 bib.setLastUpdated(resultSet.getString("DATE_UPDATED"));
543 bib.setStatus(resultSet.getString("STATUS"));
544 bib.setStatusUpdatedBy(resultSet.getString("STATUS_UPDATED_BY"));
545 bib.setStatusUpdatedOn(resultSet.getString("STATUS_UPDATED_DATE"));
546 String uuid = DocumentUniqueIDPrefix.getPrefixedId(resultSet.getString("UNIQUE_ID_PREFIX"), resultSet.getString(1));
547 bib.setId(uuid);
548 bib.setLocalId(uuid);
549 bibTree.setBib(bib);
550
551 start++;
552 totalCount++;
553 bibTrees.add(bibTree);
554 }
555 resultSet.close();
556 }
557 }
558
559 }
560 if (bibTrees.size() > 0 && !isStop()) {
561 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
562 indexBibDocs(bibTrees, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
563 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
564 commitStartTime = System.currentTimeMillis();
565 logger.info("commit started : ----> " + commitStartTime);
566 SolrServer solr = SolrServerManager.getInstance().getSolrServer();
567 solr.commit();
568 logger.info("No..of records committed : ----> " + start);
569 commitEndTime = System.currentTimeMillis();
570 logger.info("Time Taken for commit ======>>> " + (commitEndTime - commitStartTime));
571
572 }
573 endIndexBatch = System.currentTimeMillis();
574 logger.info("Time elapsed since end ====>>>>>> " + endIndexBatch);
575 } catch (Exception e) {
576 logger.error(
577 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
578 totalCount - bibTrees.size()) + "), Failed @ batch(" + bibTrees.size() + "): Cause: " + e, e);
579 } finally {
580 if (isStop) {
581 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
582 } else {
583 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
584 }
585 }
586 }
587
588 private void workInstanceOLEML(String docCategory, String docType, String docFormat) {
589 long totalCount = 0;
590 long nodeCount = 0;
591 List<RequestDocument> docs = new ArrayList<RequestDocument>();
592 InstanceOlemlRecordProcessor instanceOlemlRecordProcessor = new InstanceOlemlRecordProcessor();
593 try {
594 RequestDocument rd = new RequestDocument();
595 rd.setCategory(docCategory);
596 rd.setType(docType);
597 rd.setFormat(docFormat);
598 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
599 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService();
600 List<InstanceRecord> instanceRecords = (List<InstanceRecord>) businessObjectService.findAll(InstanceRecord.class);
601 StopWatch loadTimer = new StopWatch();
602 StopWatch batchTimer = new StopWatch();
603 loadTimer.start();
604 batchTimer.start();
605 for (int i = 0; i < instanceRecords.size(); i++) {
606 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
607 if (!isStop()) {
608 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
609 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
610 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
611 resetTimers(batchTimer, loadTimer);
612 totalCount = 0;
613 logger.info("Rebuild");
614 } else {
615 return;
616 }
617 } else {
618 InstanceRecord instanceRecord = instanceRecords.get(i);
619 String uuid = DocumentUniqueIDPrefix.getPrefixedId(instanceRecord.getUniqueIdPrefix(), instanceRecord.getInstanceId());
620 RequestDocument requestDocument = buildRequestDocumentForCheckout(docCategory, docType, docFormat, uuid);
621 ResponseDocument responseDocument = RdbmsWorkInstanceDocumentManager.getInstance().checkoutContent(requestDocument, businessObjectService);
622 String content = responseDocument.getContent().getContent();
623 RequestDocument requestDocumentForIndex = (RequestDocument) rd.clone();
624 requestDocumentForIndex.setAdditionalAttributes(responseDocument.getAdditionalAttributes());
625 requestDocumentForIndex.setId(uuid);
626 requestDocumentForIndex.setUuid(uuid);
627 InstanceCollection instanceCollection = instanceOlemlRecordProcessor.fromXML(content);
628
629
630
631
632 content = instanceOlemlRecordProcessor.toXML(instanceCollection);
633 Content contentObj = new Content();
634 contentObj.setContent(content);
635 contentObj.setContentObject(instanceCollection);
636 requestDocumentForIndex.setContent(contentObj);
637 docs.add(requestDocumentForIndex);
638 totalCount++;
639 }
640 }
641 if (docs.size() > 0 && !isStop()) {
642 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
643 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
644 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
645 }
646 } catch (Exception e) {
647 logger.error(
648 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
649 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
650 } finally {
651 if (isStop) {
652 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
653 } else {
654 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
655 }
656 }
657 }
658
659 private RequestDocument buildRequestDocumentForCheckout(String docCategory, String docType, String docFormat, String uuid) {
660 RequestDocument requestDocument = new RequestDocument();
661 requestDocument.setCategory(docCategory);
662 requestDocument.setType(docType);
663 requestDocument.setFormat(docFormat);
664 requestDocument.setUuid(uuid);
665 return requestDocument;
666 }
667
668 private void linkingInstanceWithBib(InstanceCollection instanceCollection, Session session, Node fileNode) {
669 for (Instance instance : instanceCollection.getInstance()) {
670 instance.getResourceIdentifier().clear();
671 for (FormerIdentifier frids : instance.getFormerResourceIdentifier()) {
672 try {
673 if (frids != null && frids.getIdentifier() != null &&
674 frids.getIdentifier().getIdentifierValue() != null &&
675 frids.getIdentifier().getIdentifierValue().trim().length() >= 0) {
676 List<SolrDocument> solrBibDocs = ServiceLocator.getIndexerService()
677 .getSolrDocument("SystemControlNumber",
678 frids.getIdentifier()
679 .getIdentifierValue());
680 SolrInputDocument solrInputDocument = new SolrInputDocument();
681 WorkBibMarcDocBuilder marcDocBuilder = new WorkBibMarcDocBuilder();
682 List<SolrInputDocument> solrInputDocs = new ArrayList<SolrInputDocument>();
683 if (solrBibDocs != null && solrBibDocs.size() > 0) {
684 for (SolrDocument solrbibDoc : solrBibDocs) {
685 if (checkApplicability(frids.getIdentifier().getIdentifierValue(),
686 solrbibDoc.getFieldValue("SystemControlNumber"))) {
687
688 compareObjNAddValue(instance.getInstanceIdentifier(),
689 solrbibDoc.getFieldValue("instanceIdentifier"), solrbibDoc,
690 "instanceIdentifier");
691 solrInputDocument = new SolrInputDocument();
692 marcDocBuilder.buildSolrInputDocFromSolrDoc(solrbibDoc, solrInputDocument);
693 solrInputDocs.add(solrInputDocument);
694 String bibId = compareListRString(solrbibDoc.getFieldValue("id"));
695 instance.getResourceIdentifier().add(bibId);
696 modifyContentAddLinkedIdsInDocStore(instance, bibId, session, fileNode);
697 indexSolrDocs(solrInputDocs);
698 }
699 }
700 }
701 }
702 } catch (Exception e) {
703 logger.error("error message" + e.getMessage(), e);
704 }
705 }
706 }
707 }
708
709 private void modifyContentAddLinkedIdsInDocStore(Instance instance, String id, Session session, Node fileNode) {
710
711 try {
712 Node bibNode = getNodeByUUID(session, id);
713 bibNode.setProperty("instanceIdentifier", instance.getInstanceIdentifier());
714 fileNode.setProperty("bibIdentifier", id);
715
716 InstanceOlemlRecordProcessor recordProcessor = new InstanceOlemlRecordProcessor();
717 NodeIterator nodeIterator = fileNode.getNodes();
718 while (nodeIterator.hasNext()) {
719 Node instNode = nodeIterator.nextNode();
720 if (instNode.getName().equalsIgnoreCase("instanceFile")) {
721 InstanceCollection instCol = new InstanceCollection();
722 Instance inst = new Instance();
723 inst.setResourceIdentifier(instance.getResourceIdentifier());
724 inst.setFormerResourceIdentifier(instance.getFormerResourceIdentifier());
725 inst.setExtension(instance.getExtension());
726 inst.setInstanceIdentifier(instance.getInstanceIdentifier());
727 List<Instance> instanceList = new ArrayList<Instance>();
728 instanceList.add(inst);
729 instCol.setInstance(instanceList);
730
731 byte[] documentBytes = recordProcessor.toXML(instCol).getBytes();
732 Binary binary = null;
733 if (documentBytes != null && instNode != null && documentBytes.length > 0) {
734 binary = session.getValueFactory().createBinary(new ByteArrayInputStream(documentBytes));
735 instNode.getNode("jcr:content").setProperty("jcr:data", binary);
736 }
737 }
738 }
739 } catch (Exception e) {
740 logger.error("error while updating Docstore in reindexing Process" + e.getMessage(), e);
741 }
742 }
743
744 private void indexSolrDocs(List<SolrInputDocument> solrInputDocs) {
745
746 try {
747 ServiceLocator.getIndexerService().indexSolrDocuments(solrInputDocs);
748 logger.info("Linking Bib and Instance Records (" + solrInputDocs.size() + "): ");
749 solrInputDocs.clear();
750 } catch (Exception e) {
751 logger.error(
752 "Linking Bib and Instance Records (" + (solrInputDocs.size()) + "), Failed @ batch(" + solrInputDocs
753 .size() + "): Cause: " + e + "\n\tContinuous", e);
754 }
755 }
756
757
758 private boolean checkApplicability(Object value, Object fieldValue) {
759 if (fieldValue instanceof Collection) {
760 for (Object object : (Collection) fieldValue) {
761 if (object.equals(value)) {
762 return true;
763 }
764 }
765 return false;
766 } else {
767 return value.equals(fieldValue);
768 }
769 }
770
771
772 private String compareListRString(Object id) {
773 if (id != null) {
774 if (id instanceof List) {
775 List<String> idList = (List<String>) id;
776 return idList.get(0);
777 } else if (id instanceof String) {
778 String strId = (String) id;
779 return strId;
780 }
781 }
782 return null;
783 }
784
785 private void compareObjNAddValue(String id, Object idObj, SolrDocument solrDoc, String identifier) {
786 if (idObj != null) {
787 if (idObj instanceof List) {
788 List<String> instBibIdList = (List<String>) idObj;
789 if (!instBibIdList.contains(id)) {
790 solrDoc.addField(identifier, id);
791 }
792 } else if (idObj instanceof String) {
793 String instBibId = (String) idObj;
794 if (!instBibId.equalsIgnoreCase(id)) {
795 solrDoc.addField(identifier, id);
796 }
797 }
798 } else {
799 solrDoc.addField(identifier, id);
800 }
801 }
802
803 private void workLicense(String docCategory, String docType, String docFormat) {
804 Session session = null;
805 long totalCount = 0;
806 long nodeCount = 0;
807 List<RequestDocument> docs = new ArrayList<RequestDocument>();
808 try {
809 session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
810 ProcessParameters.BULK_DEFUALT_ACTION);
811 RequestDocument rd = new RequestDocument();
812 rd.setCategory(docCategory);
813 rd.setType(docType);
814 rd.setFormat(docFormat);
815 DocumentIngester docIngester = new DocumentIngester();
816 Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
817 NodeIterator nodesL1 = nodeFormat.getNodes();
818 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>();
819 StopWatch loadTimer = new StopWatch();
820 StopWatch batchTimer = new StopWatch();
821 loadTimer.start();
822 RepositoryBrowser repositoryBrowser = new RepositoryBrowser();
823 while (nodesL1.hasNext()) {
824 Node nodeL1 = nodesL1.nextNode();
825 NodeIterator nodesFile = nodeL1.getNodes();
826 nodeCount = nodesFile.getSize();
827 batchTimer.start();
828 while (nodesFile.hasNext()) {
829 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE && !isStop()) {
830 if (!isStop()) {
831 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
832 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
833 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
834 resetTimers(batchTimer, loadTimer);
835 totalCount = 0;
836 logger.info("Rebuild");
837 } else {
838 return;
839 }
840 } else {
841
842 Node fileNode = nodesFile.nextNode();
843 String content = null;
844 if (docFormat.equals(DocFormat.ONIXPL.getCode())) {
845 content = checkoutManager.getData(fileNode);
846 } else if (docFormat.equals(DocFormat.PDF.getCode()) || docFormat
847 .equals(DocFormat.DOC.getCode())) {
848 content = checkoutManager
849 .checkOutBinary(fileNode.getIdentifier(), ProcessParameters.BULK_DEFAULT_USER,
850 ProcessParameters.BULK_DEFUALT_ACTION, docFormat);
851 }
852 RequestDocument reqDoc = (RequestDocument) rd.clone();
853 reqDoc.setId(fileNode.getIdentifier());
854 reqDoc.setUuid(fileNode.getIdentifier());
855 Content contentObj = new Content();
856 contentObj.setContent(content);
857 reqDoc.setContent(contentObj);
858 docs.add(reqDoc);
859 totalCount++;
860 }
861 }
862 }
863 if (docs.size() > 0 && !isStop()) {
864 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer);
865 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus);
866 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList);
867 }
868 } catch (Exception e) {
869 logger.error(
870 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
871 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
872 } finally {
873 try {
874 if (isStop) {
875 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped");
876 } else {
877 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done");
878 }
879 RepositoryManager.getRepositoryManager().logout(session);
880 } catch (OleException e) {
881 logger.error(e.getMessage(), e);
882 }
883 }
884 }
885
886 private void resetTimers(StopWatch batchTimer, StopWatch loadTimer) {
887 batchTimer.reset();
888 batchTimer.start();
889 loadTimer.reset();
890 loadTimer.start();
891 }
892
893 private void indexAfterParams(StopWatch batchTimer, ReIndexingBatchStatus reIndexingBatchStatus,
894 List<ReIndexingBatchStatus> batchStatusList) {
895 batchTimer.stop();
896 reIndexingBatchStatus.setBatchTotalTime(batchTimer.toString());
897 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList);
898 }
899
900 private ReIndexingBatchStatus indexBeforeParams(StopWatch loadTimer) {
901 loadTimer.stop();
902 ReIndexingBatchStatus reIndexingBatchStatus = new ReIndexingBatchStatus();
903 reIndexingBatchStatus.setBatchTotalTime(" ");
904 reIndexingBatchStatus.setBatchLoadTime(loadTimer.toString());
905 return reIndexingBatchStatus;
906 }
907
908 private Node getNodeByUUID(Session newSession, String uuid) throws OleException {
909 return new NodeHandler().getNodeByUUID(newSession, uuid);
910 }
911
912 public Connection getConnection() {
913 Connection connection = null;
914 try {
915
916
917
918 String connectionUrl = ConfigContext.getCurrentContextConfig().getProperty("datasource.url");
919 String userName = ConfigContext.getCurrentContextConfig().getProperty("datasource.username");
920 String passWord = ConfigContext.getCurrentContextConfig().getProperty("datasource.password");
921 String driverName = ConfigContext.getCurrentContextConfig().getProperty("jdbc.driver");
922 Class.forName(driverName);
923 connection = DriverManager.getConnection(connectionUrl, userName, passWord);
924 } catch (Exception e) {
925 LOG.error("Exception : ", e);
926 }
927 return connection;
928 }
929
930 public String showStatus() {
931 BibHoldingItemReindexer bibHoldingItemReindexer = BibHoldingItemReindexer.getInstance();
932 return bibHoldingItemReindexer.showStats();
933 }
934
935 public String showBibStatus() {
936 if(bibInfoStatistics == null) {
937 bibInfoStatistics = new BibInfoStatistics();
938 }
939 return bibInfoStatistics.toString();
940 }
941
942
943 public String storeBibInfo(int batchSize) throws Exception {
944
945 Date date = new Date();
946 String STORAGE_EXCEPTION_FILE_NAME = "BibInfoLoadingErrors-" + date.toString() + ".txt";
947 String STORAGE_STATUS_FILE_NAME = "BibInfoLoadingStatus" + date.toString() + ".txt";
948
949 long startTime = System.currentTimeMillis();
950 bibInfoStatistics = new BibInfoStatistics();
951 bibInfoStatistics.setStartDateTime(date);
952
953 bibTreeDBUtil.init(0, 0,null);
954
955 int batchNo = 0;
956 int count = bibTreeDBUtil.storeBibInfo(batchSize, filePath, STORAGE_EXCEPTION_FILE_NAME, bibInfoStatistics, batchNo);
957 long batchStartTime = startTime;
958 long batchEndTime = System.currentTimeMillis();
959 long totalTimeForBatch = batchEndTime - batchStartTime;
960 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Time taken for batch " + totalTimeForBatch);
961 while(count > 0) {
962 Date batchStartDate = new Date();
963 batchStartTime = System.currentTimeMillis();
964 bibInfoStatistics.setBatchStartDateTime(batchStartDate);
965 count = bibTreeDBUtil.storeBibInfo(batchSize, filePath, STORAGE_EXCEPTION_FILE_NAME, bibInfoStatistics, batchNo++);
966 batchEndTime = System.currentTimeMillis();
967 Date batchEndDate = new Date();
968 bibInfoStatistics.setBatchEndDateTime(batchEndDate);
969 bibInfoStatistics.setBatchTotalTime((batchEndTime - batchStartTime));
970 totalTimeForBatch = batchEndTime - batchStartTime;
971 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Time taken for batch " + totalTimeForBatch);
972 }
973
974 long endTime = System.currentTimeMillis();
975 Date endDate = new Date();
976 bibInfoStatistics.setEndDateTime(endDate);
977 long totalTime = endTime - startTime;
978 bibInfoStatistics.setTotalTime(totalTime);
979 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Total Time taken " + totalTime);
980 return bibInfoStatistics.toString();
981 }
982
983 }