001package org.kuali.ole.docstore.process; 002 003import org.apache.commons.lang.time.StopWatch; 004import org.apache.solr.client.solrj.SolrServer; 005import org.apache.solr.client.solrj.SolrServerException; 006import org.apache.solr.common.SolrDocument; 007import org.apache.solr.common.SolrInputDocument; 008import org.kuali.ole.DocumentUniqueIDPrefix; 009import org.kuali.ole.RepositoryBrowser; 010import org.kuali.ole.RepositoryManager; 011import org.kuali.ole.docstore.common.document.*; 012import org.kuali.ole.docstore.common.document.content.instance.FormerIdentifier; 013import org.kuali.ole.docstore.common.document.content.instance.Instance; 014import org.kuali.ole.docstore.common.document.content.instance.xstream.InstanceOlemlRecordProcessor; 015import org.kuali.ole.docstore.common.util.BatchBibTreeDBUtil; 016import org.kuali.ole.docstore.common.util.BibInfoStatistics; 017import org.kuali.ole.docstore.common.util.ReindexBatchStatistics; 018import org.kuali.ole.docstore.discovery.service.SolrServerManager; 019import org.kuali.ole.docstore.discovery.solr.work.bib.marc.WorkBibMarcDocBuilder; 020import org.kuali.ole.docstore.document.rdbms.RdbmsWorkBibMarcDocumentManager; 021import org.kuali.ole.docstore.document.rdbms.RdbmsWorkEInstanceDocumentManager; 022import org.kuali.ole.docstore.document.rdbms.RdbmsWorkInstanceDocumentManager; 023import org.kuali.ole.docstore.engine.service.index.solr.BibMarcIndexer; 024import org.kuali.ole.docstore.engine.service.index.solr.DocumentIndexer; 025import org.kuali.ole.docstore.indexer.solr.IndexerService; 026import org.kuali.ole.docstore.metrics.reindex.ReIndexingBatchStatus; 027import org.kuali.ole.docstore.metrics.reindex.ReIndexingStatus; 028import org.kuali.ole.docstore.model.enums.DocCategory; 029import org.kuali.ole.docstore.model.enums.DocFormat; 030import org.kuali.ole.docstore.model.enums.DocType; 031import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.BibRecord; 032import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.EInstanceRecord; 033import org.kuali.ole.docstore.engine.service.storage.rdbms.pojo.InstanceRecord; 034import org.kuali.ole.docstore.model.xmlpojo.ingest.AdditionalAttributes; 035import org.kuali.ole.docstore.model.xmlpojo.ingest.Content; 036import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument; 037import org.kuali.ole.docstore.model.xmlpojo.ingest.ResponseDocument; 038import org.kuali.ole.docstore.common.document.content.instance.InstanceCollection; 039import org.kuali.ole.docstore.model.xstream.work.oleml.WorkEInstanceOlemlRecordProcessor; 040import org.kuali.ole.docstore.service.BeanLocator; 041import org.kuali.ole.docstore.service.DocumentIngester; 042import org.kuali.ole.docstore.service.ServiceLocator; 043import org.kuali.ole.pojo.OleException; 044import org.kuali.ole.repository.CheckoutManager; 045import org.kuali.ole.repository.NodeHandler; 046import org.kuali.rice.krad.service.BusinessObjectService; 047import org.kuali.rice.krad.service.KRADServiceLocator; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050import org.kuali.rice.core.api.config.property.ConfigContext; 051 052import javax.jcr.Binary; 053import javax.jcr.Node; 054import javax.jcr.NodeIterator; 055import javax.jcr.Session; 056import java.io.ByteArrayInputStream; 057import java.io.IOException; 058import java.sql.*; 059import java.text.DateFormat; 060import java.text.SimpleDateFormat; 061import java.util.*; 062import java.util.Date; 063 064/** 065 * Class to Rebuild Indexes. 066 * 067 * @author Rajesh Chowdary K 068 * @created May 2, 2012 069 */ 070public class RebuildIndexesHandler 071 implements Runnable { 072 073 private static final Logger LOG = LoggerFactory.getLogger(RebuildIndexesHandler.class); 074 private static RebuildIndexesHandler reBuilder = null; 075 private boolean isRunning = false; 076 private boolean isStop = false; 077 private static final Logger logger = LoggerFactory.getLogger(RebuildIndexesHandler.class); 078 private String docCategory; 079 private String docType; 080 private String docFormat; 081 private BibInfoStatistics bibInfoStatistics = null; 082 private CheckoutManager checkoutManager; 083 // private ReIndexingStatus reIndexingStatus; 084 private int batchSize; 085 private int startIndex; 086 private int endIndex; 087 private String updateDate; 088 089 public static String EXCEPION_FILE_NAME = ""; 090 public static String STATUS_FILE_NAME = ""; 091 public static String STORAGE_EXCEPTION_FILE_NAME = ""; 092 public static String STORAGE_STATUS_FILE_NAME = ""; 093 public static BatchBibTreeDBUtil bibTreeDBUtil = new BatchBibTreeDBUtil(); 094 095 private String filePath = System.getProperty("solr.solr.home"); 096 public synchronized void setRunning(boolean running) { 097 isRunning = running; 098 } 099 100 public synchronized void setStop(boolean stop) { 101 isStop = stop; 102 } 103 104 private RebuildIndexesHandler() { 105 checkoutManager = new CheckoutManager(); 106 } 107 108 public static RebuildIndexesHandler getInstance() { 109 if (reBuilder == null) { 110 reBuilder = new RebuildIndexesHandler(); 111 } 112 return reBuilder; 113 } 114 115 /** 116 * Method to get running status. 117 * 118 * @return 119 */ 120 public synchronized boolean isRunning() { 121 return isRunning; 122 } 123 124 public synchronized boolean isStop() { 125 return isStop; 126 } 127 128 /** 129 * Method to startProcess 130 */ 131 public String startProcess(String docCategory, String docType, String docFormat) throws InterruptedException { 132 String status = null; 133 if (isRunning()) { 134 status = "ReIndexing process is already running. Click 'Show Status' button to know the status. "; 135 } else { 136 setRunning(true); 137 setStop(false); 138 status = "ReIndexing process has started. Click 'Show Status' button to know the status. "; 139 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance(); 140 reIndexingStatus.reset(); 141 if (docCategory == null || docCategory.equals("")) { 142 docCategory = "all"; 143 } 144 if (docType == null || docType.equals("")) { 145 docType = "all"; 146 } 147 if (docFormat == null || docType.equals("")) { 148 docFormat = "all"; 149 } 150 this.docCategory = docCategory; 151 this.docType = docType; 152 this.docFormat = docFormat; 153 Thread reBuilderThread = new Thread(this); 154 reBuilderThread.start(); 155 // reBuilderThread.join(); 156 setRunning(false); 157 } 158 return status; 159 } 160 public String startProcess(String docCategory, String docType, String docFormat, int batchSize, int startIndex, int endIndex,String updateDate) throws InterruptedException { 161 String status = null; 162 if (isRunning()) { 163 status = "ReIndexing process is already running. Click 'Show Status' button to know the status. "; 164 } else { 165 setRunning(true); 166 setStop(false); 167 status = "ReIndexing process has started. Click 'Show Status' button to know the status. "; 168 ReIndexingStatus reIndexingStatus = ReIndexingStatus.getInstance(); 169 reIndexingStatus.reset(); 170 if (docCategory == null || docCategory.equals("")) { 171 docCategory = "all"; 172 } 173 if (docType == null || docType.equals("")) { 174 docType = "all"; 175 } 176 if (docFormat == null || docType.equals("")) { 177 docFormat = "all"; 178 } 179 this.docCategory = docCategory; 180 this.docType = docType; 181 this.docFormat = docFormat; 182 this.batchSize = batchSize; 183 this.startIndex = startIndex; 184 this.endIndex = endIndex; 185 this.updateDate=updateDate; 186 Thread reBuilderThread = new Thread(this); 187 reBuilderThread.start(); 188 // reBuilderThread.join(); 189 setRunning(false); 190 } 191 return status; 192 } 193 194 public String stopProcess() throws Exception { 195 String status = null; 196 if (isRunning()) { 197 status = "ReIndexing process is running. ReIndexing will stop after current batch. "; 198 setStop(true); 199 setRunning(false); 200 } else { 201 status = "ReIndexing process is not running."; 202 } 203 return status; 204 205 } 206 207 public void run() { 208 DocCategoryTypeFormat docCategoryTypeFormat = new DocCategoryTypeFormat(); 209 List<String> categoryList = docCategoryTypeFormat.getCategories(); 210 List<String> typeList = null; 211 List<String> formatList = null; 212 for (String docCategoryCurr : categoryList) { 213 if (docCategory.equals("all") || docCategory.equals(docCategoryCurr)) { 214 typeList = docCategoryTypeFormat.getDocTypes(docCategoryCurr); 215 for (String docTypeCurr : typeList) { 216 if (docType.equals("all") || docType.equals(docTypeCurr)) { 217 formatList = docCategoryTypeFormat.getDocFormats(docCategoryCurr, docTypeCurr); 218 for (String docFormatCurr : formatList) { 219 if (docFormat.equals("all") || docFormat.equals(docFormatCurr)) { 220 if (!isStop()) { 221 ReIndexingStatus.getInstance() 222 .startDocType(docCategoryCurr, docTypeCurr, docFormatCurr); 223 reIndex(docCategoryCurr, docTypeCurr, docFormatCurr); 224 } else { 225 return; 226 } 227 } 228 } 229 } 230 } 231 } 232 } 233 setRunning(false); 234 } 235 236 private void reIndex(String docCategory, String docType, String docFormat) { 237 Session session = null; 238 setRunning(true); 239 logger.info("Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): "); 240 try { 241 if (docCategory.equals(DocCategory.WORK.getCode())) { 242 if (docType.equals(DocType.BIB.getDescription())) { 243 if (docFormat.equals(DocFormat.MARC.getCode()) || docFormat.equals(DocFormat.DUBLIN_CORE.getCode()) 244 || docFormat.equals(DocFormat.DUBLIN_UNQUALIFIED.getCode())) { 245 org.springframework.util.StopWatch stopWatch = new org.springframework.util.StopWatch(); 246 stopWatch.start("total time taken"); 247 Date date = new Date(); 248 EXCEPION_FILE_NAME = "ReindexErrors-" + date.toString() + ".txt"; 249 STATUS_FILE_NAME = "ReindexBatchStatus-" + date.toString() + ".txt"; 250 BatchBibTreeDBUtil.writeStatusToFile(filePath, RebuildIndexesHandler.EXCEPION_FILE_NAME, "Reindex started at:" + date); 251 BibHoldingItemReindexer bibHoldingItemReindexer = BibHoldingItemReindexer.getInstance(); 252 bibHoldingItemReindexer.setTotalBatchStatistics(new ReindexBatchStatistics()); 253 bibHoldingItemReindexer.index(batchSize, startIndex, endIndex,updateDate); 254 date = new Date(); 255 BatchBibTreeDBUtil.writeStatusToFile(filePath, RebuildIndexesHandler.EXCEPION_FILE_NAME, "Reindex ended at:" + date); 256 stopWatch.stop(); 257 logger.info(stopWatch.prettyPrint()); 258// workBibMarcAndDublinAll(docCategory, docType, docFormat); 259 } else { 260 logger.info( 261 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL"); 262 } 263 } else if (docType.equals(DocType.INSTANCE.getDescription())) { 264 if (docFormat.equals(DocFormat.OLEML.getCode())) { 265 workInstanceOLEML(docCategory, docType, docFormat); 266 } else { 267 logger.info( 268 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL"); 269 } 270 } else if (docType.equals(DocType.LICENSE.getDescription())) { 271 if (docFormat.equals(DocFormat.ONIXPL.getCode()) || docFormat.equals(DocFormat.PDF.getCode()) 272 || docFormat.equals(DocFormat.DOC.getCode())) { 273 workLicense(docCategory, docType, docFormat); 274 } else { 275 logger.info( 276 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL"); 277 } 278 } else if (docType.equals(DocType.EINSTANCE.getCode())) { 279 if (docFormat.equals(DocFormat.OLEML.getCode())) { 280 workEInstanceOLEML(docCategory, docType, docFormat); 281 } else { 282 logger.info( 283 "Rebuild Indexes Run(" + docCategory + " : " + docType + " : " + docFormat + "): FAIL"); 284 } 285 } 286 } 287 } catch (Exception e) { 288 logger.info(e.getMessage(), e); 289 } finally { 290 try { 291 if (isStop) { 292 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped"); 293 } else { 294 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done"); 295 } 296 RepositoryManager.getRepositoryManager().logout(session); 297 } catch (OleException e) { 298 logger.error(e.getMessage(), e); 299 } 300 } 301 302 } 303 304 private void workEInstanceOLEML(String docCategory, String docType, String docFormat) { 305 long totalCount = 0; 306 long nodeCount = 0; 307 List<RequestDocument> docs = new ArrayList<RequestDocument>(); 308 WorkEInstanceOlemlRecordProcessor workEInstanceOlemlRecordProcessor = new WorkEInstanceOlemlRecordProcessor(); 309 try { 310 RequestDocument rd = new RequestDocument(); 311 rd.setCategory(docCategory); 312 rd.setType(docType); 313 rd.setFormat(docFormat); 314 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>(); 315 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService(); 316 List<EInstanceRecord> instanceRecords = (List<EInstanceRecord>) businessObjectService.findAll(EInstanceRecord.class); 317 StopWatch loadTimer = new StopWatch(); 318 StopWatch batchTimer = new StopWatch(); 319 loadTimer.start(); 320 batchTimer.start(); 321 for (int i = 0; i < instanceRecords.size(); i++) { 322 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) { 323 if (!isStop()) { 324 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 325 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 326 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 327 resetTimers(batchTimer, loadTimer); 328 totalCount = 0; 329 logger.info("Rebuild"); 330 } else { 331 return; 332 } 333 } else { 334 EInstanceRecord instanceRecord = instanceRecords.get(i); 335 String uuid = DocumentUniqueIDPrefix.getPrefixedId(instanceRecord.getUniqueIdPrefix(), instanceRecord.geteInstanceIdentifier()); 336 RequestDocument requestDocument = buildRequestDocumentForCheckout(docCategory, docType, docFormat, uuid); 337 ResponseDocument responseDocument = RdbmsWorkEInstanceDocumentManager.getInstance().checkoutContent(requestDocument, businessObjectService); 338 String content = responseDocument.getContent().getContent(); 339 RequestDocument requestDocumentForIndex = (RequestDocument) rd.clone(); 340 requestDocumentForIndex.setAdditionalAttributes(responseDocument.getAdditionalAttributes()); 341 requestDocumentForIndex.setId(uuid); 342 requestDocumentForIndex.setUuid(uuid); 343 org.kuali.ole.docstore.model.xmlpojo.work.einstance.oleml.InstanceCollection instanceCollection = workEInstanceOlemlRecordProcessor.fromXML(content); 344 content = workEInstanceOlemlRecordProcessor.toXML(instanceCollection); 345 Content contentObj = new Content(); 346 contentObj.setContent(content); 347 contentObj.setContentObject(instanceCollection); 348 requestDocumentForIndex.setContent(contentObj); 349 docs.add(requestDocumentForIndex); 350 totalCount++; 351 } 352 } 353 if (docs.size() > 0 && !isStop()) { 354 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 355 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 356 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 357 } 358 } catch (Exception e) { 359 logger.error( 360 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + ( 361 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e); 362 } finally { 363 if (isStop) { 364 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped"); 365 } else { 366 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done"); 367 } 368 } 369 370 } 371 372 373 private void indexBibDocs(List<BibTree> bibTreeList, long records, long recCount, 374 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) { 375 StopWatch indexTimer = new StopWatch(); 376 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS"); 377 try { 378 Date startDate = new Date(); 379 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate)); 380 indexTimer.start(); 381 reIndexingBatchStatus.setStatus("Indexing"); 382 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString()); 383 reIndexingBatchStatus.setRecordsProcessed(records); 384 reIndexingBatchStatus.setBatchEndTime(" "); 385 batchStatusList.add(reIndexingBatchStatus); 386 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList); 387 DocumentIndexer documentIndexer = BibMarcIndexer.getInstance(); 388 BibTrees bibTrees = new BibTrees(); 389 bibTrees.getBibTrees().addAll(bibTreeList); 390 documentIndexer.createTrees(bibTrees); 391 //logger.debug(result); 392 indexTimer.stop(); 393 Date endDate = new Date(); 394 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate)); 395 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString()); 396 reIndexingBatchStatus.setRecordsProcessed(records); 397 reIndexingBatchStatus.setStatus("Done"); 398 reIndexingBatchStatus.setRecordsRemaining(recCount - records); 399 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList); 400 bibTreeList.clear(); 401 } catch (Exception e) { 402 String firstBibId = bibTreeList.get(0).getBib().getId(); 403 String lastBibId = bibTreeList.get(bibTreeList.size()-1).getBib().getId(); 404 logger.error( 405 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + ( 406 records - bibTreeList.size()) + "), Failed @ bibId( First BibId: " + firstBibId + " : Last BibId : "+ lastBibId +"): Cause: " + e, e); 407 indexTimer.stop(); 408 Date endDate = new Date(); 409 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate)); 410 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString()); 411 reIndexingBatchStatus.setRecordsProcessed(0L); 412 reIndexingBatchStatus.setStatus("Done"); 413 reIndexingBatchStatus.setRecordsRemaining(recCount - records); 414 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList); 415 bibTreeList.clear(); 416 } 417 } 418 419 420 private void indexDocs(List<RequestDocument> docs, long records, long recCount, 421 List<ReIndexingBatchStatus> batchStatusList, ReIndexingBatchStatus reIndexingBatchStatus) { 422 try { 423 StopWatch indexTimer = new StopWatch(); 424 DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss:SSS"); 425 Date startDate = new Date(); 426 reIndexingBatchStatus.setBatchStartTime(dateFormat.format(startDate)); 427 indexTimer.start(); 428 reIndexingBatchStatus.setStatus("Indexing"); 429 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString()); 430 reIndexingBatchStatus.setRecordsProcessed(records); 431 reIndexingBatchStatus.setBatchEndTime(" "); 432 batchStatusList.add(reIndexingBatchStatus); 433 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList); 434 IndexerService indexerService = BeanLocator.getDocstoreFactory().getDocumentIndexManager(docs.get(0).getCategory(), docs.get(0).getType(), docs.get(0).getFormat()); 435 String result = indexerService.indexDocuments(docs, false); 436 logger.debug(result); 437 indexTimer.stop(); 438 Date endDate = new Date(); 439 reIndexingBatchStatus.setBatchEndTime(dateFormat.format(endDate)); 440 reIndexingBatchStatus.setBatchIndexingTime(indexTimer.toString()); 441 reIndexingBatchStatus.setRecordsProcessed(records); 442 reIndexingBatchStatus.setStatus("Done"); 443 reIndexingBatchStatus.setRecordsRemaining(recCount - records); 444 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList); 445 docs.clear(); 446 } catch (Exception e) { 447 logger.error("Rebuild Indexes Processed(" + (records - docs.size()) + "), Failed @ batch(" + docs.size() 448 + "): Cause: " + e + "\n\tContinuous", e); 449 } 450 } 451 452 private void workBibMarcAndDublinAll(String docCategory, String docType, String docFormat) throws SolrServerException, IOException { 453 long totalCount = 0; 454 long nodeCount = 0; 455 int start = 0; 456 String sqlQuery = null; 457 long startTime = 0; 458 long commitEndTime = 0; 459 long commitStartTime = 0; 460 int batchSize = 50000; //ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE; 461 int commitSize = 50000; 462 long endIndexBatch = 0; 463 String dbVendor = ConfigContext.getCurrentContextConfig().getProperty("db.vendor"); 464 List<BibTree> bibTrees = new ArrayList<BibTree>(); 465 //Map<String, BibTree> bibMap = new HashMap<String, BibTree>(); 466 try { 467 String prefix = DocumentUniqueIDPrefix.getPrefix(docCategory, docType, docFormat); 468 Map prefixMap = new HashMap(0); 469 prefixMap.put("uniqueIdPrefix", prefix); 470 startTime = System.currentTimeMillis(); //t1 471 472 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService(); 473 int bibCount = businessObjectService.countMatching(BibRecord.class, prefixMap); 474 Connection connection = null; 475 PreparedStatement preparedStatement = null; 476 if (bibCount > 0) { 477 connection = getConnection(); 478 if (dbVendor.equalsIgnoreCase("mysql")) { 479 sqlQuery = "select * from ole_ds_bib_t b ORDER BY b.bib_id LIMIT ?,?"; 480 } else { 481 sqlQuery = "select * from (select b.*,ROWNUM r from OLE_DS_BIB_T b) where r between ? and ?"; 482 } 483 preparedStatement = connection.prepareStatement(sqlQuery); 484 } 485 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>(); 486 StopWatch loadTimer = new StopWatch(); 487 StopWatch batchTimer = new StopWatch(); 488 loadTimer.start(); 489 batchTimer.start(); 490 491 for (int i = 0; i < bibCount; i++) { 492 if (bibTrees.size() == batchSize) { 493 if (!isStop()) { 494 495 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 496 indexBibDocs(bibTrees, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 497 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 498 resetTimers(batchTimer, loadTimer); 499 totalCount = 0; 500 logger.info("Rebuild"); 501 i = start; 502 if (start % commitSize == 0) { 503 commitStartTime = System.currentTimeMillis(); //t2 504 logger.info("Time elapsed since start ====>>>>>> " + (commitStartTime - startTime)); // t2-t1 505 logger.info("Time elapsed since last commit ====>>>>>> " + (commitStartTime - commitEndTime)); //t2-t3 506 logger.info("commit started ====>>>>>> " + commitStartTime); 507 SolrServer solr = SolrServerManager.getInstance().getSolrServer(); 508 solr.commit(); 509 logger.info("No..of records committed ====>>>>>> " + start); 510 commitEndTime = System.currentTimeMillis(); //t3 511 logger.info("Time Taken for commit ====>>>>>> " + (commitEndTime - commitStartTime)); //t3-t2 512 513 } 514 } else { 515 return; 516 } 517 } else { 518 if (start < bibCount) { 519 long b2time = System.currentTimeMillis(); 520 if (dbVendor.equalsIgnoreCase("mysql")) { 521 preparedStatement.setInt(1, start); 522 preparedStatement.setInt(2, batchSize); 523 } else { 524 preparedStatement.setInt(1, start + 1); 525 preparedStatement.setInt(2, start + batchSize); 526 } 527 ResultSet resultSet = preparedStatement.executeQuery(); 528 logger.info("time taking for getting records from DB end======>>>>>" + (System.currentTimeMillis() - b2time)); 529 while (resultSet.next()) { 530 531 BibTree bibTree = new BibTree(); 532 bibTree.setCategory(docCategory); 533 bibTree.setType(docType); 534 bibTree.setFormat(docFormat); 535 Bib bib = new BibMarc(); 536 bib.setCategory(docCategory); 537 bib.setType(docType); 538 bib.setFormat(docFormat); 539 bib.setCreatedBy(resultSet.getString("CREATED_BY")); 540 bib.setCreatedOn(resultSet.getString("DATE_CREATED")); 541 bib.setStaffOnly((resultSet.getString("STAFF_ONLY").equalsIgnoreCase("Y") ? Boolean.TRUE : Boolean.FALSE)); 542 bib.setContent(resultSet.getString("CONTENT")); 543 bib.setUpdatedBy(resultSet.getString("UPDATED_BY")); 544 bib.setUpdatedOn(resultSet.getString("DATE_UPDATED")); 545 bib.setLastUpdated(resultSet.getString("DATE_UPDATED")); 546 bib.setStatus(resultSet.getString("STATUS")); 547 bib.setStatusUpdatedBy(resultSet.getString("STATUS_UPDATED_BY")); 548 bib.setStatusUpdatedOn(resultSet.getString("STATUS_UPDATED_DATE")); 549 String uuid = DocumentUniqueIDPrefix.getPrefixedId(resultSet.getString("UNIQUE_ID_PREFIX"), resultSet.getString(1)); 550 bib.setId(uuid); 551 bib.setLocalId(uuid); 552 bibTree.setBib(bib); 553 554 start++; 555 totalCount++; 556 bibTrees.add(bibTree); 557 } 558 resultSet.close(); 559 } 560 } 561 562 } 563 if (bibTrees.size() > 0 && !isStop()) { 564 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 565 indexBibDocs(bibTrees, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 566 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 567 commitStartTime = System.currentTimeMillis(); 568 logger.info("commit started : ----> " + commitStartTime); 569 SolrServer solr = SolrServerManager.getInstance().getSolrServer(); 570 solr.commit(); 571 logger.info("No..of records committed : ----> " + start); 572 commitEndTime = System.currentTimeMillis(); 573 logger.info("Time Taken for commit ======>>> " + (commitEndTime - commitStartTime)); 574 575 } 576 endIndexBatch = System.currentTimeMillis(); //t1 577 logger.info("Time elapsed since end ====>>>>>> " + endIndexBatch); 578 } catch (Exception e) { 579 logger.error( 580 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + ( 581 totalCount - bibTrees.size()) + "), Failed @ batch(" + bibTrees.size() + "): Cause: " + e, e); 582 } finally { 583 if (isStop) { 584 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped"); 585 } else { 586 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done"); 587 } 588 } 589 } 590 591 private void workInstanceOLEML(String docCategory, String docType, String docFormat) { 592 long totalCount = 0; 593 long nodeCount = 0; 594 List<RequestDocument> docs = new ArrayList<RequestDocument>(); 595 InstanceOlemlRecordProcessor instanceOlemlRecordProcessor = new InstanceOlemlRecordProcessor(); 596 try { 597 RequestDocument rd = new RequestDocument(); 598 rd.setCategory(docCategory); 599 rd.setType(docType); 600 rd.setFormat(docFormat); 601 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>(); 602 BusinessObjectService businessObjectService = KRADServiceLocator.getBusinessObjectService(); 603 List<InstanceRecord> instanceRecords = (List<InstanceRecord>) businessObjectService.findAll(InstanceRecord.class); 604 StopWatch loadTimer = new StopWatch(); 605 StopWatch batchTimer = new StopWatch(); 606 loadTimer.start(); 607 batchTimer.start(); 608 for (int i = 0; i < instanceRecords.size(); i++) { 609 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) { 610 if (!isStop()) { 611 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 612 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 613 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 614 resetTimers(batchTimer, loadTimer); 615 totalCount = 0; 616 logger.info("Rebuild"); 617 } else { 618 return; 619 } 620 } else { 621 InstanceRecord instanceRecord = instanceRecords.get(i); 622 String uuid = DocumentUniqueIDPrefix.getPrefixedId(instanceRecord.getUniqueIdPrefix(), instanceRecord.getInstanceId()); 623 RequestDocument requestDocument = buildRequestDocumentForCheckout(docCategory, docType, docFormat, uuid); 624 ResponseDocument responseDocument = RdbmsWorkInstanceDocumentManager.getInstance().checkoutContent(requestDocument, businessObjectService); 625 String content = responseDocument.getContent().getContent(); 626 RequestDocument requestDocumentForIndex = (RequestDocument) rd.clone(); 627 requestDocumentForIndex.setAdditionalAttributes(responseDocument.getAdditionalAttributes()); 628 requestDocumentForIndex.setId(uuid); 629 requestDocumentForIndex.setUuid(uuid); 630 InstanceCollection instanceCollection = instanceOlemlRecordProcessor.fromXML(content); 631// logger.debug("REBUILD_INDEXING_LINKING " + ProcessParameters.REBUILD_INDEXING_LINKING); 632// if (!ProcessParameters.REBUILD_INDEXING_LINKING) { 633// instanceCollection.getInstance().get(0).getResourceIdentifier().clear(); 634// } 635 content = instanceOlemlRecordProcessor.toXML(instanceCollection); 636 Content contentObj = new Content(); 637 contentObj.setContent(content); 638 contentObj.setContentObject(instanceCollection); 639 requestDocumentForIndex.setContent(contentObj); 640 docs.add(requestDocumentForIndex); 641 totalCount++; 642 } 643 } 644 if (docs.size() > 0 && !isStop()) { 645 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 646 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 647 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 648 } 649 } catch (Exception e) { 650 logger.error( 651 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + ( 652 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e); 653 } finally { 654 if (isStop) { 655 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped"); 656 } else { 657 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done"); 658 } 659 } 660 } 661 662 private RequestDocument buildRequestDocumentForCheckout(String docCategory, String docType, String docFormat, String uuid) { 663 RequestDocument requestDocument = new RequestDocument(); 664 requestDocument.setCategory(docCategory); 665 requestDocument.setType(docType); 666 requestDocument.setFormat(docFormat); 667 requestDocument.setUuid(uuid); 668 return requestDocument; 669 } 670 671 private void linkingInstanceWithBib(InstanceCollection instanceCollection, Session session, Node fileNode) { 672 for (Instance instance : instanceCollection.getInstance()) { 673 instance.getResourceIdentifier().clear(); 674 for (FormerIdentifier frids : instance.getFormerResourceIdentifier()) { 675 try { 676 if (frids != null && frids.getIdentifier() != null && 677 frids.getIdentifier().getIdentifierValue() != null && 678 frids.getIdentifier().getIdentifierValue().trim().length() >= 0) { 679 List<SolrDocument> solrBibDocs = ServiceLocator.getIndexerService() 680 .getSolrDocument("SystemControlNumber", 681 frids.getIdentifier() 682 .getIdentifierValue()); 683 SolrInputDocument solrInputDocument = new SolrInputDocument(); 684 WorkBibMarcDocBuilder marcDocBuilder = new WorkBibMarcDocBuilder(); 685 List<SolrInputDocument> solrInputDocs = new ArrayList<SolrInputDocument>(); 686 if (solrBibDocs != null && solrBibDocs.size() > 0) { 687 for (SolrDocument solrbibDoc : solrBibDocs) { 688 if (checkApplicability(frids.getIdentifier().getIdentifierValue(), 689 solrbibDoc.getFieldValue("SystemControlNumber"))) { 690 691 compareObjNAddValue(instance.getInstanceIdentifier(), 692 solrbibDoc.getFieldValue("instanceIdentifier"), solrbibDoc, 693 "instanceIdentifier"); 694 solrInputDocument = new SolrInputDocument(); 695 marcDocBuilder.buildSolrInputDocFromSolrDoc(solrbibDoc, solrInputDocument); 696 solrInputDocs.add(solrInputDocument); 697 String bibId = compareListRString(solrbibDoc.getFieldValue("id")); 698 instance.getResourceIdentifier().add(bibId); 699 modifyContentAddLinkedIdsInDocStore(instance, bibId, session, fileNode); 700 indexSolrDocs(solrInputDocs); 701 } 702 } 703 } 704 } 705 } catch (Exception e) { 706 logger.error("error message" + e.getMessage(), e); 707 } 708 } 709 } 710 } 711 712 private void modifyContentAddLinkedIdsInDocStore(Instance instance, String id, Session session, Node fileNode) { 713 714 try { 715 Node bibNode = getNodeByUUID(session, id); 716 bibNode.setProperty("instanceIdentifier", instance.getInstanceIdentifier()); 717 fileNode.setProperty("bibIdentifier", id); 718 719 InstanceOlemlRecordProcessor recordProcessor = new InstanceOlemlRecordProcessor(); 720 NodeIterator nodeIterator = fileNode.getNodes(); 721 while (nodeIterator.hasNext()) { 722 Node instNode = nodeIterator.nextNode(); 723 if (instNode.getName().equalsIgnoreCase("instanceFile")) { 724 InstanceCollection instCol = new InstanceCollection(); 725 Instance inst = new Instance(); 726 inst.setResourceIdentifier(instance.getResourceIdentifier()); 727 inst.setFormerResourceIdentifier(instance.getFormerResourceIdentifier()); 728 inst.setExtension(instance.getExtension()); 729 inst.setInstanceIdentifier(instance.getInstanceIdentifier()); 730 List<Instance> instanceList = new ArrayList<Instance>(); 731 instanceList.add(inst); 732 instCol.setInstance(instanceList); 733 734 byte[] documentBytes = recordProcessor.toXML(instCol).getBytes(); 735 Binary binary = null; 736 if (documentBytes != null && instNode != null && documentBytes.length > 0) { 737 binary = session.getValueFactory().createBinary(new ByteArrayInputStream(documentBytes)); 738 instNode.getNode("jcr:content").setProperty("jcr:data", binary); 739 } 740 } 741 } 742 } catch (Exception e) { 743 logger.error("error while updating Docstore in reindexing Process" + e.getMessage(), e); 744 } 745 } 746 747 private void indexSolrDocs(List<SolrInputDocument> solrInputDocs) { 748 749 try { 750 ServiceLocator.getIndexerService().indexSolrDocuments(solrInputDocs); 751 logger.info("Linking Bib and Instance Records (" + solrInputDocs.size() + "): "); 752 solrInputDocs.clear(); 753 } catch (Exception e) { 754 logger.error( 755 "Linking Bib and Instance Records (" + (solrInputDocs.size()) + "), Failed @ batch(" + solrInputDocs 756 .size() + "): Cause: " + e + "\n\tContinuous", e); 757 } 758 } 759 760 761 private boolean checkApplicability(Object value, Object fieldValue) { 762 if (fieldValue instanceof Collection) { 763 for (Object object : (Collection) fieldValue) { 764 if (object.equals(value)) { 765 return true; 766 } 767 } 768 return false; 769 } else { 770 return value.equals(fieldValue); 771 } 772 } 773 774 775 private String compareListRString(Object id) { 776 if (id != null) { 777 if (id instanceof List) { 778 List<String> idList = (List<String>) id; 779 return idList.get(0); 780 } else if (id instanceof String) { 781 String strId = (String) id; 782 return strId; 783 } 784 } 785 return null; 786 } 787 788 private void compareObjNAddValue(String id, Object idObj, SolrDocument solrDoc, String identifier) { 789 if (idObj != null) { 790 if (idObj instanceof List) { 791 List<String> instBibIdList = (List<String>) idObj; 792 if (!instBibIdList.contains(id)) { 793 solrDoc.addField(identifier, id); 794 } 795 } else if (idObj instanceof String) { 796 String instBibId = (String) idObj; 797 if (!instBibId.equalsIgnoreCase(id)) { 798 solrDoc.addField(identifier, id); 799 } 800 } 801 } else { 802 solrDoc.addField(identifier, id); 803 } 804 } 805 806 private void workLicense(String docCategory, String docType, String docFormat) { 807 Session session = null; 808 long totalCount = 0; 809 long nodeCount = 0; 810 List<RequestDocument> docs = new ArrayList<RequestDocument>(); 811 try { 812 session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER, 813 ProcessParameters.BULK_DEFUALT_ACTION); 814 RequestDocument rd = new RequestDocument(); 815 rd.setCategory(docCategory); 816 rd.setType(docType); 817 rd.setFormat(docFormat); 818 DocumentIngester docIngester = new DocumentIngester(); 819 Node nodeFormat = docIngester.getStaticFormatNode(rd, session); 820 NodeIterator nodesL1 = nodeFormat.getNodes(); 821 List<ReIndexingBatchStatus> batchStatusList = new ArrayList<ReIndexingBatchStatus>(); 822 StopWatch loadTimer = new StopWatch(); 823 StopWatch batchTimer = new StopWatch(); 824 loadTimer.start(); 825 RepositoryBrowser repositoryBrowser = new RepositoryBrowser(); 826 while (nodesL1.hasNext()) { 827 Node nodeL1 = nodesL1.nextNode(); 828 NodeIterator nodesFile = nodeL1.getNodes(); 829 nodeCount = nodesFile.getSize(); 830 batchTimer.start(); 831 while (nodesFile.hasNext()) { 832 if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE && !isStop()) { 833 if (!isStop()) { 834 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 835 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 836 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 837 resetTimers(batchTimer, loadTimer); 838 totalCount = 0; 839 logger.info("Rebuild"); 840 } else { 841 return; 842 } 843 } else { 844 845 Node fileNode = nodesFile.nextNode(); 846 String content = null; 847 if (docFormat.equals(DocFormat.ONIXPL.getCode())) { 848 content = checkoutManager.getData(fileNode); 849 } else if (docFormat.equals(DocFormat.PDF.getCode()) || docFormat 850 .equals(DocFormat.DOC.getCode())) { 851 content = checkoutManager 852 .checkOutBinary(fileNode.getIdentifier(), ProcessParameters.BULK_DEFAULT_USER, 853 ProcessParameters.BULK_DEFUALT_ACTION, docFormat); 854 } 855 RequestDocument reqDoc = (RequestDocument) rd.clone(); 856 reqDoc.setId(fileNode.getIdentifier()); 857 reqDoc.setUuid(fileNode.getIdentifier()); 858 Content contentObj = new Content(); 859 contentObj.setContent(content); 860 reqDoc.setContent(contentObj); 861 docs.add(reqDoc); 862 totalCount++; 863 } 864 } 865 } 866 if (docs.size() > 0 && !isStop()) { 867 ReIndexingBatchStatus reIndexingBatchStatus = indexBeforeParams(loadTimer); 868 indexDocs(docs, totalCount, nodeCount, batchStatusList, reIndexingBatchStatus); 869 indexAfterParams(batchTimer, reIndexingBatchStatus, batchStatusList); 870 } 871 } catch (Exception e) { 872 logger.error( 873 "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + ( 874 totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e); 875 } finally { 876 try { 877 if (isStop) { 878 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Stopped"); 879 } else { 880 ReIndexingStatus.getInstance().getDocTypeList().setStatus("Done"); 881 } 882 RepositoryManager.getRepositoryManager().logout(session); 883 } catch (OleException e) { 884 logger.error(e.getMessage(), e); 885 } 886 } 887 } 888 889 private void resetTimers(StopWatch batchTimer, StopWatch loadTimer) { 890 batchTimer.reset(); 891 batchTimer.start(); 892 loadTimer.reset(); 893 loadTimer.start(); 894 } 895 896 private void indexAfterParams(StopWatch batchTimer, ReIndexingBatchStatus reIndexingBatchStatus, 897 List<ReIndexingBatchStatus> batchStatusList) { 898 batchTimer.stop(); 899 reIndexingBatchStatus.setBatchTotalTime(batchTimer.toString()); 900 ReIndexingStatus.getInstance().getDocTypeList().setReIndBatStatusList(batchStatusList); 901 } 902 903 private ReIndexingBatchStatus indexBeforeParams(StopWatch loadTimer) { 904 loadTimer.stop(); 905 ReIndexingBatchStatus reIndexingBatchStatus = new ReIndexingBatchStatus(); 906 reIndexingBatchStatus.setBatchTotalTime(" "); 907 reIndexingBatchStatus.setBatchLoadTime(loadTimer.toString()); 908 return reIndexingBatchStatus; 909 } 910 911 private Node getNodeByUUID(Session newSession, String uuid) throws OleException { 912 return new NodeHandler().getNodeByUUID(newSession, uuid); 913 } 914 915 public Connection getConnection() { 916 Connection connection = null; 917 try { 918 /* InputStream in =getClass().getResourceAsStream("/mysql.properties"); 919 Properties properties = new Properties(); 920 properties.load(in);*/ 921 String connectionUrl = ConfigContext.getCurrentContextConfig().getProperty("datasource.url"); 922 String userName = ConfigContext.getCurrentContextConfig().getProperty("datasource.username"); 923 String passWord = ConfigContext.getCurrentContextConfig().getProperty("datasource.password"); 924 String driverName = ConfigContext.getCurrentContextConfig().getProperty("jdbc.driver"); 925 Class.forName(driverName); 926 connection = DriverManager.getConnection(connectionUrl, userName, passWord); 927 } catch (Exception e) { 928 LOG.error("Exception : ", e); 929 } 930 return connection; 931 } 932 933 public String showStatus() { 934 BibHoldingItemReindexer bibHoldingItemReindexer = BibHoldingItemReindexer.getInstance(); 935 return bibHoldingItemReindexer.showStats(); 936 } 937 938 public String showBibStatus() { 939 if(bibInfoStatistics == null) { 940 bibInfoStatistics = new BibInfoStatistics(); 941 } 942 return bibInfoStatistics.toString(); 943 } 944 945 946 public String storeBibInfo(int batchSize) throws Exception { 947 948 Date date = new Date(); 949 String STORAGE_EXCEPTION_FILE_NAME = "BibInfoLoadingErrors-" + date.toString() + ".txt"; 950 String STORAGE_STATUS_FILE_NAME = "BibInfoLoadingStatus" + date.toString() + ".txt"; 951 952 long startTime = System.currentTimeMillis(); 953 bibInfoStatistics = new BibInfoStatistics(); 954 bibInfoStatistics.setStartDateTime(date); 955 956 bibTreeDBUtil.init(0, 0,null); 957 958 int batchNo = 0; 959 int count = bibTreeDBUtil.storeBibInfo(batchSize, filePath, STORAGE_EXCEPTION_FILE_NAME, bibInfoStatistics, batchNo); 960 long batchStartTime = startTime; 961 long batchEndTime = System.currentTimeMillis(); 962 long totalTimeForBatch = batchEndTime - batchStartTime; 963 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Time taken for batch " + totalTimeForBatch); 964 while(count > 0) { 965 Date batchStartDate = new Date(); 966 batchStartTime = System.currentTimeMillis(); 967 bibInfoStatistics.setBatchStartDateTime(batchStartDate); 968 count = bibTreeDBUtil.storeBibInfo(batchSize, filePath, STORAGE_EXCEPTION_FILE_NAME, bibInfoStatistics, batchNo++); 969 batchEndTime = System.currentTimeMillis(); 970 Date batchEndDate = new Date(); 971 bibInfoStatistics.setBatchEndDateTime(batchEndDate); 972 bibInfoStatistics.setBatchTotalTime((batchEndTime - batchStartTime)); 973 totalTimeForBatch = batchEndTime - batchStartTime; 974 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Time taken for batch " + totalTimeForBatch); 975 } 976 977 long endTime = System.currentTimeMillis(); 978 Date endDate = new Date(); 979 bibInfoStatistics.setEndDateTime(endDate); 980 long totalTime = endTime - startTime; 981 bibInfoStatistics.setTotalTime(totalTime); 982 BatchBibTreeDBUtil.writeStatusToFile(filePath, STORAGE_STATUS_FILE_NAME, "Total Time taken " + totalTime); 983 return bibInfoStatistics.toString(); 984 } 985 986}