View Javadoc
1   package org.kuali.ole.batch.impl;
2   
3   import org.apache.commons.lang.StringUtils;
4   import org.apache.commons.lang.time.StopWatch;
5   import org.apache.log4j.Logger;
6   import org.kuali.ole.OLEConstants;
7   import org.kuali.ole.batch.bo.OLEBatchProcessProfileBo;
8   import org.kuali.ole.batch.document.OLEBatchProcessDefinitionDocument;
9   import org.kuali.ole.batch.export.BatchProcessExportData;
10  import org.kuali.ole.batch.helper.EInstanceMappingHelper;
11  import org.kuali.ole.batch.helper.InstanceMappingHelper;
12  import org.kuali.ole.batch.helper.OLEBatchProcessDataHelper;
13  import org.kuali.ole.batch.marc.OLEMarcReader;
14  import org.kuali.ole.batch.marc.OLEMarcXmlReader;
15  import org.kuali.ole.docstore.common.document.Bib;
16  import org.kuali.ole.docstore.common.document.BibTree;
17  import org.kuali.ole.docstore.common.document.BibTrees;
18  import org.kuali.ole.docstore.common.document.HoldingsTree;
19  import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecord;
20  import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecords;
21  import org.kuali.ole.docstore.common.document.content.bib.marc.DataField;
22  import org.kuali.ole.docstore.common.document.content.bib.marc.xstream.BibMarcRecordProcessor;
23  import org.kuali.ole.docstore.common.util.BatchBibTreeDBUtil;
24  import org.kuali.ole.docstore.common.util.BatchExportStatistics;
25  import org.marc4j.MarcStreamWriter;
26  import org.marc4j.MarcWriter;
27  import org.marc4j.marc.Record;
28  
29  import java.io.ByteArrayInputStream;
30  import java.io.File;
31  import java.io.FileOutputStream;
32  import java.io.InputStream;
33  import java.nio.file.FileSystems;
34  import java.util.*;
35  
36  import static org.kuali.ole.OLEConstants.OLEBatchProcess.*;
37  
38  /**
39   * Created with IntelliJ IDEA.
40   * User: jayabharathreddy
41   * Date: 5/28/14
42   * Time: 12:22 PM
43   * To change this template use File | Settings | File Templates.
44   */
45  public class BatchExportFetch {
46  
47  
48      private String fileName;
49      private File filePath;
50      private StringBuilder errBuilder = new StringBuilder();
51  
52      private BatchProcessExportData batchProcessExportData;
53      private OLEBatchProcessProfileBo batchProcessProfileBo;
54      private OLEBatchProcessDefinitionDocument processDef;
55      private int errCnt = 0;
56  
57      private BatchBibTreeDBUtil bibTreesUtil = null;
58      private int recordsToBeExportedToFile;
59      private int batchSize;
60      private int remainingRecords;
61      private int recordsToBeExported;
62      private Boolean isBibOnly = false;
63      private List<BibMarcRecord> failureList = new ArrayList<>();
64      private static final Logger LOG = Logger.getLogger(BatchExportFetch.class);
65  
66      public BatchExportFetch(BatchBibTreeDBUtil bibTreesUtil, int recordsToBeExportedToFile, String fileName, BatchProcessExportData batchProcessExportData, OLEBatchProcessDefinitionDocument processDef, Boolean isBibOnly) {
67          this.fileName = fileName;
68          this.batchProcessExportData = batchProcessExportData;
69          this.batchProcessProfileBo = processDef.getBatchProcessProfileBo();
70          this.processDef = processDef;
71          this.bibTreesUtil = bibTreesUtil;
72          this.recordsToBeExportedToFile = recordsToBeExportedToFile;
73          this.isBibOnly = isBibOnly;
74      }
75  
76  
77      public Long call() throws Exception {
78          BatchExportStatistics batchExportStatistics = new BatchExportStatistics();
79          StopWatch timer = new StopWatch();
80          Boolean isFirstBatch = true;
81          Boolean isLastBatch = false;
82  
83          batchExportStatistics.setFileName(fileName);
84          int exportedRecords = 0;
85          batchSize = processDef.getChunkSize();
86          timer.start();
87  
88          while (exportedRecords < recordsToBeExportedToFile) {
89              remainingRecords = recordsToBeExportedToFile - exportedRecords;
90              recordsToBeExported = Math.min(batchSize, remainingRecords);
91  
92              if (remainingRecords <= batchSize) {
93                  isLastBatch = true;
94              }
95              if (!batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_COMPLETED)
96                      && !batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_CANCELLED)
97                      && !batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_PAUSED)
98                      && !batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_STOPPED)) {
99  
100                 BibTrees bibTrees = bibTreesUtil.fetchNextBatch(recordsToBeExported, batchExportStatistics, isBibOnly);
101 
102                 // Building Marc Records    and Creating Data Fileds
103                 List<BibMarcRecord> bibMarcRecords = buildBibMarcRecords(bibTrees.getBibTrees(), batchExportStatistics);
104 
105                 int size = bibMarcRecords.size();
106                 batchProcessExportData.setTotalRecordsExported(size + batchProcessExportData.getTotalRecordsExported());
107                 // Converting Bib Marc records as MARCXML
108                 Object[] resultMap = processBibMarcRecord(bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
109 
110                 exportedRecords += recordsToBeExported;
111                 prepareForWrite(resultMap, bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
112                 isFirstBatch = false;
113             } else {
114                 break;
115             }
116         }
117         timer.stop();
118         batchExportStatistics.setTotalTimeTaken(timer.getTime());
119         batchExportStatistics.printExportStatistics();
120 
121         batchProcessExportData.job.setNoOfRecordsProcessed(String.valueOf(batchProcessExportData.getTotalRecordsExported()));
122         batchProcessExportData.job.setNoOfFailureRecords(String.valueOf(Integer.valueOf(batchProcessExportData.job.getNoOfFailureRecords()) + failureList.size()));
123         batchProcessExportData.job.setNoOfSuccessRecords(String.valueOf(Integer.valueOf(batchProcessExportData.job.getNoOfRecordsProcessed()) - Integer.valueOf(batchProcessExportData.job.getNoOfFailureRecords())));
124         return Long.valueOf(batchProcessExportData.job.getNoOfRecordsProcessed());
125     }
126 
127 
128     private void prepareForWrite(Object[] resultMap, List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean isFirstBatch, Boolean isLastBatch) throws Exception {
129         StopWatch timer = new StopWatch();
130         timer.start();
131         if (resultMap != null && !resultMap[0].equals("0")) {
132             batchProcessExportData.getBibDocList().clear();
133             batchProcessExportData.getBibDocList().addAll((List<String>) resultMap[1]);
134             processBatch(bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
135             batchProcessExportData.updateJobProgress();
136             if (resultMap[2] != null)
137                 batchProcessExportData.getErrBuilder().append(resultMap[2].toString());
138             if (resultMap[3] != null)
139                 batchProcessExportData.setErrCnt(resultMap[3].toString());
140         }
141         timer.stop();
142         batchExportStatistics.addTimeTakenForWritingRecords(timer.getTime());
143     }
144 
145 
146     private List<BibMarcRecord> buildBibMarcRecords(List<BibTree> bibTrees, BatchExportStatistics batchExportStatistics) {
147 
148         StopWatch timer = new StopWatch();
149         timer.start();
150 
151         List<BibMarcRecord> bibMarcRecords = new ArrayList<>();
152 
153         for (BibTree bibTree : bibTrees) {
154             BibMarcRecord bibMarcRecord = null;
155             try {
156                 bibMarcRecord = buildBibMarcRecord(bibTree.getBib());
157 
158                 if (!batchProcessProfileBo.getOleBatchProcessProfileMappingOptionsList().isEmpty()
159                         && StringUtils.isNotEmpty(batchProcessProfileBo.getDataToExport()) && (batchProcessProfileBo.getDataToExport().equalsIgnoreCase(OLEBatchProcess.EXPORT_BIB_AND_INSTANCE) || batchProcessProfileBo.getDataToExport().equalsIgnoreCase(OLEBatchProcess.EXPORT_BIB_INSTANCE_AND_EINSTANCE))) {
160                     try {
161                         getInstanceDetails(bibMarcRecord, batchProcessProfileBo, errBuilder, bibTree);
162                         LOG.debug("Instance data mapping completed");
163                     } catch (Exception ex) {
164                         LOG.error("Instance data mapping Error for Bib record id::" + bibMarcRecord.getRecordId(), ex);
165                         buildError(errBuilder, ERR_BIB, bibMarcRecord.getRecordId(), "", "",
166                                 ERR_CAUSE, ex.getMessage(), TIME_STAMP, new java.util.Date().toString());
167                     }
168                 }
169                 //Marc record rename
170                 if (!batchProcessProfileBo.getOleBatchProcessProfileRenameFieldsList().isEmpty()) {
171                     try {
172                         OLEBatchProcessDataHelper.getInstance().renameMarcFieldsSubFields(batchProcessProfileBo, bibMarcRecord);
173                         LOG.debug("Rename of bib marc records completed");
174                     } catch (Exception ex) {
175                         LOG.error("Marc Record Rename error for Bib record id::" + bibMarcRecord.getRecordId(), ex);
176                         buildError(errBuilder, ERR_BIB, bibMarcRecord.getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "renameMarcFieldsSubFields", TIME_STAMP, new Date().toString());
177                         errCnt++;
178                     }
179                 }
180                 //Marc record delete
181                 if (!batchProcessProfileBo.getOleBatchProcessProfileDeleteFieldsList().isEmpty()) {
182                     try {
183                         OLEBatchProcessDataHelper.getInstance().deleteFieldsSubfields(batchProcessProfileBo, bibMarcRecord);
184                         LOG.debug("Deletion of bib marc records completed");
185                     } catch (Exception ex) {
186                         LOG.error("Marc record delete Error for Bib record id::" + bibMarcRecord.getRecordId(), ex);
187                         buildError(errBuilder, ERR_BIB, bibMarcRecord.getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "deleteFieldsSubfields", TIME_STAMP, new Date().toString());
188                         errCnt++;
189                     }
190                 }
191                 bibMarcRecords.add(bibMarcRecord);
192             } catch (Exception ex) {
193                 LOG.error("Error while Exporting bibs :: No of bibs processed while error occured :: " + bibMarcRecords.size(), ex);
194 
195                 if (!bibMarcRecords.isEmpty()) {
196                     LOG.error("Bib record where error occured: " + bibMarcRecords.get(bibMarcRecords.size() - 1).getRecordId(), ex);
197                     buildError(errBuilder, ERR_BIB, bibMarcRecords.get(bibMarcRecords.size() - 1).getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "getBibliographicRecord-P", TIME_STAMP, new Date().toString());
198                     errCnt++;
199                 }
200             }
201 
202 
203         }
204         timer.stop();
205         batchExportStatistics.addTimeTakenForBibMarcRecords(timer.getTime());
206         return bibMarcRecords;
207     }
208 
209 
210     private Object[] processBibMarcRecord(List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean firstBatch, Boolean lastBatch) {
211         StopWatch timer = new StopWatch();
212         timer.start();
213         BibMarcRecordProcessor bibMarcRecordProcessor = new BibMarcRecordProcessor(errBuilder);
214         List<String> bibMarcRecordList = new ArrayList<String>();
215         try {
216             getResult(bibMarcRecordProcessor, bibMarcRecords, bibMarcRecordList, firstBatch, lastBatch);
217         } catch (Exception ex) {
218             LOG.error("Error while Exporting bibs :: No of bibs processed while error occured :: " + bibMarcRecords.size(), ex);
219             buildError(errBuilder, ERR_CAUSE, "Error while getting bib data::" + ex.getMessage(), TIME_STAMP, new Date().toString());
220         }
221         timer.stop();
222         batchExportStatistics.addTimeTakenForProcessing(timer.getTime());
223         return new Object[]{String.valueOf(bibMarcRecordProcessor.getSuccessCnt()), bibMarcRecordList, errBuilder.toString(), String.valueOf((errCnt + bibMarcRecordProcessor.getErrCnt()))};
224     }
225 
226     private void getResult(BibMarcRecordProcessor bibMarcRecordProcessor, List<BibMarcRecord> bibRecords, List<String> bibMarcRecordList, Boolean firstBatch, Boolean lastBatch) {
227         String bibMarcRecord = null;
228         if (processDef.getOutputFormat().equalsIgnoreCase(BatchProcessExportData.MARC)) {
229             bibMarcRecord = bibMarcRecordProcessor.generateXML(bibRecords);
230         } else {
231             bibMarcRecord = bibMarcRecordProcessor.generateXML(bibRecords, firstBatch, lastBatch);
232         }
233 
234         bibMarcRecordList.add(bibMarcRecord);
235     }
236 
237 
238     protected void processBatch(List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean isFirstBatch, Boolean isLastBatch) throws Exception {
239         int recordsSize = bibMarcRecords.size();
240         batchProcessExportData.prepareForWrite(fileName);
241         int currSuccessRec = 0;
242         int currErrCnt = Integer.valueOf(batchProcessExportData.getErrCnt());
243         if (processDef.getOutputFormat().equalsIgnoreCase(BatchProcessExportData.MARCXML)) {
244             try {
245                 if (batchProcessExportData.getProcessedRec() > 0)
246                     batchProcessExportData.setFileName(fileName);
247                 batchProcessExportData.writeFileToLocation(fileName);
248                 currSuccessRec = recordsSize;
249             } catch (Exception e) {
250                 batchProcessExportData.job.setStatus(JOB_STATUS_STOPPED);
251                 batchProcessExportData.job.setStatusDesc("Error while writing to marcxml file::" + fileName + BatchProcessExportData.EXT_MARCXML);
252                 currSuccessRec = 0;
253                 currErrCnt += recordsSize - currSuccessRec;
254             }
255         } else if (processDef.getOutputFormat().equalsIgnoreCase(BatchProcessExportData.MARC)) {
256             List<BibMarcRecord> bibMarcRecordsProcess = new ArrayList<>();
257             bibMarcRecordsProcess.addAll(bibMarcRecords);
258             batchProcessExportData.setFileName(fileName);
259             generateMarcFromXml(bibMarcRecordsProcess, batchExportStatistics, isFirstBatch, isLastBatch);
260         }
261     }
262 
263 
264     private BibMarcRecord buildBibMarcRecord(Bib bib) throws Exception {
265         BibMarcRecord bibMarcRecord = null;
266         BibMarcRecordProcessor bibMarcRecordProcessor = new BibMarcRecordProcessor();
267         BibMarcRecords marcRecords = bibMarcRecordProcessor.fromXML(bib.getContent());
268         List<BibMarcRecord> bibMarcRecordList = marcRecords.getRecords();
269         Iterator<BibMarcRecord> bibMarcRecordListIterator = bibMarcRecordList.iterator();
270         if (bibMarcRecordListIterator.hasNext()) {
271             bibMarcRecord = bibMarcRecordListIterator.next();
272         }
273         return bibMarcRecord;
274     }
275 
276 
277     private void getInstanceDetails(BibMarcRecord bibMarcRecord, OLEBatchProcessProfileBo profile, StringBuilder errBuilder, BibTree bibTree) throws Exception {
278         List<DataField> dataFields = bibMarcRecord.getDataFields();
279 
280         try {
281             List<DataField> holdingsItemDataField = Collections.emptyList();
282 
283             if (bibTree != null && bibTree.getHoldingsTrees() != null && bibTree.getHoldingsTrees().size() > 0) {
284                 for (HoldingsTree holdingsTree : bibTree.getHoldingsTrees()) {
285                     if (holdingsTree.getHoldings() != null) {
286                         if (holdingsTree.getHoldings().getHoldingsType().equalsIgnoreCase("print")) {
287                             holdingsItemDataField = new InstanceMappingHelper().generateDataFieldForHolding(holdingsTree, profile, errBuilder);
288                         } else {
289                             holdingsItemDataField = new EInstanceMappingHelper().generateDataFieldForEHolding(holdingsTree, profile, errBuilder);
290                         }
291                         dataFields.addAll(holdingsItemDataField);
292                     }
293                 }
294             }
295         } catch (Exception ex) {
296             LOG.error("Error while getting instance details for instanceID :: " + bibMarcRecord.getRecordId(), ex);
297             errBuilder.append("-----");
298             buildError(errBuilder, ERR_INSTANCE, bibMarcRecord.getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "getInstanceDetails", TIME_STAMP, new java.util.Date().toString());
299 
300         }
301     }
302 
303 
304     private void buildError(StringBuilder errBuilder, String... errorString) {
305         for (String str : errorString) {
306             errBuilder.append(str).append(COMMA);
307         }
308         errBuilder.append(lineSeparator);
309     }
310 
311 
312     /**
313      * Writes the content read into a mrc file
314      *
315      * @param bibMarcRecords
316      * @param batchExportStatistics
317      * @param isFirstBatch
318      * @param isLastBatch
319      * @throws Exception
320      */
321     public int generateMarcFromXml(List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean isFirstBatch, Boolean isLastBatch) throws Exception {
322         StopWatch timer = new StopWatch();
323         timer.start();
324         int successRec = 0;
325         File fileToWrite = new File(batchProcessExportData.getFilePath() + FileSystems.getDefault().getSeparator() + fileName + BatchProcessExportData.EXT_MARC);
326         FileOutputStream fileOutputStream = new FileOutputStream(fileToWrite, true);
327         //String bibContent = StringUtils.join(bibDocList, "");
328         if (!fileToWrite.exists()) {
329             if (fileToWrite.getParentFile().mkdirs() && fileToWrite.createNewFile()) {
330                 //do nothing
331             } else {
332                 LOG.error("Cannot create mrc file in the given file path :: " + fileToWrite.getPath());
333                 batchProcessExportData.job.setStatus(JOB_STATUS_STOPPED);
334                 throw new RuntimeException("Cannot create mrc file in the given file path :: " + fileToWrite.getPath());
335             }
336         }
337         BibMarcRecordProcessor recordProcessor = new BibMarcRecordProcessor();
338         MarcWriter writer = new MarcStreamWriter(fileOutputStream, "UTF-8");
339 
340         for (String bibContent : batchProcessExportData.getBibDocList()) {
341             InputStream input = new ByteArrayInputStream(bibContent.getBytes());
342             List<BibMarcRecord> successList = new ArrayList<>();
343             Record record = null;
344             OLEMarcReader marcXmlReader = new OLEMarcXmlReader(input);
345             try {
346                 while (marcXmlReader.hasNext()) {
347                     if (marcXmlReader.hasErrors()) {
348                         marcXmlReader.next();
349                         errBuilder.append(marcXmlReader.getError().toString()).append(lineSeparator);
350                         failureList.add(bibMarcRecords.get(successRec));
351                         marcXmlReader.clearErrors();
352                         continue;
353                     }
354                     record = marcXmlReader.next();
355                     writer.write(record);
356                     successList.add(bibMarcRecords.get(successRec));
357                     successRec++;
358                 }
359 
360             } catch (Exception ex) {
361                 BibMarcRecord failureRecord = bibMarcRecords.get(successRec);
362                 bibMarcRecords.removeAll(successList);
363 
364                 // add to error list
365                 failureList.add(failureRecord);
366                 bibMarcRecords.remove(failureRecord);
367 
368                 // Building Error file with reason
369                 String recordId = failureRecord.getRecordId();
370                 LOG.error("Error while parsing MARCXML to mrc data:: " + (recordId == null ? "NULL_RECORD" : "record id:: " + recordId), ex);
371                 batchProcessExportData.getErrBuilder().append(ERR_BIB).append(recordId == null ? "ERROR_RECORD" : recordId).append(TIME_STAMP)
372                     .append(new Date()).append(ERR_CAUSE).append(ex.getMessage()).append("::").append(ex.getCause().getMessage()).append(" ::For Record::").append(lineSeparator);
373                 batchProcessExportData.getErrBuilder().append("--------------------------------------------------------------------------------------------------").append(lineSeparator);
374                 batchProcessExportData.getErrBuilder().append(recordProcessor.generateXML(failureRecord)).append(lineSeparator);
375                 batchProcessExportData.getErrBuilder().append("--------------------------------------------------------------------------------------------------").append(lineSeparator).append(lineSeparator);
376 
377                 // Converting Bib Marc records as MARCXML
378                 Object[] resultMap = processBibMarcRecord(bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
379                 prepareForWrite(resultMap, bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
380             }
381         }
382         writer.close();
383         timer.stop();
384         return successRec;
385     }
386 
387 
388 }