1 package org.kuali.ole.batch.impl;
2
3 import org.apache.commons.lang.StringUtils;
4 import org.apache.commons.lang.time.StopWatch;
5 import org.apache.log4j.Logger;
6 import org.kuali.ole.OLEConstants;
7 import org.kuali.ole.batch.bo.OLEBatchProcessProfileBo;
8 import org.kuali.ole.batch.document.OLEBatchProcessDefinitionDocument;
9 import org.kuali.ole.batch.export.BatchProcessExportData;
10 import org.kuali.ole.batch.helper.EInstanceMappingHelper;
11 import org.kuali.ole.batch.helper.InstanceMappingHelper;
12 import org.kuali.ole.batch.helper.OLEBatchProcessDataHelper;
13 import org.kuali.ole.batch.marc.OLEMarcReader;
14 import org.kuali.ole.batch.marc.OLEMarcXmlReader;
15 import org.kuali.ole.docstore.common.document.Bib;
16 import org.kuali.ole.docstore.common.document.BibTree;
17 import org.kuali.ole.docstore.common.document.BibTrees;
18 import org.kuali.ole.docstore.common.document.HoldingsTree;
19 import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecord;
20 import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecords;
21 import org.kuali.ole.docstore.common.document.content.bib.marc.DataField;
22 import org.kuali.ole.docstore.common.document.content.bib.marc.xstream.BibMarcRecordProcessor;
23 import org.kuali.ole.docstore.common.util.BatchBibTreeDBUtil;
24 import org.kuali.ole.docstore.common.util.BatchExportStatistics;
25 import org.marc4j.MarcStreamWriter;
26 import org.marc4j.MarcWriter;
27 import org.marc4j.marc.Record;
28
29 import java.io.ByteArrayInputStream;
30 import java.io.File;
31 import java.io.FileOutputStream;
32 import java.io.InputStream;
33 import java.nio.file.FileSystems;
34 import java.util.*;
35
36 import static org.kuali.ole.OLEConstants.OLEBatchProcess.*;
37
38
39
40
41
42
43
44
45 public class BatchExportFetch {
46
47
48 private String fileName;
49 private File filePath;
50 private StringBuilder errBuilder = new StringBuilder();
51
52 private BatchProcessExportData batchProcessExportData;
53 private OLEBatchProcessProfileBo batchProcessProfileBo;
54 private OLEBatchProcessDefinitionDocument processDef;
55 private int errCnt = 0;
56
57 private BatchBibTreeDBUtil bibTreesUtil = null;
58 private int recordsToBeExportedToFile;
59 private int batchSize;
60 private int remainingRecords;
61 private int recordsToBeExported;
62 private Boolean isBibOnly = false;
63 private List<BibMarcRecord> failureList = new ArrayList<>();
64 private static final Logger LOG = Logger.getLogger(BatchExportFetch.class);
65
66 public BatchExportFetch(BatchBibTreeDBUtil bibTreesUtil, int recordsToBeExportedToFile, String fileName, BatchProcessExportData batchProcessExportData, OLEBatchProcessDefinitionDocument processDef, Boolean isBibOnly) {
67 this.fileName = fileName;
68 this.batchProcessExportData = batchProcessExportData;
69 this.batchProcessProfileBo = processDef.getBatchProcessProfileBo();
70 this.processDef = processDef;
71 this.bibTreesUtil = bibTreesUtil;
72 this.recordsToBeExportedToFile = recordsToBeExportedToFile;
73 this.isBibOnly = isBibOnly;
74 }
75
76
77 public Long call() throws Exception {
78 BatchExportStatistics batchExportStatistics = new BatchExportStatistics();
79 StopWatch timer = new StopWatch();
80 Boolean isFirstBatch = true;
81 Boolean isLastBatch = false;
82
83 batchExportStatistics.setFileName(fileName);
84 int exportedRecords = 0;
85 batchSize = processDef.getChunkSize();
86 timer.start();
87
88 while (exportedRecords < recordsToBeExportedToFile) {
89 remainingRecords = recordsToBeExportedToFile - exportedRecords;
90 recordsToBeExported = Math.min(batchSize, remainingRecords);
91
92 if (remainingRecords <= batchSize) {
93 isLastBatch = true;
94 }
95 if (!batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_COMPLETED)
96 && !batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_CANCELLED)
97 && !batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_PAUSED)
98 && !batchProcessExportData.job.getStatus().equalsIgnoreCase(OLEConstants.OLEBatchProcess.JOB_STATUS_STOPPED)) {
99
100 BibTrees bibTrees = bibTreesUtil.fetchNextBatch(recordsToBeExported, batchExportStatistics, isBibOnly);
101
102
103 List<BibMarcRecord> bibMarcRecords = buildBibMarcRecords(bibTrees.getBibTrees(), batchExportStatistics);
104
105 int size = bibMarcRecords.size();
106 batchProcessExportData.setTotalRecordsExported(size + batchProcessExportData.getTotalRecordsExported());
107
108 Object[] resultMap = processBibMarcRecord(bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
109
110 exportedRecords += recordsToBeExported;
111 prepareForWrite(resultMap, bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
112 isFirstBatch = false;
113 } else {
114 break;
115 }
116 }
117 timer.stop();
118 batchExportStatistics.setTotalTimeTaken(timer.getTime());
119 batchExportStatistics.printExportStatistics();
120
121 batchProcessExportData.job.setNoOfRecordsProcessed(String.valueOf(batchProcessExportData.getTotalRecordsExported()));
122 batchProcessExportData.job.setNoOfFailureRecords(String.valueOf(Integer.valueOf(batchProcessExportData.job.getNoOfFailureRecords()) + failureList.size()));
123 batchProcessExportData.job.setNoOfSuccessRecords(String.valueOf(Integer.valueOf(batchProcessExportData.job.getNoOfRecordsProcessed()) - Integer.valueOf(batchProcessExportData.job.getNoOfFailureRecords())));
124 return Long.valueOf(batchProcessExportData.job.getNoOfRecordsProcessed());
125 }
126
127
128 private void prepareForWrite(Object[] resultMap, List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean isFirstBatch, Boolean isLastBatch) throws Exception {
129 StopWatch timer = new StopWatch();
130 timer.start();
131 if (resultMap != null && !resultMap[0].equals("0")) {
132 batchProcessExportData.getBibDocList().clear();
133 batchProcessExportData.getBibDocList().addAll((List<String>) resultMap[1]);
134 processBatch(bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
135 batchProcessExportData.updateJobProgress();
136 if (resultMap[2] != null)
137 batchProcessExportData.getErrBuilder().append(resultMap[2].toString());
138 if (resultMap[3] != null)
139 batchProcessExportData.setErrCnt(resultMap[3].toString());
140 }
141 timer.stop();
142 batchExportStatistics.addTimeTakenForWritingRecords(timer.getTime());
143 }
144
145
146 private List<BibMarcRecord> buildBibMarcRecords(List<BibTree> bibTrees, BatchExportStatistics batchExportStatistics) {
147
148 StopWatch timer = new StopWatch();
149 timer.start();
150
151 List<BibMarcRecord> bibMarcRecords = new ArrayList<>();
152
153 for (BibTree bibTree : bibTrees) {
154 BibMarcRecord bibMarcRecord = null;
155 try {
156 bibMarcRecord = buildBibMarcRecord(bibTree.getBib());
157
158 if (!batchProcessProfileBo.getOleBatchProcessProfileMappingOptionsList().isEmpty()
159 && StringUtils.isNotEmpty(batchProcessProfileBo.getDataToExport()) && (batchProcessProfileBo.getDataToExport().equalsIgnoreCase(OLEBatchProcess.EXPORT_BIB_AND_INSTANCE) || batchProcessProfileBo.getDataToExport().equalsIgnoreCase(OLEBatchProcess.EXPORT_BIB_INSTANCE_AND_EINSTANCE))) {
160 try {
161 getInstanceDetails(bibMarcRecord, batchProcessProfileBo, errBuilder, bibTree);
162 LOG.debug("Instance data mapping completed");
163 } catch (Exception ex) {
164 LOG.error("Instance data mapping Error for Bib record id::" + bibMarcRecord.getRecordId(), ex);
165 buildError(errBuilder, ERR_BIB, bibMarcRecord.getRecordId(), "", "",
166 ERR_CAUSE, ex.getMessage(), TIME_STAMP, new java.util.Date().toString());
167 }
168 }
169
170 if (!batchProcessProfileBo.getOleBatchProcessProfileRenameFieldsList().isEmpty()) {
171 try {
172 OLEBatchProcessDataHelper.getInstance().renameMarcFieldsSubFields(batchProcessProfileBo, bibMarcRecord);
173 LOG.debug("Rename of bib marc records completed");
174 } catch (Exception ex) {
175 LOG.error("Marc Record Rename error for Bib record id::" + bibMarcRecord.getRecordId(), ex);
176 buildError(errBuilder, ERR_BIB, bibMarcRecord.getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "renameMarcFieldsSubFields", TIME_STAMP, new Date().toString());
177 errCnt++;
178 }
179 }
180
181 if (!batchProcessProfileBo.getOleBatchProcessProfileDeleteFieldsList().isEmpty()) {
182 try {
183 OLEBatchProcessDataHelper.getInstance().deleteFieldsSubfields(batchProcessProfileBo, bibMarcRecord);
184 LOG.debug("Deletion of bib marc records completed");
185 } catch (Exception ex) {
186 LOG.error("Marc record delete Error for Bib record id::" + bibMarcRecord.getRecordId(), ex);
187 buildError(errBuilder, ERR_BIB, bibMarcRecord.getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "deleteFieldsSubfields", TIME_STAMP, new Date().toString());
188 errCnt++;
189 }
190 }
191 bibMarcRecords.add(bibMarcRecord);
192 } catch (Exception ex) {
193 LOG.error("Error while Exporting bibs :: No of bibs processed while error occured :: " + bibMarcRecords.size(), ex);
194
195 if (!bibMarcRecords.isEmpty()) {
196 LOG.error("Bib record where error occured: " + bibMarcRecords.get(bibMarcRecords.size() - 1).getRecordId(), ex);
197 buildError(errBuilder, ERR_BIB, bibMarcRecords.get(bibMarcRecords.size() - 1).getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "getBibliographicRecord-P", TIME_STAMP, new Date().toString());
198 errCnt++;
199 }
200 }
201
202
203 }
204 timer.stop();
205 batchExportStatistics.addTimeTakenForBibMarcRecords(timer.getTime());
206 return bibMarcRecords;
207 }
208
209
210 private Object[] processBibMarcRecord(List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean firstBatch, Boolean lastBatch) {
211 StopWatch timer = new StopWatch();
212 timer.start();
213 BibMarcRecordProcessor bibMarcRecordProcessor = new BibMarcRecordProcessor(errBuilder);
214 List<String> bibMarcRecordList = new ArrayList<String>();
215 try {
216 getResult(bibMarcRecordProcessor, bibMarcRecords, bibMarcRecordList, firstBatch, lastBatch);
217 } catch (Exception ex) {
218 LOG.error("Error while Exporting bibs :: No of bibs processed while error occured :: " + bibMarcRecords.size(), ex);
219 buildError(errBuilder, ERR_CAUSE, "Error while getting bib data::" + ex.getMessage(), TIME_STAMP, new Date().toString());
220 }
221 timer.stop();
222 batchExportStatistics.addTimeTakenForProcessing(timer.getTime());
223 return new Object[]{String.valueOf(bibMarcRecordProcessor.getSuccessCnt()), bibMarcRecordList, errBuilder.toString(), String.valueOf((errCnt + bibMarcRecordProcessor.getErrCnt()))};
224 }
225
226 private void getResult(BibMarcRecordProcessor bibMarcRecordProcessor, List<BibMarcRecord> bibRecords, List<String> bibMarcRecordList, Boolean firstBatch, Boolean lastBatch) {
227 String bibMarcRecord = null;
228 if (processDef.getOutputFormat().equalsIgnoreCase(BatchProcessExportData.MARC)) {
229 bibMarcRecord = bibMarcRecordProcessor.generateXML(bibRecords);
230 } else {
231 bibMarcRecord = bibMarcRecordProcessor.generateXML(bibRecords, firstBatch, lastBatch);
232 }
233
234 bibMarcRecordList.add(bibMarcRecord);
235 }
236
237
238 protected void processBatch(List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean isFirstBatch, Boolean isLastBatch) throws Exception {
239 int recordsSize = bibMarcRecords.size();
240 batchProcessExportData.prepareForWrite(fileName);
241 int currSuccessRec = 0;
242 int currErrCnt = Integer.valueOf(batchProcessExportData.getErrCnt());
243 if (processDef.getOutputFormat().equalsIgnoreCase(BatchProcessExportData.MARCXML)) {
244 try {
245 if (batchProcessExportData.getProcessedRec() > 0)
246 batchProcessExportData.setFileName(fileName);
247 batchProcessExportData.writeFileToLocation(fileName);
248 currSuccessRec = recordsSize;
249 } catch (Exception e) {
250 batchProcessExportData.job.setStatus(JOB_STATUS_STOPPED);
251 batchProcessExportData.job.setStatusDesc("Error while writing to marcxml file::" + fileName + BatchProcessExportData.EXT_MARCXML);
252 currSuccessRec = 0;
253 currErrCnt += recordsSize - currSuccessRec;
254 }
255 } else if (processDef.getOutputFormat().equalsIgnoreCase(BatchProcessExportData.MARC)) {
256 List<BibMarcRecord> bibMarcRecordsProcess = new ArrayList<>();
257 bibMarcRecordsProcess.addAll(bibMarcRecords);
258 batchProcessExportData.setFileName(fileName);
259 generateMarcFromXml(bibMarcRecordsProcess, batchExportStatistics, isFirstBatch, isLastBatch);
260 }
261 }
262
263
264 private BibMarcRecord buildBibMarcRecord(Bib bib) throws Exception {
265 BibMarcRecord bibMarcRecord = null;
266 BibMarcRecordProcessor bibMarcRecordProcessor = new BibMarcRecordProcessor();
267 BibMarcRecords marcRecords = bibMarcRecordProcessor.fromXML(bib.getContent());
268 List<BibMarcRecord> bibMarcRecordList = marcRecords.getRecords();
269 Iterator<BibMarcRecord> bibMarcRecordListIterator = bibMarcRecordList.iterator();
270 if (bibMarcRecordListIterator.hasNext()) {
271 bibMarcRecord = bibMarcRecordListIterator.next();
272 }
273 return bibMarcRecord;
274 }
275
276
277 private void getInstanceDetails(BibMarcRecord bibMarcRecord, OLEBatchProcessProfileBo profile, StringBuilder errBuilder, BibTree bibTree) throws Exception {
278 List<DataField> dataFields = bibMarcRecord.getDataFields();
279
280 try {
281 List<DataField> holdingsItemDataField = Collections.emptyList();
282
283 if (bibTree != null && bibTree.getHoldingsTrees() != null && bibTree.getHoldingsTrees().size() > 0) {
284 for (HoldingsTree holdingsTree : bibTree.getHoldingsTrees()) {
285 if (holdingsTree.getHoldings() != null) {
286 if (holdingsTree.getHoldings().getHoldingsType().equalsIgnoreCase("print")) {
287 holdingsItemDataField = new InstanceMappingHelper().generateDataFieldForHolding(holdingsTree, profile, errBuilder);
288 } else {
289 holdingsItemDataField = new EInstanceMappingHelper().generateDataFieldForEHolding(holdingsTree, profile, errBuilder);
290 }
291 dataFields.addAll(holdingsItemDataField);
292 }
293 }
294 }
295 } catch (Exception ex) {
296 LOG.error("Error while getting instance details for instanceID :: " + bibMarcRecord.getRecordId(), ex);
297 errBuilder.append("-----");
298 buildError(errBuilder, ERR_INSTANCE, bibMarcRecord.getRecordId(), ERR_CAUSE, ex.getMessage(), " ::At:: ", "getInstanceDetails", TIME_STAMP, new java.util.Date().toString());
299
300 }
301 }
302
303
304 private void buildError(StringBuilder errBuilder, String... errorString) {
305 for (String str : errorString) {
306 errBuilder.append(str).append(COMMA);
307 }
308 errBuilder.append(lineSeparator);
309 }
310
311
312
313
314
315
316
317
318
319
320
321 public int generateMarcFromXml(List<BibMarcRecord> bibMarcRecords, BatchExportStatistics batchExportStatistics, Boolean isFirstBatch, Boolean isLastBatch) throws Exception {
322 StopWatch timer = new StopWatch();
323 timer.start();
324 int successRec = 0;
325 File fileToWrite = new File(batchProcessExportData.getFilePath() + FileSystems.getDefault().getSeparator() + fileName + BatchProcessExportData.EXT_MARC);
326 FileOutputStream fileOutputStream = new FileOutputStream(fileToWrite, true);
327
328 if (!fileToWrite.exists()) {
329 if (fileToWrite.getParentFile().mkdirs() && fileToWrite.createNewFile()) {
330
331 } else {
332 LOG.error("Cannot create mrc file in the given file path :: " + fileToWrite.getPath());
333 batchProcessExportData.job.setStatus(JOB_STATUS_STOPPED);
334 throw new RuntimeException("Cannot create mrc file in the given file path :: " + fileToWrite.getPath());
335 }
336 }
337 BibMarcRecordProcessor recordProcessor = new BibMarcRecordProcessor();
338 MarcWriter writer = new MarcStreamWriter(fileOutputStream, "UTF-8");
339
340 for (String bibContent : batchProcessExportData.getBibDocList()) {
341 InputStream input = new ByteArrayInputStream(bibContent.getBytes());
342 List<BibMarcRecord> successList = new ArrayList<>();
343 Record record = null;
344 OLEMarcReader marcXmlReader = new OLEMarcXmlReader(input);
345 try {
346 while (marcXmlReader.hasNext()) {
347 if (marcXmlReader.hasErrors()) {
348 marcXmlReader.next();
349 errBuilder.append(marcXmlReader.getError().toString()).append(lineSeparator);
350 failureList.add(bibMarcRecords.get(successRec));
351 marcXmlReader.clearErrors();
352 continue;
353 }
354 record = marcXmlReader.next();
355 writer.write(record);
356 successList.add(bibMarcRecords.get(successRec));
357 successRec++;
358 }
359
360 } catch (Exception ex) {
361 BibMarcRecord failureRecord = bibMarcRecords.get(successRec);
362 bibMarcRecords.removeAll(successList);
363
364
365 failureList.add(failureRecord);
366 bibMarcRecords.remove(failureRecord);
367
368
369 String recordId = failureRecord.getRecordId();
370 LOG.error("Error while parsing MARCXML to mrc data:: " + (recordId == null ? "NULL_RECORD" : "record id:: " + recordId), ex);
371 batchProcessExportData.getErrBuilder().append(ERR_BIB).append(recordId == null ? "ERROR_RECORD" : recordId).append(TIME_STAMP)
372 .append(new Date()).append(ERR_CAUSE).append(ex.getMessage()).append("::").append(ex.getCause().getMessage()).append(" ::For Record::").append(lineSeparator);
373 batchProcessExportData.getErrBuilder().append("--------------------------------------------------------------------------------------------------").append(lineSeparator);
374 batchProcessExportData.getErrBuilder().append(recordProcessor.generateXML(failureRecord)).append(lineSeparator);
375 batchProcessExportData.getErrBuilder().append("--------------------------------------------------------------------------------------------------").append(lineSeparator).append(lineSeparator);
376
377
378 Object[] resultMap = processBibMarcRecord(bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
379 prepareForWrite(resultMap, bibMarcRecords, batchExportStatistics, isFirstBatch, isLastBatch);
380 }
381 }
382 writer.close();
383 timer.stop();
384 return successRec;
385 }
386
387
388 }