View Javadoc
1   package org.kuali.ole.batch;
2   
3   import org.apache.commons.io.FileUtils;
4   import org.apache.commons.lang.time.StopWatch;
5   import org.junit.Assert;
6   import org.junit.Ignore;
7   import org.junit.Test;
8   import org.kuali.ole.batch.bo.*;
9   import org.kuali.ole.batch.bo.OLEBatchProcessProfileDataMappingOptionsBo;
10  import org.kuali.ole.batch.ingest.BatchProcessBibImport;
11  import org.kuali.ole.batch.marc.OLEMarcReader;
12  import org.kuali.ole.batch.marc.OLEMarcXmlReader;
13  import org.kuali.ole.batch.bo.xstream.OLEBatchProcessProfileRecordProcessor;
14  import org.kuali.ole.docstore.common.document.Bib;
15  import org.kuali.ole.docstore.common.document.BibTree;
16  import org.kuali.ole.docstore.common.document.content.bib.marc.*;
17  import org.kuali.ole.docstore.common.document.content.bib.marc.xstream.BibMarcRecordProcessor;
18  import org.marc4j.marc.Record;
19  import org.marc4j.MarcStreamWriter;
20  import org.marc4j.MarcWriter;
21  
22  import java.io.*;
23  
24  
25  import java.nio.file.FileSystems;
26  import java.util.ArrayList;
27  import java.util.Iterator;
28  import java.util.List;
29  
30  /**
31   * Created with IntelliJ IDEA.
32   * User: jayabharathreddy
33   * Date: 1/27/14
34   * Time: 5:00 PM
35   * To change this template use File | Settings | File Templates.
36   */
37  public class BatchProcessImport_UT {
38      private OLEBatchProcessProfileRecordProcessor oleBatchProcessProfileRecordProcessor;
39  
40      public OLEBatchProcessProfileRecordProcessor getOLEBatchProcessProfileRecordProcessor() {
41          if (null == oleBatchProcessProfileRecordProcessor) {
42              oleBatchProcessProfileRecordProcessor = new OLEBatchProcessProfileRecordProcessor();
43          }
44          return oleBatchProcessProfileRecordProcessor;
45      }
46  
47      @Ignore
48      @Test
49      public void createBibMarc() throws Exception {
50  
51          BibMarcRecords bibMarcRecords = new BibMarcRecords();
52          for(int i=0;i<5;i++){
53          BibMarcRecord bibMarcRecord= new BibMarcRecord();
54          ControlField controlField = new ControlField();
55          controlField.setTag("008");
56          controlField.setValue("testdfsdfsdf");
57          bibMarcRecord.addControlFields(controlField);
58          DataField dataField = new DataField();
59          dataField.setInd1(" ");
60          dataField.setInd2(" ");
61          dataField.setTag("245");
62          SubField subField = new SubField();
63          subField.setCode("a");
64          subField.setValue("test");
65          List<SubField> subFields = new ArrayList<>();
66          subFields.add(subField);
67          dataField.setSubFields(subFields);
68          bibMarcRecord.setLeader("aaaaaaaaaaaaaaaaaaaaaaa");
69          bibMarcRecord.addDataFields(dataField);
70          bibMarcRecords.getRecords().add(bibMarcRecord);
71          }
72  
73  
74  
75          BatchProcessBibImport batchProcessBibImport = new BatchProcessBibImport();
76         // batchProcessBibImport.processBatch(bibMarcRecords.getRecords());
77  
78      }
79  
80  
81  
82      @Test
83      public void testSort() {
84          List<OLEBatchProcessProfileDataMappingOptionsBo> oleBatchProcessProfileDataMappingOptionsBos = new ArrayList<>();
85  
86          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo = new OLEBatchProcessProfileDataMappingOptionsBo();
87  
88          oleBatchProcessProfileDataMappingOptionsBo.setDataTypeDestinationField("holdings");
89          oleBatchProcessProfileDataMappingOptionsBo.setDestinationField("callNumber");
90          oleBatchProcessProfileDataMappingOptionsBo.setPriority(1);
91  
92          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo1 = new OLEBatchProcessProfileDataMappingOptionsBo();
93  
94          oleBatchProcessProfileDataMappingOptionsBo1.setDataTypeDestinationField("item");
95          oleBatchProcessProfileDataMappingOptionsBo1.setDestinationField("callNumber");
96          oleBatchProcessProfileDataMappingOptionsBo1.setPriority(1);
97  
98          OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo2 = new OLEBatchProcessProfileDataMappingOptionsBo();
99  
100         oleBatchProcessProfileDataMappingOptionsBo2.setDataTypeDestinationField("holdings");
101         oleBatchProcessProfileDataMappingOptionsBo2.setDestinationField("callNumber");
102         oleBatchProcessProfileDataMappingOptionsBo2.setPriority(4);
103 
104         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo3 = new OLEBatchProcessProfileDataMappingOptionsBo();
105 
106         oleBatchProcessProfileDataMappingOptionsBo3.setDataTypeDestinationField("holdings");
107         oleBatchProcessProfileDataMappingOptionsBo3.setDestinationField("callNumber");
108         oleBatchProcessProfileDataMappingOptionsBo3.setPriority(3);
109 
110         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo4 = new OLEBatchProcessProfileDataMappingOptionsBo();
111 
112         oleBatchProcessProfileDataMappingOptionsBo4.setDataTypeDestinationField("item");
113         oleBatchProcessProfileDataMappingOptionsBo4.setDestinationField("callNumber");
114         oleBatchProcessProfileDataMappingOptionsBo4.setPriority(2);
115 
116         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo5 = new OLEBatchProcessProfileDataMappingOptionsBo();
117 
118         oleBatchProcessProfileDataMappingOptionsBo5.setDataTypeDestinationField("eholdings");
119         oleBatchProcessProfileDataMappingOptionsBo5.setDestinationField("callNumber");
120         oleBatchProcessProfileDataMappingOptionsBo5.setPriority(3);
121 
122         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo6 = new OLEBatchProcessProfileDataMappingOptionsBo();
123 
124         oleBatchProcessProfileDataMappingOptionsBo6.setDataTypeDestinationField("eholdings");
125         oleBatchProcessProfileDataMappingOptionsBo6.setDestinationField("callNumber");
126         oleBatchProcessProfileDataMappingOptionsBo6.setPriority(2);
127 
128 
129         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo);
130         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo1);
131         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo2);
132         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo3);
133         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo4);
134         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo5);
135         oleBatchProcessProfileDataMappingOptionsBos.add(oleBatchProcessProfileDataMappingOptionsBo6);
136 
137         System.out.println(oleBatchProcessProfileDataMappingOptionsBos+"\n\n");
138         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(0).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(0).getDataTypeDestinationField());
139         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(1).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(1).getDataTypeDestinationField());
140         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(2).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(2).getDataTypeDestinationField());
141         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(3).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(3).getDataTypeDestinationField());
142         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(4).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(4).getDataTypeDestinationField());
143         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(5).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(5).getDataTypeDestinationField());
144         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(6).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(6).getDataTypeDestinationField());
145 
146 
147         java.util.Collections.sort(oleBatchProcessProfileDataMappingOptionsBos);
148 
149         System.out.println(oleBatchProcessProfileDataMappingOptionsBos+"\n\n");
150         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(0).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(0).getDataTypeDestinationField());
151         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(1).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(1).getDataTypeDestinationField());
152         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(2).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(2).getDataTypeDestinationField());
153         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(3).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(3).getDataTypeDestinationField());
154         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(4).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(4).getDataTypeDestinationField());
155         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(5).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(5).getDataTypeDestinationField());
156         System.out.println(oleBatchProcessProfileDataMappingOptionsBos.get(6).getPriority() + " " +oleBatchProcessProfileDataMappingOptionsBos.get(6).getDataTypeDestinationField());
157 
158 
159 
160     }
161 
162     @Test
163     public void generateFileForBibImport1() {
164         String filePath = System.getProperty("user.home");
165         String fileName = "10Marc";
166         boolean writeMarc = Boolean.TRUE;
167         boolean writeMarcXml = Boolean.TRUE;
168         int numOfRecordsInFile = 10;
169 
170         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
171     }
172 
173     @Test
174     public void generateFileForBibImport2() {
175         String filePath = System.getProperty("user.home");
176         String fileName = "100Marc";
177         boolean writeMarc = Boolean.TRUE;
178         boolean writeMarcXml = Boolean.TRUE;
179         int numOfRecordsInFile = 100;
180 
181         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
182     }
183 
184     @Test
185     public void generateFileForBibImport3() {
186         String filePath = System.getProperty("user.home");
187         String fileName = "10KMarc";
188         boolean writeMarc = Boolean.TRUE;
189         boolean writeMarcXml = Boolean.TRUE;
190         int numOfRecordsInFile = 10000;
191 
192         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
193     }
194 
195     @Test
196     public void generateFileForBibImport4() {
197         String filePath = System.getProperty("user.home");
198         String fileName = "100KMarc";
199         boolean writeMarc = Boolean.TRUE;
200         boolean writeMarcXml = Boolean.TRUE;
201         int numOfRecordsInFile = 100000;
202 
203         generateFile(filePath, fileName, writeMarc, writeMarcXml, numOfRecordsInFile);
204     }
205 
206     public void generateFile(String filePath, String fileName, boolean writeMarc, boolean writeMarcXml, int numOfRecordsInFile) {
207         BibTree bibTree = getBibTreeForBibImport();
208         BibMarcRecordProcessor bibMarcRecordProcessor = new BibMarcRecordProcessor();
209         List<BibMarcRecord> bibRecords = new ArrayList<BibMarcRecord>();
210         List<String> bibMarcRecordList = new ArrayList<String>();
211         for (int i = 1; i <= numOfRecordsInFile; i++) {
212             Bib bib = bibTree.getBib();
213             BibMarcRecord bibMarcRecord = getBibMarcRecord(bib.getContent(), bibMarcRecordProcessor);
214             List<DataField> dataFields = bibMarcRecord.getDataFields();
215             bibMarcRecord.getControlFields().get(0).setValue("1000" + i);
216             bibMarcRecord.getDataFields().get(0).getSubFields().get(0).setValue("Test Record" + i);
217             DataField dataField1 = new DataField();
218             SubField subField1 = new SubField();
219             subField1.setCode("a");
220             subField1.setValue("PQ 00" + i);
221             dataField1.setTag("949");
222             dataField1.getSubFields().add(subField1);
223             DataField dataField2 = new DataField();
224             SubField subField2 = new SubField();
225             subField2.setCode("i");
226             subField2.setValue(String.valueOf(i));
227             dataField2.setTag("949");
228             dataField2.getSubFields().add(subField2);
229             dataFields.add(dataField1);
230             dataFields.add(dataField2);
231             bibRecords.add(bibMarcRecord);
232         }
233         bibMarcRecordList.add(bibMarcRecordProcessor.generateXML(bibRecords));
234         if (writeMarc && writeMarcXml) {
235             generateMarcXml(fileName, filePath, bibMarcRecordList);
236             generateMarcFromXml(fileName, filePath, bibMarcRecordList);
237         } else if (writeMarc && !writeMarcXml) {
238             generateMarcFromXml(fileName, filePath, bibMarcRecordList);
239         } else if (!writeMarc && writeMarcXml) {
240             generateMarcXml(fileName, filePath, bibMarcRecordList);
241         }
242     }
243 
244     public BibTree getBibTreeForBibImport() {
245         BibTree bibTree = new BibTree();
246         return (BibTree) bibTree.deserialize(getXmlAsString("/org/kuali/ole/batch/bibTreeDocument/ImportBibTree.xml"));
247     }
248 
249     private BibMarcRecord getBibMarcRecord(String content, BibMarcRecordProcessor bibMarcRecordProcessor) {
250         BibMarcRecord bibMarcRecord = null;
251         BibMarcRecords marcRecords = bibMarcRecordProcessor.fromXML(content);
252         List<BibMarcRecord> bibMarcRecordList = marcRecords.getRecords();
253         Iterator<BibMarcRecord> bibMarcRecordListIterator = bibMarcRecordList.iterator();
254         if (bibMarcRecordListIterator.hasNext()) {
255             bibMarcRecord = bibMarcRecordListIterator.next();
256         }
257         return bibMarcRecord;
258     }
259 
260     public void generateMarcXml(String fileName, String filePath, List<String> bibMarcRecordList) {
261         File file = new File(filePath + FileSystems.getDefault().getSeparator() + fileName + ".xml");
262         try {
263             FileUtils.writeLines(file, "UTF-8", bibMarcRecordList, true);
264         } catch (IOException e) {
265             e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
266         }
267     }
268 
269     public void generateMarcFromXml(String fileName, String filePath, List<String> bibMarcRecordList) {
270         StopWatch timer = new StopWatch();
271         timer.start();
272         File fileToWrite = new File(filePath + FileSystems.getDefault().getSeparator() + fileName + ".mrc");
273         FileOutputStream fileOutputStream = null;
274         try {
275             fileOutputStream = new FileOutputStream(fileToWrite, true);
276         } catch (FileNotFoundException e) {
277             e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
278         }
279         MarcWriter writer = new MarcStreamWriter(fileOutputStream, "UTF-8");
280         for (String bibContent : bibMarcRecordList) {
281             InputStream input = new ByteArrayInputStream(bibContent.getBytes());
282             Record record = null;
283             try {
284                 OLEMarcReader marcXmlReader = new OLEMarcXmlReader(input);
285                 while (marcXmlReader.hasNext()) {
286                     if (marcXmlReader.hasErrors()) {
287                         marcXmlReader.next();
288                         marcXmlReader.clearErrors();
289                         continue;
290                     }
291                     record = marcXmlReader.next();
292                     writer.write(record);
293                 }
294 
295             } catch (Exception ex) {
296                 ex.printStackTrace();
297             }
298         }
299         writer.close();
300         timer.stop();
301     }
302 
303     @Test
304     public void testBatchProcessProfileProcessor1() throws Exception {
305         OLEBatchProcessProfileBo oleBatchProcessProfileBo = getOLEBatchProcessProfileBo1();
306         Assert.assertEquals("Test_Bib_Profile_1", oleBatchProcessProfileBo.getBatchProcessProfileName());
307         String profileXML = getOLEBatchProcessProfileRecordProcessor().toXml(oleBatchProcessProfileBo);
308         System.out.println(profileXML);
309         OLEBatchProcessProfileBo oleBatchProcessProfileBo1 = getOLEBatchProcessProfileRecordProcessor().fromXML(profileXML);
310         Assert.assertNotNull(oleBatchProcessProfileBo1);
311         Assert.assertEquals("Test_Bib_Profile_1", oleBatchProcessProfileBo1.getBatchProcessProfileName());
312     }
313 
314     @Test
315     public void testBatchProcessProfileProcessor2() throws Exception {
316         OLEBatchProcessProfileBo oleBatchProcessProfileBo = getOLEBatchProcessProfileBo2();
317         Assert.assertEquals("Test_Bib_Profile_2", oleBatchProcessProfileBo.getBatchProcessProfileName());
318         String profileXML = getOLEBatchProcessProfileRecordProcessor().toXml(oleBatchProcessProfileBo);
319         System.out.println(profileXML);
320         OLEBatchProcessProfileBo oleBatchProcessProfileBo1 = getOLEBatchProcessProfileRecordProcessor().fromXML(profileXML);
321         Assert.assertNotNull(oleBatchProcessProfileBo1);
322         Assert.assertEquals("Test_Bib_Profile_2", oleBatchProcessProfileBo1.getBatchProcessProfileName());
323     }
324 
325     public OLEBatchProcessProfileBo getOLEBatchProcessProfileBo1() throws Exception {
326         String profileXML = getXmlAsString("/org/kuali/ole/batch/bibImportProfiles/BibImportProfile1.xml");
327         return getOLEBatchProcessProfileRecordProcessor().fromXML(profileXML);
328     }
329 
330     public OLEBatchProcessProfileBo getOLEBatchProcessProfileBo2() {
331         String profileXML = getXmlAsString("/org/kuali/ole/batch/bibImportProfiles/BibImportProfile2.xml");
332         return getOLEBatchProcessProfileRecordProcessor().fromXML(profileXML);
333     }
334 
335     public String getXmlAsString(String filePath){
336         String input = "";
337         File file = null;
338         try {
339             file = new File(getClass().getResource(filePath).toURI());
340             input = FileUtils.readFileToString(file);
341         } catch (Exception e) {
342             e.printStackTrace();
343         }
344         return input;
345     }
346 
347     private OLEBatchProcessProfileBo getOLEBatchProcessProfileBo() {
348         OLEBatchProcessProfileBo oleBatchProcessProfileBo = new OLEBatchProcessProfileBo();
349         oleBatchProcessProfileBo.setBatchProcessProfileId("1");
350         oleBatchProcessProfileBo.setBatchProcessProfileDesc("Mock Desc");
351         oleBatchProcessProfileBo.setBatchProcessProfileName("Mock Profile");
352         oleBatchProcessProfileBo.setBatchProcessProfileType("Batch Export");
353         List<OLEBatchProcessProfileFilterCriteriaBo> filterCriteriaBoList = new ArrayList<>();
354         OLEBatchProcessProfileFilterCriteriaBo filterCriteriaBo = new OLEBatchProcessProfileFilterCriteriaBo();
355         filterCriteriaBo.setFilterId("1");
356         filterCriteriaBo.setDataType("BibMarc");
357         filterCriteriaBo.setFilterFieldName("test");
358         filterCriteriaBoList.add(filterCriteriaBo);
359         oleBatchProcessProfileBo.setOleBatchProcessProfileFilterCriteriaList(filterCriteriaBoList);
360         List<OLEBatchProcessProfileMappingOptionsBo> oleBatchProcessProfileMappingOptionsList = new ArrayList<OLEBatchProcessProfileMappingOptionsBo>();
361         OLEBatchProcessProfileMappingOptionsBo oleBatchProcessProfileMappingOptionsBo = new OLEBatchProcessProfileMappingOptionsBo();
362         List<OLEBatchProcessProfileDataMappingOptionsBo> oleBatchProcessProfileDataMappingOptionsBoList = new ArrayList<OLEBatchProcessProfileDataMappingOptionsBo>();
363         OLEBatchProcessProfileDataMappingOptionsBo oleBatchProcessProfileDataMappingOptionsBo = new OLEBatchProcessProfileDataMappingOptionsBo();
364         oleBatchProcessProfileDataMappingOptionsBo.setDataType("BibMarc");
365         oleBatchProcessProfileDataMappingOptionsBo.setSourceField("CallNumber");
366         oleBatchProcessProfileDataMappingOptionsBo.setDataTypeDestinationField("BibMarc");
367         oleBatchProcessProfileDataMappingOptionsBo.setDestinationField("245 $a");
368         oleBatchProcessProfileDataMappingOptionsBoList.add(oleBatchProcessProfileDataMappingOptionsBo);
369         oleBatchProcessProfileMappingOptionsBo.setOleBatchProcessProfileDataMappingOptionsBoList(oleBatchProcessProfileDataMappingOptionsBoList);
370         oleBatchProcessProfileMappingOptionsList.add(oleBatchProcessProfileMappingOptionsBo);
371         oleBatchProcessProfileBo.setOleBatchProcessProfileMappingOptionsList(oleBatchProcessProfileMappingOptionsList);
372         List<OLEBatchGloballyProtectedField> oleBatchGloballyProtectedFieldList = new ArrayList<OLEBatchGloballyProtectedField>();
373         OLEBatchGloballyProtectedField oleBatchGloballyProtectedField = new OLEBatchGloballyProtectedField();
374         oleBatchGloballyProtectedField.setId("1");
375         oleBatchGloballyProtectedField.setGloballyProtectedFieldId("1");
376         oleBatchGloballyProtectedField.setTag("999");
377         oleBatchGloballyProtectedFieldList.add(oleBatchGloballyProtectedField);
378         oleBatchProcessProfileBo.setOleBatchGloballyProtectedFieldList(oleBatchGloballyProtectedFieldList);
379         return oleBatchProcessProfileBo;
380     }
381 
382 }