View Javadoc
1   package org.kuali.ole.docstore.common.document;
2   
3   import org.kuali.ole.docstore.common.document.config.DocumentSearchConfig;
4   import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecord;
5   import org.kuali.ole.docstore.common.document.content.bib.marc.DataField;
6   import org.kuali.ole.docstore.common.document.content.bib.marc.SubField;
7   
8   import java.util.ArrayList;
9   import java.util.Collections;
10  import java.util.HashMap;
11  import java.util.Map;
12  import java.util.regex.Matcher;
13  import java.util.regex.Pattern;
14  
15  /**
16   * Created with IntelliJ IDEA.
17   * User: jayabharathreddy
18   * Date: 12/27/13
19   * Time: 4:35 PM
20   * To change this template use File | Settings | File Templates.
21   */
22  
23  
24  public class BibMarcMapper {
25  
26      private static BibMarcMapper bibMarcMapper = null;
27      private static Map<String, String> FIELDS_TO_TAGS_2_INCLUDE_MAP = new HashMap<String, String>();
28      private String publicationDateRegex = "[0-9]{4}";
29  
30      private BibMarcMapper() {
31          FIELDS_TO_TAGS_2_INCLUDE_MAP = Collections.unmodifiableMap(DocumentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP);
32      }
33  
34      public static BibMarcMapper getInstance() {
35          if (bibMarcMapper == null) {
36              bibMarcMapper = new BibMarcMapper();
37          }
38          return bibMarcMapper;
39      }
40  
41      public void extractFields(BibMarcRecord bibMarc, Bib bib) {
42          for (String field : FIELDS_TO_TAGS_2_INCLUDE_MAP.keySet()) {
43              buildFields(field, bibMarc, bib);
44          }
45  
46      }
47  
48      private HashMap<String, ArrayList<String>> getTags(String tag) {
49  
50          String tags = FIELDS_TO_TAGS_2_INCLUDE_MAP.get(tag);
51          String[] tagDetailArray = tags.split(",");
52  
53          HashMap<String, ArrayList<String>> dataFieldMap = new HashMap<>();
54          for (int i = 0; i < tagDetailArray.length; i++) {
55              ArrayList<String> subFieldList = new ArrayList<>();
56              String dataField = null;
57              String subFieldSplit[] = null;
58              String[] tagSplit = tagDetailArray[i].split("-");
59              if (tagSplit.length > 0) {
60                  dataField = tagSplit[0];
61              }
62              if (tagSplit.length > 1) {
63                  subFieldSplit = tagSplit[1].split(";");
64                  for (int subFieldCount = 0; subFieldCount < subFieldSplit.length; subFieldCount++) {
65                      subFieldList.add(subFieldSplit[subFieldCount]);
66                  }
67              }
68  
69              dataFieldMap.put(dataField, subFieldList);
70  
71          }
72  
73          return dataFieldMap;
74  
75      }
76  
77  
78      public void buildFields(String field, BibMarcRecord bibMarc, Bib bib) {
79          DataField dataField;
80  
81          if (field.equalsIgnoreCase("Title_display")) {
82              HashMap<String, ArrayList<String>> titleDisplayMap = getTags("Title_display");
83              StringBuilder title = new StringBuilder();
84              for (Map.Entry<String, ArrayList<String>> titleDisplayMapEntry : titleDisplayMap.entrySet()) {
85                  String key = titleDisplayMapEntry.getKey();
86                  Object value = titleDisplayMapEntry.getValue();
87                  ArrayList<String> subFieldList = (ArrayList<String>) value;
88                  dataField = bibMarc.getDataFieldForTag(key);
89                  if (dataField != null) {
90                      for (SubField subField : dataField.getSubFields()) {
91                          for (String subFieldStr : subFieldList) {
92                              if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
93                                  title.append(subField.getValue());
94                                  break;
95                              }
96                          }
97                      }
98  
99                  }
100             }
101             String titleBib=title.toString();
102             titleBib = titleBib.replaceAll("<","&lt;");
103             titleBib = titleBib.replaceAll(">","&gt;");
104             bib.setTitle(titleBib);
105         } else if (field.equalsIgnoreCase("Author_display")) {
106             HashMap<String, ArrayList<String>> authorDisplayMap = getTags("Author_display");
107             StringBuilder author = new StringBuilder();
108             for (Map.Entry<String, ArrayList<String>> authorDisplayMapEntry : authorDisplayMap.entrySet()) {
109                 String key = authorDisplayMapEntry.getKey();
110                 Object value = authorDisplayMapEntry.getValue();
111                 ArrayList<String> subFieldList = (ArrayList<String>) value;
112                 dataField = bibMarc.getDataFieldForTag(key);
113                 if (dataField != null) {
114                     for (SubField subField : dataField.getSubFields()) {
115                         for (String subFieldStr : subFieldList) {
116                             if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
117                                 author.append(subField.getValue());
118                                 break;
119                             }
120                         }
121                     }
122 
123                 }
124             }
125 
126             bib.setAuthor(author.toString());
127         } else if (field.equalsIgnoreCase("Publisher_display")) {
128             // Publisher and publisher are separate we want them as single
129             HashMap<String, ArrayList<String>> publicDisplayMap = getTags("PublicationPlace_display");
130             StringBuilder publisher = new StringBuilder();
131             for (Map.Entry<String, ArrayList<String>> publicDisplayMapEntry : publicDisplayMap.entrySet()) {
132                 String key = publicDisplayMapEntry.getKey();
133                 Object value = publicDisplayMapEntry.getValue();
134                 ArrayList<String> subFieldList = (ArrayList<String>) value;
135                 dataField = bibMarc.getDataFieldForTag(key);
136                 if (dataField != null) {
137                     for (SubField subField : dataField.getSubFields()) {
138                         for (String subFieldStr : subFieldList) {
139                             if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
140                                 if (publisher.length() > 0) {
141                                     publisher.append(" ");
142                                 }
143                                 publisher.append(subField.getValue());
144                                 break;
145                             }
146                         }
147                     }
148 
149                 }
150             }
151             publicDisplayMap = getTags("Publisher_display");
152             for (Map.Entry<String, ArrayList<String>> publicDisplayMapEntry : publicDisplayMap.entrySet()) {
153                 String key = publicDisplayMapEntry.getKey();
154                 Object value = publicDisplayMapEntry.getValue();
155                 ArrayList<String> subFieldList = (ArrayList<String>) value;
156                 dataField = bibMarc.getDataFieldForTag(key);
157                 if (dataField != null) {
158                     for (SubField subField : dataField.getSubFields()) {
159                         for (String subFieldStr : subFieldList) {
160                             if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
161                                 if (publisher.length() > 0) {
162                                     publisher.append(" ");
163                                 }
164                                 publisher.append(subField.getValue());
165                                 break;
166                             }
167                         }
168                     }
169 
170                 }
171             }
172             bib.setPublisher(publisher.toString());
173         } else if (field.equalsIgnoreCase("PublicationDate_display")) {
174 
175             HashMap<String, ArrayList<String>> publicationDateMap = getTags("PublicationDate_display");
176             StringBuilder publisherDate = new StringBuilder();
177             for (Map.Entry<String, ArrayList<String>> publicationDateMapEntry : publicationDateMap.entrySet()) {
178                 String key = publicationDateMapEntry.getKey();
179                 Object value = publicationDateMapEntry.getValue();
180                 ArrayList<String> subFieldList = (ArrayList<String>) value;
181                 dataField = bibMarc.getDataFieldForTag(key);
182                 if (dataField != null) {
183                     for (SubField subField : dataField.getSubFields()) {
184                         for (String subFieldStr : subFieldList) {
185                             if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
186                                 publisherDate.append(subField.getValue());
187                                 break;
188                             }
189                         }
190                     }
191 
192                 }
193             }
194             bib.setPublicationDate(publisherDate.toString());
195         } else if (field.equalsIgnoreCase("ISBN_display")) {
196             HashMap<String, ArrayList<String>> isbnMap = getTags("ISBN_display");
197             StringBuilder isbn = new StringBuilder();
198             for (Map.Entry<String, ArrayList<String>> isbnMapEntry : isbnMap.entrySet()) {
199                 String key = isbnMapEntry.getKey();
200                 Object value = isbnMapEntry.getValue();
201                 ArrayList<String> subFieldList = (ArrayList<String>) value;
202                 dataField = bibMarc.getDataFieldForTag(key);
203                 if (dataField != null) {
204                     for (SubField subField : dataField.getSubFields()) {
205                         for (String subFieldStr : subFieldList) {
206                             if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
207                                 isbn.append(subField.getValue());
208                                 break;
209                             }
210                         }
211                     }
212 
213                 }
214             }
215             bib.setIsbn(isbn.toString());
216         } else if (field.equalsIgnoreCase("ISSN_display")) {
217             HashMap<String, ArrayList<String>> issnMap = getTags("ISSN_display");
218             StringBuilder issn = new StringBuilder();
219             for (Map.Entry<String, ArrayList<String>> issnMapEntry : issnMap.entrySet()) {
220                 String key = issnMapEntry.getKey();
221                 Object value = issnMapEntry.getValue();
222                 ArrayList<String> subFieldList = (ArrayList<String>) value;
223                 dataField = bibMarc.getDataFieldForTag(key);
224                 if (dataField != null) {
225                     for (SubField subField : dataField.getSubFields()) {
226                         for (String subFieldStr : subFieldList) {
227                             if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
228                                 issn.append(subField.getValue());
229                                 break;
230                             }
231                         }
232                     }
233 
234                 }
235             }
236             bib.setIssn(issn.toString());
237         }
238 
239 
240     }
241 
242     private String extractPublicationDateWithRegex(String publicationDate) {
243         Pattern pattern = Pattern.compile(publicationDateRegex);
244         Matcher matcher = pattern.matcher(publicationDate);
245         if (matcher.find()) {
246             if (matcher.group(0).equalsIgnoreCase("0000")) {
247                 return "";
248             }
249             return matcher.group(0);
250         } else {
251             return "";
252         }
253 
254 
255     }
256 
257 }