001package org.kuali.ole.docstore.common.document; 002 003import org.kuali.ole.docstore.common.document.config.DocumentSearchConfig; 004import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecord; 005import org.kuali.ole.docstore.common.document.content.bib.marc.DataField; 006import org.kuali.ole.docstore.common.document.content.bib.marc.SubField; 007 008import java.util.ArrayList; 009import java.util.Collections; 010import java.util.HashMap; 011import java.util.Map; 012import java.util.regex.Matcher; 013import java.util.regex.Pattern; 014 015/** 016 * Created with IntelliJ IDEA. 017 * User: jayabharathreddy 018 * Date: 12/27/13 019 * Time: 4:35 PM 020 * To change this template use File | Settings | File Templates. 021 */ 022 023 024public class BibMarcMapper { 025 026 private static BibMarcMapper bibMarcMapper = null; 027 private static Map<String, String> FIELDS_TO_TAGS_2_INCLUDE_MAP = new HashMap<String, String>(); 028 private String publicationDateRegex = "[0-9]{4}"; 029 030 private BibMarcMapper() { 031 FIELDS_TO_TAGS_2_INCLUDE_MAP = Collections.unmodifiableMap(DocumentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP); 032 } 033 034 public static BibMarcMapper getInstance() { 035 if (bibMarcMapper == null) { 036 bibMarcMapper = new BibMarcMapper(); 037 } 038 return bibMarcMapper; 039 } 040 041 public void extractFields(BibMarcRecord bibMarc, Bib bib) { 042 for (String field : FIELDS_TO_TAGS_2_INCLUDE_MAP.keySet()) { 043 buildFields(field, bibMarc, bib); 044 } 045 046 } 047 048 private HashMap<String, ArrayList<String>> getTags(String tag) { 049 050 String tags = FIELDS_TO_TAGS_2_INCLUDE_MAP.get(tag); 051 String[] tagDetailArray = tags.split(","); 052 053 HashMap<String, ArrayList<String>> dataFieldMap = new HashMap<>(); 054 for (int i = 0; i < tagDetailArray.length; i++) { 055 ArrayList<String> subFieldList = new ArrayList<>(); 056 String dataField = null; 057 String subFieldSplit[] = null; 058 String[] tagSplit = tagDetailArray[i].split("-"); 059 if (tagSplit.length > 0) { 060 dataField = tagSplit[0]; 061 } 062 if (tagSplit.length > 1) { 063 subFieldSplit = tagSplit[1].split(";"); 064 for (int subFieldCount = 0; subFieldCount < subFieldSplit.length; subFieldCount++) { 065 subFieldList.add(subFieldSplit[subFieldCount]); 066 } 067 } 068 069 dataFieldMap.put(dataField, subFieldList); 070 071 } 072 073 return dataFieldMap; 074 075 } 076 077 078 public void buildFields(String field, BibMarcRecord bibMarc, Bib bib) { 079 DataField dataField; 080 081 if (field.equalsIgnoreCase("Title_display")) { 082 HashMap<String, ArrayList<String>> titleDisplayMap = getTags("Title_display"); 083 StringBuilder title = new StringBuilder(); 084 for (Map.Entry<String, ArrayList<String>> titleDisplayMapEntry : titleDisplayMap.entrySet()) { 085 String key = titleDisplayMapEntry.getKey(); 086 Object value = titleDisplayMapEntry.getValue(); 087 ArrayList<String> subFieldList = (ArrayList<String>) value; 088 dataField = bibMarc.getDataFieldForTag(key); 089 if (dataField != null) { 090 for (SubField subField : dataField.getSubFields()) { 091 for (String subFieldStr : subFieldList) { 092 if (subField.getCode().equalsIgnoreCase(subFieldStr)) { 093 title.append(subField.getValue()); 094 break; 095 } 096 } 097 } 098 099 } 100 } 101 String titleBib=title.toString(); 102 titleBib = titleBib.replaceAll("<","<"); 103 titleBib = titleBib.replaceAll(">",">"); 104 bib.setTitle(titleBib); 105 } else if (field.equalsIgnoreCase("Author_display")) { 106 HashMap<String, ArrayList<String>> authorDisplayMap = getTags("Author_display"); 107 StringBuilder author = new StringBuilder(); 108 for (Map.Entry<String, ArrayList<String>> authorDisplayMapEntry : authorDisplayMap.entrySet()) { 109 String key = authorDisplayMapEntry.getKey(); 110 Object value = authorDisplayMapEntry.getValue(); 111 ArrayList<String> subFieldList = (ArrayList<String>) value; 112 dataField = bibMarc.getDataFieldForTag(key); 113 if (dataField != null) { 114 for (SubField subField : dataField.getSubFields()) { 115 for (String subFieldStr : subFieldList) { 116 if (subField.getCode().equalsIgnoreCase(subFieldStr)) { 117 author.append(subField.getValue()); 118 break; 119 } 120 } 121 } 122 123 } 124 } 125 126 bib.setAuthor(author.toString()); 127 } else if (field.equalsIgnoreCase("Publisher_display")) { 128 HashMap<String, ArrayList<String>> publicDisplayMap = getTags("Publisher_display"); 129 StringBuilder publisher = new StringBuilder(); 130 for (Map.Entry<String, ArrayList<String>> publicDisplayMapEntry : publicDisplayMap.entrySet()) { 131 String key = publicDisplayMapEntry.getKey(); 132 Object value = publicDisplayMapEntry.getValue(); 133 ArrayList<String> subFieldList = (ArrayList<String>) value; 134 dataField = bibMarc.getDataFieldForTag(key); 135 if (dataField != null) { 136 for (SubField subField : dataField.getSubFields()) { 137 for (String subFieldStr : subFieldList) { 138 if (subField.getCode().equalsIgnoreCase(subFieldStr)) { 139 if (publisher.length() > 0) { 140 publisher.append(" "); 141 } 142 publisher.append(subField.getValue()); 143 break; 144 } 145 } 146 } 147 148 } 149 } 150 bib.setPublisher(publisher.toString()); 151 } else if (field.equalsIgnoreCase("PublicationDate_display")) { 152 153 HashMap<String, ArrayList<String>> publicationDateMap = getTags("PublicationDate_display"); 154 StringBuilder publisherDate = new StringBuilder(); 155 for (Map.Entry<String, ArrayList<String>> publicationDateMapEntry : publicationDateMap.entrySet()) { 156 String key = publicationDateMapEntry.getKey(); 157 Object value = publicationDateMapEntry.getValue(); 158 ArrayList<String> subFieldList = (ArrayList<String>) value; 159 dataField = bibMarc.getDataFieldForTag(key); 160 if (dataField != null) { 161 for (SubField subField : dataField.getSubFields()) { 162 for (String subFieldStr : subFieldList) { 163 if (subField.getCode().equalsIgnoreCase(subFieldStr)) { 164 publisherDate.append(subField.getValue()); 165 break; 166 } 167 } 168 } 169 170 } 171 } 172 bib.setPublicationDate(publisherDate.toString()); 173 } else if (field.equalsIgnoreCase("ISBN_display")) { 174 HashMap<String, ArrayList<String>> isbnMap = getTags("ISBN_display"); 175 StringBuilder isbn = new StringBuilder(); 176 for (Map.Entry<String, ArrayList<String>> isbnMapEntry : isbnMap.entrySet()) { 177 String key = isbnMapEntry.getKey(); 178 Object value = isbnMapEntry.getValue(); 179 ArrayList<String> subFieldList = (ArrayList<String>) value; 180 dataField = bibMarc.getDataFieldForTag(key); 181 if (dataField != null) { 182 for (SubField subField : dataField.getSubFields()) { 183 for (String subFieldStr : subFieldList) { 184 if (subField.getCode().equalsIgnoreCase(subFieldStr)) { 185 isbn.append(subField.getValue()); 186 break; 187 } 188 } 189 } 190 191 } 192 } 193 bib.setIsbn(isbn.toString()); 194 } else if (field.equalsIgnoreCase("ISSN_display")) { 195 HashMap<String, ArrayList<String>> issnMap = getTags("ISSN_display"); 196 StringBuilder issn = new StringBuilder(); 197 for (Map.Entry<String, ArrayList<String>> issnMapEntry : issnMap.entrySet()) { 198 String key = issnMapEntry.getKey(); 199 Object value = issnMapEntry.getValue(); 200 ArrayList<String> subFieldList = (ArrayList<String>) value; 201 dataField = bibMarc.getDataFieldForTag(key); 202 if (dataField != null) { 203 for (SubField subField : dataField.getSubFields()) { 204 for (String subFieldStr : subFieldList) { 205 if (subField.getCode().equalsIgnoreCase(subFieldStr)) { 206 issn.append(subField.getValue()); 207 break; 208 } 209 } 210 } 211 212 } 213 } 214 bib.setIssn(issn.toString()); 215 } 216 217 218 } 219 220 private String extractPublicationDateWithRegex(String publicationDate) { 221 Pattern pattern = Pattern.compile(publicationDateRegex); 222 Matcher matcher = pattern.matcher(publicationDate); 223 if (matcher.find()) { 224 if (matcher.group(0).equalsIgnoreCase("0000")) { 225 return ""; 226 } 227 return matcher.group(0); 228 } else { 229 return ""; 230 } 231 232 233 } 234 235}