1 package org.kuali.ole.docstore.common.document;
2
3 import org.kuali.ole.docstore.common.document.config.DocumentSearchConfig;
4 import org.kuali.ole.docstore.common.document.content.bib.marc.BibMarcRecord;
5 import org.kuali.ole.docstore.common.document.content.bib.marc.DataField;
6 import org.kuali.ole.docstore.common.document.content.bib.marc.SubField;
7
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.HashMap;
11 import java.util.Map;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14
15
16
17
18
19
20
21
22
23
24 public class BibMarcMapper {
25
26 private static BibMarcMapper bibMarcMapper = null;
27 private static Map<String, String> FIELDS_TO_TAGS_2_INCLUDE_MAP = new HashMap<String, String>();
28 private String publicationDateRegex = "[0-9]{4}";
29
30 private BibMarcMapper() {
31 FIELDS_TO_TAGS_2_INCLUDE_MAP = Collections.unmodifiableMap(DocumentSearchConfig.FIELDS_TO_TAGS_2_INCLUDE_MAP);
32 }
33
34 public static BibMarcMapper getInstance() {
35 if (bibMarcMapper == null) {
36 bibMarcMapper = new BibMarcMapper();
37 }
38 return bibMarcMapper;
39 }
40
41 public void extractFields(BibMarcRecord bibMarc, Bib bib) {
42 for (String field : FIELDS_TO_TAGS_2_INCLUDE_MAP.keySet()) {
43 buildFields(field, bibMarc, bib);
44 }
45
46 }
47
48 private HashMap<String, ArrayList<String>> getTags(String tag) {
49
50 String tags = FIELDS_TO_TAGS_2_INCLUDE_MAP.get(tag);
51 String[] tagDetailArray = tags.split(",");
52
53 HashMap<String, ArrayList<String>> dataFieldMap = new HashMap<>();
54 for (int i = 0; i < tagDetailArray.length; i++) {
55 ArrayList<String> subFieldList = new ArrayList<>();
56 String dataField = null;
57 String subFieldSplit[] = null;
58 String[] tagSplit = tagDetailArray[i].split("-");
59 if (tagSplit.length > 0) {
60 dataField = tagSplit[0];
61 }
62 if (tagSplit.length > 1) {
63 subFieldSplit = tagSplit[1].split(";");
64 for (int subFieldCount = 0; subFieldCount < subFieldSplit.length; subFieldCount++) {
65 subFieldList.add(subFieldSplit[subFieldCount]);
66 }
67 }
68
69 dataFieldMap.put(dataField, subFieldList);
70
71 }
72
73 return dataFieldMap;
74
75 }
76
77
78 public void buildFields(String field, BibMarcRecord bibMarc, Bib bib) {
79 DataField dataField;
80
81 if (field.equalsIgnoreCase("Title_display")) {
82 HashMap<String, ArrayList<String>> titleDisplayMap = getTags("Title_display");
83 StringBuilder title = new StringBuilder();
84 for (Map.Entry<String, ArrayList<String>> titleDisplayMapEntry : titleDisplayMap.entrySet()) {
85 String key = titleDisplayMapEntry.getKey();
86 Object value = titleDisplayMapEntry.getValue();
87 ArrayList<String> subFieldList = (ArrayList<String>) value;
88 dataField = bibMarc.getDataFieldForTag(key);
89 if (dataField != null) {
90 for (SubField subField : dataField.getSubFields()) {
91 for (String subFieldStr : subFieldList) {
92 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
93 title.append(subField.getValue());
94 break;
95 }
96 }
97 }
98
99 }
100 }
101 String titleBib=title.toString();
102 titleBib = titleBib.replaceAll("<","<");
103 titleBib = titleBib.replaceAll(">",">");
104 bib.setTitle(titleBib);
105 } else if (field.equalsIgnoreCase("Author_display")) {
106 HashMap<String, ArrayList<String>> authorDisplayMap = getTags("Author_display");
107 StringBuilder author = new StringBuilder();
108 for (Map.Entry<String, ArrayList<String>> authorDisplayMapEntry : authorDisplayMap.entrySet()) {
109 String key = authorDisplayMapEntry.getKey();
110 Object value = authorDisplayMapEntry.getValue();
111 ArrayList<String> subFieldList = (ArrayList<String>) value;
112 dataField = bibMarc.getDataFieldForTag(key);
113 if (dataField != null) {
114 for (SubField subField : dataField.getSubFields()) {
115 for (String subFieldStr : subFieldList) {
116 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
117 author.append(subField.getValue());
118 break;
119 }
120 }
121 }
122
123 }
124 }
125
126 bib.setAuthor(author.toString());
127 } else if (field.equalsIgnoreCase("Publisher_display")) {
128
129 HashMap<String, ArrayList<String>> publicDisplayMap = getTags("PublicationPlace_display");
130 StringBuilder publisher = new StringBuilder();
131 for (Map.Entry<String, ArrayList<String>> publicDisplayMapEntry : publicDisplayMap.entrySet()) {
132 String key = publicDisplayMapEntry.getKey();
133 Object value = publicDisplayMapEntry.getValue();
134 ArrayList<String> subFieldList = (ArrayList<String>) value;
135 dataField = bibMarc.getDataFieldForTag(key);
136 if (dataField != null) {
137 for (SubField subField : dataField.getSubFields()) {
138 for (String subFieldStr : subFieldList) {
139 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
140 if (publisher.length() > 0) {
141 publisher.append(" ");
142 }
143 publisher.append(subField.getValue());
144 break;
145 }
146 }
147 }
148
149 }
150 }
151 publicDisplayMap = getTags("Publisher_display");
152 for (Map.Entry<String, ArrayList<String>> publicDisplayMapEntry : publicDisplayMap.entrySet()) {
153 String key = publicDisplayMapEntry.getKey();
154 Object value = publicDisplayMapEntry.getValue();
155 ArrayList<String> subFieldList = (ArrayList<String>) value;
156 dataField = bibMarc.getDataFieldForTag(key);
157 if (dataField != null) {
158 for (SubField subField : dataField.getSubFields()) {
159 for (String subFieldStr : subFieldList) {
160 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
161 if (publisher.length() > 0) {
162 publisher.append(" ");
163 }
164 publisher.append(subField.getValue());
165 break;
166 }
167 }
168 }
169
170 }
171 }
172 bib.setPublisher(publisher.toString());
173 } else if (field.equalsIgnoreCase("PublicationDate_display")) {
174
175 HashMap<String, ArrayList<String>> publicationDateMap = getTags("PublicationDate_display");
176 StringBuilder publisherDate = new StringBuilder();
177 for (Map.Entry<String, ArrayList<String>> publicationDateMapEntry : publicationDateMap.entrySet()) {
178 String key = publicationDateMapEntry.getKey();
179 Object value = publicationDateMapEntry.getValue();
180 ArrayList<String> subFieldList = (ArrayList<String>) value;
181 dataField = bibMarc.getDataFieldForTag(key);
182 if (dataField != null) {
183 for (SubField subField : dataField.getSubFields()) {
184 for (String subFieldStr : subFieldList) {
185 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
186 publisherDate.append(subField.getValue());
187 break;
188 }
189 }
190 }
191
192 }
193 }
194 bib.setPublicationDate(publisherDate.toString());
195 } else if (field.equalsIgnoreCase("ISBN_display")) {
196 HashMap<String, ArrayList<String>> isbnMap = getTags("ISBN_display");
197 StringBuilder isbn = new StringBuilder();
198 for (Map.Entry<String, ArrayList<String>> isbnMapEntry : isbnMap.entrySet()) {
199 String key = isbnMapEntry.getKey();
200 Object value = isbnMapEntry.getValue();
201 ArrayList<String> subFieldList = (ArrayList<String>) value;
202 dataField = bibMarc.getDataFieldForTag(key);
203 if (dataField != null) {
204 for (SubField subField : dataField.getSubFields()) {
205 for (String subFieldStr : subFieldList) {
206 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
207 isbn.append(subField.getValue());
208 break;
209 }
210 }
211 }
212
213 }
214 }
215 bib.setIsbn(isbn.toString());
216 } else if (field.equalsIgnoreCase("ISSN_display")) {
217 HashMap<String, ArrayList<String>> issnMap = getTags("ISSN_display");
218 StringBuilder issn = new StringBuilder();
219 for (Map.Entry<String, ArrayList<String>> issnMapEntry : issnMap.entrySet()) {
220 String key = issnMapEntry.getKey();
221 Object value = issnMapEntry.getValue();
222 ArrayList<String> subFieldList = (ArrayList<String>) value;
223 dataField = bibMarc.getDataFieldForTag(key);
224 if (dataField != null) {
225 for (SubField subField : dataField.getSubFields()) {
226 for (String subFieldStr : subFieldList) {
227 if (subField.getCode().equalsIgnoreCase(subFieldStr)) {
228 issn.append(subField.getValue());
229 break;
230 }
231 }
232 }
233
234 }
235 }
236 bib.setIssn(issn.toString());
237 }
238
239
240 }
241
242 private String extractPublicationDateWithRegex(String publicationDate) {
243 Pattern pattern = Pattern.compile(publicationDateRegex);
244 Matcher matcher = pattern.matcher(publicationDate);
245 if (matcher.find()) {
246 if (matcher.group(0).equalsIgnoreCase("0000")) {
247 return "";
248 }
249 return matcher.group(0);
250 } else {
251 return "";
252 }
253
254
255 }
256
257 }