View Javadoc
1   package org.kuali.ole.docstore.process;
2   
3   import org.apache.solr.common.SolrDocument;
4   import org.apache.solr.common.SolrInputDocument;
5   import org.kuali.ole.RepositoryManager;
6   import org.kuali.ole.docstore.common.document.content.instance.InstanceCollection;
7   import org.kuali.ole.docstore.common.document.content.instance.xstream.InstanceOlemlRecordProcessor;
8   import org.kuali.ole.docstore.discovery.solr.work.bib.marc.WorkBibMarcDocBuilder;
9   import org.kuali.ole.docstore.model.xmlpojo.ingest.RequestDocument;
10  import org.kuali.ole.docstore.common.document.content.instance.FormerIdentifier;
11  import org.kuali.ole.docstore.common.document.content.instance.Instance;
12  import org.kuali.ole.docstore.service.DocumentIngester;
13  import org.kuali.ole.docstore.service.ServiceLocator;
14  import org.kuali.ole.pojo.OleException;
15  import org.kuali.ole.repository.CheckoutManager;
16  import org.kuali.ole.repository.NodeHandler;
17  import org.slf4j.Logger;
18  import org.slf4j.LoggerFactory;
19  
20  import javax.jcr.*;
21  import java.io.ByteArrayInputStream;
22  import java.util.ArrayList;
23  import java.util.List;
24  
25  /**
26   * Created by IntelliJ IDEA.
27   * User: Pranitha
28   * Date: 6/4/12
29   * Time: 1:58 PM
30   * To change this template use File | Settings | File Templates.
31   */
32  public class LinkingInstanceNBibHandler
33          implements Runnable {
34  
35      private static LinkingInstanceNBibHandler link = null;
36      private boolean isRunning = false;
37      private static final Logger logger = LoggerFactory.getLogger(RebuildIndexesHandler.class);
38  
39      private String docCategory;
40      private String docType;
41      private String docFormat;
42      private CheckoutManager checkoutManager;
43  
44      private LinkingInstanceNBibHandler(String docCategory, String docType, String docFormat) {
45          this.docCategory = docCategory;
46          this.docType = docType;
47          this.docFormat = docFormat;
48          checkoutManager = new CheckoutManager();
49      }
50  
51      public static LinkingInstanceNBibHandler getInstance(String docCategory, String docType, String docFormat) {
52          if (link == null) {
53              link = new LinkingInstanceNBibHandler(docCategory, docType, docFormat);
54          }
55          return link;
56      }
57  
58      /**
59       * Method to get running status.
60       *
61       * @return
62       */
63      public boolean isRunning() {
64          return isRunning;
65      }
66  
67      /**
68       * Method to startProcess
69       */
70      public void startProcess() {
71          if (!isRunning) {
72              Thread rebuilderThread = new Thread(this);
73              rebuilderThread.start();
74          }
75      }
76  
77      public void run() {
78          Session session = null;
79          long totalCount = 0;
80          isRunning = true;
81          List<RequestDocument> docs = new ArrayList<RequestDocument>();
82          List<SolrInputDocument> solrInputDocs = new ArrayList<SolrInputDocument>();
83          logger.info("Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + "): START");
84          try {
85  
86              session = RepositoryManager.getRepositoryManager().getSession(ProcessParameters.BULK_DEFAULT_USER,
87                      ProcessParameters.BULK_DEFUALT_ACTION);
88              RequestDocument rd = new RequestDocument();
89              rd.setCategory(docCategory);
90              rd.setType(docType);
91              rd.setFormat(docFormat);
92              DocumentIngester docIngester = new DocumentIngester();
93              Node nodeFormat = docIngester.getStaticFormatNode(rd, session);
94              NodeIterator nodesL1 = nodeFormat.getNodes();
95              while (nodesL1.hasNext()) {
96                  Node nodeL1 = nodesL1.nextNode();
97                  NodeIterator nodesL2 = nodeL1.getNodes();
98                  while (nodesL2.hasNext()) {
99                      Node nodeL2 = nodesL2.nextNode();
100 
101                     NodeIterator nodesFile = nodeL2.getNodes();
102                     while (nodesFile.hasNext()) {
103                         if (docs.size() == ProcessParameters.BULK_PROCESSOR_SPLIT_SIZE) {
104                             indexDocs(solrInputDocs, totalCount);
105                         } else {
106                             Node fileNode = nodesFile.nextNode();
107                             String instanceCon = null;
108                             NodeIterator nodeIterator = null;
109                             Node instanceNode = null;
110                             try {
111                                 nodeIterator = fileNode.getNodes();
112                                 while (nodeIterator.hasNext()) {
113                                     instanceNode = nodeIterator.nextNode();
114                                     if (instanceNode.getName().equalsIgnoreCase("instanceFile")) {
115                                         instanceCon = checkoutManager.getData(instanceNode);
116                                     }
117                                 }
118                             } catch (RepositoryException e) {
119                                 logger.error(e.getMessage(), e);
120                             }
121                             //                            String content = checkoutManager.checkOut(fileNode.getIdentifier(), "mockUser", "checkout");
122                             InstanceOlemlRecordProcessor recProcessor = new InstanceOlemlRecordProcessor();
123                             InstanceCollection instanceCollection = recProcessor.fromXML(instanceCon);
124                             List<String> bibIdList = new ArrayList<String>();
125                             if (instanceCollection.getInstance() != null
126                                     && instanceCollection.getInstance().size() > 0) {
127                                 Instance instance = instanceCollection.getInstance().get(0);
128                                 resolveLinkingWithBib(instance, bibIdList, session, solrInputDocs);
129                                 if (instanceNode.getName().equalsIgnoreCase("instanceFile")) {
130                                     byte[] documentBytes = recProcessor.toXML(instanceCollection).getBytes();
131                                     Binary binary = null;
132                                     if (documentBytes != null && instanceNode != null) {
133                                         binary = session.getValueFactory()
134                                                 .createBinary(new ByteArrayInputStream(documentBytes));
135                                         instanceNode.getNode("jcr:content").setProperty("jcr:data", binary);
136                                     }
137                                 }
138                             }
139                             totalCount++;
140                         }
141                     }
142                     if ((totalCount % 1000) == 0) {
143                         session.save();
144                         indexDocs(solrInputDocs, totalCount);
145                     } else if ((totalCount % 1000000) == 0) {
146                         ServiceLocator.getDiscoveryAdminService().optimize();
147                     }
148                 }
149             }
150             if (solrInputDocs.size() > 0) {
151                 session.save();
152                 indexDocs(solrInputDocs, totalCount);
153             }
154         } catch (Exception e) {
155             logger.error(
156                     "Rebuild Indexes Process(" + docCategory + " : " + docType + " : " + docFormat + ") Processed(" + (
157                             totalCount - docs.size()) + "), Failed @ batch(" + docs.size() + "): Cause: " + e, e);
158         } finally {
159             try {
160                 isRunning = false;
161                 RepositoryManager.getRepositoryManager().logout(session);
162             } catch (OleException e) {
163             }
164         }
165     }
166 
167     private void indexDocs(List<SolrInputDocument> solrInputDocs, long records) {
168 
169         try {
170             ServiceLocator.getIndexerService().indexSolrDocuments(solrInputDocs);
171             logger.info(
172                     "Linking Bib and Instance Records (" + records + "), Time Taken for Batch(" + solrInputDocs.size()
173                             + "): ");
174             solrInputDocs.clear();
175         } catch (Exception e) {
176             logger.error("Linking Bib and Instance Records (" + (records - solrInputDocs.size()) + "), Failed @ batch("
177                     + solrInputDocs.size() + "): Cause: " + e + "\n\tContinuous", e);
178         }
179     }
180 
181 
182     private void resolveLinkingWithBib(Instance instance, List<String> bibIdList, Session session,
183                                        List<SolrInputDocument> solrInputDocs) {
184         instance.getResourceIdentifier().clear();
185 
186         for (FormerIdentifier frids : instance.getFormerResourceIdentifier()) {
187             try {
188                 if (frids != null && frids.getIdentifier() != null &&
189                         frids.getIdentifier().getIdentifierValue() != null &&
190                         frids.getIdentifier().getIdentifierValue().trim().length() != 0) {
191                     WorkBibMarcDocBuilder marcDocBuilder = new WorkBibMarcDocBuilder();
192                     List<SolrDocument> solrBibDocs = ServiceLocator.getIndexerService()
193                             .getSolrDocument("SystemControlNumber",
194                                     frids.getIdentifier()
195                                             .getIdentifierValue());
196                     List<SolrDocument> solrInstanceDocs = ServiceLocator.getIndexerService().getSolrDocument("id",
197                             instance.getInstanceIdentifier());
198                     SolrInputDocument solrInputDocument = new SolrInputDocument();
199                     if (solrBibDocs != null && solrBibDocs.size() > 0) {
200 
201                         for (SolrDocument solrbibDoc : solrBibDocs) {
202                             String id = compareListRString(solrbibDoc.getFieldValue("id"));
203                             instance.getResourceIdentifier().add(id);
204 
205                             logger.info("bib id " + id);
206                             compareObjNAddValue(instance.getInstanceIdentifier(),
207                                     solrbibDoc.getFieldValue("instanceIdentifier"), solrbibDoc,
208                                     "instanceIdentifier");
209 
210                             solrInputDocument = new SolrInputDocument();
211                             marcDocBuilder.buildSolrInputDocFromSolrDoc(solrbibDoc, solrInputDocument);
212                             solrInputDocs.add(solrInputDocument);
213                             Node bibNode = getNodeByUUID(session, id);
214                             bibNode.setProperty("instanceIdentifier", instance.getInstanceIdentifier());
215                             Node instanceNode = getNodeByUUID(session, instance.getInstanceIdentifier());
216                             instanceNode.setProperty("bibIdentifier", id);
217 
218                             for (SolrDocument solrInstDoc : solrInstanceDocs) {
219                                 if (id != null) {
220                                     compareObjNAddValue(id, solrInstDoc.getFieldValue("bibIdentifier"), solrInstDoc,
221                                             "bibIdentifier");
222                                 }
223                                 solrInputDocument = new SolrInputDocument();
224                                 marcDocBuilder.buildSolrInputDocFromSolrDoc(solrInstDoc, solrInputDocument);
225                                 solrInputDocs.add(solrInputDocument);
226                             }
227                             logger.info("solr input  docs " + solrInputDocs);
228                         }
229                     }
230                 }
231             } catch (Exception e) {
232                 logger.info(e.getMessage(), e);
233             }
234         }
235     }
236 
237     private String compareListRString(Object id) {
238         if (id != null) {
239             if (id instanceof List) {
240                 List<String> idList = (List<String>) id;
241                 return idList.get(0);
242             } else if (id instanceof String) {
243                 String strId = (String) id;
244                 return strId;
245             }
246         }
247         return null;
248     }
249 
250 
251     /* private void compareObjToObj(SolrDocument solrbibDoc, SolrDocument solrInstDoc) {
252 
253         if (solrbibDoc.getFieldValue("id") instanceof List) {
254             List<String> bibIds = (List<String>) solrbibDoc.getFieldValue("id");
255             compareListToObj(bibIds, solrInstDoc.getFirstValue("bibIdentifier"), solrInstDoc,"bibIdentifier" );
256         }
257         else if (solrbibDoc.getFieldValue("id") instanceof String) {
258             String bibId = solrbibDoc.getFieldValue("id").toString();
259             compareStringToObj(bibId, solrInstDoc.getFieldValue("bibIdentifier"), solrInstDoc, "bibIdentifier");
260         }
261     }*/
262 
263 
264     private void compareObjNAddValue(String id, Object idObj, SolrDocument solrDoc, String identifier) {
265         if (idObj != null) {
266             if (idObj instanceof List) {
267                 List<String> instBibIdList = (List<String>) idObj;
268                 if (!instBibIdList.contains(id)) {
269                     solrDoc.addField(identifier, id);
270                 }
271             } else if (idObj instanceof String) {
272                 String instBibId = (String) idObj;
273                 if (!instBibId.equalsIgnoreCase(id)) {
274                     solrDoc.addField(identifier, id);
275                 }
276             }
277         } else {
278             solrDoc.addField(identifier, id);
279         }
280     }
281 
282     /* private void compareListToObj(List<String> idList, Object IdObj, SolrDocument solrDoc, String identifier) {
283 
284             for (String bibId : idList) {
285                 if (IdObj != null) {
286                     if (IdObj instanceof List) {
287                         List<String> instBibIdList = (List<String>) IdObj;
288                         if (!instBibIdList.contains(bibId)) {
289                             solrDoc.addField(identifier, bibId);
290                         }
291                     }
292                     else if ((IdObj instanceof String)) {
293                         String instBibId = (String) IdObj;
294                         if (!instBibId.equalsIgnoreCase(bibId)) {
295                             solrDoc.addField(identifier, bibId);
296                         }
297                     }
298                 }
299                 else {
300                     solrDoc.addField(identifier, bibId);
301                 }
302             }
303         }
304     */
305     private Node getNodeByUUID(Session newSession, String uuid) throws OleException {
306         return new NodeHandler().getNodeByUUID(newSession, uuid);
307     }
308 }