1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  package org.kuali.ole.gl.batch;
17  
18  import java.io.BufferedReader;
19  import java.io.BufferedWriter;
20  import java.io.File;
21  import java.io.FileNotFoundException;
22  import java.io.FileReader;
23  import java.io.FileWriter;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Collections;
27  import java.util.Comparator;
28  import java.util.UUID;
29  
30  import org.apache.commons.io.FileUtils;
31  import org.kuali.ole.sys.OLEConstants;
32  import org.kuali.ole.sys.context.SpringContext;
33  import org.kuali.rice.core.api.config.property.ConfigurationService;
34  
35  
36  
37  
38  public class BatchSortUtil {
39      private static org.apache.log4j.Logger LOG = org.apache.log4j.Logger.getLogger(BatchSortUtil.class);
40      
41      private static File tempDir;
42  
43      private static File getTempDirectory() {
44          if ( tempDir == null ) {
45              tempDir = new File( SpringContext.getBean(ConfigurationService.class).getPropertyValueAsString(OLEConstants.TEMP_DIRECTORY_KEY) );
46          }
47          return tempDir;
48          }
49  
50      static public void sortTextFileWithFields(String inputFileName, String outputFileName, @SuppressWarnings("rawtypes") Comparator comparator){
51          
52          String tempSortDirName = UUID.randomUUID().toString();
53          File tempSortDir = new File( getTempDirectory(), tempSortDirName );
54          
55          FileUtils.deleteQuietly(tempSortDir);
56          try {
57              FileUtils.forceMkdir(tempSortDir);
58          } catch (IOException ex) {
59              LOG.fatal( "Unable to create temporary sort directory", ex );
60              throw new RuntimeException( "Unable to create temporary sort directory", ex );
61          }
62  
63          int numFiles = sortToTempFiles( inputFileName, tempSortDir, comparator );
64  
65          
66          mergeFiles(tempSortDir, numFiles, outputFileName, comparator);
67  
68          
69          FileUtils.deleteQuietly(tempSortDir);
70      }
71  
72      static int linesPerFile = 10000;
73  
74      
75  
76  
77  
78  
79      private static int sortToTempFiles(String inputFileName, File tempSortDir, Comparator<String> comparator) {
80          BufferedReader inputFile;
81           try {
82               inputFile = new BufferedReader(new FileReader(inputFileName));
83           } catch ( FileNotFoundException ex ) {
84               LOG.fatal( "Unable to find input file: " + inputFileName, ex );
85               throw new RuntimeException( "Unable to find input file: " + inputFileName, ex );
86           }
87           try {
88               String line = "";
89               ArrayList<String> batchLines = new ArrayList<String>( linesPerFile );
90  
91               int numFiles = 0;
92               while ( line !=null ) {
93                   
94                   for ( int i = 0; i < linesPerFile; i++ ) {
95                       line = inputFile.readLine();
96                       if ( line != null ) {
97                           batchLines.add(line);
98                       }
99                   }
100                  
101 
102                  Collections.sort(batchLines, comparator);
103         
104                  
105                  BufferedWriter bw = new BufferedWriter(new FileWriter( new File( tempSortDir,  "chunk_" + numFiles ) ));
106                  for( int i = 0; i < batchLines.size(); i++) {
107                      bw.append(batchLines.get(i)).append('\n');
108                  }
109                  bw.close();
110                  numFiles++;
111                  batchLines.clear(); 
112              }
113              inputFile.close();
114              return numFiles;
115          } catch (Exception ex) {
116              LOG.fatal( "Exception processing sort to temp files.", ex );
117              throw new RuntimeException( ex );
118          }
119     }
120         
121     private static void mergeFiles(File tempSortDir, int numFiles, String outputFileName, Comparator<String> comparator ) {
122         try {
123             ArrayList<FileReader> mergefr = new ArrayList<FileReader>( numFiles );
124             ArrayList<BufferedReader> mergefbr = new ArrayList<BufferedReader>( numFiles );
125             
126             ArrayList<String> fileRows = new ArrayList<String>( numFiles );
127 
128             BufferedWriter bw = new BufferedWriter(new FileWriter(outputFileName));
129 
130             boolean someFileStillHasRows = false;
131 
132             
133             for ( int i = 0; i < numFiles; i++) {
134                 
135                 mergefr.add(new FileReader(new File( tempSortDir, "chunk_"+i) ) );
136                 mergefbr.add(new BufferedReader(mergefr.get(i)));
137 
138                 
139                 String line = mergefbr.get(i).readLine();
140                 if (line != null) {
141                     fileRows.add(line);
142                     someFileStillHasRows = true;
143                 } else  {
144                     fileRows.add(null);
145                 }
146             }
147             
148             while (someFileStillHasRows) {
149                 String min = null;
150                 int minIndex = 0; 
151 
152                 
153                 String line = fileRows.get(0);
154                 if (line!=null) {
155                     min = line;
156                     minIndex = 0;
157                 } else {
158                     min = null;
159                     minIndex = -1;
160         }
161 
162                 
163                 
164                 for( int i = 1; i < fileRows.size(); i++ ) {
165                     line = fileRows.get(i);
166                     if ( line != null ) {
167                         if ( min != null ) {
168                             if( comparator.compare(line, min) < 0 ) {
169                                 minIndex = i;
170                                 min = line;
171                             }
172                         } else {
173                             min = line;
174                             minIndex = i;
175                         }
176                     }
177                 }
178 
179                 if (minIndex < 0) {
180                     someFileStillHasRows=false;
181                 } else {
182                     
183                     bw.append(fileRows.get(minIndex)).append('\n');
184 
185                     
186                     line = mergefbr.get(minIndex).readLine();
187                     if (line != null) {
188                         fileRows.set(minIndex,line);
189                     } else { 
190                         fileRows.set(minIndex,null);
191                     }
192                 }
193                 
194                 for( int i = 0; i < fileRows.size(); i++) {
195                     someFileStillHasRows = false;
196                     if(fileRows.get(i)!=null)  {
197                         if (minIndex < 0) {
198                             throw new RuntimeException( "minIndex < 0 and row found in chunk file " + i + " : " + fileRows.get(i) );
199                         }
200                         someFileStillHasRows = true;
201                         break;
202                     }
203         }
204              
205                 
206                 if (!someFileStillHasRows) {
207                     
208                     for(int i=0; i<fileRows.size(); i++) {
209                         if (fileRows.get(i) == null) {
210                             line = mergefbr.get(i).readLine();
211                             if (line!=null) {
212                                 someFileStillHasRows=true;
213                                 fileRows.set(i,line);
214                             }
215                         }
216                     }
217                 }
218     }
219     
220             
221             bw.close();
222             for(BufferedReader br : mergefbr ) {
223                 br.close();
224             }
225             for(FileReader fr : mergefr ) {
226                 fr.close();
227             }
228         } catch (Exception ex) {
229             LOG.error( "Exception merging the sorted files", ex );
230             throw new RuntimeException( "Exception merging the sorted files", ex );
231         }
232    }
233     
234 }