1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.kuali.ole.gl.batch;
17
18 import java.io.BufferedReader;
19 import java.io.BufferedWriter;
20 import java.io.File;
21 import java.io.FileNotFoundException;
22 import java.io.FileReader;
23 import java.io.FileWriter;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.Collections;
27 import java.util.Comparator;
28 import java.util.UUID;
29
30 import org.apache.commons.io.FileUtils;
31 import org.kuali.ole.sys.OLEConstants;
32 import org.kuali.ole.sys.context.SpringContext;
33 import org.kuali.rice.core.api.config.property.ConfigurationService;
34
35
36
37
38 public class BatchSortUtil {
39 private static org.apache.log4j.Logger LOG = org.apache.log4j.Logger.getLogger(BatchSortUtil.class);
40
41 private static File tempDir;
42
43 private static File getTempDirectory() {
44 if ( tempDir == null ) {
45 tempDir = new File( SpringContext.getBean(ConfigurationService.class).getPropertyValueAsString(OLEConstants.TEMP_DIRECTORY_KEY) );
46 }
47 return tempDir;
48 }
49
50 static public void sortTextFileWithFields(String inputFileName, String outputFileName, @SuppressWarnings("rawtypes") Comparator comparator){
51
52 String tempSortDirName = UUID.randomUUID().toString();
53 File tempSortDir = new File( getTempDirectory(), tempSortDirName );
54
55 FileUtils.deleteQuietly(tempSortDir);
56 try {
57 FileUtils.forceMkdir(tempSortDir);
58 } catch (IOException ex) {
59 LOG.fatal( "Unable to create temporary sort directory", ex );
60 throw new RuntimeException( "Unable to create temporary sort directory", ex );
61 }
62
63 int numFiles = sortToTempFiles( inputFileName, tempSortDir, comparator );
64
65
66 mergeFiles(tempSortDir, numFiles, outputFileName, comparator);
67
68
69 FileUtils.deleteQuietly(tempSortDir);
70 }
71
72 static int linesPerFile = 10000;
73
74
75
76
77
78
79 private static int sortToTempFiles(String inputFileName, File tempSortDir, Comparator<String> comparator) {
80 BufferedReader inputFile;
81 try {
82 inputFile = new BufferedReader(new FileReader(inputFileName));
83 } catch ( FileNotFoundException ex ) {
84 LOG.fatal( "Unable to find input file: " + inputFileName, ex );
85 throw new RuntimeException( "Unable to find input file: " + inputFileName, ex );
86 }
87 try {
88 String line = "";
89 ArrayList<String> batchLines = new ArrayList<String>( linesPerFile );
90
91 int numFiles = 0;
92 while ( line !=null ) {
93
94 for ( int i = 0; i < linesPerFile; i++ ) {
95 line = inputFile.readLine();
96 if ( line != null ) {
97 batchLines.add(line);
98 }
99 }
100
101
102 Collections.sort(batchLines, comparator);
103
104
105 BufferedWriter bw = new BufferedWriter(new FileWriter( new File( tempSortDir, "chunk_" + numFiles ) ));
106 for( int i = 0; i < batchLines.size(); i++) {
107 bw.append(batchLines.get(i)).append('\n');
108 }
109 bw.close();
110 numFiles++;
111 batchLines.clear();
112 }
113 inputFile.close();
114 return numFiles;
115 } catch (Exception ex) {
116 LOG.fatal( "Exception processing sort to temp files.", ex );
117 throw new RuntimeException( ex );
118 }
119 }
120
121 private static void mergeFiles(File tempSortDir, int numFiles, String outputFileName, Comparator<String> comparator ) {
122 try {
123 ArrayList<FileReader> mergefr = new ArrayList<FileReader>( numFiles );
124 ArrayList<BufferedReader> mergefbr = new ArrayList<BufferedReader>( numFiles );
125
126 ArrayList<String> fileRows = new ArrayList<String>( numFiles );
127
128 BufferedWriter bw = new BufferedWriter(new FileWriter(outputFileName));
129
130 boolean someFileStillHasRows = false;
131
132
133 for ( int i = 0; i < numFiles; i++) {
134
135 mergefr.add(new FileReader(new File( tempSortDir, "chunk_"+i) ) );
136 mergefbr.add(new BufferedReader(mergefr.get(i)));
137
138
139 String line = mergefbr.get(i).readLine();
140 if (line != null) {
141 fileRows.add(line);
142 someFileStillHasRows = true;
143 } else {
144 fileRows.add(null);
145 }
146 }
147
148 while (someFileStillHasRows) {
149 String min = null;
150 int minIndex = 0;
151
152
153 String line = fileRows.get(0);
154 if (line!=null) {
155 min = line;
156 minIndex = 0;
157 } else {
158 min = null;
159 minIndex = -1;
160 }
161
162
163
164 for( int i = 1; i < fileRows.size(); i++ ) {
165 line = fileRows.get(i);
166 if ( line != null ) {
167 if ( min != null ) {
168 if( comparator.compare(line, min) < 0 ) {
169 minIndex = i;
170 min = line;
171 }
172 } else {
173 min = line;
174 minIndex = i;
175 }
176 }
177 }
178
179 if (minIndex < 0) {
180 someFileStillHasRows=false;
181 } else {
182
183 bw.append(fileRows.get(minIndex)).append('\n');
184
185
186 line = mergefbr.get(minIndex).readLine();
187 if (line != null) {
188 fileRows.set(minIndex,line);
189 } else {
190 fileRows.set(minIndex,null);
191 }
192 }
193
194 for( int i = 0; i < fileRows.size(); i++) {
195 someFileStillHasRows = false;
196 if(fileRows.get(i)!=null) {
197 if (minIndex < 0) {
198 throw new RuntimeException( "minIndex < 0 and row found in chunk file " + i + " : " + fileRows.get(i) );
199 }
200 someFileStillHasRows = true;
201 break;
202 }
203 }
204
205
206 if (!someFileStillHasRows) {
207
208 for(int i=0; i<fileRows.size(); i++) {
209 if (fileRows.get(i) == null) {
210 line = mergefbr.get(i).readLine();
211 if (line!=null) {
212 someFileStillHasRows=true;
213 fileRows.set(i,line);
214 }
215 }
216 }
217 }
218 }
219
220
221 bw.close();
222 for(BufferedReader br : mergefbr ) {
223 br.close();
224 }
225 for(FileReader fr : mergefr ) {
226 fr.close();
227 }
228 } catch (Exception ex) {
229 LOG.error( "Exception merging the sorted files", ex );
230 throw new RuntimeException( "Exception merging the sorted files", ex );
231 }
232 }
233
234 }