001package org.kuali.common.util.file;
002
003import java.io.File;
004import java.io.IOException;
005import java.io.OutputStream;
006import java.util.ArrayList;
007import java.util.Arrays;
008import java.util.Collections;
009import java.util.HashMap;
010import java.util.List;
011import java.util.Map;
012import java.util.Set;
013import java.util.TreeMap;
014import java.util.TreeSet;
015
016import org.apache.commons.io.FileUtils;
017import org.apache.commons.io.FilenameUtils;
018import org.apache.commons.io.IOUtils;
019import org.apache.commons.lang3.StringUtils;
020import org.junit.Ignore;
021import org.junit.Test;
022import org.kuali.common.util.FormatUtils;
023import org.kuali.common.util.LocationUtils;
024import org.kuali.common.util.SimpleScanner;
025import org.kuali.common.util.file.model.ArtifactForTesting;
026import org.kuali.common.util.file.model.DuplicateArtifact;
027import org.kuali.common.util.file.model.FileExtension;
028import org.kuali.common.util.file.model.RepoArtifacts;
029import org.kuali.common.util.file.model.RepoFile;
030import org.kuali.common.util.file.model.Repository;
031import org.kuali.common.util.log.LoggerUtils;
032
033import com.google.common.base.Optional;
034import com.google.common.collect.ImmutableList;
035
036public class ListTest {
037
038        private static final String SHA1 = "sha1";
039        private static final String MD5 = "md5";
040        private static final String BASEDIR = "/usr/local/sonatype-work/nexus/storage";
041        private static final List<String> CHECKSUM_EXTENSIONS = Arrays.asList(SHA1, MD5);
042
043        @Test
044        public void getRepoListTest() {
045                try {
046                        List<Repository> repos = getRepoList();
047                        logRepos(repos);
048                        Set<String> paths = getPaths(repos);
049                        List<FileExtension> extensions = getExtensions(paths);
050                        System.out.println("     Unique paths: " + FormatUtils.getCount(paths.size()));
051                        logFileExtensions(extensions);
052                        logWeird(paths);
053                        List<RepoArtifacts> list = analyzeRepos(repos);
054                        logRepoArtifacts(list);
055                        List<DuplicateArtifact> duplicates = getDuplicates(list);
056                        List<DuplicateArtifact> issues = getDuplicateIssues(duplicates);
057                        System.out.println(issues.size());
058                        logDuplicateArtifact(issues);
059                } catch (Exception e) {
060                        e.printStackTrace();
061                }
062        }
063
064        protected void createChecksumRequestFile(List<RepoArtifacts> list) {
065                for (RepoArtifacts element : list) {
066                        Repository repo = element.getRepository();
067                        String path = BASEDIR + "/";
068                        System.out.println(repo + path);
069                }
070        }
071
072        protected void logDuplicateArtifact(List<DuplicateArtifact> duplicates) {
073                System.out.println(duplicates.size());
074                for (DuplicateArtifact duplicate : duplicates) {
075                        String filename = FilenameUtils.getName(duplicate.getPath());
076                        System.out.print(StringUtils.rightPad(filename, 55) + " [");
077                        for (ArtifactForTesting artifact : duplicate.getArtifacts()) {
078                                System.out.print(artifact.getRepository().getName() + " ");
079                        }
080                        System.out.println("]");
081                }
082        }
083
084        protected List<DuplicateArtifact> getDuplicateIssues(List<DuplicateArtifact> duplicates) {
085                List<DuplicateArtifact> issues = new ArrayList<DuplicateArtifact>();
086                for (DuplicateArtifact duplicate : duplicates) {
087                        List<ArtifactForTesting> artifacts = duplicate.getArtifacts();
088                        long size = -1;
089                        for (ArtifactForTesting artifact : artifacts) {
090                                if (size == -1) {
091                                        size = artifact.getFile().getSize();
092                                }
093                                if (size != artifact.getFile().getSize()) {
094                                        issues.add(duplicate);
095                                }
096                        }
097                }
098                return issues;
099
100        }
101
102        protected List<DuplicateArtifact> getDuplicates(List<RepoArtifacts> list) {
103                Map<String, List<ArtifactForTesting>> all = new HashMap<String, List<ArtifactForTesting>>();
104                for (RepoArtifacts element : list) {
105                        for (ArtifactForTesting artifact : element.getArtifacts()) {
106                                String path = artifact.getFile().getPath();
107                                List<ArtifactForTesting> artifacts = all.get(path);
108                                if (artifacts == null) {
109                                        artifacts = new ArrayList<ArtifactForTesting>();
110                                }
111                                artifacts.add(artifact);
112                                all.put(path, artifacts);
113                        }
114                }
115                List<DuplicateArtifact> duplicates = new ArrayList<DuplicateArtifact>();
116                for (Map.Entry<String, List<ArtifactForTesting>> pair : all.entrySet()) {
117                        if (pair.getValue().size() > 1) {
118                                DuplicateArtifact duplicate = new DuplicateArtifact(pair.getKey(), pair.getValue());
119                                duplicates.add(duplicate);
120                        }
121                }
122                Collections.sort(duplicates);
123                Collections.reverse(duplicates);
124                return duplicates;
125        }
126
127        protected void logRepoArtifacts(List<RepoArtifacts> list) {
128                List<String> columns = Arrays.asList("repo", "present", "missing", "total", "size");
129                List<Object[]> rows = new ArrayList<Object[]>();
130                List<ArtifactForTesting> issues = new ArrayList<ArtifactForTesting>();
131                long totalSize = 0;
132                long totalCount = 0;
133                long totalMissing = 0;
134                long totalPresent = 0;
135                for (RepoArtifacts element : list) {
136                        int present = 0;
137                        int missing = 0;
138                        List<ArtifactForTesting> artifacts = element.getArtifacts();
139                        for (ArtifactForTesting artifact : artifacts) {
140                                if (artifact.getChecksum().isPresent()) {
141                                        present++;
142                                        totalPresent++;
143                                } else {
144                                        missing++;
145                                        issues.add(artifact);
146                                        totalMissing++;
147                                }
148                        }
149                        totalSize += element.getSize();
150                        totalCount += artifacts.size();
151                        String pcount = FormatUtils.getCount(present);
152                        String mcount = FormatUtils.getCount(missing);
153                        String name = element.getRepository().getName();
154                        String total = FormatUtils.getCount(artifacts.size());
155                        String size = FormatUtils.getSize(element.getSize());
156                        Object[] row = { name, pcount, mcount, total, size };
157                        rows.add(row);
158                }
159                Object[] row = { "", "", "", "", "" };
160                rows.add(row);
161                Object[] totals = { "Totals:", FormatUtils.getCount(totalPresent), FormatUtils.getCount(totalMissing), FormatUtils.getCount(totalCount), FormatUtils.getSize(totalSize) };
162                rows.add(totals);
163                LoggerUtils.logTable("repo artifacts", columns, rows);
164                List<String> missingChecksums = new ArrayList<String>();
165                for (ArtifactForTesting artifact : issues) {
166                        missingChecksums.add(BASEDIR + "/" + artifact.getRepository().getName() + artifact.getFile().getPath());
167                }
168                // String filename = "/Users/jcaddel/ws/kuali-util/src/test/resources/repos/missing-checksums.txt";
169                // write(new File(filename), missingChecksums);
170        }
171
172        protected void write(File file, List<String> lines) {
173                try {
174                        FileUtils.writeLines(file, lines);
175                } catch (IOException e) {
176                        throw new IllegalStateException(e);
177                }
178        }
179
180        protected List<RepoArtifacts> analyzeRepos(List<Repository> repos) {
181                List<RepoArtifacts> list = new ArrayList<RepoArtifacts>();
182                for (Repository repo : repos) {
183                        RepoArtifacts ra = analyzeRepo(repo);
184                        list.add(ra);
185                }
186                return list;
187        }
188
189        protected ArtifactForTesting getArtifact(Repository repo, RepoFile artifact, List<RepoFile> checksums) {
190                String path = artifact.getPath();
191                String sha1ChecksumPath = path + "." + SHA1;
192                String md5ChecksumPath = path + "." + MD5;
193                RepoFile md5CheckSum = null;
194                for (RepoFile checksum : checksums) {
195                        String checksumPath = checksum.getPath();
196                        if (sha1ChecksumPath.equals(checksumPath)) {
197                                // If we've got a SHA1 checksum, we are done
198                                return new ArtifactForTesting(repo, artifact, Optional.of(checksum));
199                        }
200                        if (md5ChecksumPath.equals(checksumPath)) {
201                                md5CheckSum = checksum;
202                        }
203                }
204                // Only use MD5 if SHA1 is not available
205                if (md5CheckSum != null) {
206                        return new ArtifactForTesting(repo, artifact, Optional.of(md5CheckSum));
207                } else {
208                        return new ArtifactForTesting(repo, artifact, Optional.<RepoFile> absent());
209                }
210        }
211
212        protected List<ArtifactForTesting> getArtifacts(Repository repo, List<RepoFile> checksums, List<RepoFile> artifacts) {
213                List<ArtifactForTesting> list = new ArrayList<ArtifactForTesting>();
214                for (RepoFile artifact : artifacts) {
215                        ArtifactForTesting a = getArtifact(repo, artifact, checksums);
216                        list.add(a);
217                }
218                return list;
219        }
220
221        protected RepoArtifacts analyzeRepo(Repository repo) {
222                List<RepoFile> checksums = getCheckSums(repo.getFiles());
223                List<RepoFile> artifacts = getArtifacts(repo.getFiles());
224                List<ArtifactForTesting> list = getArtifacts(repo, checksums, artifacts);
225                return new RepoArtifacts(repo, list);
226        }
227
228        protected List<RepoFile> getArtifacts(List<RepoFile> files) {
229                List<RepoFile> checksums = new ArrayList<RepoFile>();
230                for (RepoFile file : files) {
231                        String path = file.getPath();
232                        if (!isChecksum(path)) {
233                                checksums.add(file);
234                        }
235                }
236                return checksums;
237        }
238
239        protected List<RepoFile> getCheckSums(List<RepoFile> files) {
240                List<RepoFile> checksums = new ArrayList<RepoFile>();
241                for (RepoFile file : files) {
242                        String path = file.getPath();
243                        if (isChecksum(path)) {
244                                checksums.add(file);
245                        }
246                }
247                return checksums;
248        }
249
250        protected void logWeird(Set<String> paths) {
251                List<String> weird = new ArrayList<String>();
252                for (String path : paths) {
253                        if (isWeird(path)) {
254                                weird.add(path);
255                        }
256                }
257                Collections.sort(weird);
258                for (String s : weird) {
259                        System.out.println(s);
260                }
261        }
262
263        protected boolean ignore(String path) {
264                if (path.endsWith("archetype-catalog.xml")) {
265                        return true;
266                }
267                if (path.endsWith("maven-metadata.xml")) {
268                        return true;
269                }
270                if (path.endsWith("asc")) {
271                        return true;
272                }
273                if (path.endsWith("signature")) {
274                        return true;
275                }
276                if (path.endsWith("txt")) {
277                        return true;
278                }
279                if (path.contains("texen/1.0/orig/texen")) {
280                        return true;
281                }
282                if (path.contains("texen/1.0/try.texen")) {
283                        return true;
284                }
285                return false;
286        }
287
288        protected boolean isChecksum(String path) {
289                for (String ext : CHECKSUM_EXTENSIONS) {
290                        if (path.endsWith(ext)) {
291                                return true;
292                        }
293                }
294                return false;
295        }
296
297        protected boolean isWeird(String path) {
298                if (path.endsWith("tld")) {
299                        return true;
300                }
301                if (path.endsWith("mar")) {
302                        return true;
303                }
304                if (path.contains("texen")) {
305                        return true;
306                }
307                if (path.endsWith("signature")) {
308                        return true;
309                }
310                if (path.endsWith("txt")) {
311                        return true;
312                }
313                if (path.endsWith("asc")) {
314                        return true;
315                }
316                if (path.endsWith("gz")) {
317                        return true;
318                }
319                return false;
320        }
321
322        protected void logFileExtensions(List<FileExtension> extensions) {
323                System.out.println("Unique extensions: " + FormatUtils.getCount(extensions.size()));
324                for (FileExtension extension : extensions) {
325                        String count = FormatUtils.getCount(extension.getCount());
326                        System.out.println(StringUtils.rightPad(extension.getValue(), 16) + " - " + count);
327                }
328        }
329
330        protected List<FileExtension> getExtensions(Set<String> paths) {
331                Map<String, Integer> extensions = new TreeMap<String, Integer>();
332                for (String path : paths) {
333                        String extension = FilenameUtils.getExtension(path);
334                        Integer count = extensions.get(extension) == null ? 1 : extensions.get(extension) + 1;
335                        extensions.put(extension, count);
336                }
337                List<FileExtension> list = new ArrayList<FileExtension>();
338                for (Map.Entry<String, Integer> pair : extensions.entrySet()) {
339                        FileExtension fe = new FileExtension(pair.getKey(), pair.getValue());
340                        list.add(fe);
341                }
342                Collections.sort(list);
343                Collections.reverse(list);
344                return list;
345        }
346
347        protected Set<String> getPaths(List<Repository> repos) {
348                Set<String> paths = new TreeSet<String>();
349                for (Repository repo : repos) {
350                        List<RepoFile> files = repo.getFiles();
351                        for (RepoFile file : files) {
352                                String path = file.getPath();
353                                paths.add(path);
354                        }
355                }
356                return paths;
357        }
358
359        protected List<Repository> getRepoList() {
360                List<String> names = getRepoNames();
361                List<Repository> repos = new ArrayList<Repository>();
362                for (String name : names) {
363                        String location = "classpath:repos/" + name + ".txt";
364                        List<String> lines = LocationUtils.readLines(location);
365                        List<RepoFile> files = getRepoFiles(lines);
366                        Repository repo = new Repository(name, files);
367                        repos.add(repo);
368                }
369                Collections.sort(repos);
370                Collections.reverse(repos);
371                return repos;
372        }
373
374        protected void logRepos(List<Repository> repos) {
375                List<String> columns = ImmutableList.of("repo", "files", "size");
376                List<Object[]> rows = new ArrayList<Object[]>();
377                long totalFiles = 0;
378                long totalSize = 0;
379                for (Repository repo : repos) {
380                        String count = FormatUtils.getCount(repo.getFiles().size());
381                        String size = FormatUtils.getSize(repo.getSize());
382                        totalSize += repo.getSize();
383                        totalFiles += repo.getFiles().size();
384                        Object[] row = { repo.getName(), count, size };
385                        rows.add(row);
386                }
387                String tc = FormatUtils.getCount(totalFiles);
388                String ts = FormatUtils.getSize(totalSize);
389                rows.add(new Object[] { "", "", "" });
390                rows.add(new Object[] { "totals", tc, ts });
391                LoggerUtils.logTable("Repo Summary", columns, rows);
392        }
393
394        protected List<RepoFile> getRepoFiles(List<String> lines) {
395                List<RepoFile> files = new ArrayList<RepoFile>();
396                for (String line : lines) {
397                        String[] tokens = StringUtils.split(line, ",");
398                        String path = tokens[0];
399                        if (ignore(path)) {
400                                continue;
401                        }
402                        long size = Long.parseLong(tokens[1]);
403                        RepoFile file = new RepoFile(path, size);
404                        files.add(file);
405                }
406                return files;
407        }
408
409        @Test
410        @Ignore
411        public void generateFileListingsTest() {
412                try {
413                        List<String> repos = getRepoNames();
414                        Collections.reverse(repos);
415                        List<File> repoDirs = getRepos();
416                        // System.out.println("repos=" + repos.size());
417                        for (File repo : repoDirs) {
418                                // System.out.println("names.add(\"" + repo.getName() + "\");");
419                                repo.getPath();
420                        }
421                        int i = 0;
422                        FileUtils.forceDelete(new File("/tmp/repos"));
423                        for (String repo : repos) {
424                                File dir = new CanonicalFile(BASEDIR + repo);
425                                long start = System.currentTimeMillis();
426                                System.out.print(StringUtils.rightPad(dir.getPath(), 75));
427                                List<File> files = getRepoFiles(dir.getPath());
428                                printRepo(dir, files);
429                                String elapsed = FormatUtils.getTime(System.currentTimeMillis() - start);
430                                String count = FormatUtils.getCount(files.size());
431                                System.out.println(StringUtils.leftPad(elapsed, 10) + " - " + StringUtils.leftPad(count, 10));
432                        }
433                        System.out.println(i);
434                } catch (Exception e) {
435                        e.printStackTrace();
436                }
437        }
438
439        protected void printRepo(File repo, List<File> files) {
440                File outputFile = new File("/tmp/repos/" + repo.getName() + ".txt");
441                OutputStream out = null;
442                try {
443                        out = FileUtils.openOutputStream(outputFile);
444                        for (File file : files) {
445                                long length = file.length();
446                                String path = StringUtils.replace(file.getPath(), repo.getPath(), "");
447                                String s = path + "," + length + "\n";
448                                byte[] bytes = s.getBytes("UTF-8");
449                                out.write(bytes);
450                        }
451                        out.flush();
452                } catch (IOException e) {
453                        throw new IllegalStateException("Unexpected IO error", e);
454                } finally {
455                        IOUtils.closeQuietly(out);
456                }
457        }
458
459        protected List<File> getRepos() {
460                File dir = new File(BASEDIR);
461                List<File> repos = Arrays.asList(dir.listFiles());
462                List<File> canonical = new ArrayList<File>();
463                for (File file : repos) {
464                        canonical.add(new CanonicalFile(file));
465                }
466                Collections.sort(canonical);
467                return canonical;
468        }
469
470        protected List<File> getRepoFiles(String repoDir) {
471                File basedir = new File(repoDir);
472                List<String> includes = ImmutableList.of("**/**");
473                // List<String> excludes = ImmutableList.of("**/.index/**", "**/.meta/**", "archetype-catalog.xml.sha1", "archetype-catalog.xml", "**/maven-metadata.xml",
474                // "**/maven-metadata.xml.*");
475                List<String> excludes = ImmutableList.of("**/.index/**", "**/.meta/**", "**/.nexus/**");
476                SimpleScanner scanner = new SimpleScanner(basedir, includes, excludes);
477                List<File> files = scanner.getFiles();
478                List<File> canonical = new ArrayList<File>();
479                for (File file : files) {
480                        canonical.add(new CanonicalFile(file));
481                }
482                Collections.sort(canonical);
483                return canonical;
484        }
485
486        protected List<String> getRepoNames() {
487                List<String> names = new ArrayList<String>();
488
489                // These 8 are all an order of magnitude bigger than the other repo's
490                names.add("central");
491                names.add("kuali-snapshot");
492                names.add("kuali-builds");
493                names.add("kuali-release");
494                names.add("hosted-private");
495                names.add("atlassian");
496                names.add("eclipselink");
497                names.add("apache-snapshots");
498
499                names.add("central-m1");
500                names.add("codehaus");
501                names.add("codehaus-snapshots");
502                names.add("developer");
503                names.add("glassfish");
504                names.add("google");
505                names.add("google-reflections");
506                names.add("jasperreports-sourceforge");
507                names.add("java.net-m1");
508                names.add("java.net-m1-m2");
509                names.add("java.net-m2");
510                names.add("jboss");
511                names.add("jsdoctk");
512                names.add("kuali-legacy-releases");
513                names.add("kuali-legacy-snapshots");
514                names.add("kuali-private");
515                names.add("kuali-s3-external");
516                names.add("kuali-s3-private");
517                // names.add("m2eclipse");
518                names.add("maven-restlet");
519                names.add("ow2");
520                names.add("public");
521                names.add("releases");
522                names.add("saucelabs-repository");
523                names.add("snapshots");
524                names.add("sonatype-oss-releases");
525                names.add("spring-milestones");
526                names.add("thirdparty");
527                return names;
528        }
529
530}