001package org.kuali.common.util.file; 002 003import java.io.File; 004import java.io.IOException; 005import java.io.OutputStream; 006import java.util.ArrayList; 007import java.util.Arrays; 008import java.util.Collections; 009import java.util.HashMap; 010import java.util.List; 011import java.util.Map; 012import java.util.Set; 013import java.util.TreeMap; 014import java.util.TreeSet; 015 016import org.apache.commons.io.FileUtils; 017import org.apache.commons.io.FilenameUtils; 018import org.apache.commons.io.IOUtils; 019import org.apache.commons.lang3.StringUtils; 020import org.junit.Ignore; 021import org.junit.Test; 022import org.kuali.common.util.FormatUtils; 023import org.kuali.common.util.LocationUtils; 024import org.kuali.common.util.SimpleScanner; 025import org.kuali.common.util.file.model.ArtifactForTesting; 026import org.kuali.common.util.file.model.DuplicateArtifact; 027import org.kuali.common.util.file.model.FileExtension; 028import org.kuali.common.util.file.model.RepoArtifacts; 029import org.kuali.common.util.file.model.RepoFile; 030import org.kuali.common.util.file.model.Repository; 031import org.kuali.common.util.log.LoggerUtils; 032 033import com.google.common.base.Optional; 034import com.google.common.collect.ImmutableList; 035 036public class ListTest { 037 038 private static final String SHA1 = "sha1"; 039 private static final String MD5 = "md5"; 040 private static final String BASEDIR = "/usr/local/sonatype-work/nexus/storage"; 041 private static final List<String> CHECKSUM_EXTENSIONS = Arrays.asList(SHA1, MD5); 042 043 @Test 044 public void getRepoListTest() { 045 try { 046 List<Repository> repos = getRepoList(); 047 logRepos(repos); 048 Set<String> paths = getPaths(repos); 049 List<FileExtension> extensions = getExtensions(paths); 050 System.out.println(" Unique paths: " + FormatUtils.getCount(paths.size())); 051 logFileExtensions(extensions); 052 logWeird(paths); 053 List<RepoArtifacts> list = analyzeRepos(repos); 054 logRepoArtifacts(list); 055 List<DuplicateArtifact> duplicates = getDuplicates(list); 056 List<DuplicateArtifact> issues = getDuplicateIssues(duplicates); 057 System.out.println(issues.size()); 058 logDuplicateArtifact(issues); 059 } catch (Exception e) { 060 e.printStackTrace(); 061 } 062 } 063 064 protected void createChecksumRequestFile(List<RepoArtifacts> list) { 065 for (RepoArtifacts element : list) { 066 Repository repo = element.getRepository(); 067 String path = BASEDIR + "/"; 068 System.out.println(repo + path); 069 } 070 } 071 072 protected void logDuplicateArtifact(List<DuplicateArtifact> duplicates) { 073 System.out.println(duplicates.size()); 074 for (DuplicateArtifact duplicate : duplicates) { 075 String filename = FilenameUtils.getName(duplicate.getPath()); 076 System.out.print(StringUtils.rightPad(filename, 55) + " ["); 077 for (ArtifactForTesting artifact : duplicate.getArtifacts()) { 078 System.out.print(artifact.getRepository().getName() + " "); 079 } 080 System.out.println("]"); 081 } 082 } 083 084 protected List<DuplicateArtifact> getDuplicateIssues(List<DuplicateArtifact> duplicates) { 085 List<DuplicateArtifact> issues = new ArrayList<DuplicateArtifact>(); 086 for (DuplicateArtifact duplicate : duplicates) { 087 List<ArtifactForTesting> artifacts = duplicate.getArtifacts(); 088 long size = -1; 089 for (ArtifactForTesting artifact : artifacts) { 090 if (size == -1) { 091 size = artifact.getFile().getSize(); 092 } 093 if (size != artifact.getFile().getSize()) { 094 issues.add(duplicate); 095 } 096 } 097 } 098 return issues; 099 100 } 101 102 protected List<DuplicateArtifact> getDuplicates(List<RepoArtifacts> list) { 103 Map<String, List<ArtifactForTesting>> all = new HashMap<String, List<ArtifactForTesting>>(); 104 for (RepoArtifacts element : list) { 105 for (ArtifactForTesting artifact : element.getArtifacts()) { 106 String path = artifact.getFile().getPath(); 107 List<ArtifactForTesting> artifacts = all.get(path); 108 if (artifacts == null) { 109 artifacts = new ArrayList<ArtifactForTesting>(); 110 } 111 artifacts.add(artifact); 112 all.put(path, artifacts); 113 } 114 } 115 List<DuplicateArtifact> duplicates = new ArrayList<DuplicateArtifact>(); 116 for (Map.Entry<String, List<ArtifactForTesting>> pair : all.entrySet()) { 117 if (pair.getValue().size() > 1) { 118 DuplicateArtifact duplicate = new DuplicateArtifact(pair.getKey(), pair.getValue()); 119 duplicates.add(duplicate); 120 } 121 } 122 Collections.sort(duplicates); 123 Collections.reverse(duplicates); 124 return duplicates; 125 } 126 127 protected void logRepoArtifacts(List<RepoArtifacts> list) { 128 List<String> columns = Arrays.asList("repo", "present", "missing", "total", "size"); 129 List<Object[]> rows = new ArrayList<Object[]>(); 130 List<ArtifactForTesting> issues = new ArrayList<ArtifactForTesting>(); 131 long totalSize = 0; 132 long totalCount = 0; 133 long totalMissing = 0; 134 long totalPresent = 0; 135 for (RepoArtifacts element : list) { 136 int present = 0; 137 int missing = 0; 138 List<ArtifactForTesting> artifacts = element.getArtifacts(); 139 for (ArtifactForTesting artifact : artifacts) { 140 if (artifact.getChecksum().isPresent()) { 141 present++; 142 totalPresent++; 143 } else { 144 missing++; 145 issues.add(artifact); 146 totalMissing++; 147 } 148 } 149 totalSize += element.getSize(); 150 totalCount += artifacts.size(); 151 String pcount = FormatUtils.getCount(present); 152 String mcount = FormatUtils.getCount(missing); 153 String name = element.getRepository().getName(); 154 String total = FormatUtils.getCount(artifacts.size()); 155 String size = FormatUtils.getSize(element.getSize()); 156 Object[] row = { name, pcount, mcount, total, size }; 157 rows.add(row); 158 } 159 Object[] row = { "", "", "", "", "" }; 160 rows.add(row); 161 Object[] totals = { "Totals:", FormatUtils.getCount(totalPresent), FormatUtils.getCount(totalMissing), FormatUtils.getCount(totalCount), FormatUtils.getSize(totalSize) }; 162 rows.add(totals); 163 LoggerUtils.logTable("repo artifacts", columns, rows); 164 List<String> missingChecksums = new ArrayList<String>(); 165 for (ArtifactForTesting artifact : issues) { 166 missingChecksums.add(BASEDIR + "/" + artifact.getRepository().getName() + artifact.getFile().getPath()); 167 } 168 // String filename = "/Users/jcaddel/ws/kuali-util/src/test/resources/repos/missing-checksums.txt"; 169 // write(new File(filename), missingChecksums); 170 } 171 172 protected void write(File file, List<String> lines) { 173 try { 174 FileUtils.writeLines(file, lines); 175 } catch (IOException e) { 176 throw new IllegalStateException(e); 177 } 178 } 179 180 protected List<RepoArtifacts> analyzeRepos(List<Repository> repos) { 181 List<RepoArtifacts> list = new ArrayList<RepoArtifacts>(); 182 for (Repository repo : repos) { 183 RepoArtifacts ra = analyzeRepo(repo); 184 list.add(ra); 185 } 186 return list; 187 } 188 189 protected ArtifactForTesting getArtifact(Repository repo, RepoFile artifact, List<RepoFile> checksums) { 190 String path = artifact.getPath(); 191 String sha1ChecksumPath = path + "." + SHA1; 192 String md5ChecksumPath = path + "." + MD5; 193 RepoFile md5CheckSum = null; 194 for (RepoFile checksum : checksums) { 195 String checksumPath = checksum.getPath(); 196 if (sha1ChecksumPath.equals(checksumPath)) { 197 // If we've got a SHA1 checksum, we are done 198 return new ArtifactForTesting(repo, artifact, Optional.of(checksum)); 199 } 200 if (md5ChecksumPath.equals(checksumPath)) { 201 md5CheckSum = checksum; 202 } 203 } 204 // Only use MD5 if SHA1 is not available 205 if (md5CheckSum != null) { 206 return new ArtifactForTesting(repo, artifact, Optional.of(md5CheckSum)); 207 } else { 208 return new ArtifactForTesting(repo, artifact, Optional.<RepoFile> absent()); 209 } 210 } 211 212 protected List<ArtifactForTesting> getArtifacts(Repository repo, List<RepoFile> checksums, List<RepoFile> artifacts) { 213 List<ArtifactForTesting> list = new ArrayList<ArtifactForTesting>(); 214 for (RepoFile artifact : artifacts) { 215 ArtifactForTesting a = getArtifact(repo, artifact, checksums); 216 list.add(a); 217 } 218 return list; 219 } 220 221 protected RepoArtifacts analyzeRepo(Repository repo) { 222 List<RepoFile> checksums = getCheckSums(repo.getFiles()); 223 List<RepoFile> artifacts = getArtifacts(repo.getFiles()); 224 List<ArtifactForTesting> list = getArtifacts(repo, checksums, artifacts); 225 return new RepoArtifacts(repo, list); 226 } 227 228 protected List<RepoFile> getArtifacts(List<RepoFile> files) { 229 List<RepoFile> checksums = new ArrayList<RepoFile>(); 230 for (RepoFile file : files) { 231 String path = file.getPath(); 232 if (!isChecksum(path)) { 233 checksums.add(file); 234 } 235 } 236 return checksums; 237 } 238 239 protected List<RepoFile> getCheckSums(List<RepoFile> files) { 240 List<RepoFile> checksums = new ArrayList<RepoFile>(); 241 for (RepoFile file : files) { 242 String path = file.getPath(); 243 if (isChecksum(path)) { 244 checksums.add(file); 245 } 246 } 247 return checksums; 248 } 249 250 protected void logWeird(Set<String> paths) { 251 List<String> weird = new ArrayList<String>(); 252 for (String path : paths) { 253 if (isWeird(path)) { 254 weird.add(path); 255 } 256 } 257 Collections.sort(weird); 258 for (String s : weird) { 259 System.out.println(s); 260 } 261 } 262 263 protected boolean ignore(String path) { 264 if (path.endsWith("archetype-catalog.xml")) { 265 return true; 266 } 267 if (path.endsWith("maven-metadata.xml")) { 268 return true; 269 } 270 if (path.endsWith("asc")) { 271 return true; 272 } 273 if (path.endsWith("signature")) { 274 return true; 275 } 276 if (path.endsWith("txt")) { 277 return true; 278 } 279 if (path.contains("texen/1.0/orig/texen")) { 280 return true; 281 } 282 if (path.contains("texen/1.0/try.texen")) { 283 return true; 284 } 285 return false; 286 } 287 288 protected boolean isChecksum(String path) { 289 for (String ext : CHECKSUM_EXTENSIONS) { 290 if (path.endsWith(ext)) { 291 return true; 292 } 293 } 294 return false; 295 } 296 297 protected boolean isWeird(String path) { 298 if (path.endsWith("tld")) { 299 return true; 300 } 301 if (path.endsWith("mar")) { 302 return true; 303 } 304 if (path.contains("texen")) { 305 return true; 306 } 307 if (path.endsWith("signature")) { 308 return true; 309 } 310 if (path.endsWith("txt")) { 311 return true; 312 } 313 if (path.endsWith("asc")) { 314 return true; 315 } 316 if (path.endsWith("gz")) { 317 return true; 318 } 319 return false; 320 } 321 322 protected void logFileExtensions(List<FileExtension> extensions) { 323 System.out.println("Unique extensions: " + FormatUtils.getCount(extensions.size())); 324 for (FileExtension extension : extensions) { 325 String count = FormatUtils.getCount(extension.getCount()); 326 System.out.println(StringUtils.rightPad(extension.getValue(), 16) + " - " + count); 327 } 328 } 329 330 protected List<FileExtension> getExtensions(Set<String> paths) { 331 Map<String, Integer> extensions = new TreeMap<String, Integer>(); 332 for (String path : paths) { 333 String extension = FilenameUtils.getExtension(path); 334 Integer count = extensions.get(extension) == null ? 1 : extensions.get(extension) + 1; 335 extensions.put(extension, count); 336 } 337 List<FileExtension> list = new ArrayList<FileExtension>(); 338 for (Map.Entry<String, Integer> pair : extensions.entrySet()) { 339 FileExtension fe = new FileExtension(pair.getKey(), pair.getValue()); 340 list.add(fe); 341 } 342 Collections.sort(list); 343 Collections.reverse(list); 344 return list; 345 } 346 347 protected Set<String> getPaths(List<Repository> repos) { 348 Set<String> paths = new TreeSet<String>(); 349 for (Repository repo : repos) { 350 List<RepoFile> files = repo.getFiles(); 351 for (RepoFile file : files) { 352 String path = file.getPath(); 353 paths.add(path); 354 } 355 } 356 return paths; 357 } 358 359 protected List<Repository> getRepoList() { 360 List<String> names = getRepoNames(); 361 List<Repository> repos = new ArrayList<Repository>(); 362 for (String name : names) { 363 String location = "classpath:repos/" + name + ".txt"; 364 List<String> lines = LocationUtils.readLines(location); 365 List<RepoFile> files = getRepoFiles(lines); 366 Repository repo = new Repository(name, files); 367 repos.add(repo); 368 } 369 Collections.sort(repos); 370 Collections.reverse(repos); 371 return repos; 372 } 373 374 protected void logRepos(List<Repository> repos) { 375 List<String> columns = ImmutableList.of("repo", "files", "size"); 376 List<Object[]> rows = new ArrayList<Object[]>(); 377 long totalFiles = 0; 378 long totalSize = 0; 379 for (Repository repo : repos) { 380 String count = FormatUtils.getCount(repo.getFiles().size()); 381 String size = FormatUtils.getSize(repo.getSize()); 382 totalSize += repo.getSize(); 383 totalFiles += repo.getFiles().size(); 384 Object[] row = { repo.getName(), count, size }; 385 rows.add(row); 386 } 387 String tc = FormatUtils.getCount(totalFiles); 388 String ts = FormatUtils.getSize(totalSize); 389 rows.add(new Object[] { "", "", "" }); 390 rows.add(new Object[] { "totals", tc, ts }); 391 LoggerUtils.logTable("Repo Summary", columns, rows); 392 } 393 394 protected List<RepoFile> getRepoFiles(List<String> lines) { 395 List<RepoFile> files = new ArrayList<RepoFile>(); 396 for (String line : lines) { 397 String[] tokens = StringUtils.split(line, ","); 398 String path = tokens[0]; 399 if (ignore(path)) { 400 continue; 401 } 402 long size = Long.parseLong(tokens[1]); 403 RepoFile file = new RepoFile(path, size); 404 files.add(file); 405 } 406 return files; 407 } 408 409 @Test 410 @Ignore 411 public void generateFileListingsTest() { 412 try { 413 List<String> repos = getRepoNames(); 414 Collections.reverse(repos); 415 List<File> repoDirs = getRepos(); 416 // System.out.println("repos=" + repos.size()); 417 for (File repo : repoDirs) { 418 // System.out.println("names.add(\"" + repo.getName() + "\");"); 419 repo.getPath(); 420 } 421 int i = 0; 422 FileUtils.forceDelete(new File("/tmp/repos")); 423 for (String repo : repos) { 424 File dir = new CanonicalFile(BASEDIR + repo); 425 long start = System.currentTimeMillis(); 426 System.out.print(StringUtils.rightPad(dir.getPath(), 75)); 427 List<File> files = getRepoFiles(dir.getPath()); 428 printRepo(dir, files); 429 String elapsed = FormatUtils.getTime(System.currentTimeMillis() - start); 430 String count = FormatUtils.getCount(files.size()); 431 System.out.println(StringUtils.leftPad(elapsed, 10) + " - " + StringUtils.leftPad(count, 10)); 432 } 433 System.out.println(i); 434 } catch (Exception e) { 435 e.printStackTrace(); 436 } 437 } 438 439 protected void printRepo(File repo, List<File> files) { 440 File outputFile = new File("/tmp/repos/" + repo.getName() + ".txt"); 441 OutputStream out = null; 442 try { 443 out = FileUtils.openOutputStream(outputFile); 444 for (File file : files) { 445 long length = file.length(); 446 String path = StringUtils.replace(file.getPath(), repo.getPath(), ""); 447 String s = path + "," + length + "\n"; 448 byte[] bytes = s.getBytes("UTF-8"); 449 out.write(bytes); 450 } 451 out.flush(); 452 } catch (IOException e) { 453 throw new IllegalStateException("Unexpected IO error", e); 454 } finally { 455 IOUtils.closeQuietly(out); 456 } 457 } 458 459 protected List<File> getRepos() { 460 File dir = new File(BASEDIR); 461 List<File> repos = Arrays.asList(dir.listFiles()); 462 List<File> canonical = new ArrayList<File>(); 463 for (File file : repos) { 464 canonical.add(new CanonicalFile(file)); 465 } 466 Collections.sort(canonical); 467 return canonical; 468 } 469 470 protected List<File> getRepoFiles(String repoDir) { 471 File basedir = new File(repoDir); 472 List<String> includes = ImmutableList.of("**/**"); 473 // List<String> excludes = ImmutableList.of("**/.index/**", "**/.meta/**", "archetype-catalog.xml.sha1", "archetype-catalog.xml", "**/maven-metadata.xml", 474 // "**/maven-metadata.xml.*"); 475 List<String> excludes = ImmutableList.of("**/.index/**", "**/.meta/**", "**/.nexus/**"); 476 SimpleScanner scanner = new SimpleScanner(basedir, includes, excludes); 477 List<File> files = scanner.getFiles(); 478 List<File> canonical = new ArrayList<File>(); 479 for (File file : files) { 480 canonical.add(new CanonicalFile(file)); 481 } 482 Collections.sort(canonical); 483 return canonical; 484 } 485 486 protected List<String> getRepoNames() { 487 List<String> names = new ArrayList<String>(); 488 489 // These 8 are all an order of magnitude bigger than the other repo's 490 names.add("central"); 491 names.add("kuali-snapshot"); 492 names.add("kuali-builds"); 493 names.add("kuali-release"); 494 names.add("hosted-private"); 495 names.add("atlassian"); 496 names.add("eclipselink"); 497 names.add("apache-snapshots"); 498 499 names.add("central-m1"); 500 names.add("codehaus"); 501 names.add("codehaus-snapshots"); 502 names.add("developer"); 503 names.add("glassfish"); 504 names.add("google"); 505 names.add("google-reflections"); 506 names.add("jasperreports-sourceforge"); 507 names.add("java.net-m1"); 508 names.add("java.net-m1-m2"); 509 names.add("java.net-m2"); 510 names.add("jboss"); 511 names.add("jsdoctk"); 512 names.add("kuali-legacy-releases"); 513 names.add("kuali-legacy-snapshots"); 514 names.add("kuali-private"); 515 names.add("kuali-s3-external"); 516 names.add("kuali-s3-private"); 517 // names.add("m2eclipse"); 518 names.add("maven-restlet"); 519 names.add("ow2"); 520 names.add("public"); 521 names.add("releases"); 522 names.add("saucelabs-repository"); 523 names.add("snapshots"); 524 names.add("sonatype-oss-releases"); 525 names.add("spring-milestones"); 526 names.add("thirdparty"); 527 return names; 528 } 529 530}