From 3d6c99eee8760863a62528c2644d62bb85d11104 Mon Sep 17 00:00:00 2001 From: FlyingWolFox <49326973+FlyingWolFox@users.noreply.github.com> Date: Sun, 12 Apr 2020 11:01:29 -0300 Subject: [PATCH] Internal repetion handling implemented --- FileInfo.java | 6 +- Main.java | 198 ++++++++++++++++++++++++-------------------------- README.md | 46 ++---------- 3 files changed, 102 insertions(+), 148 deletions(-) diff --git a/FileInfo.java b/FileInfo.java index 4dbab81..a6dfe52 100644 --- a/FileInfo.java +++ b/FileInfo.java @@ -147,8 +147,8 @@ public void setRepeated() { /** * Used to compare files to find repetions */ - public int compareTo(FileInfo rom) { - return md5.compareTo(rom.getMD5()); + public int compareTo(FileInfo file) { + return md5.compareTo(file.getMD5()); } -} \ No newline at end of file +} diff --git a/Main.java b/Main.java index 51f63dc..786133b 100644 --- a/Main.java +++ b/Main.java @@ -53,14 +53,27 @@ public Main(String[] args) { // this should always be true assert subfolders.size() == dirs - .size() : "Error: The number of child directories is not equal to the number of scanned directories"; + .size() : "Error: The number of subdirectories is not equal to the number of scanned directories"; // process internal repetions first. At this version this just logs that // repetions have been found - getRidInternalRepetions(); + if (manageInternalRepetions()) { + System.out.println("Internal repetions found:"); + for (Directory dir : dirs) { + System.out.println( + dir.getNumOfInternalRepetions() + " internal repetions in " + dir.getPath().toString()); + } + System.out.println( + "Internal are in their respective directories. Clean then first then run the tool again"); + return; + } else + System.out.println("No internal repetions found"); + System.out.println(""); // will get all files of all directories to make comparisons - for (Directory dir : dirs) { + for ( + + Directory dir : dirs) { files.addAll(dir.getFiles()); } Collections.sort(files); // sort the arraylist accordingly to MD5 hash in alphabetical order @@ -70,6 +83,7 @@ public Main(String[] args) { // moveRepeated(); deactivated, useless for now System.out.println("Operation Complete"); + } /** @@ -116,62 +130,63 @@ private void createDirectories() { e.printStackTrace(); System.exit(-1); } - - // TODO: better internal repetion handling - /* - * Path internal = child.resolve("Internal Repetions"); internals.add(internal); - * try { Files.createDirectories(internal); } catch (IOException e) { - * System.err. - * println("Error trying to create \"Internal Repetions\" directory for " + - * dirs.get(i).getPath().getFileName()); e.printStackTrace(); } - */ } } /** - * Gets rid of internal repetions of each directory, making organization better - * and comparing among folders faster. Deactived for now + * Will find internal repetions in the directories in th same style of + * manageRepetions + * + * @return if a internal repetion was found */ - private void getRidInternalRepetions() { - // TODO: getRidInternalRepetions - // this flag will change to show internal repetions on the terminal - boolean repetion = false; - for (int dir = 0; dir < dirs.size(); dir++) { - ArrayList roms = dirs.get(dir).getFiles(); - // will search for internal repetions - for (int i = 0; i < roms.size() - 1; i++) { - FileInfo rom1 = roms.get(i); - FileInfo rom2 = roms.get(i + 1); - if (rom1.compareTo(rom2) == 0) { - dirs.get(dir).increaseNumOfInternalRepetions(); - /* - * Path source = rom2.getPath(); Path target = internals.get(dir).resolve( - * rom1.getNum() + letters.get(dirs.get(dir).getNum()).toString() + "- " + - * rom2.getName()); try { Files.move(source, target); } catch (IOException e) { - * System.err.println("Failed to move " + rom2.getName() + " from " + - * dirs.get(dir).getPath().toString() + " to " + target.toString()); - * e.printStackTrace(); } - * - * roms.remove(rom2); i--; rom1.setRepeated(true); - */ - repetion = true; // if a repetion is found, set the flag - } + private boolean manageInternalRepetions() { + boolean internalRepetionFound = false; + for (Directory dir : dirs) { + ArrayList files = dir.getFiles(); + String letter = letters.get(dir.getNum()).toString(); + for (int i = 0; i < files.size() - 1; i++) { + FileInfo file1 = files.get(i); + FileInfo file2 = files.get(i + 1); + if (file1.compareTo(file2) == 0) { + internalRepetionFound = true; + Path source1 = file1.getPath(); + file1.setName(file1.getNum() + letter + "- " + file1.getName()); + Path target1 = subfolders.get(dirs.indexOf(dir)).resolve(file1.getName()); + + while (file1.compareTo(file2) == 0) { + dir.increaseNumOfInternalRepetions(); + Path source2 = file2.getPath(); + file2.setName(file1.getNum() + letter + "- " + file2.getName()); + Path target2 = subfolders.get(dirs.indexOf(dir)).resolve(file2.getName()); + try { + Files.move(source2, target2); + } catch (IOException e) { + System.out.println( + "Error moving " + source2.toString() + " to " + target2.toString() + ": " + e); + } finally { + files.remove(file2); + } + + try { + file2 = files.get(i + 1); + } catch (IndexOutOfBoundsException e) { + // if this happens, the end of the collection have been reached + break; + } + } + + try { + Files.move(source1, target1); + } catch (IOException e) { + System.out + .println("Error moving " + source1.toString() + " to " + target1.toString() + ": " + e); + } + } } } - // print internal repetion stats and the non handling warning - if (repetion) { - System.out.println("Internal repetions found:"); - for (Directory dir : dirs) { - System.out.println( - dir.getNumOfInternalRepetions() + " internal repetions in " + dir.getPath().toString()); - } - System.out.println( - "Internal repetions aren't handled in this version, so they're togheter with the other repetions"); - } else - System.out.println("No internal repetions found"); - System.out.println(""); + return internalRepetionFound; } /** @@ -184,36 +199,40 @@ private void manageRepetions() { int numOfRepetions = 0; // stores the number of repetions of all directories, every repetion found will // increase this for (int i = 0; i < files.size() - 1; i++) { - FileInfo rom1 = files.get(i); // get a first file to compare - FileInfo rom2 = files.get(i + 1); // get a second file to compare - if (rom1.compareTo(rom2) == 0) { // compares the files + FileInfo file1 = files.get(i); // get a first file to compare + FileInfo file2 = files.get(i + 1); // get a second file to compare + if (file1.compareTo(file2) == 0) { // compares the files numOfRepetions++; - rom1.getDir().increaseNumOfRepetions(); // increases the number of repetion on the folder of the file - Path source1 = rom1.getPath(); // gets directory info to move the file - rom1.setName(rom1.getNum() + letters.get(rom1.getDir().getNum()).toString() + "- " + rom1.getName()); - Path target1 = subfolders.get(rom1.getDir().getNum()).resolve(rom1.getName()); // gets the respective - // subfolder to move the - // file with the new - // name + file1.getDir().increaseNumOfRepetions(); // increases the number of repetion on the folder of the file + Path source1 = file1.getPath(); // gets directory info to move the file + file1.setName( + file1.getNum() + letters.get(file1.getDir().getNum()).toString() + "- " + file1.getName()); + Path target1 = subfolders.get(file1.getDir().getNum()).resolve(file1.getName()); // gets the respective + // subfolder to move + // the + // file with the new + // name // continue to look for repetions, this will garant that not just repetion // doubles get spotted - while (rom1.compareTo(rom2) == 0) { - rom2.getDir().increaseNumOfRepetions(); - Path source2 = rom2.getPath(); - rom2.setName(rom1.getNum() + letters.get(rom2.getDir().getNum()).toString() + "- " + rom2.getName()); - Path target2 = subfolders.get(rom2.getDir().getNum()).resolve(rom2.getName()); + while (file1.compareTo(file2) == 0) { + file2.getDir().increaseNumOfRepetions(); + Path source2 = file2.getPath(); + file2.setName( + file1.getNum() + letters.get(file2.getDir().getNum()).toString() + "- " + file2.getName()); + Path target2 = subfolders.get(file2.getDir().getNum()).resolve(file2.getName()); try { Files.move(source2, target2); } catch (IOException e) { - System.err.println("Failed to move " + rom2.getName() + " from " - + rom2.getDir().getPath().toString() + " to " + target2.toString()); + System.err.println("Failed to move " + file2.getName() + " from " + + file2.getDir().getPath().toString() + " to " + target2.toString()); e.printStackTrace(); } - files.remove(rom2); // remove the file of the collection, so comparasion can proceed without getting - // a integer iterator + files.remove(file2); // remove the file of the collection, so comparasion can proceed without + // getting + // a integer iterator try { - rom2 = files.get(i + 1); + file2 = files.get(i + 1); } catch (IndexOutOfBoundsException e) { // if this happens, the end of the collection have been reached break; @@ -224,8 +243,8 @@ private void manageRepetions() { // finally moves the file used to compare with proper naming Files.move(source1, target1); } catch (IOException e) { - System.err.println("Failed to move " + rom1.getName() + " from " - + rom1.getDir().getPath().toString() + " to " + target1.toString()); + System.err.println("Failed to move " + file1.getName() + " from " + + file1.getDir().getPath().toString() + " to " + target1.toString()); e.printStackTrace(); } // removes from the collection, this means that the iteration have to be @@ -242,39 +261,10 @@ private void manageRepetions() { } - /** - * Used to move internal repetions that weren't moved. Not used by now because - * internal repetions aren't handled seperetedly - */ - public void moveRepeated() { - for (FileInfo file : files) { - if (file.getRepeated() - && file.getFile().getAbsolutePath().equals(file.getPath().toAbsolutePath().toString())) { - Path source = file.getPath(); - Path target = internalRepetions.get(file.getDir().getNum()).resolve( - file.getNum() + letters.get(file.getDir().getNum()).toString() + "- " + file.getName()); - try { - Files.move(source, target); - } catch (IOException e) { - System.err.println("Failed to move " + file.getName() + " from " - + file.getDir().getPath().toString() + " to " + target.toString()); - e.printStackTrace(); - } - } - if (file.getRepeated() - && !file.getFile().getAbsolutePath().equals(file.getPath().toAbsolutePath().toString())) { - System.out.println(file.getName() + " from " + file.getDir().getPath().toString() - + " was among internal repetions, but it was found among repetions with other directories, " - + "so it isn't with the internal repetions of its directory, but in the " - + file.getDir().getPath().toFile().getName() + " directory in the results directory"); - } - } - } - /** * main method. The script will execute the code in the Main class constructor - * and then sleep 2 seconds (this is done so its possible to read the fail - * maessages and statistics) + * and then sleep 3,5 seconds (this is done so its possible to read the fail + * messages and statistics) * * @param args The directories to be analyzed */ @@ -285,7 +275,7 @@ public static void main(String[] args) { } new Main(args); try { - Thread.sleep(2000); + Thread.sleep(3500); } catch (InterruptedException e) { e.printStackTrace(); } diff --git a/README.md b/README.md index 9d0e8e0..efb5e8b 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This tool uses the terminal to operate and the windows executable is wrapped by * Using the .class or .jar: - Call the Main class and pass the directories that you want to analyze as arguments. The tool, for now, ignores subdirectories. The tool will print in the terminal what files it's analizing and, after done, will print the results: - - 1. Internal repetions: Internal repetions are files that have a equal copy in the same directory that they're in. The tool will show if it found at least a repetion and, if it does, will print how many are in each. For now, internal repetions aren't handled separately and will be togheter with other repetions. + - 1. Internal repetions: Internal repetions are files that have a equal copy in the same directory that they're in. The tool will show if it found at least a repetion and, if it does, will print how many are in each and end execution. You'll have to handle them yourself and then re-run the script. - 2. Repetions: Here enters repetions among directories. The tool will print how many repetions were found on each directory. - 3. The tool will print `Operation Complete` after the execution. If any exceptions are raised while handling, the description of the error will be printed with the stack trace. @@ -32,7 +32,7 @@ The `Results` directory will have a number of subdriectories in it, the same num - 3. After the lettter comes a dash (`-`) and a blank space. This is to preserve the file name and make easy to rename the files after (that you can use my [renamer tool for this script](https://github.com/FlyingWolFox/Duplicate-Finder-Renamer/) or something like Bulk Renamer Utility) - 4. Then comes the file name and extension, preserving the original name, but I'm not sure about symbolic links -PS: Intenal repetions are put in the same folder, with the same number and letter, making it easy to manage. This will change in a future version +PS: Intenal repetions are handle first. If the tool finds, it won't look for repetions among directories. Internal repetions are put in the subfolder which they're found. Manage them first, them re-run the tool to look for repetions among directories ### How it works @@ -40,28 +40,7 @@ First the tool get all files in the folders passed by argument, creating Directo ## About the code -This tool uses MD5 hash to verify if files are equal and uses these imported classes: - -```java -java.io.File; // File handling -java.io.IOException; // IOException Handling -java.nio.file.DirectoryIteratorException; // Directory Handling exception -java.nio.file.Files; // Path hadling and used to move files -java.nio.file.Path; // File and directory handling -java.nio.file.Paths; // File and directory handling -java.util.ArrayList; // Used in file comparation -java.util.Collections; // Used in file comparation -java.util.HashMap; // Used to put letters after the number in each filename -java.io.FileInputStream; // Used to calculate MD5 hash -java.io.InputStream; // Used to calculate MD5 hash -java.security.MessageDigest; // Used to calculate MD5 hash -java.security.NoSuchAlgorithmException; // Exception in hash calculation -java.nio.file.DirectoryIteratorException; // Exception in directory handling -java.nio.file.DirectoryStream; // Used to get files in the folders -``` - -Not all of these are available in all Java versions, like Java ME or old version of Java. -PS: The hash calculations were optimized after Java 7, so they run much faster +This tool uses MD5 hash to verify if files are equal and uses `java.nio` and `java.security`. Not all of these are available in all Java versions, like Java ME or old version of Java. PS: The hash calculations were optimized after Java 7, so they run much faster ## About FastMD5 @@ -121,30 +100,15 @@ The method `calculateMD5()` in the ROM class is responsible to calculate the MD5 I choosed MD5 because is relatively colision safe when looking for repeated files and it's fast. There was other alternatives, like SHA-1, that's fast too, but MD5 was good enough. Other hashes like SHA-2 or SHA-3 weren't considered because they're really slow. Other non-security algorithms weren't considered because I didn't know they existed until yesterday :D You can put any hash you want in the code and it'll work. I'm thinking of changing `calculateMD5()` to `calculateHash()` to make the use of other algorithms easier -5. **[OLD] Why the classes have these names? They don't seen common** - -This is because this tool was called Simple ROM Manager and was made to prevent repetions when merging my romsets (I'm archivist of retrogames) so the names, mainly the ROM class, ended up with these names. The tool was repurposed to find repetions in general, since it did that already. The names were changed in v0.9.0-3, this question will be maintened here because old commits still have the old names - -6. **Everything is in the Main class in a weird way, how the code is designed?** +5. **Everything is in the Main class in a weird way, how the code is designed?** This is because this tool is more like a script, so things went to the Main classes. The code works basically in the constructors, that does almost everything. This is just to be pratic, feel free to change it (and even submmit a Pull Request!) -7. **The code is commented?** - -Yes! Basically everything is commented. They're added in v0.9.0-2 - -8. **This tool have a class documention?** - -Yes. - ## Future plans This tool isn't finished, not for me at least, so I'm planning to do: -- [ ] Implement a better internal repetion handling (in the works) +- [ ] Implement a compressed archived comparer - [ ] Better verbosity -- [x] Renaming everything to better names -- [x] Comment the code -- [x] Make the classes doc Also I'm planning to make a GUI version of this tool (when I learn how to make GUIs) to be even better! It'll be in another repository tho. I'll update here when I finish it