From b4532f183291efa0f60c3fd31caae4cc8a8a4db9 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Tue, 8 Nov 2011 18:26:54 +0000 Subject: [PATCH] * improved name cleanup a little bit --- .../filebot/ui/rename/EpisodeListMatcher.java | 2 +- .../ui/rename/MatchSimilarityMetric.java | 47 ++++++++++++------- .../filebot/web/EpisodeUtilities.java | 2 +- .../sourceforge/filebot/web/LocalSearch.java | 5 +- .../ui/rename/MatchSimilarityMetricTest.java | 2 +- 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java b/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java index 6fe86a4f..43189b57 100644 --- a/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java +++ b/source/net/sourceforge/filebot/ui/rename/EpisodeListMatcher.java @@ -110,7 +110,7 @@ class EpisodeListMatcher implements AutoCompleteMatcher { private String normalizeName(String value) { // remove trailing braces, e.g. Doctor Who (2005) -> doctor who - return removeTrailingBraces(value).toLowerCase(); + return removeTrailingBrackets(value).toLowerCase(); } diff --git a/source/net/sourceforge/filebot/ui/rename/MatchSimilarityMetric.java b/source/net/sourceforge/filebot/ui/rename/MatchSimilarityMetric.java index aa77947f..f72d5a41 100644 --- a/source/net/sourceforge/filebot/ui/rename/MatchSimilarityMetric.java +++ b/source/net/sourceforge/filebot/ui/rename/MatchSimilarityMetric.java @@ -3,8 +3,6 @@ package net.sourceforge.filebot.ui.rename; import static java.lang.Math.*; -import static net.sourceforge.filebot.hash.VerificationUtilities.*; -import static net.sourceforge.filebot.web.EpisodeUtilities.*; import static net.sourceforge.tuned.FileUtilities.*; import java.io.File; @@ -104,8 +102,8 @@ public enum MatchSimilarityMetric implements SimilarityMetric { @Override public float getSimilarity(Object o1, Object o2) { - String[] f1 = fields(o1); - String[] f2 = fields(o2); + String[] f1 = normalize(fields(o1)); + String[] f2 = normalize(fields(o2)); // match all fields and average similarity float sum = 0; @@ -121,28 +119,38 @@ public enum MatchSimilarityMetric implements SimilarityMetric { } - protected String[] fields(Object object) { + protected String[] normalize(Object[] objects) { + String[] names = new String[objects.length]; + + for (int i = 0; i < objects.length; i++) { + names[i] = normalizeObject(objects[i]); + } + + return names; + } + + + protected Object[] fields(Object object) { if (object instanceof Episode) { Episode episode = (Episode) object; - return new String[] { removeTrailingBraces(episode.getSeriesName()), episode.getTitle() }; + return new Object[] { episode.getSeriesName(), episode.getTitle() }; } if (object instanceof File) { File file = (File) object; - return new String[] { getName(file.getParentFile()), getName(file) }; + return new Object[] { file.getParentFile(), file }; } if (object instanceof Movie) { Movie movie = (Movie) object; - return new String[] { movie.getName(), Integer.toString(movie.getYear()) }; + return new Object[] { movie.getName(), movie.getYear() }; } if (object instanceof AbstractFile) { - AbstractFile file = (AbstractFile) object; - return new String[] { getNameWithoutExtension(file.getName()) }; + return new Object[] { (AbstractFile) object }; } - return new String[] { object.toString() }; + return new Object[] { object }; } }), @@ -161,7 +169,7 @@ public enum MatchSimilarityMetric implements SimilarityMetric { @Override protected String normalize(Object object) { // simplify file name, if possible - return super.normalize(normalizeFile(object)); + return normalizeObject(object); } }), @@ -171,7 +179,7 @@ public enum MatchSimilarityMetric implements SimilarityMetric { @Override protected String normalize(Object object) { // simplify file name, if possible - return super.normalize(normalizeFile(object)); + return normalizeObject(object); } }); @@ -190,7 +198,7 @@ public enum MatchSimilarityMetric implements SimilarityMetric { } - protected static String normalizeFile(Object object) { + protected static String normalizeObject(Object object) { String name = object.toString(); // use name without extension @@ -200,8 +208,15 @@ public enum MatchSimilarityMetric implements SimilarityMetric { name = getNameWithoutExtension(((AbstractFile) object).getName()); } - // remove embedded checksum from name, if any - return removeEmbeddedChecksum(name); + // remove group names and checksums, any [...] or (...) + name = name.replaceAll("\\([^\\(]*\\)", ""); + name = name.replaceAll("\\[[^\\[]*\\]", ""); + + // remove/normalize special characters + name = name.replaceAll("['`´]+", ""); + name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " "); + + return name.trim().toLowerCase(); } diff --git a/source/net/sourceforge/filebot/web/EpisodeUtilities.java b/source/net/sourceforge/filebot/web/EpisodeUtilities.java index b8392a21..88bfef81 100644 --- a/source/net/sourceforge/filebot/web/EpisodeUtilities.java +++ b/source/net/sourceforge/filebot/web/EpisodeUtilities.java @@ -10,7 +10,7 @@ import java.util.List; public final class EpisodeUtilities { - public static String removeTrailingBraces(String name) { + public static String removeTrailingBrackets(String name) { // remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who return name.replaceAll("[(]([^)]*)[)]", "").trim(); } diff --git a/source/net/sourceforge/filebot/web/LocalSearch.java b/source/net/sourceforge/filebot/web/LocalSearch.java index 1e39700b..58c088e0 100644 --- a/source/net/sourceforge/filebot/web/LocalSearch.java +++ b/source/net/sourceforge/filebot/web/LocalSearch.java @@ -123,7 +123,10 @@ class LocalSearch { protected String normalize(String value) { // normalize separator, normalize case and trim - return value.replaceAll("[\\p{Punct}\\p{Space}]+", " ").trim().toLowerCase(); + value = value.replaceAll("['`´]+", ""); + value = value.replaceAll("[\\p{Punct}\\p{Space}]+", " "); + + return value.trim().toLowerCase(); } } diff --git a/test/net/sourceforge/filebot/ui/rename/MatchSimilarityMetricTest.java b/test/net/sourceforge/filebot/ui/rename/MatchSimilarityMetricTest.java index e2fd446b..3fb9b29b 100644 --- a/test/net/sourceforge/filebot/ui/rename/MatchSimilarityMetricTest.java +++ b/test/net/sourceforge/filebot/ui/rename/MatchSimilarityMetricTest.java @@ -26,7 +26,7 @@ public class MatchSimilarityMetricTest { @Test public void normalizeFile() { - assertEquals("abc", MatchSimilarityMetric.normalizeFile(new File("/folder/abc[EF62DF13].txt"))); + assertEquals("abc", MatchSimilarityMetric.normalizeObject(new File("/folder/abc[EF62DF13].txt"))); } }