* make sure substring metric only matches word sequences, rather than simple substrings which may match half of a word which wouldn't never make sense and could only cause issues

2014-03-24 20:32:27 +00:00 · 2014-03-24 20:32:27 +00:00 · 1ca8de3ab7
parent f61f30e862
commit 1ca8de3ab7
1 changed files with 16 additions and 1 deletions
--- a/source/net/sourceforge/filebot/similarity/SubstringMetric.java
+++ b/source/net/sourceforge/filebot/similarity/SubstringMetric.java
@ -26,7 +26,22 @@ public class SubstringMetric implements SimilarityMetric {
 		if (s2 == null || s2.isEmpty())
 			return 0;

-		return (o1c2 && s1.contains(s2)) || (o2c1 && s2.contains(s1)) ? 1 : 0;
+		return (o1c2 && matches(s1, s2) || (o2c1 && matches(s2, s1))) ? 1 : 0;
+	}
+
+	protected boolean matches(String s1, String s2) {
+		int index = s1.lastIndexOf(s2);
+		if (index < 0)
+			return false;
+
+		// check before and after and make sure we're only matching between word boundries
+		if (index - 1 >= 0 && !Character.isLetterOrDigit(s1.charAt(index - 1)))
+			return false;
+
+		if (index + s2.length() < s1.length() && !Character.isLetterOrDigit(index + s2.length()))
+			return false;
+
+		return true;
 	}

 	protected String normalize(Object object) {