From 1ca8de3ab70befa507fac59d913a6e306bbe798d Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Mon, 24 Mar 2014 20:32:27 +0000 Subject: [PATCH] * make sure substring metric only matches word sequences, rather than simple substrings which may match half of a word which wouldn't never make sense and could only cause issues --- .../filebot/similarity/SubstringMetric.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/source/net/sourceforge/filebot/similarity/SubstringMetric.java b/source/net/sourceforge/filebot/similarity/SubstringMetric.java index 1e198e3f..a9fa20a3 100644 --- a/source/net/sourceforge/filebot/similarity/SubstringMetric.java +++ b/source/net/sourceforge/filebot/similarity/SubstringMetric.java @@ -26,7 +26,22 @@ public class SubstringMetric implements SimilarityMetric { if (s2 == null || s2.isEmpty()) return 0; - return (o1c2 && s1.contains(s2)) || (o2c1 && s2.contains(s1)) ? 1 : 0; + return (o1c2 && matches(s1, s2) || (o2c1 && matches(s2, s1))) ? 1 : 0; + } + + protected boolean matches(String s1, String s2) { + int index = s1.lastIndexOf(s2); + if (index < 0) + return false; + + // check before and after and make sure we're only matching between word boundries + if (index - 1 >= 0 && !Character.isLetterOrDigit(s1.charAt(index - 1))) + return false; + + if (index + s2.length() < s1.length() && !Character.isLetterOrDigit(index + s2.length())) + return false; + + return true; } protected String normalize(Object object) {