Cache SeriesNameMatcher objects

This commit is contained in:
Reinhard Pointner 2016-02-10 18:32:30 +00:00
parent a81fcf155a
commit bf69d750e8
8 changed files with 31 additions and 40 deletions

View File

@ -116,10 +116,9 @@ public class CmdlineOperations implements CmdlineInterface {
int sxe = 0; // SxE int sxe = 0; // SxE
int cws = 0; // common word sequence int cws = 0; // common word sequence
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale, true);
Collection<String> cwsList = emptySet(); Collection<String> cwsList = emptySet();
if (max >= 5) { if (max >= 5) {
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0])); cwsList = getSeriesNameMatcher().matchAll(mediaFiles.toArray(new File[0]));
} }
for (File f : mediaFiles) { for (File f : mediaFiles) {
@ -130,7 +129,7 @@ public class CmdlineOperations implements CmdlineInterface {
// count CWS matches // count CWS matches
for (String base : cwsList) { for (String base : cwsList) {
if (base.equalsIgnoreCase(nameMatcher.matchByFirstCommonWordSequence(base, f.getName()))) { if (base.equalsIgnoreCase(getSeriesNameMatcher().matchByFirstCommonWordSequence(base, f.getName()))) {
cws++; cws++;
break; break;
} }

View File

@ -115,9 +115,10 @@ public class MediaDetection {
return releaseInfo.getLanguageSuffix(getName(file)); return releaseInfo.getLanguageSuffix(getName(file));
} }
private static final SeasonEpisodeMatcher seasonEpisodeMatcherStrict = new SmartSeasonEpisodeMatcher(true); private static final SeasonEpisodeMatcher seasonEpisodeMatcherStrict = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
private static final SeasonEpisodeMatcher seasonEpisodeMatcherNonStrict = new SmartSeasonEpisodeMatcher(false); private static final SeasonEpisodeMatcher seasonEpisodeMatcherNonStrict = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, false);
private static final DateMatcher dateMatcher = new DateMatcher(Locale.getDefault(), DateMatcher.DEFAULT_SANITY); private static final DateMatcher dateMatcher = new DateMatcher(Locale.getDefault(), DateMatcher.DEFAULT_SANITY);
private static final SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher(Locale.ENGLISH, true);
public static SeasonEpisodeMatcher getSeasonEpisodeMatcher(boolean strict) { public static SeasonEpisodeMatcher getSeasonEpisodeMatcher(boolean strict) {
return strict ? seasonEpisodeMatcherStrict : seasonEpisodeMatcherNonStrict; return strict ? seasonEpisodeMatcherStrict : seasonEpisodeMatcherNonStrict;
@ -127,6 +128,10 @@ public class MediaDetection {
return dateMatcher; return dateMatcher;
} }
public static SeriesNameMatcher getSeriesNameMatcher() {
return seriesNameMatcher;
}
public static boolean isEpisode(String name, boolean strict) { public static boolean isEpisode(String name, boolean strict) {
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null; return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
} }

View File

@ -11,10 +11,6 @@ public class SmartSeasonEpisodeMatcher extends SeasonEpisodeMatcher {
super(sanity, strict); super(sanity, strict);
} }
public SmartSeasonEpisodeMatcher(boolean strict) {
super(DEFAULT_SANITY, strict);
}
protected String clean(CharSequence name) { protected String clean(CharSequence name) {
return MediaDetection.stripFormatInfo(name); return MediaDetection.stripFormatInfo(name);
} }

View File

@ -592,8 +592,6 @@ public enum EpisodeMetrics implements SimilarityMetric {
RegionHint(new SimilarityMetric() { RegionHint(new SimilarityMetric() {
private Pattern hint = compile("[(](\\p{Alpha}+|\\p{Digit}+)[)]$"); private Pattern hint = compile("[(](\\p{Alpha}+|\\p{Digit}+)[)]$");
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
private Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+"); private Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
@Override @Override
@ -617,7 +615,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
for (File f : listPathTail((File) o, 3, true)) { for (File f : listPathTail((File) o, 3, true)) {
// try to focus on series name // try to focus on series name
String n = f.getName(); String n = f.getName();
String sn = seriesNameMatcher.matchByEpisodeIdentifier(n); String sn = getSeriesNameMatcher().matchByEpisodeIdentifier(n);
String[] tokens = punctuation.split(sn != null ? sn : n); String[] tokens = punctuation.split(sn != null ? sn : n);
for (String s : tokens) { for (String s : tokens) {

View File

@ -8,6 +8,7 @@ import static net.filebot.util.StringUtilities.*;
import java.io.File; import java.io.File;
import java.text.CollationKey; import java.text.CollationKey;
import java.text.Collator;
import java.util.AbstractCollection; import java.util.AbstractCollection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -30,23 +31,21 @@ import net.filebot.util.FileUtilities;
public class SeriesNameMatcher { public class SeriesNameMatcher {
protected SeasonEpisodeMatcher seasonEpisodeMatcher; protected final SimilarityMetric metric;
protected DateMatcher dateMatcher; protected final SeasonEpisodeMatcher seasonEpisodeMatcher;
protected final DateMatcher dateMatcher;
protected NameSimilarityMetric nameSimilarityMetric; protected final CommonSequenceMatcher commonSequenceMatcher;
protected CommonSequenceMatcher commonSequenceMatcher;
public SeriesNameMatcher() {
this(Locale.ENGLISH, true);
}
public SeriesNameMatcher(Locale locale, boolean strict) { public SeriesNameMatcher(Locale locale, boolean strict) {
seasonEpisodeMatcher = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict); this(new NameSimilarityMetric(), getLenientCollator(locale), new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict), new DateMatcher(locale, DateMatcher.DEFAULT_SANITY));
dateMatcher = new DateMatcher(locale, DateMatcher.DEFAULT_SANITY); }
nameSimilarityMetric = new NameSimilarityMetric();
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) { public SeriesNameMatcher(SimilarityMetric metric, Collator collator, SeasonEpisodeMatcher seasonEpisodeMatcher, DateMatcher dateMatcher) {
this.metric = metric;
this.seasonEpisodeMatcher = seasonEpisodeMatcher;
this.dateMatcher = dateMatcher;
this.commonSequenceMatcher = new CommonSequenceMatcher(collator, 3, true) {
@Override @Override
public CollationKey[] split(String sequence) { public CollationKey[] split(String sequence) {
@ -65,7 +64,7 @@ public class SeriesNameMatcher {
for (String nameMatch : matchAll(names)) { for (String nameMatch : matchAll(names)) {
String commonMatch = commonSequenceMatcher.matchFirstCommonSequence(nameMatch, parent); String commonMatch = commonSequenceMatcher.matchFirstCommonSequence(nameMatch, parent);
float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch); float similarity = commonMatch == null ? 0 : metric.getSimilarity(commonMatch, nameMatch);
// prefer common match, but only if it's very similar to the original match // prefer common match, but only if it's very similar to the original match
seriesNames.add(similarity > 0.7 ? commonMatch : nameMatch); seriesNames.add(similarity > 0.7 ? commonMatch : nameMatch);

View File

@ -2,6 +2,7 @@ package net.filebot.ui.list;
import static java.awt.Font.*; import static java.awt.Font.*;
import static java.lang.Math.*; import static java.lang.Math.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.ui.NotificationLogging.*; import static net.filebot.ui.NotificationLogging.*;
import java.awt.BorderLayout; import java.awt.BorderLayout;
@ -128,7 +129,7 @@ public class ListPanel extends JComponent {
} }
// try to match title from the first five names // try to match title from the first five names
Collection<String> title = new SeriesNameMatcher().matchAll((names.size() < 5 ? names : names.subList(0, 4)).toArray(new String[0])); Collection<String> title = getSeriesNameMatcher().matchAll((names.size() < 5 ? names : names.subList(0, 4)).toArray(new String[0]));
list.setTitle(title.isEmpty() ? "List" : title.iterator().next()); list.setTitle(title.isEmpty() ? "List" : title.iterator().next());

View File

@ -35,8 +35,7 @@ class MovieEditor implements TableCellEditor {
String fn = FileUtilities.getName(mapping.getVideo() != null ? mapping.getVideo() : mapping.getSubtitle()); String fn = FileUtilities.getName(mapping.getVideo() != null ? mapping.getVideo() : mapping.getSubtitle());
// check if query contain an episode identifier // check if query contain an episode identifier
SeriesNameMatcher snm = new SeriesNameMatcher(); String sn = getSeriesNameMatcher().matchByEpisodeIdentifier(fn);
String sn = snm.matchByEpisodeIdentifier(fn);
if (sn != null) { if (sn != null) {
return stripReleaseInfo(sn, true); return stripReleaseInfo(sn, true);
} }

View File

@ -1,17 +1,16 @@
package net.filebot.similarity; package net.filebot.similarity;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.util.Locale;
import net.filebot.similarity.SeriesNameMatcher.SeriesNameCollection; import net.filebot.similarity.SeriesNameMatcher.SeriesNameCollection;
import org.junit.Test; import org.junit.Test;
public class SeriesNameMatcherTest { public class SeriesNameMatcherTest {
private static SeriesNameMatcher matcher = new SeriesNameMatcher(); private static SeriesNameMatcher matcher = new SeriesNameMatcher(Locale.ENGLISH, true);
@Test @Test
public void whitelist() { public void whitelist() {
@ -21,7 +20,6 @@ public class SeriesNameMatcherTest {
assertArrayEquals(new String[] { "Test 101" }, matcher.matchAll(names).toArray()); assertArrayEquals(new String[] { "Test 101" }, matcher.matchAll(names).toArray());
} }
@Test @Test
public void threshold() { public void threshold() {
// ignore recurring word sequences when matching episode patterns // ignore recurring word sequences when matching episode patterns
@ -30,7 +28,6 @@ public class SeriesNameMatcherTest {
assertArrayEquals(new String[] { "Test" }, matcher.matchAll(names).toArray()); assertArrayEquals(new String[] { "Test" }, matcher.matchAll(names).toArray());
} }
@Test @Test
public void matchBeforeSeasonEpisodePattern() { public void matchBeforeSeasonEpisodePattern() {
assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01")); assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01"));
@ -39,7 +36,6 @@ public class SeriesNameMatcherTest {
assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_1x01_-_The_Green_Gathering")); assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_1x01_-_The_Green_Gathering"));
} }
@Test @Test
public void normalize() { public void normalize() {
// non-letter and non-digit characters // non-letter and non-digit characters
@ -52,7 +48,6 @@ public class SeriesNameMatcherTest {
assertEquals("strawhat Luffy", matcher.normalize("(strawhat [Luffy (#Monkey)")); assertEquals("strawhat Luffy", matcher.normalize("(strawhat [Luffy (#Monkey)"));
} }
@Test @Test
public void firstCommonSequence() { public void firstCommonSequence() {
String[] seq1 = "Common Name 1 Any Title".split("\\s"); String[] seq1 = "Common Name 1 Any Title".split("\\s");
@ -66,7 +61,6 @@ public class SeriesNameMatcherTest {
assertArrayEquals(null, matcher.firstCommonSequence(seq2, seq1, 1, String.CASE_INSENSITIVE_ORDER)); assertArrayEquals(null, matcher.firstCommonSequence(seq2, seq1, 1, String.CASE_INSENSITIVE_ORDER));
} }
@Test @Test
public void firstCharacterCaseBalance() { public void firstCharacterCaseBalance() {
SeriesNameCollection n = new SeriesNameCollection(); SeriesNameCollection n = new SeriesNameCollection();