* improved matching for Name-Title patterns (without SxE marker)

This commit is contained in:
Reinhard Pointner 2011-10-28 06:28:19 +00:00
parent 919a6c0660
commit 7a83fda23b
3 changed files with 54 additions and 29 deletions

View File

@ -9,7 +9,6 @@ import static net.sourceforge.filebot.WebServices.*;
import static net.sourceforge.filebot.cli.CLILogging.*; import static net.sourceforge.filebot.cli.CLILogging.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*; import static net.sourceforge.filebot.hash.VerificationUtilities.*;
import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*; import static net.sourceforge.filebot.subtitle.SubtitleUtilities.*;
import static net.sourceforge.filebot.ui.rename.MatchSimilarityMetric.*;
import static net.sourceforge.tuned.FileUtilities.*; import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File; import java.io.File;
@ -51,6 +50,7 @@ import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.subtitle.SubtitleFormat; import net.sourceforge.filebot.subtitle.SubtitleFormat;
import net.sourceforge.filebot.ui.Language; import net.sourceforge.filebot.ui.Language;
import net.sourceforge.filebot.ui.rename.HistorySpooler; import net.sourceforge.filebot.ui.rename.HistorySpooler;
import net.sourceforge.filebot.ui.rename.MatchSimilarityMetric;
import net.sourceforge.filebot.vfs.ArchiveType; import net.sourceforge.filebot.vfs.ArchiveType;
import net.sourceforge.filebot.vfs.MemoryFile; import net.sourceforge.filebot.vfs.MemoryFile;
import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Episode;
@ -181,9 +181,9 @@ public class ArgumentProcessor {
// similarity metrics for matching // similarity metrics for matching
SimilarityMetric[] sequence; SimilarityMetric[] sequence;
if (strict) { if (strict) {
sequence = new SimilarityMetric[] { StrictEpisodeIdentifier, StrictName }; // use SEI for matching and SN for excluding false positives sequence = new SimilarityMetric[] { StrictMetric.EpisodeIdentifier, StrictMetric.Title, StrictMetric.Name }; // use SEI for matching and SN for excluding false positives
} else { } else {
sequence = new SimilarityMetric[] { EpisodeIdentifier, Name, Numeric }; // same as in GUI sequence = MatchSimilarityMetric.defaultSequence(); // same as in GUI
} }
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>(); List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
@ -670,4 +670,5 @@ public class ArgumentProcessor {
System.out.println(string); System.out.println(string);
} }
} }
} }

View File

@ -10,7 +10,7 @@ import java.util.logging.LogRecord;
import java.util.logging.Logger; import java.util.logging.Logger;
public class CLILogging extends Handler { class CLILogging extends Handler {
public static final Logger CLILogger = createCommandlineLogger("net.sourceforge.filebot.cli"); public static final Logger CLILogger = createCommandlineLogger("net.sourceforge.filebot.cli");

View File

@ -2,7 +2,9 @@
package net.sourceforge.filebot.ui.rename; package net.sourceforge.filebot.ui.rename;
import static java.lang.Math.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*; import static net.sourceforge.filebot.hash.VerificationUtilities.*;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File; import java.io.File;
import java.util.Arrays; import java.util.Arrays;
@ -15,11 +17,11 @@ import net.sourceforge.filebot.similarity.NameSimilarityMetric;
import net.sourceforge.filebot.similarity.NumericSimilarityMetric; import net.sourceforge.filebot.similarity.NumericSimilarityMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMetric; import net.sourceforge.filebot.similarity.SeasonEpisodeMetric;
import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.similarity.SubstringMetric;
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE; import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.sourceforge.filebot.vfs.AbstractFile; import net.sourceforge.filebot.vfs.AbstractFile;
import net.sourceforge.filebot.web.Date; import net.sourceforge.filebot.web.Date;
import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Episode;
import net.sourceforge.tuned.FileUtilities;
public enum MatchSimilarityMetric implements SimilarityMetric { public enum MatchSimilarityMetric implements SimilarityMetric {
@ -55,7 +57,7 @@ public enum MatchSimilarityMetric implements SimilarityMetric {
if (sxeSimilarity >= 1) if (sxeSimilarity >= 1)
return sxeSimilarity; return sxeSimilarity;
return Math.max(sxeSimilarity, AirDate.getSimilarity(o1, o2)); return max(sxeSimilarity, AirDate.getSimilarity(o1, o2));
} }
}), }),
@ -95,6 +97,44 @@ public enum MatchSimilarityMetric implements SimilarityMetric {
} }
}), }),
// Match series title and episode title against folder structure and file name
Title(new SubstringMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
String[] f1 = fields(o1);
String[] f2 = fields(o2);
// match all fields and average similarity
float sum = 0;
for (String s1 : f1) {
for (String s2 : f2) {
sum += super.getSimilarity(s1, s2);
}
}
sum /= f1.length * f2.length;
// normalize into 3 similarity levels
return (float) (ceil(sum * 3) / 3);
}
protected String[] fields(Object object) {
if (object instanceof Episode) {
Episode e = (Episode) object;
return new String[] { e.getSeriesName(), e.getTitle() };
}
if (object instanceof File) {
File file = (File) object;
return new String[] { getName(file.getParentFile()), getName(file) };
}
return new String[] { object.toString() };
}
}),
// Match by generic name similarity // Match by generic name similarity
Name(new NameSimilarityMetric() { Name(new NameSimilarityMetric() {
@ -102,7 +142,7 @@ public enum MatchSimilarityMetric implements SimilarityMetric {
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
// normalize absolute similarity to similarity rank (10 ranks in total), // normalize absolute similarity to similarity rank (10 ranks in total),
// so we are less likely to fall for false positives in this pass, and move on to the next one // so we are less likely to fall for false positives in this pass, and move on to the next one
return (float) (Math.floor(super.getSimilarity(o1, o2) * 10) / 10); return (float) (floor(super.getSimilarity(o1, o2) * 10) / 10);
} }
@ -121,23 +161,6 @@ public enum MatchSimilarityMetric implements SimilarityMetric {
// simplify file name, if possible // simplify file name, if possible
return super.normalize(normalizeFile(object)); return super.normalize(normalizeFile(object));
} }
}),
StrictEpisodeIdentifier(new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
// strict SxE metric, don't allow in-between values
return EpisodeIdentifier.getSimilarity(o1, o2) >= 1 ? 1 : 0;
}
}),
StrictName(new SimilarityMetric() {
@Override
public float getSimilarity(Object o1, Object o2) {
return (float) (Math.floor(Name.getSimilarity(o1, o2) * 2) / 2);
}
}); });
// inner metric // inner metric
@ -160,9 +183,9 @@ public enum MatchSimilarityMetric implements SimilarityMetric {
// use name without extension // use name without extension
if (object instanceof File) { if (object instanceof File) {
name = FileUtilities.getName((File) object); name = getName((File) object);
} else if (object instanceof AbstractFile) { } else if (object instanceof AbstractFile) {
name = FileUtilities.getNameWithoutExtension(((AbstractFile) object).getName()); name = getNameWithoutExtension(((AbstractFile) object).getName());
} }
// remove embedded checksum from name, if any // remove embedded checksum from name, if any
@ -173,9 +196,10 @@ public enum MatchSimilarityMetric implements SimilarityMetric {
public static SimilarityMetric[] defaultSequence() { public static SimilarityMetric[] defaultSequence() {
// 1. pass: match by file length (fast, but only works when matching torrents or files) // 1. pass: match by file length (fast, but only works when matching torrents or files)
// 2. pass: match by season / episode numbers // 2. pass: match by season / episode numbers
// 3. pass: match by generic name similarity (slow, but most matches will have been determined in second pass) // 3. pass: match by checking series/episode title against the file path
// 4. pass: match by generic numeric similarity // 4. pass: match by generic name similarity (slow, but most matches will have been determined in second pass)
return new SimilarityMetric[] { FileSize, EpisodeIdentifier, Name, Numeric }; // 5. pass: match by generic numeric similarity
return new SimilarityMetric[] { FileSize, EpisodeIdentifier, Title, Name, Numeric };
} }
} }