* fine-tune generic numberic sequence matching (e.g. Bones Staffel 1 Folge 5)
This commit is contained in:
parent
1e06994a59
commit
24f9b8d92a
|
@ -8,11 +8,15 @@ import static java.util.Collections.*;
|
||||||
import static net.sourceforge.filebot.Settings.*;
|
import static net.sourceforge.filebot.Settings.*;
|
||||||
import static net.sourceforge.filebot.similarity.Normalization.*;
|
import static net.sourceforge.filebot.similarity.Normalization.*;
|
||||||
import static net.sourceforge.tuned.FileUtilities.*;
|
import static net.sourceforge.tuned.FileUtilities.*;
|
||||||
|
import static net.sourceforge.tuned.StringUtilities.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Scanner;
|
||||||
|
|
||||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
import net.sourceforge.filebot.vfs.FileInfo;
|
import net.sourceforge.filebot.vfs.FileInfo;
|
||||||
|
@ -187,7 +191,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
}),
|
}),
|
||||||
|
|
||||||
// Match via common word sequence in episode name and file name
|
// Match via common word sequence in episode name and file name
|
||||||
SubstringSequence(new SequenceMatchSimilarity() {
|
NameSubstringSequence(new SequenceMatchSimilarity() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
|
@ -199,12 +203,19 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String normalize(Object object) {
|
protected String normalize(Object object) {
|
||||||
|
if (object instanceof Episode) {
|
||||||
|
object = removeTrailingBrackets(((Episode) object).getSeriesName());
|
||||||
|
} else if (object instanceof Movie) {
|
||||||
|
object = ((Movie) object).getName();
|
||||||
|
} else if (object instanceof File) {
|
||||||
|
object = getNameWithoutExtension(getRelativePathTail((File) object, 3).getPath());
|
||||||
|
}
|
||||||
// simplify file name, if possible
|
// simplify file name, if possible
|
||||||
return normalizeObject(object);
|
return normalizeObject(object);
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
// Match by generic name similarity
|
// Match by generic name similarity (round rank)
|
||||||
Name(new NameSimilarityMetric() {
|
Name(new NameSimilarityMetric() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -222,12 +233,33 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
NumericSequence(new SequenceMatchSimilarity() {
|
// Match by generic name similarity (absolute)
|
||||||
|
AbsoluteName(new NameSimilarityMetric() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String normalize(Object object) {
|
protected String normalize(Object object) {
|
||||||
// simplify file name, if possible
|
// simplify file name, if possible
|
||||||
return normalizeObject(object).replaceAll("\\D+", " ").trim();
|
return normalizeObject(object);
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
|
||||||
|
NumericSequence(new SequenceMatchSimilarity() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String normalize(Object object) {
|
||||||
|
if (object instanceof Episode) {
|
||||||
|
object = EpisodeFormat.SeasonEpisode.formatSxE((Episode) object);
|
||||||
|
} else if (object instanceof Movie) {
|
||||||
|
object = ((Movie) object).getYear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// simplify file name if possible and extract numbers
|
||||||
|
List<Integer> numbers = new ArrayList<Integer>(4);
|
||||||
|
Scanner scanner = new Scanner(normalizeObject(object)).useDelimiter("\\D+");
|
||||||
|
while (scanner.hasNextInt()) {
|
||||||
|
numbers.add(scanner.nextInt());
|
||||||
|
}
|
||||||
|
return join(numbers, " ");
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
@ -411,9 +443,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||||
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
// 7 pass: prefer episodes that were aired closer to the last modified date of the file
|
||||||
// 8 pass: resolve remaining collisions via absolute string similarity
|
// 8 pass: resolve remaining collisions via absolute string similarity
|
||||||
if (includeFileMetrics) {
|
if (includeFileMetrics) {
|
||||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(NameSubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, AbsoluteName };
|
||||||
} else {
|
} else {
|
||||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(SubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, new NameSimilarityMetric() };
|
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, MetaAttributes, new MetricCascade(NameSubstringSequence, Name), Numeric, NumericSequence, Name, TimeStamp, AbsoluteName };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -148,13 +148,14 @@ public class Matcher<V, C> {
|
||||||
for (Match<V, C> possibleMatch : possibleMatches) {
|
for (Match<V, C> possibleMatch : possibleMatches) {
|
||||||
float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate());
|
float similarity = metric.getSimilarity(possibleMatch.getValue(), possibleMatch.getCandidate());
|
||||||
|
|
||||||
Set<Match<V, C>> matchSet = similarityMap.get(similarity);
|
// DEBUG
|
||||||
|
// System.out.format("%s: %.04f: %s%n", metric, similarity, possibleMatch);
|
||||||
|
|
||||||
|
Set<Match<V, C>> matchSet = similarityMap.get(similarity);
|
||||||
if (matchSet == null) {
|
if (matchSet == null) {
|
||||||
matchSet = new LinkedHashSet<Match<V, C>>();
|
matchSet = new LinkedHashSet<Match<V, C>>();
|
||||||
similarityMap.put(similarity, matchSet);
|
similarityMap.put(similarity, matchSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
matchSet.add(possibleMatch);
|
matchSet.add(possibleMatch);
|
||||||
|
|
||||||
// unwind this thread if we have been interrupted
|
// unwind this thread if we have been interrupted
|
||||||
|
|
|
@ -24,6 +24,11 @@ public class SequenceMatchSimilarity implements SimilarityMetric {
|
||||||
if (match == null)
|
if (match == null)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
return similarity(match, s1, s2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected float similarity(String match, String s1, String s2) {
|
||||||
return (float) match.length() / min(s1.length(), s2.length());
|
return (float) match.length() / min(s1.length(), s2.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ public final class SubtitleUtilities {
|
||||||
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
Map<File, SubtitleDescriptor> subtitleByVideo = new LinkedHashMap<File, SubtitleDescriptor>();
|
||||||
|
|
||||||
// optimize for generic media <-> subtitle matching
|
// optimize for generic media <-> subtitle matching
|
||||||
SimilarityMetric[] metrics = new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringSequence, new MetricCascade(SubstringSequence, Name), Numeric, new NameSimilarityMetric() };
|
SimilarityMetric[] metrics = new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, NameSubstringSequence, new MetricCascade(NameSubstringSequence, Name), Numeric, new NameSimilarityMetric() };
|
||||||
|
|
||||||
// subtitle verification metric specifically excluding SxE mismatches
|
// subtitle verification metric specifically excluding SxE mismatches
|
||||||
SimilarityMetric absoluteSeasonEpisode = new SimilarityMetric() {
|
SimilarityMetric absoluteSeasonEpisode = new SimilarityMetric() {
|
||||||
|
@ -61,7 +61,7 @@ public final class SubtitleUtilities {
|
||||||
return f < 1 ? -1 : 1;
|
return f < 1 ? -1 : 1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
SimilarityMetric sanity = new MetricCascade(absoluteSeasonEpisode, AirDate, new MetricAvg(SubstringSequence, Name));
|
SimilarityMetric sanity = new MetricCascade(absoluteSeasonEpisode, AirDate, new MetricAvg(NameSubstringSequence, Name));
|
||||||
|
|
||||||
// first match everything as best as possible, then filter possibly bad matches
|
// first match everything as best as possible, then filter possibly bad matches
|
||||||
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
Matcher<File, SubtitleDescriptor> matcher = new Matcher<File, SubtitleDescriptor>(files, subtitles, false, metrics);
|
||||||
|
|
|
@ -352,9 +352,15 @@ public final class FileUtilities {
|
||||||
|
|
||||||
|
|
||||||
public static List<File> listPath(File file) {
|
public static List<File> listPath(File file) {
|
||||||
|
return listPathTail(file, Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static List<File> listPathTail(File file, int tailSize) {
|
||||||
LinkedList<File> nodes = new LinkedList<File>();
|
LinkedList<File> nodes = new LinkedList<File>();
|
||||||
|
|
||||||
for (File node = file; node != null && !UNC_PREFIX.equals(node.toString()); node = node.getParentFile()) {
|
File node = file;
|
||||||
|
for (int i = 0; node != null && i < tailSize && !UNC_PREFIX.equals(node.toString()); i++, node = node.getParentFile()) {
|
||||||
nodes.addFirst(node);
|
nodes.addFirst(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,6 +368,17 @@ public final class FileUtilities {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static File getRelativePathTail(File file, int tailSize) {
|
||||||
|
File f = null;
|
||||||
|
for (File it : listPathTail(file, tailSize)) {
|
||||||
|
if (it.getParentFile() != null) {
|
||||||
|
f = new File(f, it.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static List<File> listFiles(Iterable<File> folders, int maxDepth, boolean listHiddenFiles) {
|
public static List<File> listFiles(Iterable<File> folders, int maxDepth, boolean listHiddenFiles) {
|
||||||
List<File> files = new ArrayList<File>();
|
List<File> files = new ArrayList<File>();
|
||||||
|
|
||||||
|
|
|
@ -123,6 +123,7 @@ Hard.Subbed
|
||||||
HBO
|
HBO
|
||||||
hd
|
hd
|
||||||
HDRip
|
HDRip
|
||||||
|
Hi10P
|
||||||
Hindi
|
Hindi
|
||||||
History.Channel
|
History.Channel
|
||||||
HQ
|
HQ
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
HIMYM How I Met your Mother
|
HIMYM How I Met your Mother
|
||||||
Hml8p Homeland
|
Hml8p Homeland
|
||||||
|
NCIS.LA NCIS: Los Angeles
|
|
@ -3,8 +3,8 @@ def input = []
|
||||||
def failOnError = _args.conflict == 'fail'
|
def failOnError = _args.conflict == 'fail'
|
||||||
|
|
||||||
// print input parameters
|
// print input parameters
|
||||||
_args.bindings?.each{ _log.finest("Parameter: $it.key = $it.value") }
|
_args.bindings?.each{ _log.fine("Parameter: $it.key = $it.value") }
|
||||||
args.each{ _log.finest("Argument: $it") }
|
args.each{ _log.fine("Argument: $it") }
|
||||||
args.findAll{ !it.exists() }.each{ throw new Exception("File not found: $it") }
|
args.findAll{ !it.exists() }.each{ throw new Exception("File not found: $it") }
|
||||||
|
|
||||||
// check user-defined pre-condition
|
// check user-defined pre-condition
|
||||||
|
@ -34,7 +34,7 @@ def format = [
|
||||||
tvs: tryQuietly{ seriesFormat } ?: '''TV Shows/{n}/{episode.special ? "Special" : "Season "+s}/{n} - {episode.special ? "S00E"+special.pad(2) : s00e00} - {t}{".$lang"}''',
|
tvs: tryQuietly{ seriesFormat } ?: '''TV Shows/{n}/{episode.special ? "Special" : "Season "+s}/{n} - {episode.special ? "S00E"+special.pad(2) : s00e00} - {t}{".$lang"}''',
|
||||||
anime: tryQuietly{ animeFormat } ?: '''Anime/{n}/{n} - {sxe} - {t}''',
|
anime: tryQuietly{ animeFormat } ?: '''Anime/{n}/{n} - {sxe} - {t}''',
|
||||||
mov: tryQuietly{ movieFormat } ?: '''Movies/{n} ({y})/{n} ({y}){" CD$pi"}{".$lang"}''',
|
mov: tryQuietly{ movieFormat } ?: '''Movies/{n} ({y})/{n} ({y}){" CD$pi"}{".$lang"}''',
|
||||||
music: tryQuietly{ musicFormat } ?: '''Music/{n}/{album}/{n} - {t}'''
|
music: tryQuietly{ musicFormat } ?: '''Music/{n}/{album+'/'}{pi.pad(2)+'. '}{artist} - {t}'''
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ def groups = input.groupBy{ f ->
|
||||||
|
|
||||||
def tvs = detectSeriesName(f)
|
def tvs = detectSeriesName(f)
|
||||||
def mov = detectMovie(f, false)
|
def mov = detectMovie(f, false)
|
||||||
println "$f.name [series: $tvs, movie: $mov]"
|
_log.fine("$f.name [series: $tvs, movie: $mov]")
|
||||||
|
|
||||||
// DECIDE EPISODE VS MOVIE (IF NOT CLEAR)
|
// DECIDE EPISODE VS MOVIE (IF NOT CLEAR)
|
||||||
if (tvs && mov) {
|
if (tvs && mov) {
|
||||||
|
@ -129,10 +129,10 @@ def groups = input.groupBy{ f ->
|
||||||
|
|
||||||
// S00E00 | 2012.07.21 | One Piece 217 | Firefly - Serenity | [Taken 1, Taken 2, Taken 3, Taken 4, ..., Taken 10]
|
// S00E00 | 2012.07.21 | One Piece 217 | Firefly - Serenity | [Taken 1, Taken 2, Taken 3, Taken 4, ..., Taken 10]
|
||||||
if (parseEpisodeNumber(fn, true) || parseDate(fn) || (fn =~ sn && parseEpisodeNumber(fn.after(sn), false)) || fn.after(sn) =~ / - .+/ || f.dir.listFiles{ it.isVideo() && norm(it.name) =~ sn && it.name =~ /\b\d{1,3}\b/}.size() >= 10) {
|
if (parseEpisodeNumber(fn, true) || parseDate(fn) || (fn =~ sn && parseEpisodeNumber(fn.after(sn), false)) || fn.after(sn) =~ / - .+/ || f.dir.listFiles{ it.isVideo() && norm(it.name) =~ sn && it.name =~ /\b\d{1,3}\b/}.size() >= 10) {
|
||||||
println "Exclude Movie: $mov"
|
_log.fine("Exclude Movie: $mov")
|
||||||
mov = null
|
mov = null
|
||||||
} else if ((detectMovie(f, true) && [dn, fn].find{ it =~ /(19|20)\d{2}/ }) || [dn, fn].find{ it =~ mn && !(it.after(mn) =~ /\b\d{1,3}\b/) }) {
|
} else if ((detectMovie(f, true) && [dn, fn].find{ it =~ /(19|20)\d{2}/ }) || [dn, fn].find{ it =~ mn && !(it.after(mn) =~ /\b\d{1,3}\b/) }) {
|
||||||
println "Exclude Series: $tvs"
|
_log.fine("Exclude Series: $tvs")
|
||||||
tvs = null
|
tvs = null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue