* improved matching for patterns like EN_5.1 or JPN_2.0

This commit is contained in:
Reinhard Pointner 2016-01-08 12:26:42 +00:00
parent 13150d664c
commit 80e7da3820
5 changed files with 20 additions and 20 deletions

View File

@ -1022,6 +1022,12 @@ public class MediaDetection {
return movies;
}
private static final Pattern defaultIgnoreTokens = releaseInfo.getVideoFormatPattern(false);
public static String stripFormatInfo(CharSequence name) {
return defaultIgnoreTokens.matcher(name).replaceAll("");
}
public static String stripReleaseInfo(String name, boolean strict) {
try {
return releaseInfo.cleanRelease(singleton(name), strict).iterator().next();

View File

@ -11,7 +11,7 @@ pattern.video.s3d: 3D[^\\p{Alnum}]?(H|HALF|F|FULL)?[^\\p{Alnum}]?(SBS|TAB|OU)
pattern.subtitle.tags: forced|HI|SDH|Director.?s.Commentary
# additional release info patterns
pattern.video.format: DivX|Xvid|AVC|x264|h264|h.264|HEVC|h265|h.265|3ivx|MPG|MPEG|MPEG4|MP3|AAC|AAC2.0|AAC5.1|AAC.2.0|AAC.5.1|AC3|AC3|AC3.2.0|AC3|AC3.5.1|dd20|dd51|2ch|6ch|DTS|Multi.DTS|DTS.HD|DTS.HD.MA|TrueHD|720p|1080p|M1080|10bit|10.bit|Hi10|Hi10P|(19|20)[0-9]+(.)S[0-9]+(?!(.)?E[0-9]+)|(?<=\\d+)v[0-4]
pattern.video.format: DivX|Xvid|AVC|x264|h264|h.264|HEVC|h265|h.265|3ivx|MPG|MPEG|MPEG4|MP3|FLAC|AAC|AAC2.0|AAC5.1|AAC.2.0|AAC.5.1|AC3|AC3|AC3.2.0|AC3|AC3.5.1|dd20|dd51|2ch|6ch|DTS|Multi.DTS|DTS.HD|DTS.HD.MA|TrueHD|720p|1080p|M1080|10bit|10.bit|Hi10|Hi10P|(19|20)[0-9]+(.)S[0-9]+(?!(.)?E[0-9]+)|(?<=\\d+)v[0-4]|[\\p{Alpha}]{2,3}.(2\.0|5\.1)
# known release group names
url.release-groups: http://app.filebot.net/data/release-groups.txt

View File

@ -2,15 +2,11 @@ package net.filebot.media;
import java.io.File;
import java.util.List;
import java.util.regex.Pattern;
import net.filebot.similarity.SeasonEpisodeMatcher;
public class SmartSeasonEpisodeMatcher extends SeasonEpisodeMatcher {
// make sure certain patterns like x264 or 720p will never be interpreted as SxE numbers
private final Pattern ignorePattern = new ReleaseInfo().getVideoFormatPattern(false);
public SmartSeasonEpisodeMatcher(SeasonEpisodeFilter sanity, boolean strict) {
super(sanity, strict);
}
@ -20,7 +16,7 @@ public class SmartSeasonEpisodeMatcher extends SeasonEpisodeMatcher {
}
protected String clean(CharSequence name) {
return ignorePattern.matcher(name).replaceAll("");
return MediaDetection.stripFormatInfo(name);
}
@Override

View File

@ -3,6 +3,7 @@ package net.filebot.similarity;
import static java.lang.Math.*;
import static java.util.Collections.*;
import static java.util.regex.Pattern.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.similarity.Normalization.*;
import static net.filebot.util.FileUtilities.*;
import static net.filebot.util.StringUtilities.*;
@ -18,7 +19,6 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Scanner;
import java.util.Set;
import java.util.logging.Level;
@ -26,7 +26,6 @@ import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.filebot.media.MediaDetection;
import net.filebot.media.SmartSeasonEpisodeMatcher;
import net.filebot.similarity.SeasonEpisodeMatcher.SxE;
import net.filebot.vfs.FileInfo;
@ -350,7 +349,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// check direct mappings first
try {
List<String> directMapping = MediaDetection.matchSeriesByDirectMapping(singleton(file));
List<String> directMapping = matchSeriesByDirectMapping(singleton(file));
if (directMapping.size() > 0) {
return directMapping;
}
@ -371,9 +370,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// equally strip away strip potential any clutter
if (names != null) {
try {
return MediaDetection.releaseInfo.cleanRelease(names, true);
} catch (NoSuchElementException e) {
// keep default value in case all tokens are stripped away
return stripReleaseInfo(names, true);
} catch (IOException e) {
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
}
@ -405,8 +402,8 @@ public enum EpisodeMetrics implements SimilarityMetric {
String s1 = normalizeObject(o1);
String s2 = normalizeObject(o2);
s1 = MediaDetection.stripReleaseInfo(s1, false);
s2 = MediaDetection.stripReleaseInfo(s2, false);
s1 = stripReleaseInfo(s1, false);
s2 = stripReleaseInfo(s2, false);
int length = min(s1.length(), s2.length());
s1 = s1.substring(0, length);
@ -499,7 +496,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
return new String[] { movie.getName(), String.valueOf(movie.getYear()) };
}
return new String[] { normalizeObject(object) };
return new String[] { stripFormatInfo(normalizeObject(object)) };
}
}),
@ -653,7 +650,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
// deserialize MetaAttributes if enabled and available
if (object instanceof File) {
Object metaObject = MediaDetection.readMetaInfo((File) object);
Object metaObject = readMetaInfo((File) object);
if (metaObject != null) {
return super.getProperties(metaObject);
}
@ -690,13 +687,13 @@ public enum EpisodeMetrics implements SimilarityMetric {
return result;
}
String name = object.toString();
// use name without extension
String name;
if (object instanceof File) {
name = getName((File) object);
} else if (object instanceof FileInfo) {
name = ((FileInfo) object).getName();
} else {
name = object.toString();
}
// remove checksums, any [...] or (...)
@ -706,7 +703,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
name = transliterator.transform(name);
}
// remove/normalize special characters
// remove or normalize special characters
name = normalizePunctuation(name);
// normalize to lower case

View File

@ -368,6 +368,7 @@ REACTOR
READNFO
REAL.PROPER
RecordedTV
REMASTER
REMASTERED
RENOMBRAR
REPACK