Fine-tune numeric rules

This commit is contained in:
Reinhard Pointner 2016-03-13 17:35:31 +00:00
parent b248b83dac
commit 02a958f22b
6 changed files with 63 additions and 28 deletions

View File

@ -1010,7 +1010,7 @@ public class MediaDetection {
return movies;
}
private static Pattern formatInfoPattern = releaseInfo.getVideoFormatPattern(false);
private static Pattern formatInfoPattern = releaseInfo.getVideoFormatPattern(true);
public static String stripFormatInfo(CharSequence name) {
return formatInfoPattern.matcher(name).replaceAll("");

View File

@ -11,7 +11,7 @@ pattern.video.s3d: ((H|HALF|F|FULL)[^\\p{Alnum}]{0,2})?(SBS|TAB|OU)
pattern.subtitle.tags: forced|HI|SDH|Director.?s.Commentary
# additional release info patterns
pattern.video.format: DivX|Xvid|AVC|(x|h)[.]?(264|265)|HEVC|3ivx|PGS|MP[E]?G[45]?|MP[34]|FLAC|(AAC|AC3|DD)(.?[2457][.]?[01])?|[26]ch|(Multi.)?DTS(.HD)?(.MA)?|TrueHD|[M0]?(720|1080)[pi]|[-](720|1080|2D|3D)|10.?bit|(24|30|60)FPS|Hi10[P]?|[\\p{Alpha}]{2,3}.(2[.]0|5[.]1)|(19|20)[0-9]+(.)S[0-9]+(?!(.)?E[0-9]+)|(?<=\\d+)v[0-4]
pattern.video.format: DivX|Xvid|AVC|(x|h)[.]?(264|265)|HEVC|3ivx|PGS|MP[E]?G[45]?|MP[34]|(FLAC|AAC|AC3|DD)(.?[2457][.]?[01])?|[26]ch|(Multi.)?DTS(.HD)?(.MA)?|TrueHD|[M0]?(720|1080)[pi]|(?<=[-])(720|1080|2D|3D)|10.?bit|(24|30|60)FPS|Hi10[P]?|[a-z]{2,3}.(2[.]0|5[.]1)|(19|20)[0-9]+(.)S[0-9]+(?!(.)?E[0-9]+)|(?<=\\d+)v[0-4]
# known release group names
url.release-groups: https://app.filebot.net/data/release-groups.txt.xz

View File

@ -14,7 +14,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.Predicate;
import java.util.function.Function;
import net.filebot.media.SmartSeasonEpisodeMatcher;
import net.filebot.similarity.SeasonEpisodeMatcher.SxE;
@ -126,29 +126,36 @@ public class EpisodeMatcher extends Matcher<File, Object> {
}
private boolean isMultiEpisode(Episode[] episodes) {
if (episodes.length < 2)
if (episodes.length < 2) {
return false;
}
// use getEpisode() or getSpecial() as number function
Function<Episode, Integer> number = stream(episodes).allMatch(e -> e.getSpecial() == null) ? e -> e.getEpisode() : e -> e.getSpecial();
// check episode sequence integrity
Integer seqIndex = null;
for (Episode it : episodes) {
// any illegal episode object breaks the chain
Integer num = it != null ? it.getEpisode() != null ? it.getEpisode() : it.getSpecial() : null;
if (num == null)
Integer i = number.apply(it);
if (i == null) {
return false;
}
// non-sequential next episode index breaks the chain (same episode is OK since DVD numbering allows for multiple episodes to share the same SxE numbers)
if (seqIndex != null) {
if (!(num.equals(seqIndex + 1) || num.equals(seqIndex))) {
if (!(i.equals(seqIndex + 1) || i.equals(seqIndex))) {
return false;
}
}
seqIndex = num;
seqIndex = i;
}
// check drill-down integrity
return stream(episodes).map(Episode::getSeriesName).allMatch(Predicate.isEqual(episodes[0].getSeriesName()));
return stream(episodes).skip(1).allMatch(e -> {
return episodes[0].getSeriesName().equals(e.getSeriesName());
});
}
}

View File

@ -466,6 +466,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
for (String s2 : f2) {
if (s1 != null && s2 != null) {
max = max(super.getSimilarity(s1, s2), max);
if (max >= 1) {
return max;
}
}
}
}
@ -475,11 +478,10 @@ public enum EpisodeMetrics implements SimilarityMetric {
protected String[] fields(Object object) {
if (object instanceof Episode) {
Episode episode = (Episode) object;
String[] f = new String[4];
String[] f = new String[3];
f[0] = episode.getSeriesName();
f[1] = EpisodeFormat.SeasonEpisode.formatSxE(episode);
f[1] = episode.getSpecial() == null ? EpisodeFormat.SeasonEpisode.formatSxE(episode) : episode.getSpecial().toString();
f[2] = episode.getAbsolute() == null ? null : episode.getAbsolute().toString();
f[3] = episode.getSeason() == null || episode.getEpisode() == null ? null : String.format("%02d%02d", episode.getSeason(), episode.getEpisode());
return f;
}
@ -488,7 +490,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
return new String[] { movie.getName(), String.valueOf(movie.getYear()) };
}
return new String[] { stripFormatInfo(normalizeObject(object)) };
return new String[] { normalizeObject(object) };
}
}),
@ -715,6 +717,9 @@ public enum EpisodeMetrics implements SimilarityMetric {
// remove checksums, any [...] or (...)
name = removeEmbeddedChecksum(name);
// remove obvious release info
name = stripFormatInfo(name);
synchronized (transliterator) {
name = transliterator.transform(name);
}

View File

@ -1,7 +1,11 @@
package net.filebot.media;
import static java.util.Collections.*;
import static org.junit.Assert.*;
import java.io.File;
import java.util.Locale;
import org.junit.Test;
public class MediaDetectionTest {
@ -11,4 +15,15 @@ public class MediaDetectionTest {
assertEquals("[2009]", MediaDetection.parseMovieYear("Avatar 2009 2100").toString());
assertEquals("[1955]", MediaDetection.parseMovieYear("1898 Sissi 1955").toString());
}
@Test
public void stripFormatInfo() throws Exception {
assertEquals("3.Idiots.PAL.DVD..", MediaDetection.stripFormatInfo("3.Idiots.PAL.DVD.DD5.1.x264"));
}
@Test
public void detectSeriesName() throws Exception {
assertEquals(null, MediaDetection.detectSeriesNames(singleton(new File("Movie/3.Idiots.PAL.DVD.DD5.1.x264")), true, false, Locale.ENGLISH));
}
}

View File

@ -9,7 +9,9 @@ import java.util.List;
import org.junit.Test;
import net.filebot.media.MediaDetection;
import net.filebot.web.Episode;
import net.filebot.web.SimpleDate;
public class EpisodeMetricsTest {
@ -24,21 +26,6 @@ public class EpisodeMetricsTest {
assertEquals(1.0 / 3, SubstringFields.getSimilarity(eY1T1, fY2T2), 0.01);
}
@Test
public void nameIgnoreEmbeddedChecksum() {
assertEquals(1, Name.getSimilarity("test", "test [EF62DF13]"), 0);
}
@Test
public void numericIgnoreEmbeddedChecksum() {
assertEquals(1, Numeric.getSimilarity("S01E02", "Season 1, Episode 2 [00A01E02]"), 0);
}
@Test
public void normalizeFile() {
assertEquals("abc", EpisodeMetrics.normalizeObject(new File("/folder/abc[EF62DF13].txt")));
}
@Test
public void matcherLevel2() throws Exception {
List<File> files = new ArrayList<File>();
@ -57,4 +44,25 @@ public class EpisodeMetricsTest {
assertEquals("Veronica Mars [1x19] Hot Dogs", m.get(1).getValue().getName());
assertEquals("Veronica Mars - 1x19 - Hot Dogs", m.get(1).getCandidate().toString());
}
@Test
public void nameIgnoreEmbeddedChecksum() {
assertEquals(1, Name.getSimilarity("test", "test [EF62DF13]"), 0);
}
@Test
public void numericIgnoreEmbeddedChecksum() {
assertEquals(1, Numeric.getSimilarity("S01E02", "Season 1, Episode 2 [00A01E02]"), 0);
}
@Test
public void numericNumbers() {
String fn = "SEED - 01 - [X 2.0]";
Episode e1 = new Episode("SEED", null, 1, "Enraged Eyes", 1, null, new SimpleDate(2004, 10, 9), null);
Episode s1 = new Episode("SEED", null, null, "EDITED", null, 1, new SimpleDate(2005, 1, 29), null);
assertEquals(0.5, Numeric.getSimilarity(fn, e1), 0.01);
assertEquals(0.5, Numeric.getSimilarity(fn, s1), 0.01);
}
}