From c738e8783c2157972a86095ae2ff55addd7de858 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Thu, 30 Jun 2016 17:51:07 +0800 Subject: [PATCH] Simplify NFO parser --- source/net/filebot/media/MediaDetection.java | 44 +++++-------------- source/net/filebot/util/StringUtilities.java | 7 ++- .../net/filebot/media/MediaDetectionTest.java | 10 +++++ 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/source/net/filebot/media/MediaDetection.java b/source/net/filebot/media/MediaDetection.java index 1f197f60..769a95ef 100644 --- a/source/net/filebot/media/MediaDetection.java +++ b/source/net/filebot/media/MediaDetection.java @@ -17,7 +17,6 @@ import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.Serializable; -import java.net.URL; import java.text.CollationKey; import java.text.Collator; import java.util.ArrayList; @@ -1207,47 +1206,28 @@ public class MediaDetection { return names; } - public static Set grepImdbId(CharSequence text) { + public static List grepImdbId(CharSequence text) { // scan for imdb id patterns like tt1234567 - Matcher imdbMatch = Pattern.compile("(? collection = new LinkedHashSet(); - - while (imdbMatch.find()) { - collection.add(Integer.parseInt(imdbMatch.group(1))); - } - - return collection; + Pattern imdbId = Pattern.compile("(? m.group(1)).map(Integer::new).collect(toList()); } - public static Set grepTheTvdbId(CharSequence text) { + public static List grepTheTvdbId(CharSequence text) { // scan for thetvdb id patterns like http://www.thetvdb.com/?tab=series&id=78874&lid=14 - Set collection = new LinkedHashSet(); - for (String token : Pattern.compile("[\\s\"<>|]+").split(text)) { - try { - URL url = new URL(token); - if (url.getHost().contains("thetvdb") && url.getQuery() != null && url.getQuery().contains("tab=series")) { - Matcher m = Pattern.compile("\\Wid=(\\d+)").matcher(url.getQuery()); - while (m.find()) { - collection.add(Integer.parseInt(m.group(1))); - } - } - } catch (Exception e) { - debug.finest(e::toString); - } - } - return collection; + Pattern tvdbUrl = Pattern.compile("http[s]?://www.thetvdb.com/[?]tab=series&id=(\\d+)", Pattern.CASE_INSENSITIVE); + return streamMatches(text, tvdbUrl, m -> m.group(1)).map(Integer::new).collect(toList()); } public static Movie grepMovie(File nfo, MovieIdentificationService resolver, Locale locale) throws Exception { - String contents = new String(readFile(nfo), "UTF-8"); - int imdbid = grepImdbId(contents).iterator().next(); - return resolver.getMovieDescriptor(new Movie(imdbid), locale); + String text = readTextFile(nfo); + int imdbId = grepImdbId(text).get(0); + return resolver.getMovieDescriptor(new Movie(imdbId), locale); } public static SeriesInfo grepSeries(File nfo, Locale locale) throws Exception { - String contents = new String(readFile(nfo), "UTF-8"); - int thetvdbid = grepTheTvdbId(contents).iterator().next(); - return WebServices.TheTVDB.getSeriesInfo(thetvdbid, locale); + String text = readTextFile(nfo); + int tvdbId = grepTheTvdbId(text).get(0); + return WebServices.TheTVDB.getSeriesInfo(tvdbId, locale); } public static List getProbableMatches(String query, Collection options, boolean alias, boolean strict) { diff --git a/source/net/filebot/util/StringUtilities.java b/source/net/filebot/util/StringUtilities.java index afed651f..58733a0d 100644 --- a/source/net/filebot/util/StringUtilities.java +++ b/source/net/filebot/util/StringUtilities.java @@ -13,6 +13,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Spliterators.AbstractSpliterator; import java.util.function.Consumer; +import java.util.function.Function; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -54,7 +55,11 @@ public final class StringUtilities { } public static Stream streamMatches(CharSequence s, Pattern pattern) { - return stream(new MatcherSpliterator(pattern.matcher(s)), false).map(MatchResult::group); + return streamMatches(s, pattern, MatchResult::group); + } + + public static Stream streamMatches(CharSequence s, Pattern pattern, Function mapper) { + return stream(new MatcherSpliterator(pattern.matcher(s)), false).map(mapper); } public static boolean find(String s, Pattern pattern) { diff --git a/test/net/filebot/media/MediaDetectionTest.java b/test/net/filebot/media/MediaDetectionTest.java index 5b6ec36f..a97a4edb 100644 --- a/test/net/filebot/media/MediaDetectionTest.java +++ b/test/net/filebot/media/MediaDetectionTest.java @@ -26,4 +26,14 @@ public class MediaDetectionTest { assertEquals("[]", MediaDetection.detectSeriesNames(singleton(new File("Movie/LOTR.2001.AVC-1080")), false, Locale.ENGLISH).toString()); } + @Test + public void grepImdbId() throws Exception { + assertEquals("[499549]", MediaDetection.grepImdbId("@see http://www.imdb.com/title/tt0499549/").toString()); + } + + @Test + public void grepTheTvdbId() throws Exception { + assertEquals("[78874]", MediaDetection.grepTheTvdbId("@see http://www.thetvdb.com/?tab=series&id=78874&lid=14").toString()); + } + }