* improved SeriesNameMatcher
* slightly modified season episode regex * added ehcache to fatjar build
This commit is contained in:
parent
9fd13dceae
commit
f7fdc5b5db
|
@ -92,6 +92,10 @@
|
|||
<include name="**/*.class" />
|
||||
<include name="**/*.properties" />
|
||||
</zipfileset>
|
||||
|
||||
<zipfileset src="${dir.lib}/ehcache.jar">
|
||||
<include name="net/sf/ehcache/**" />
|
||||
</zipfileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -2,12 +2,14 @@
|
|||
package net.sourceforge.filebot;
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.util.AbstractList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.sourceforge.tuned.FileUtilities;
|
||||
import net.sourceforge.tuned.FileUtilities.ExtensionFileFilter;
|
||||
|
||||
|
||||
|
@ -63,10 +65,6 @@ public final class FileBotUtilities {
|
|||
|
||||
|
||||
public static String join(Object[] values, String separator) {
|
||||
if (values == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
|
@ -81,12 +79,12 @@ public final class FileBotUtilities {
|
|||
}
|
||||
|
||||
|
||||
public static List<String> asStringList(final List<?> list) {
|
||||
public static List<String> asFileNameList(final List<File> list) {
|
||||
return new AbstractList<String>() {
|
||||
|
||||
@Override
|
||||
public String get(int index) {
|
||||
return list.get(index).toString();
|
||||
return FileUtilities.getName(list.get(index));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ public class SeasonEpisodeMatcher {
|
|||
patterns[1] = new SeasonEpisodePattern("(?<!\\p{Alnum})(\\d{1,2})x(\\d{1,3})(?!\\p{Digit})");
|
||||
|
||||
// match patterns like 01, 102, 1003 (enclosed in separators)
|
||||
patterns[2] = new SeasonEpisodePattern("(?<=^|[\\._ ])([0-2]?\\d?)(\\d{2})(?=[\\._ ]|$)");
|
||||
patterns[2] = new SeasonEpisodePattern("(?<=^|[\\._ ])([0-1]?\\d?)(\\d{2})(?=[\\._ ]|$)");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ package net.sourceforge.filebot.similarity;
|
|||
|
||||
import static net.sourceforge.filebot.FileBotUtilities.join;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.AbstractCollection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -16,6 +17,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import net.sourceforge.filebot.FileBotUtilities;
|
||||
|
||||
|
||||
public class SeriesNameMatcher {
|
||||
|
@ -26,20 +30,45 @@ public class SeriesNameMatcher {
|
|||
|
||||
|
||||
public SeriesNameMatcher(int threshold) {
|
||||
if (threshold <= 0)
|
||||
if (threshold < 0)
|
||||
throw new IllegalArgumentException("threshold must be greater than 0");
|
||||
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
|
||||
public Collection<String> matchAll(List<String> names) {
|
||||
public String match(File file) {
|
||||
return match(file.getName(), file.getParent());
|
||||
}
|
||||
|
||||
|
||||
public Collection<String> matchAll(File... files) {
|
||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||
|
||||
// use pattern matching with frequency threshold
|
||||
// group files by parent folder
|
||||
for (Entry<File, String[]> entry : mapNamesByFolder(files).entrySet()) {
|
||||
String parent = entry.getKey().getName();
|
||||
String[] names = entry.getValue();
|
||||
|
||||
for (String nameMatch : matchAll(names)) {
|
||||
String commonMatch = matchByFirstCommonWordSequence(nameMatch, parent);
|
||||
|
||||
// prefer common match, but use name match if there is no matching word sequence
|
||||
seriesNames.add(commonMatch != null ? commonMatch : nameMatch);
|
||||
}
|
||||
}
|
||||
|
||||
return seriesNames;
|
||||
}
|
||||
|
||||
|
||||
public Collection<String> matchAll(String... names) {
|
||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||
|
||||
// 1. use pattern matching with frequency threshold
|
||||
seriesNames.addAll(flatMatchAll(names));
|
||||
|
||||
// deep match common word sequences
|
||||
// 2. match common word sequences
|
||||
seriesNames.addAll(deepMatchAll(names));
|
||||
|
||||
return seriesNames;
|
||||
|
@ -49,11 +78,11 @@ public class SeriesNameMatcher {
|
|||
/**
|
||||
* Try to match and verify all series names using known season episode patterns.
|
||||
*
|
||||
* @param names list of episode names
|
||||
* @return series names that have been matched one or multiple times depending on the size
|
||||
* of the given list
|
||||
* @param names episode names
|
||||
* @return series names that have been matched one or multiple times depending on the
|
||||
* threshold
|
||||
*/
|
||||
protected Collection<String> flatMatchAll(Iterable<String> names) {
|
||||
private Collection<String> flatMatchAll(String[] names) {
|
||||
ThresholdCollection<String> seriesNames = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER);
|
||||
|
||||
for (String name : names) {
|
||||
|
@ -74,9 +103,9 @@ public class SeriesNameMatcher {
|
|||
* @param names list of episode names
|
||||
* @return all common word sequences that have been found
|
||||
*/
|
||||
protected Collection<String> deepMatchAll(List<String> names) {
|
||||
// don't use common word sequence matching for less than 5 names
|
||||
if (names.size() < threshold) {
|
||||
private Collection<String> deepMatchAll(String[] names) {
|
||||
// can't use common word sequence matching for less than 2 names
|
||||
if (names.length < 2 || names.length < threshold) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
|
@ -90,23 +119,44 @@ public class SeriesNameMatcher {
|
|||
// recursive divide and conquer
|
||||
List<String> results = new ArrayList<String>();
|
||||
|
||||
if (names.size() >= 2) {
|
||||
// split list in two and try to match common word sequence on those
|
||||
results.addAll(deepMatchAll(names.subList(0, names.size() / 2)));
|
||||
results.addAll(deepMatchAll(names.subList(names.size() / 2, names.size())));
|
||||
}
|
||||
// split list in two and try to match common word sequence on those
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2)));
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length)));
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Match series name using season episode pattern and then try to find a common word
|
||||
* sequence between the first match and the given parent.
|
||||
*
|
||||
* @param name episode name
|
||||
* @param parent a string that contains the series name
|
||||
* @return a likely series name
|
||||
*/
|
||||
public String match(String name, String parent) {
|
||||
String nameMatch = matchBySeasonEpisodePattern(name);
|
||||
|
||||
if (nameMatch != null) {
|
||||
String commonMatch = matchByFirstCommonWordSequence(nameMatch, parent);
|
||||
|
||||
if (commonMatch != null) {
|
||||
return commonMatch;
|
||||
}
|
||||
}
|
||||
|
||||
return nameMatch;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Try to match a series name from the given episode name using known season episode
|
||||
* patterns.
|
||||
*
|
||||
* @param name episode name
|
||||
* @return a substring of the given name that ends before the first occurrence of a season
|
||||
* episode pattern, or null
|
||||
* episode pattern, or null if there is no such pattern
|
||||
*/
|
||||
public String matchBySeasonEpisodePattern(String name) {
|
||||
int seasonEpisodePosition = seasonEpisodeMatcher.find(name);
|
||||
|
@ -126,10 +176,9 @@ public class SeriesNameMatcher {
|
|||
* @param names various episode names (5 or more for accurate results)
|
||||
* @return a word sequence all episode names have in common, or null
|
||||
*/
|
||||
public String matchByFirstCommonWordSequence(Collection<String> names) {
|
||||
if (names.size() <= 1) {
|
||||
// can't match common sequence from less than two names
|
||||
return null;
|
||||
public String matchByFirstCommonWordSequence(String... names) {
|
||||
if (names.length < 2) {
|
||||
throw new IllegalArgumentException("Can't match common sequence from less than two names");
|
||||
}
|
||||
|
||||
String[] common = null;
|
||||
|
@ -151,14 +200,19 @@ public class SeriesNameMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
// join will return null, if common is null
|
||||
if (common == null)
|
||||
return null;
|
||||
|
||||
return join(common, " ");
|
||||
}
|
||||
|
||||
|
||||
protected String normalize(String name) {
|
||||
// remove group names (remove any [...])
|
||||
name = name.replaceAll("\\[[^\\]]+\\]", "");
|
||||
// normalize brackets, convert (...) to [...]
|
||||
name = name.replace('(', '[').replace(')', ']');
|
||||
|
||||
// remove group names, any [...]
|
||||
name = name.replaceAll("\\[[^\\[]+\\]", "");
|
||||
|
||||
// remove special characters
|
||||
name = name.replaceAll("[\\p{Punct}\\p{Space}]+", " ");
|
||||
|
@ -196,6 +250,33 @@ public class SeriesNameMatcher {
|
|||
}
|
||||
|
||||
|
||||
private Map<File, String[]> mapNamesByFolder(File... files) {
|
||||
Map<File, List<File>> filesByFolder = new LinkedHashMap<File, List<File>>();
|
||||
|
||||
for (File file : files) {
|
||||
File folder = file.getParentFile();
|
||||
|
||||
List<File> list = filesByFolder.get(folder);
|
||||
|
||||
if (list == null) {
|
||||
list = new ArrayList<File>();
|
||||
filesByFolder.put(folder, list);
|
||||
}
|
||||
|
||||
list.add(file);
|
||||
}
|
||||
|
||||
// convert folder->files map to folder->names map
|
||||
Map<File, String[]> namesByFolder = new LinkedHashMap<File, String[]>();
|
||||
|
||||
for (Entry<File, List<File>> entry : filesByFolder.entrySet()) {
|
||||
namesByFolder.put(entry.getKey(), FileBotUtilities.asFileNameList(entry.getValue()).toArray(new String[0]));
|
||||
}
|
||||
|
||||
return namesByFolder;
|
||||
}
|
||||
|
||||
|
||||
protected static class SeriesNameCollection extends AbstractCollection<String> {
|
||||
|
||||
private final Map<String, String> data = new LinkedHashMap<String, String>();
|
||||
|
@ -272,30 +353,30 @@ public class SeriesNameMatcher {
|
|||
|
||||
|
||||
@Override
|
||||
public boolean add(E e) {
|
||||
Collection<E> buffer = limbo.get(e);
|
||||
public boolean add(E value) {
|
||||
Collection<E> buffer = limbo.get(value);
|
||||
|
||||
if (buffer == null) {
|
||||
// initialize buffer
|
||||
buffer = new ArrayList<E>(threshold);
|
||||
limbo.put(e, buffer);
|
||||
limbo.put(value, buffer);
|
||||
}
|
||||
|
||||
if (buffer == heaven) {
|
||||
// threshold reached
|
||||
heaven.add(e);
|
||||
heaven.add(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
// add element to buffer
|
||||
buffer.add(e);
|
||||
buffer.add(value);
|
||||
|
||||
// check if threshold has been reached
|
||||
if (buffer.size() >= threshold) {
|
||||
heaven.addAll(buffer);
|
||||
|
||||
// replace buffer with heaven
|
||||
limbo.put(e, heaven);
|
||||
limbo.put(value, heaven);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
package net.sourceforge.filebot.ui;
|
||||
|
||||
|
||||
import static net.sourceforge.filebot.FileBotUtilities.asStringList;
|
||||
import static net.sourceforge.filebot.Settings.getApplicationName;
|
||||
|
||||
import java.awt.BorderLayout;
|
||||
|
@ -64,10 +63,10 @@ public class FileBotWindow extends JFrame implements ListSelectionListener {
|
|||
|
||||
setSize(760, 615);
|
||||
|
||||
// restore the panel selection from last time,
|
||||
//TODO restore the panel selection from last time,
|
||||
// switch to EpisodeListPanel by default (e.g. first start)
|
||||
int selectedPanel = asStringList(panelSelectionList.getPanelModel()).indexOf(Settings.userRoot().get("selectedPanel"));
|
||||
panelSelectionList.setSelectedIndex(selectedPanel);
|
||||
// int selectedPanel = asStringList(panelSelectionList.getPanelModel()).indexOf(Settings.userRoot().get("selectedPanel"));
|
||||
// panelSelectionList.setSelectedIndex(selectedPanel);
|
||||
|
||||
// connect message handlers to message bus
|
||||
MessageBus.getDefault().addMessageHandler("panel", panelSelectMessageHandler);
|
||||
|
|
|
@ -9,10 +9,12 @@ import static net.sourceforge.tuned.FileUtilities.containsOnly;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import net.sourceforge.filebot.FileBotUtilities;
|
||||
import net.sourceforge.filebot.torrent.Torrent;
|
||||
import net.sourceforge.filebot.ui.FileBotList;
|
||||
import net.sourceforge.filebot.ui.transfer.FileTransferablePolicy;
|
||||
|
@ -51,9 +53,7 @@ class FileListTransferablePolicy extends FileTransferablePolicy {
|
|||
} else if (containsOnly(files, TORRENT_FILES)) {
|
||||
loadTorrents(files);
|
||||
} else {
|
||||
for (File file : files) {
|
||||
list.getModel().add(FileUtilities.getName(file));
|
||||
}
|
||||
list.getModel().addAll(FileBotUtilities.asFileNameList(files));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,9 +65,7 @@ class FileListTransferablePolicy extends FileTransferablePolicy {
|
|||
}
|
||||
|
||||
for (File folder : folders) {
|
||||
for (File file : folder.listFiles()) {
|
||||
list.getModel().add(FileUtilities.getName(file));
|
||||
}
|
||||
list.getModel().addAll(FileBotUtilities.asFileNameList(Arrays.asList(folder.listFiles())));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,10 +3,10 @@ package net.sourceforge.filebot.ui.panel.rename;
|
|||
|
||||
|
||||
import static net.sourceforge.filebot.FileBotUtilities.SUBTITLE_FILES;
|
||||
import static net.sourceforge.filebot.FileBotUtilities.asStringList;
|
||||
import static net.sourceforge.filebot.web.Episode.formatEpisodeNumbers;
|
||||
import static net.sourceforge.tuned.FileUtilities.FILES;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
@ -52,9 +52,16 @@ class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>
|
|||
|
||||
|
||||
protected Collection<String> matchSeriesNames(List<FileEntry> episodes) {
|
||||
int threshold = Math.min(episodes.size(), 5);
|
||||
File[] files = new File[episodes.size()];
|
||||
|
||||
return new SeriesNameMatcher(threshold).matchAll(asStringList(episodes));
|
||||
for (int i = 0; i < files.length; i++) {
|
||||
files[i] = episodes.get(i).getFile();
|
||||
}
|
||||
|
||||
// allow matching of a small number of episodes, by setting threshold = length if length < 5
|
||||
int threshold = Math.min(files.length, 5);
|
||||
|
||||
return new SeriesNameMatcher(threshold).matchAll(files);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -15,6 +15,12 @@ public class SeriesNameMatcherTest {
|
|||
private static SeriesNameMatcher matcher = new SeriesNameMatcher(5);
|
||||
|
||||
|
||||
@Test
|
||||
public void match() {
|
||||
assertEquals("Test Series", matcher.match("My Test Series - 1x01", "Test Series - Season 1"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void matchBeforeSeasonEpisodePattern() {
|
||||
assertEquals("The Test", matcher.matchBySeasonEpisodePattern("The Test - 1x01"));
|
||||
|
@ -30,7 +36,10 @@ public class SeriesNameMatcherTest {
|
|||
assertEquals("The Test", matcher.normalize("_The_Test_-_ ..."));
|
||||
|
||||
// brackets
|
||||
assertEquals("Luffy", matcher.normalize("[strawhat] Luffy [D.] [@Monkey]"));
|
||||
assertEquals("Luffy", matcher.normalize("[strawhat] Luffy [D.] [#Monkey]"));
|
||||
|
||||
// invalid brackets
|
||||
assertEquals("strawhat Luffy", matcher.normalize("(strawhat [Luffy (#Monkey)"));
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue