* improved auto episode list matching
This commit is contained in:
parent
f7fdc5b5db
commit
7b61757fd7
|
@ -100,6 +100,18 @@ public final class FileBotUtilities {
|
|||
public static final FileFilter LIST_FILES = new ExtensionFileFilter("txt", "list", "");
|
||||
public static final FileFilter SUBTITLE_FILES = new ExtensionFileFilter("srt", "sub", "ssa", "ass", "smi");
|
||||
|
||||
/**
|
||||
* This filter does not filter by extension, but file size. All files larger than 10 MB
|
||||
* will be accepted.
|
||||
*/
|
||||
public static final FileFilter MOVIE_FILES = new FileFilter() {
|
||||
|
||||
@Override
|
||||
public boolean accept(File file) {
|
||||
return file.length() > 10 * FileUtilities.MEGA;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Dummy constructor to prevent instantiation.
|
||||
|
|
|
@ -26,23 +26,13 @@ public class SeriesNameMatcher {
|
|||
|
||||
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher();
|
||||
|
||||
protected final int threshold;
|
||||
|
||||
|
||||
public SeriesNameMatcher(int threshold) {
|
||||
if (threshold < 0)
|
||||
throw new IllegalArgumentException("threshold must be greater than 0");
|
||||
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
|
||||
public String match(File file) {
|
||||
return match(file.getName(), file.getParent());
|
||||
}
|
||||
|
||||
|
||||
public Collection<String> matchAll(File... files) {
|
||||
public Collection<String> matchAll(File[] files) {
|
||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||
|
||||
// group files by parent folder
|
||||
|
@ -62,14 +52,17 @@ public class SeriesNameMatcher {
|
|||
}
|
||||
|
||||
|
||||
public Collection<String> matchAll(String... names) {
|
||||
public Collection<String> matchAll(String[] names) {
|
||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||
|
||||
// allow matching of a small number of episodes, by setting threshold = length if length < 5
|
||||
int threshold = Math.min(names.length, 5);
|
||||
|
||||
// 1. use pattern matching with frequency threshold
|
||||
seriesNames.addAll(flatMatchAll(names));
|
||||
seriesNames.addAll(flatMatchAll(names, threshold));
|
||||
|
||||
// 2. match common word sequences
|
||||
seriesNames.addAll(deepMatchAll(names));
|
||||
seriesNames.addAll(deepMatchAll(names, threshold));
|
||||
|
||||
return seriesNames;
|
||||
}
|
||||
|
@ -82,7 +75,7 @@ public class SeriesNameMatcher {
|
|||
* @return series names that have been matched one or multiple times depending on the
|
||||
* threshold
|
||||
*/
|
||||
private Collection<String> flatMatchAll(String[] names) {
|
||||
private Collection<String> flatMatchAll(String[] names, int threshold) {
|
||||
ThresholdCollection<String> seriesNames = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER);
|
||||
|
||||
for (String name : names) {
|
||||
|
@ -103,7 +96,7 @@ public class SeriesNameMatcher {
|
|||
* @param names list of episode names
|
||||
* @return all common word sequences that have been found
|
||||
*/
|
||||
private Collection<String> deepMatchAll(String[] names) {
|
||||
private Collection<String> deepMatchAll(String[] names, int threshold) {
|
||||
// can't use common word sequence matching for less than 2 names
|
||||
if (names.length < 2 || names.length < threshold) {
|
||||
return Collections.emptySet();
|
||||
|
@ -120,8 +113,8 @@ public class SeriesNameMatcher {
|
|||
List<String> results = new ArrayList<String>();
|
||||
|
||||
// split list in two and try to match common word sequence on those
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2)));
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length)));
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold));
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold));
|
||||
|
||||
return results;
|
||||
}
|
||||
|
@ -173,8 +166,9 @@ public class SeriesNameMatcher {
|
|||
/**
|
||||
* Try to match a series name from the first common word sequence.
|
||||
*
|
||||
* @param names various episode names (5 or more for accurate results)
|
||||
* @param names various episode names (at least two)
|
||||
* @return a word sequence all episode names have in common, or null
|
||||
* @throws IllegalArgumentException if less than 2 episode names are given
|
||||
*/
|
||||
public String matchByFirstCommonWordSequence(String... names) {
|
||||
if (names.length < 2) {
|
||||
|
@ -301,7 +295,7 @@ public class SeriesNameMatcher {
|
|||
int upper = 0;
|
||||
int lower = 0;
|
||||
|
||||
Scanner scanner = new Scanner(s); // Scanner has white space delimiter by default
|
||||
Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default
|
||||
|
||||
while (scanner.hasNext()) {
|
||||
char c = scanner.next().charAt(0);
|
||||
|
@ -312,7 +306,7 @@ public class SeriesNameMatcher {
|
|||
upper++;
|
||||
}
|
||||
|
||||
// give upper case characters a slight boost
|
||||
// give upper case characters a slight boost over lower case characters
|
||||
return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,16 +2,20 @@
|
|||
package net.sourceforge.filebot.ui.panel.rename;
|
||||
|
||||
|
||||
import static net.sourceforge.filebot.FileBotUtilities.MOVIE_FILES;
|
||||
import static net.sourceforge.filebot.FileBotUtilities.SUBTITLE_FILES;
|
||||
import static net.sourceforge.filebot.web.Episode.formatEpisodeNumbers;
|
||||
import static net.sourceforge.tuned.FileUtilities.FILES;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
@ -26,52 +30,42 @@ import net.sourceforge.filebot.similarity.SimilarityMetric;
|
|||
import net.sourceforge.filebot.web.Episode;
|
||||
import net.sourceforge.filebot.web.EpisodeListClient;
|
||||
import net.sourceforge.filebot.web.SearchResult;
|
||||
import net.sourceforge.tuned.FileUtilities;
|
||||
|
||||
|
||||
class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>, Void> {
|
||||
class AutoEpisodeListMatcher extends SwingWorker<List<Match<File, Episode>>, Void> {
|
||||
|
||||
private final List<FileEntry> remainingFiles = new ArrayList<FileEntry>();
|
||||
|
||||
private final List<FileEntry> files;
|
||||
private final List<File> files;
|
||||
|
||||
private final EpisodeListClient client;
|
||||
|
||||
private final Collection<SimilarityMetric> metrics;
|
||||
|
||||
|
||||
public AutoEpisodeListMatcher(EpisodeListClient client, List<FileEntry> files, Collection<SimilarityMetric> metrics) {
|
||||
public AutoEpisodeListMatcher(EpisodeListClient client, List<File> files, Collection<SimilarityMetric> metrics) {
|
||||
this.client = client;
|
||||
this.files = files;
|
||||
this.metrics = metrics;
|
||||
this.files = new LinkedList<File>(files);
|
||||
this.metrics = new ArrayList<SimilarityMetric>(metrics);
|
||||
}
|
||||
|
||||
|
||||
public Collection<FileEntry> remainingFiles() {
|
||||
return Collections.unmodifiableCollection(remainingFiles);
|
||||
public Collection<File> remainingFiles() {
|
||||
return Collections.unmodifiableCollection(files);
|
||||
}
|
||||
|
||||
|
||||
protected Collection<String> matchSeriesNames(List<FileEntry> episodes) {
|
||||
File[] files = new File[episodes.size()];
|
||||
|
||||
for (int i = 0; i < files.length; i++) {
|
||||
files[i] = episodes.get(i).getFile();
|
||||
}
|
||||
|
||||
// allow matching of a small number of episodes, by setting threshold = length if length < 5
|
||||
int threshold = Math.min(files.length, 5);
|
||||
|
||||
return new SeriesNameMatcher(threshold).matchAll(files);
|
||||
protected Collection<String> detectSeriesNames(Collection<File> files) {
|
||||
// detect series name(s) from files
|
||||
return new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected List<Match<FileEntry, Episode>> doInBackground() throws Exception {
|
||||
List<Callable<Collection<Episode>>> fetchTasks = new ArrayList<Callable<Collection<Episode>>>();
|
||||
protected List<Episode> fetchEpisodeList(Collection<String> seriesNames) throws Exception {
|
||||
List<Callable<Collection<Episode>>> tasks = new ArrayList<Callable<Collection<Episode>>>();
|
||||
|
||||
// match series names and create episode list fetch tasks
|
||||
for (final String seriesName : matchSeriesNames(files)) {
|
||||
fetchTasks.add(new Callable<Collection<Episode>>() {
|
||||
// detect series names and create episode list fetch tasks
|
||||
for (final String seriesName : seriesNames) {
|
||||
tasks.add(new Callable<Collection<Episode>>() {
|
||||
|
||||
@Override
|
||||
public Collection<Episode> call() throws Exception {
|
||||
|
@ -85,47 +79,78 @@ class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>
|
|||
});
|
||||
}
|
||||
|
||||
if (fetchTasks.isEmpty()) {
|
||||
if (tasks.isEmpty())
|
||||
throw new IllegalArgumentException("Failed to auto-detect series name.");
|
||||
}
|
||||
|
||||
// fetch episode lists concurrently
|
||||
List<Episode> episodeList = new ArrayList<Episode>();
|
||||
ExecutorService executor = Executors.newFixedThreadPool(fetchTasks.size());
|
||||
List<Episode> episodes = new ArrayList<Episode>();
|
||||
ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
|
||||
|
||||
for (Future<Collection<Episode>> future : executor.invokeAll(fetchTasks)) {
|
||||
episodeList.addAll(future.get());
|
||||
for (Future<Collection<Episode>> future : executor.invokeAll(tasks)) {
|
||||
episodes.addAll(future.get());
|
||||
}
|
||||
|
||||
// destroy background threads
|
||||
executor.shutdown();
|
||||
|
||||
List<Match<FileEntry, Episode>> matches = new ArrayList<Match<FileEntry, Episode>>();
|
||||
return episodes;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected List<Match<File, Episode>> doInBackground() throws Exception {
|
||||
|
||||
for (List<FileEntry> entryList : splitByFileType(files)) {
|
||||
Matcher<FileEntry, Episode> matcher = new Matcher<FileEntry, Episode>(entryList, episodeList, metrics);
|
||||
// focus on movie and subtitle files
|
||||
List<File> mediaFiles = FileUtilities.filter(files, MOVIE_FILES, SUBTITLE_FILES);
|
||||
|
||||
// detect series name and fetch episode list
|
||||
List<Episode> episodes = fetchEpisodeList(detectSeriesNames(mediaFiles));
|
||||
|
||||
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
|
||||
|
||||
// group by subtitles first and then by files in general
|
||||
for (List<File> filesPerType : mapByFileType(files, MOVIE_FILES, SUBTITLE_FILES).values()) {
|
||||
Matcher<File, Episode> matcher = new Matcher<File, Episode>(filesPerType, episodes, metrics);
|
||||
matches.addAll(matcher.match());
|
||||
remainingFiles.addAll(matcher.remainingValues());
|
||||
}
|
||||
|
||||
// restore original order
|
||||
Collections.sort(matches, new Comparator<Match<File, Episode>>() {
|
||||
|
||||
@Override
|
||||
public int compare(Match<File, Episode> o1, Match<File, Episode> o2) {
|
||||
return files.indexOf(o1.getValue()) - files.indexOf(o2.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
// update remaining files
|
||||
for (Match<File, Episode> match : matches) {
|
||||
files.remove(match.getValue());
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Collection<List<FileEntry>> splitByFileType(Collection<FileEntry> files) {
|
||||
List<FileEntry> subtitles = new ArrayList<FileEntry>();
|
||||
List<FileEntry> other = new ArrayList<FileEntry>();
|
||||
protected Map<FileFilter, List<File>> mapByFileType(Collection<File> files, FileFilter... filters) {
|
||||
// initialize map, keep filter order
|
||||
Map<FileFilter, List<File>> map = new HashMap<FileFilter, List<File>>(filters.length);
|
||||
|
||||
for (FileEntry file : files) {
|
||||
// check for for subtitles first, then files in general
|
||||
if (SUBTITLE_FILES.accept(file.getFile())) {
|
||||
subtitles.add(file);
|
||||
} else if (FILES.accept(file.getFile())) {
|
||||
other.add(file);
|
||||
// initialize value lists
|
||||
for (FileFilter filter : filters) {
|
||||
map.put(filter, new ArrayList<File>());
|
||||
}
|
||||
|
||||
for (File file : files) {
|
||||
for (FileFilter filter : filters) {
|
||||
if (filter.accept(file)) {
|
||||
// put each value into one group only
|
||||
map.get(filter).add(file);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Arrays.asList(other, subtitles);
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import static net.sourceforge.tuned.ui.LoadingOverlayPane.LOADING_PROPERTY;
|
|||
import static net.sourceforge.filebot.FileBotUtilities.*;
|
||||
import java.awt.Insets;
|
||||
import java.awt.event.ActionEvent;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
|
@ -33,6 +34,8 @@ import net.sourceforge.filebot.web.TheTVDBClient;
|
|||
import net.sourceforge.tuned.ExceptionUtil;
|
||||
import net.sourceforge.tuned.ui.ActionPopup;
|
||||
import net.sourceforge.tuned.ui.LoadingOverlayPane;
|
||||
import ca.odell.glazedlists.FunctionList;
|
||||
import ca.odell.glazedlists.FunctionList.Function;
|
||||
import ca.odell.glazedlists.event.ListEvent;
|
||||
import ca.odell.glazedlists.event.ListEventListener;
|
||||
|
||||
|
@ -156,10 +159,22 @@ public class RenamePanel extends FileBotPanel {
|
|||
|
||||
@Override
|
||||
public void actionPerformed(ActionEvent evt) {
|
||||
if (model.files().isEmpty() || isAutoMatchInProgress())
|
||||
if (model.files().isEmpty() || isAutoMatchInProgress()) {
|
||||
return;
|
||||
}
|
||||
|
||||
AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, new ArrayList<FileEntry>(model.files()), matchAction.getMetrics()) {
|
||||
// clear names list
|
||||
model.names().clear();
|
||||
|
||||
List<File> files = new FunctionList<FileEntry, File>(model.files(), new Function<FileEntry, File>() {
|
||||
|
||||
@Override
|
||||
public File evaluate(FileEntry entry) {
|
||||
return entry.getFile();
|
||||
}
|
||||
});
|
||||
|
||||
AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, files, matchAction.getMetrics()) {
|
||||
|
||||
@Override
|
||||
protected void done() {
|
||||
|
@ -172,15 +187,15 @@ public class RenamePanel extends FileBotPanel {
|
|||
|
||||
List<StringEntry> invalidNames = new ArrayList<StringEntry>();
|
||||
|
||||
for (Match<FileEntry, Episode> match : get()) {
|
||||
for (Match<File, Episode> match : get()) {
|
||||
StringEntry name = new StringEntry(match.getCandidate());
|
||||
|
||||
if (isInvalidFileName(name.toString())) {
|
||||
invalidNames.add(name);
|
||||
}
|
||||
|
||||
names.add(new StringEntry(name));
|
||||
files.add(match.getValue());
|
||||
names.add(name);
|
||||
files.add(new FileEntry(match.getValue()));
|
||||
}
|
||||
|
||||
if (!invalidNames.isEmpty()) {
|
||||
|
@ -193,13 +208,15 @@ public class RenamePanel extends FileBotPanel {
|
|||
}
|
||||
}
|
||||
|
||||
// add remaining file entries
|
||||
for (File file : remainingFiles()) {
|
||||
files.add(new FileEntry(file));
|
||||
}
|
||||
|
||||
model.clear();
|
||||
|
||||
model.names().addAll(names);
|
||||
model.files().addAll(files);
|
||||
|
||||
// add remaining file entries again
|
||||
model.files().addAll(remainingFiles());
|
||||
} catch (Exception e) {
|
||||
Logger.getLogger("ui").log(Level.WARNING, ExceptionUtil.getRootCause(e).getMessage(), e);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,8 @@ package net.sourceforge.tuned;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public final class FileUtilities {
|
||||
|
@ -115,6 +117,22 @@ public final class FileUtilities {
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
public static List<File> filter(Iterable<File> files, FileFilter... filters) {
|
||||
List<File> accepted = new ArrayList<File>();
|
||||
|
||||
for (File file : files) {
|
||||
for (FileFilter filter : filters) {
|
||||
if (filter.accept(file)) {
|
||||
accepted.add(file);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return accepted;
|
||||
}
|
||||
|
||||
public static final FileFilter FOLDERS = new FileFilter() {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -12,7 +12,7 @@ import org.junit.Test;
|
|||
|
||||
public class SeriesNameMatcherTest {
|
||||
|
||||
private static SeriesNameMatcher matcher = new SeriesNameMatcher(5);
|
||||
private static SeriesNameMatcher matcher = new SeriesNameMatcher();
|
||||
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue