* take parent folder into account when parsing Date patterns from files

This commit is contained in:
Reinhard Pointner 2014-09-24 06:55:59 +00:00
parent e17ac10168
commit af8ce77f87
2 changed files with 57 additions and 51 deletions

View File

@ -1,98 +1,112 @@
package net.filebot.similarity; package net.filebot.similarity;
import static net.filebot.util.FileUtilities.*;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.MatchResult; import java.util.regex.MatchResult;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.filebot.web.SimpleDate; import net.filebot.web.SimpleDate;
public class DateMatcher { public class DateMatcher {
private final DatePattern[] patterns; private final DatePattern[] patterns;
public DateMatcher() { public DateMatcher() {
patterns = new DatePattern[2]; patterns = new DatePattern[2];
// match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc. // match yyyy-mm-dd patterns like 2010-10-24, 2009/6/1, etc.
patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 }); patterns[0] = new DatePattern("(?<!\\p{Alnum})(\\d{4})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{1,2})(?!\\p{Alnum})", new int[] { 1, 2, 3 });
// match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc. // match dd-mm-yyyy patterns like 1.1.2010, 01/06/2010, etc.
patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 }); patterns[1] = new DatePattern("(?<!\\p{Alnum})(\\d{1,2})[^\\p{Alnum}](\\d{1,2})[^\\p{Alnum}](\\d{4})(?!\\p{Alnum})", new int[] { 3, 2, 1 });
} }
public DateMatcher(DatePattern... patterns) { public DateMatcher(DatePattern... patterns) {
this.patterns = patterns; this.patterns = patterns;
} }
public SimpleDate match(CharSequence seq) { public SimpleDate match(CharSequence seq) {
for (DatePattern pattern : patterns) { for (DatePattern pattern : patterns) {
SimpleDate match = pattern.match(seq); SimpleDate match = pattern.match(seq);
if (match != null) { if (match != null) {
return match; return match;
} }
} }
return null; return null;
} }
public int find(CharSequence seq, int fromIndex) { public int find(CharSequence seq, int fromIndex) {
for (DatePattern pattern : patterns) { for (DatePattern pattern : patterns) {
int pos = pattern.find(seq, fromIndex); int pos = pattern.find(seq, fromIndex);
if (pos >= 0) { if (pos >= 0) {
return pos; return pos;
} }
} }
return -1; return -1;
} }
public SimpleDate match(File file) {
for (String name : tokenizeTail(file)) {
for (DatePattern pattern : patterns) {
SimpleDate match = pattern.match(name);
if (match != null) {
return match;
}
}
}
return null;
}
protected List<String> tokenizeTail(File file) {
List<String> tail = new ArrayList<String>(2);
for (File f : listPathTail(file, 2, true)) {
tail.add(getName(f));
}
return tail;
}
private static class DatePattern { private static class DatePattern {
protected final Pattern pattern; protected final Pattern pattern;
protected final int[] order; protected final int[] order;
public DatePattern(String pattern, int[] order) { public DatePattern(String pattern, int[] order) {
this.pattern = Pattern.compile(pattern); this.pattern = Pattern.compile(pattern);
this.order = order; this.order = order;
} }
protected SimpleDate process(MatchResult match) { protected SimpleDate process(MatchResult match) {
return new SimpleDate(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2]))); return new SimpleDate(Integer.parseInt(match.group(order[0])), Integer.parseInt(match.group(order[1])), Integer.parseInt(match.group(order[2])));
} }
public SimpleDate match(CharSequence seq) { public SimpleDate match(CharSequence seq) {
Matcher matcher = pattern.matcher(seq); Matcher matcher = pattern.matcher(seq);
if (matcher.find()) { if (matcher.find()) {
return process(matcher); return process(matcher);
} }
return null; return null;
} }
public int find(CharSequence seq, int fromIndex) { public int find(CharSequence seq, int fromIndex) {
Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length()); Matcher matcher = pattern.matcher(seq).region(fromIndex, seq.length());
if (matcher.find()) { if (matcher.find()) {
return matcher.start(); return matcher.start();
} }
return -1; return -1;
} }
} }
} }

View File

@ -1,48 +1,40 @@
package net.filebot.similarity; package net.filebot.similarity;
import java.io.File; import java.io.File;
import net.filebot.web.SimpleDate; import net.filebot.web.SimpleDate;
public class DateMetric implements SimilarityMetric { public class DateMetric implements SimilarityMetric {
private final DateMatcher matcher; private final DateMatcher matcher;
public DateMetric() { public DateMetric() {
this.matcher = new DateMatcher(); this.matcher = new DateMatcher();
} }
public DateMetric(DateMatcher matcher) { public DateMetric(DateMatcher matcher) {
this.matcher = matcher; this.matcher = matcher;
} }
@Override @Override
public float getSimilarity(Object o1, Object o2) { public float getSimilarity(Object o1, Object o2) {
SimpleDate d1 = parse(o1); SimpleDate d1 = parse(o1);
if (d1 == null) if (d1 == null)
return 0; return 0;
SimpleDate d2 = parse(o2); SimpleDate d2 = parse(o2);
if (d2 == null) if (d2 == null)
return 0; return 0;
return d1.equals(d2) ? 1 : -1; return d1.equals(d2) ? 1 : -1;
} }
public SimpleDate parse(Object object) { public SimpleDate parse(Object object) {
if (object instanceof File) { if (object instanceof File) {
// parse file name return matcher.match((File) object);
object = ((File) object).getName();
} }
return matcher.match(object.toString()); return matcher.match(object.toString());
} }
} }