+ auto-detect charset instead of assuming utf-8 when loading SFV/MD5/SHA files

This commit is contained in:
Reinhard Pointner 2011-09-03 17:40:48 +00:00
parent 68c703f3cb
commit abfaf9f6c7
6 changed files with 45 additions and 31 deletions

View File

@ -4,9 +4,7 @@ package net.sourceforge.filebot.hash;
import java.io.Closeable; import java.io.Closeable;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException; import java.text.ParseException;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
@ -27,11 +25,6 @@ public class VerificationFileReader implements Iterator<Entry<File, String>>, Cl
private int lineNumber = 0; private int lineNumber = 0;
public VerificationFileReader(File file, VerificationFormat format) throws IOException {
this(new InputStreamReader(new FileInputStream(file), "UTF-8"), format);
}
public VerificationFileReader(Readable source, VerificationFormat format) { public VerificationFileReader(Readable source, VerificationFormat format) {
this.scanner = new Scanner(source); this.scanner = new Scanner(source);
this.format = format; this.format = format;

View File

@ -2,12 +2,19 @@
package net.sourceforge.filebot.hash; package net.sourceforge.filebot.hash;
import java.io.BufferedInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
public final class VerificationUtilities { public final class VerificationUtilities {
@ -43,6 +50,19 @@ public final class VerificationUtilities {
} }
public static VerificationFileReader createVerificationFileReader(File file, HashType type) throws IOException {
// detect charset and read text content
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8");
detector.setText(new BufferedInputStream(new FileInputStream(file)));
CharsetMatch charset = detector.detect();
Reader source = (charset != null) ? charset.getReader() : new InputStreamReader(new FileInputStream(file), "UTF-8");
return new VerificationFileReader(source, type.getFormat());
}
private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException { private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException {
// stop if we reached max depth or the file system root // stop if we reached max depth or the file system root
if (folder == null || depth > maxDepth) if (folder == null || depth > maxDepth)
@ -50,11 +70,11 @@ public final class VerificationUtilities {
// scan all sfv files in this folder // scan all sfv files in this folder
for (File verificationFile : folder.listFiles(type.getFilter())) { for (File verificationFile : folder.listFiles(type.getFilter())) {
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat()); VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
try { try {
while (scanner.hasNext()) { while (parser.hasNext()) {
Entry<File, String> entry = scanner.next(); Entry<File, String> entry = parser.next();
// resolve relative file path // resolve relative file path
File file = new File(folder, entry.getKey().getPath()); File file = new File(folder, entry.getKey().getPath());
@ -64,7 +84,7 @@ public final class VerificationUtilities {
} }
} }
} finally { } finally {
scanner.close(); parser.close();
} }
} }

View File

@ -143,14 +143,14 @@ class NamesListTransferablePolicy extends FileTransferablePolicy {
continue; continue;
// add all file names from verification file // add all file names from verification file
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat()); VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
try { try {
while (scanner.hasNext()) { while (parser.hasNext()) {
values.add(new AbstractFile(scanner.next().getKey().getName(), -1)); values.add(new AbstractFile(parser.next().getKey().getName(), -1));
} }
} finally { } finally {
scanner.close(); parser.close();
} }
} }
} }

View File

@ -69,7 +69,7 @@ class ChecksumTableExportHandler extends TextFileExportHandler {
// print header // print header
out.format("; Generated by %s %s on %tF at %<tT%n", Settings.getApplicationName(), Settings.getApplicationVersion(), new Date()); out.format("; Generated by %s %s on %tF at %<tT%n", Settings.getApplicationName(), Settings.getApplicationVersion(), new Date());
out.format(";%n"); out.format("; charset=UTF-8%n");
out.format(";%n"); out.format(";%n");
// print data // print data

View File

@ -110,14 +110,18 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException { protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException {
VerificationFileReader scanner = new VerificationFileReader(file, type.getFormat()); VerificationFileReader parser = createVerificationFileReader(file, type);
try { try {
// root for relative file paths in verification file // root for relative file paths in verification file
File baseFolder = file.getParentFile(); File baseFolder = file.getParentFile();
while (scanner.hasNext()) { while (parser.hasNext()) {
Entry<File, String> entry = scanner.next(); // make this possibly long-running operation interruptible
if (Thread.interrupted())
throw new InterruptedException();
Entry<File, String> entry = parser.next();
String name = normalizePath(entry.getKey()); String name = normalizePath(entry.getKey());
String hash = new String(entry.getValue()); String hash = new String(entry.getValue());
@ -126,13 +130,9 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
ChecksumCell current = createComputationCell(name, baseFolder, type); ChecksumCell current = createComputationCell(name, baseFolder, type);
publish(correct, current); publish(correct, current);
// make this long-running operation interruptible
if (Thread.interrupted())
throw new InterruptedException();
} }
} finally { } finally {
scanner.close(); parser.close();
} }
} }
@ -260,21 +260,21 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
* Completely read a verification file and resolve all relative file paths against a given base folder * Completely read a verification file and resolve all relative file paths against a given base folder
*/ */
private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException { private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException {
VerificationFileReader reader = new VerificationFileReader(verificationFile, hashType.getFormat()); VerificationFileReader parser = createVerificationFileReader(verificationFile, hashType);
Map<File, String> content = new HashMap<File, String>(); Map<File, String> result = new HashMap<File, String>();
try { try {
while (reader.hasNext()) { while (parser.hasNext()) {
Entry<File, String> entry = reader.next(); Entry<File, String> entry = parser.next();
// resolve relative path, the hash is probably a substring, so we compact it, for memory reasons // resolve relative path, the hash is probably a substring, so we compact it, for memory reasons
content.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue())); result.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
} }
} finally { } finally {
reader.close(); parser.close();
} }
return content; return result;
} }
} }

View File

@ -28,6 +28,7 @@ final class SubtitleUtilities {
public static List<SubtitleElement> decode(MemoryFile file) throws IOException { public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
// detect charset and read text content // detect charset and read text content
CharsetDetector detector = new CharsetDetector(); CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8");
detector.enableInputFilter(true); detector.enableInputFilter(true);
detector.setText(new ByteBufferInputStream(file.getData())); detector.setText(new ByteBufferInputStream(file.getData()));