+ auto-detect charset instead of assuming utf-8 when loading SFV/MD5/SHA files

This commit is contained in:
Reinhard Pointner 2011-09-03 17:40:48 +00:00
parent 68c703f3cb
commit abfaf9f6c7
6 changed files with 45 additions and 31 deletions

View File

@ -4,9 +4,7 @@ package net.sourceforge.filebot.hash;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.Iterator;
import java.util.NoSuchElementException;
@ -27,11 +25,6 @@ public class VerificationFileReader implements Iterator<Entry<File, String>>, Cl
private int lineNumber = 0;
public VerificationFileReader(File file, VerificationFormat format) throws IOException {
this(new InputStreamReader(new FileInputStream(file), "UTF-8"), format);
}
public VerificationFileReader(Readable source, VerificationFormat format) {
this.scanner = new Scanner(source);
this.format = format;

View File

@ -2,12 +2,19 @@
package net.sourceforge.filebot.hash;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
public final class VerificationUtilities {
@ -43,6 +50,19 @@ public final class VerificationUtilities {
}
public static VerificationFileReader createVerificationFileReader(File file, HashType type) throws IOException {
// detect charset and read text content
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8");
detector.setText(new BufferedInputStream(new FileInputStream(file)));
CharsetMatch charset = detector.detect();
Reader source = (charset != null) ? charset.getReader() : new InputStreamReader(new FileInputStream(file), "UTF-8");
return new VerificationFileReader(source, type.getFormat());
}
private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException {
// stop if we reached max depth or the file system root
if (folder == null || depth > maxDepth)
@ -50,11 +70,11 @@ public final class VerificationUtilities {
// scan all sfv files in this folder
for (File verificationFile : folder.listFiles(type.getFilter())) {
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat());
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
try {
while (scanner.hasNext()) {
Entry<File, String> entry = scanner.next();
while (parser.hasNext()) {
Entry<File, String> entry = parser.next();
// resolve relative file path
File file = new File(folder, entry.getKey().getPath());
@ -64,7 +84,7 @@ public final class VerificationUtilities {
}
}
} finally {
scanner.close();
parser.close();
}
}

View File

@ -143,14 +143,14 @@ class NamesListTransferablePolicy extends FileTransferablePolicy {
continue;
// add all file names from verification file
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat());
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
try {
while (scanner.hasNext()) {
values.add(new AbstractFile(scanner.next().getKey().getName(), -1));
while (parser.hasNext()) {
values.add(new AbstractFile(parser.next().getKey().getName(), -1));
}
} finally {
scanner.close();
parser.close();
}
}
}

View File

@ -69,7 +69,7 @@ class ChecksumTableExportHandler extends TextFileExportHandler {
// print header
out.format("; Generated by %s %s on %tF at %<tT%n", Settings.getApplicationName(), Settings.getApplicationVersion(), new Date());
out.format(";%n");
out.format("; charset=UTF-8%n");
out.format(";%n");
// print data

View File

@ -110,14 +110,18 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException {
VerificationFileReader scanner = new VerificationFileReader(file, type.getFormat());
VerificationFileReader parser = createVerificationFileReader(file, type);
try {
// root for relative file paths in verification file
File baseFolder = file.getParentFile();
while (scanner.hasNext()) {
Entry<File, String> entry = scanner.next();
while (parser.hasNext()) {
// make this possibly long-running operation interruptible
if (Thread.interrupted())
throw new InterruptedException();
Entry<File, String> entry = parser.next();
String name = normalizePath(entry.getKey());
String hash = new String(entry.getValue());
@ -126,13 +130,9 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
ChecksumCell current = createComputationCell(name, baseFolder, type);
publish(correct, current);
// make this long-running operation interruptible
if (Thread.interrupted())
throw new InterruptedException();
}
} finally {
scanner.close();
parser.close();
}
}
@ -260,21 +260,21 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
* Completely read a verification file and resolve all relative file paths against a given base folder
*/
private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException {
VerificationFileReader reader = new VerificationFileReader(verificationFile, hashType.getFormat());
Map<File, String> content = new HashMap<File, String>();
VerificationFileReader parser = createVerificationFileReader(verificationFile, hashType);
Map<File, String> result = new HashMap<File, String>();
try {
while (reader.hasNext()) {
Entry<File, String> entry = reader.next();
while (parser.hasNext()) {
Entry<File, String> entry = parser.next();
// resolve relative path, the hash is probably a substring, so we compact it, for memory reasons
content.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
result.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
}
} finally {
reader.close();
parser.close();
}
return content;
return result;
}
}

View File

@ -28,6 +28,7 @@ final class SubtitleUtilities {
public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
// detect charset and read text content
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8");
detector.enableInputFilter(true);
detector.setText(new ByteBufferInputStream(file.getData()));