+ auto-detect charset instead of assuming utf-8 when loading SFV/MD5/SHA files
This commit is contained in:
parent
68c703f3cb
commit
abfaf9f6c7
|
@ -4,9 +4,7 @@ package net.sourceforge.filebot.hash;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.ParseException;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
|
@ -27,11 +25,6 @@ public class VerificationFileReader implements Iterator<Entry<File, String>>, Cl
|
|||
private int lineNumber = 0;
|
||||
|
||||
|
||||
public VerificationFileReader(File file, VerificationFormat format) throws IOException {
|
||||
this(new InputStreamReader(new FileInputStream(file), "UTF-8"), format);
|
||||
}
|
||||
|
||||
|
||||
public VerificationFileReader(Readable source, VerificationFormat format) {
|
||||
this.scanner = new Scanner(source);
|
||||
this.format = format;
|
||||
|
|
|
@ -2,12 +2,19 @@
|
|||
package net.sourceforge.filebot.hash;
|
||||
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
import com.ibm.icu.text.CharsetMatch;
|
||||
|
||||
|
||||
public final class VerificationUtilities {
|
||||
|
||||
|
@ -43,6 +50,19 @@ public final class VerificationUtilities {
|
|||
}
|
||||
|
||||
|
||||
public static VerificationFileReader createVerificationFileReader(File file, HashType type) throws IOException {
|
||||
// detect charset and read text content
|
||||
CharsetDetector detector = new CharsetDetector();
|
||||
detector.setDeclaredEncoding("UTF-8");
|
||||
detector.setText(new BufferedInputStream(new FileInputStream(file)));
|
||||
|
||||
CharsetMatch charset = detector.detect();
|
||||
Reader source = (charset != null) ? charset.getReader() : new InputStreamReader(new FileInputStream(file), "UTF-8");
|
||||
|
||||
return new VerificationFileReader(source, type.getFormat());
|
||||
}
|
||||
|
||||
|
||||
private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException {
|
||||
// stop if we reached max depth or the file system root
|
||||
if (folder == null || depth > maxDepth)
|
||||
|
@ -50,11 +70,11 @@ public final class VerificationUtilities {
|
|||
|
||||
// scan all sfv files in this folder
|
||||
for (File verificationFile : folder.listFiles(type.getFilter())) {
|
||||
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat());
|
||||
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
|
||||
|
||||
try {
|
||||
while (scanner.hasNext()) {
|
||||
Entry<File, String> entry = scanner.next();
|
||||
while (parser.hasNext()) {
|
||||
Entry<File, String> entry = parser.next();
|
||||
|
||||
// resolve relative file path
|
||||
File file = new File(folder, entry.getKey().getPath());
|
||||
|
@ -64,7 +84,7 @@ public final class VerificationUtilities {
|
|||
}
|
||||
}
|
||||
} finally {
|
||||
scanner.close();
|
||||
parser.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -143,14 +143,14 @@ class NamesListTransferablePolicy extends FileTransferablePolicy {
|
|||
continue;
|
||||
|
||||
// add all file names from verification file
|
||||
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat());
|
||||
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
|
||||
|
||||
try {
|
||||
while (scanner.hasNext()) {
|
||||
values.add(new AbstractFile(scanner.next().getKey().getName(), -1));
|
||||
while (parser.hasNext()) {
|
||||
values.add(new AbstractFile(parser.next().getKey().getName(), -1));
|
||||
}
|
||||
} finally {
|
||||
scanner.close();
|
||||
parser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ class ChecksumTableExportHandler extends TextFileExportHandler {
|
|||
|
||||
// print header
|
||||
out.format("; Generated by %s %s on %tF at %<tT%n", Settings.getApplicationName(), Settings.getApplicationVersion(), new Date());
|
||||
out.format(";%n");
|
||||
out.format("; charset=UTF-8%n");
|
||||
out.format(";%n");
|
||||
|
||||
// print data
|
||||
|
|
|
@ -110,14 +110,18 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
|
|||
|
||||
|
||||
protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException {
|
||||
VerificationFileReader scanner = new VerificationFileReader(file, type.getFormat());
|
||||
VerificationFileReader parser = createVerificationFileReader(file, type);
|
||||
|
||||
try {
|
||||
// root for relative file paths in verification file
|
||||
File baseFolder = file.getParentFile();
|
||||
|
||||
while (scanner.hasNext()) {
|
||||
Entry<File, String> entry = scanner.next();
|
||||
while (parser.hasNext()) {
|
||||
// make this possibly long-running operation interruptible
|
||||
if (Thread.interrupted())
|
||||
throw new InterruptedException();
|
||||
|
||||
Entry<File, String> entry = parser.next();
|
||||
|
||||
String name = normalizePath(entry.getKey());
|
||||
String hash = new String(entry.getValue());
|
||||
|
@ -126,13 +130,9 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
|
|||
ChecksumCell current = createComputationCell(name, baseFolder, type);
|
||||
|
||||
publish(correct, current);
|
||||
|
||||
// make this long-running operation interruptible
|
||||
if (Thread.interrupted())
|
||||
throw new InterruptedException();
|
||||
}
|
||||
} finally {
|
||||
scanner.close();
|
||||
parser.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -260,21 +260,21 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
|
|||
* Completely read a verification file and resolve all relative file paths against a given base folder
|
||||
*/
|
||||
private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException {
|
||||
VerificationFileReader reader = new VerificationFileReader(verificationFile, hashType.getFormat());
|
||||
Map<File, String> content = new HashMap<File, String>();
|
||||
VerificationFileReader parser = createVerificationFileReader(verificationFile, hashType);
|
||||
Map<File, String> result = new HashMap<File, String>();
|
||||
|
||||
try {
|
||||
while (reader.hasNext()) {
|
||||
Entry<File, String> entry = reader.next();
|
||||
while (parser.hasNext()) {
|
||||
Entry<File, String> entry = parser.next();
|
||||
|
||||
// resolve relative path, the hash is probably a substring, so we compact it, for memory reasons
|
||||
content.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
|
||||
result.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
parser.close();
|
||||
}
|
||||
|
||||
return content;
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ final class SubtitleUtilities {
|
|||
public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
|
||||
// detect charset and read text content
|
||||
CharsetDetector detector = new CharsetDetector();
|
||||
detector.setDeclaredEncoding("UTF-8");
|
||||
detector.enableInputFilter(true);
|
||||
|
||||
detector.setText(new ByteBufferInputStream(file.getData()));
|
||||
|
|
Loading…
Reference in New Issue