+ auto-detect charset instead of assuming utf-8 when loading SFV/MD5/SHA files
This commit is contained in:
parent
68c703f3cb
commit
abfaf9f6c7
|
@ -4,9 +4,7 @@ package net.sourceforge.filebot.hash;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
@ -27,11 +25,6 @@ public class VerificationFileReader implements Iterator<Entry<File, String>>, Cl
|
||||||
private int lineNumber = 0;
|
private int lineNumber = 0;
|
||||||
|
|
||||||
|
|
||||||
public VerificationFileReader(File file, VerificationFormat format) throws IOException {
|
|
||||||
this(new InputStreamReader(new FileInputStream(file), "UTF-8"), format);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public VerificationFileReader(Readable source, VerificationFormat format) {
|
public VerificationFileReader(Readable source, VerificationFormat format) {
|
||||||
this.scanner = new Scanner(source);
|
this.scanner = new Scanner(source);
|
||||||
this.format = format;
|
this.format = format;
|
||||||
|
|
|
@ -2,12 +2,19 @@
|
||||||
package net.sourceforge.filebot.hash;
|
package net.sourceforge.filebot.hash;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Reader;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.CharsetDetector;
|
||||||
|
import com.ibm.icu.text.CharsetMatch;
|
||||||
|
|
||||||
|
|
||||||
public final class VerificationUtilities {
|
public final class VerificationUtilities {
|
||||||
|
|
||||||
|
@ -43,6 +50,19 @@ public final class VerificationUtilities {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static VerificationFileReader createVerificationFileReader(File file, HashType type) throws IOException {
|
||||||
|
// detect charset and read text content
|
||||||
|
CharsetDetector detector = new CharsetDetector();
|
||||||
|
detector.setDeclaredEncoding("UTF-8");
|
||||||
|
detector.setText(new BufferedInputStream(new FileInputStream(file)));
|
||||||
|
|
||||||
|
CharsetMatch charset = detector.detect();
|
||||||
|
Reader source = (charset != null) ? charset.getReader() : new InputStreamReader(new FileInputStream(file), "UTF-8");
|
||||||
|
|
||||||
|
return new VerificationFileReader(source, type.getFormat());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException {
|
private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException {
|
||||||
// stop if we reached max depth or the file system root
|
// stop if we reached max depth or the file system root
|
||||||
if (folder == null || depth > maxDepth)
|
if (folder == null || depth > maxDepth)
|
||||||
|
@ -50,11 +70,11 @@ public final class VerificationUtilities {
|
||||||
|
|
||||||
// scan all sfv files in this folder
|
// scan all sfv files in this folder
|
||||||
for (File verificationFile : folder.listFiles(type.getFilter())) {
|
for (File verificationFile : folder.listFiles(type.getFilter())) {
|
||||||
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat());
|
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (scanner.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
Entry<File, String> entry = scanner.next();
|
Entry<File, String> entry = parser.next();
|
||||||
|
|
||||||
// resolve relative file path
|
// resolve relative file path
|
||||||
File file = new File(folder, entry.getKey().getPath());
|
File file = new File(folder, entry.getKey().getPath());
|
||||||
|
@ -64,7 +84,7 @@ public final class VerificationUtilities {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
scanner.close();
|
parser.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -143,14 +143,14 @@ class NamesListTransferablePolicy extends FileTransferablePolicy {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// add all file names from verification file
|
// add all file names from verification file
|
||||||
VerificationFileReader scanner = new VerificationFileReader(verificationFile, type.getFormat());
|
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (scanner.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
values.add(new AbstractFile(scanner.next().getKey().getName(), -1));
|
values.add(new AbstractFile(parser.next().getKey().getName(), -1));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
scanner.close();
|
parser.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,7 @@ class ChecksumTableExportHandler extends TextFileExportHandler {
|
||||||
|
|
||||||
// print header
|
// print header
|
||||||
out.format("; Generated by %s %s on %tF at %<tT%n", Settings.getApplicationName(), Settings.getApplicationVersion(), new Date());
|
out.format("; Generated by %s %s on %tF at %<tT%n", Settings.getApplicationName(), Settings.getApplicationVersion(), new Date());
|
||||||
out.format(";%n");
|
out.format("; charset=UTF-8%n");
|
||||||
out.format(";%n");
|
out.format(";%n");
|
||||||
|
|
||||||
// print data
|
// print data
|
||||||
|
|
|
@ -110,14 +110,18 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
|
||||||
|
|
||||||
|
|
||||||
protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException {
|
protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException {
|
||||||
VerificationFileReader scanner = new VerificationFileReader(file, type.getFormat());
|
VerificationFileReader parser = createVerificationFileReader(file, type);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// root for relative file paths in verification file
|
// root for relative file paths in verification file
|
||||||
File baseFolder = file.getParentFile();
|
File baseFolder = file.getParentFile();
|
||||||
|
|
||||||
while (scanner.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
Entry<File, String> entry = scanner.next();
|
// make this possibly long-running operation interruptible
|
||||||
|
if (Thread.interrupted())
|
||||||
|
throw new InterruptedException();
|
||||||
|
|
||||||
|
Entry<File, String> entry = parser.next();
|
||||||
|
|
||||||
String name = normalizePath(entry.getKey());
|
String name = normalizePath(entry.getKey());
|
||||||
String hash = new String(entry.getValue());
|
String hash = new String(entry.getValue());
|
||||||
|
@ -126,13 +130,9 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
|
||||||
ChecksumCell current = createComputationCell(name, baseFolder, type);
|
ChecksumCell current = createComputationCell(name, baseFolder, type);
|
||||||
|
|
||||||
publish(correct, current);
|
publish(correct, current);
|
||||||
|
|
||||||
// make this long-running operation interruptible
|
|
||||||
if (Thread.interrupted())
|
|
||||||
throw new InterruptedException();
|
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
scanner.close();
|
parser.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -260,21 +260,21 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
|
||||||
* Completely read a verification file and resolve all relative file paths against a given base folder
|
* Completely read a verification file and resolve all relative file paths against a given base folder
|
||||||
*/
|
*/
|
||||||
private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException {
|
private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException {
|
||||||
VerificationFileReader reader = new VerificationFileReader(verificationFile, hashType.getFormat());
|
VerificationFileReader parser = createVerificationFileReader(verificationFile, hashType);
|
||||||
Map<File, String> content = new HashMap<File, String>();
|
Map<File, String> result = new HashMap<File, String>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (reader.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
Entry<File, String> entry = reader.next();
|
Entry<File, String> entry = parser.next();
|
||||||
|
|
||||||
// resolve relative path, the hash is probably a substring, so we compact it, for memory reasons
|
// resolve relative path, the hash is probably a substring, so we compact it, for memory reasons
|
||||||
content.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
|
result.put(new File(baseFolder, entry.getKey().getPath()), new String(entry.getValue()));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
reader.close();
|
parser.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
return content;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,7 @@ final class SubtitleUtilities {
|
||||||
public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
|
public static List<SubtitleElement> decode(MemoryFile file) throws IOException {
|
||||||
// detect charset and read text content
|
// detect charset and read text content
|
||||||
CharsetDetector detector = new CharsetDetector();
|
CharsetDetector detector = new CharsetDetector();
|
||||||
|
detector.setDeclaredEncoding("UTF-8");
|
||||||
detector.enableInputFilter(true);
|
detector.enableInputFilter(true);
|
||||||
|
|
||||||
detector.setText(new ByteBufferInputStream(file.getData()));
|
detector.setText(new ByteBufferInputStream(file.getData()));
|
||||||
|
|
Loading…
Reference in New Issue