* auto-detect encoding when list files are dropped into rename panel

This commit is contained in:
Reinhard Pointner 2011-09-04 23:50:54 +00:00
parent abfaf9f6c7
commit 332f371636
5 changed files with 30 additions and 30 deletions

View File

@ -2,19 +2,14 @@
package net.sourceforge.filebot.hash;
import java.io.BufferedInputStream;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
public final class VerificationUtilities {
@ -50,19 +45,6 @@ public final class VerificationUtilities {
}
public static VerificationFileReader createVerificationFileReader(File file, HashType type) throws IOException {
// detect charset and read text content
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8");
detector.setText(new BufferedInputStream(new FileInputStream(file)));
CharsetMatch charset = detector.detect();
Reader source = (charset != null) ? charset.getReader() : new InputStreamReader(new FileInputStream(file), "UTF-8");
return new VerificationFileReader(source, type.getFormat());
}
private static String getHashFromVerificationFile(File folder, File target, HashType type, int depth, int maxDepth) throws IOException {
// stop if we reached max depth or the file system root
if (folder == null || depth > maxDepth)
@ -70,7 +52,7 @@ public final class VerificationUtilities {
// scan all sfv files in this folder
for (File verificationFile : folder.listFiles(type.getFilter())) {
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
VerificationFileReader parser = new VerificationFileReader(createTextReader(verificationFile), type.getFormat());
try {
while (parser.hasNext()) {

View File

@ -127,7 +127,7 @@ class MovieHashMatcher implements AutoCompleteMatcher {
Set<Integer> collection = new HashSet<Integer>();
for (File file : files) {
Scanner scanner = new Scanner(new FileInputStream(file));
Scanner scanner = new Scanner(new FileInputStream(file), "UTF-8");
try {
// scan for imdb id patterns like tt1234567

View File

@ -10,8 +10,6 @@ import static net.sourceforge.tuned.FileUtilities.*;
import java.awt.datatransfer.DataFlavor;
import java.awt.datatransfer.Transferable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -116,10 +114,10 @@ class NamesListTransferablePolicy extends FileTransferablePolicy {
}
protected void loadListFiles(List<File> files, List<Object> values) throws FileNotFoundException {
protected void loadListFiles(List<File> files, List<Object> values) throws IOException {
for (File file : files) {
// don't use new Scanner(File) because of BUG 6368019 (http://bugs.sun.com/view_bug.do?bug_id=6368019)
Scanner scanner = new Scanner(new FileInputStream(file), "UTF-8");
Scanner scanner = new Scanner(createTextReader(file));
while (scanner.hasNextLine()) {
String line = scanner.nextLine().trim();
@ -143,7 +141,7 @@ class NamesListTransferablePolicy extends FileTransferablePolicy {
continue;
// add all file names from verification file
VerificationFileReader parser = createVerificationFileReader(verificationFile, type);
VerificationFileReader parser = new VerificationFileReader(createTextReader(verificationFile), type.getFormat());
try {
while (parser.hasNext()) {

View File

@ -5,6 +5,7 @@ package net.sourceforge.filebot.ui.panel.sfv;
import static java.util.Collections.*;
import static net.sourceforge.filebot.hash.VerificationUtilities.*;
import static net.sourceforge.filebot.ui.NotificationLogging.*;
import static net.sourceforge.tuned.FileUtilities.*;
import java.io.File;
import java.io.IOException;
@ -110,7 +111,7 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
protected void loadVerificationFile(File file, HashType type) throws IOException, InterruptedException {
VerificationFileReader parser = createVerificationFileReader(file, type);
VerificationFileReader parser = new VerificationFileReader(createTextReader(file), type.getFormat());
try {
// root for relative file paths in verification file
@ -260,7 +261,7 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
* Completely read a verification file and resolve all relative file paths against a given base folder
*/
private Map<File, String> importVerificationFile(File verificationFile, HashType hashType, File baseFolder) throws IOException {
VerificationFileReader parser = createVerificationFileReader(verificationFile, hashType);
VerificationFileReader parser = new VerificationFileReader(createTextReader(verificationFile), hashType.getFormat());
Map<File, String> result = new HashMap<File, String>();
try {
@ -276,7 +277,6 @@ class ChecksumTableTransferablePolicy extends BackgroundFileTransferablePolicy<C
return result;
}
}
}

View File

@ -2,11 +2,14 @@
package net.sourceforge.tuned;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -17,6 +20,9 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
public final class FileUtilities {
@ -40,6 +46,20 @@ public final class FileUtilities {
}
public static Reader createTextReader(File file) throws IOException {
CharsetDetector detector = new CharsetDetector();
detector.setDeclaredEncoding("UTF-8"); // small boost for UTF-8 as default encoding
detector.setText(new BufferedInputStream(new FileInputStream(file)));
CharsetMatch charset = detector.detect();
if (charset != null)
return charset.getReader();
// assume UTF-8 by default
return new InputStreamReader(new FileInputStream(file), "UTF-8");
}
/**
* Pattern used for matching file extensions.
*