added OpenSubtitlesHasher

This commit is contained in:
Reinhard Pointner 2008-02-06 20:48:54 +00:00
parent 637ce351ff
commit 319a528542
3 changed files with 127 additions and 19 deletions

View File

@ -12,7 +12,7 @@ import javax.swing.SwingWorker;
public class ChecksumComputationTask extends SwingWorker<Long, Object> {
private static final int MAX_READ_LENGTH = 200 * 1024; // 200 KB
private static final int CHUNK_SIZE = 20 * 1024;
private File file;
@ -27,13 +27,11 @@ public class ChecksumComputationTask extends SwingWorker<Long, Object> {
CheckedInputStream cis = new CheckedInputStream(new FileInputStream(file), new CRC32());
long length = file.length();
if (length > 0) {
long done = 0;
int bufferLength = (int) Math.min(length, MAX_READ_LENGTH);
// don't allow bufferLength == 0
if (bufferLength < 1)
bufferLength = 1;
int bufferLength = (int) Math.min(length, CHUNK_SIZE);
byte[] buffer = new byte[bufferLength];
@ -45,11 +43,9 @@ public class ChecksumComputationTask extends SwingWorker<Long, Object> {
done += bytesRead;
if (length > 0) {
int progress = (int) ((done * 100) / length);

View File

@ -23,7 +23,7 @@ import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class HtmlUtil {
class HtmlUtil {
private static Charset getCharset(String contentType) {
if (contentType != null) {

View File

@ -0,0 +1,112 @@
package net.sourceforge.filebot.web;
import java.math.BigInteger;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
* Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit
* checksum of the first and last 64k (even if they overlap because the file is smaller than
* 128k).
class OpenSubtitlesHasher {
* Size of the chunks that will be hashed in bytes (64 KB)
private static final int HASH_CHUNK_SIZE = 64 * 1024;
* Size of the checksum in bytes (64 Bit)
private static final int HASH_SIZE = 8;
public static String computeHash(File file) throws IOException {
long size = file.length();
long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size);
FileChannel fileChannel = new FileInputStream(file).getChannel();
BigInteger head = computeHashForChunk(fileChannel, 0, chunkSizeForFile);
BigInteger tail = computeHashForChunk(fileChannel, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile);
// size + head + tail
BigInteger bigHash = BigInteger.valueOf(size).add(head.add(tail));
byte[] hash = getTrailingBytes(bigHash.toByteArray(), HASH_SIZE);
return format(new BigInteger(1, hash));
private static BigInteger computeHashForChunk(FileChannel fileChannel, long start, long size) throws IOException {
MappedByteBuffer buffer =, start, size);
BigInteger bigHash = BigInteger.ZERO;
byte[] bytes = new byte[HASH_SIZE];
while (buffer.hasRemaining()) {
buffer.get(bytes, 0, Math.min(HASH_SIZE, buffer.remaining()));
// BigInteger expects a big-endian byte-order, so we reverse the byte array
bigHash = bigHash.add(new BigInteger(1, reverse(bytes)));
return bigHash;
private static String format(BigInteger hash) {
// 1 byte -> 2 hex digits
int minLength = HASH_SIZE * 2;
StringBuffer sb = new StringBuffer(minLength);
while (sb.length() < minLength) {
sb.insert(0, "0");
return sb.toString();
* copy the last n bytes to a new array
* @param bytes original array
* @param n number of trailing bytes
* @return new array
private static byte[] getTrailingBytes(byte[] src, int n) {
int length = Math.min(src.length, n);
byte[] dest = new byte[length];
int offsetSrc = Math.max(src.length - n, 0);
System.arraycopy(src, offsetSrc, dest, 0, length);
return dest;
private static byte[] reverse(byte[] bytes) {
byte[] reverseBytes = new byte[bytes.length];
for (int forward = 0, backward = bytes.length; forward < bytes.length; ++forward)
reverseBytes[forward] = bytes[--backward];
return reverseBytes;