filebot/source/net/filebot/web/OpenSubtitlesHasher.java

85 lines
2.8 KiB
Java

package net.filebot.web;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.file.StandardOpenOption;
/**
* Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit checksum of the first and last 64k (even if they overlap because the file is smaller than 128k).
*/
public final class OpenSubtitlesHasher {
/**
* Size of the chunks that will be hashed in bytes (64 KB)
*/
public static final int HASH_CHUNK_SIZE = 64 * 1024;
public static String computeHashNIO(File file) throws IOException {
long size = file.length();
long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size);
try (FileChannel channel = FileChannel.open(file.toPath(), StandardOpenOption.READ)) {
long head = computeHashForChunk(channel.map(MapMode.READ_ONLY, 0, chunkSizeForFile));
long tail = computeHashForChunk(channel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile));
return String.format("%016x", size + head + tail);
}
}
public static String computeHash(File file) throws IOException {
FileInputStream in = new FileInputStream(file);
try {
return computeHash(in, file.length());
} finally {
in.close();
}
}
public static String computeHash(InputStream stream, long length) throws IOException {
int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length);
// buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks
byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)];
DataInputStream in = new DataInputStream(stream);
// first chunk
in.readFully(chunkBytes, 0, chunkSizeForFile);
long position = chunkSizeForFile;
long tailChunkPosition = length - chunkSizeForFile;
// seek to position of the tail chunk, or not at all if length is smaller than two chunks
while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0) {
}
// second chunk, or the rest of the data if length is smaller than two chunks
in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile);
long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile));
long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile));
return String.format("%016x", length + head + tail);
}
private static long computeHashForChunk(ByteBuffer buffer) {
LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
long hash = 0;
while (longBuffer.hasRemaining()) {
hash += longBuffer.get();
}
return hash;
}
}