1
0
mirror of https://github.com/mitb-archive/filebot synced 2024-11-15 05:45:05 -05:00
filebot/source/net/filebot/util/UnicodeReader.java

126 lines
3.0 KiB
Java
Raw Normal View History

2015-05-17 05:39:58 -04:00
package net.filebot.util;
2015-05-20 03:35:40 -04:00
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
2015-05-17 05:39:58 -04:00
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
2015-05-17 06:18:37 -04:00
import com.ibm.icu.text.CharsetDetector;
2015-05-17 05:39:58 -04:00
public class UnicodeReader extends Reader {
private static final int BOM_SIZE = 4;
2015-05-17 06:18:37 -04:00
private final Reader reader;
2015-05-17 05:39:58 -04:00
public UnicodeReader(InputStream stream, boolean guessCharset, Charset defaultCharset) throws IOException {
if (!stream.markSupported()) {
2015-05-17 05:39:58 -04:00
throw new IllegalArgumentException("stream must support mark");
}
2015-05-17 05:39:58 -04:00
stream.mark(BOM_SIZE);
byte bom[] = new byte[BOM_SIZE];
stream.read(bom, 0, bom.length);
2015-05-17 06:18:37 -04:00
Charset bomEncoding = null;
2015-05-17 05:39:58 -04:00
int skip = 0;
if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
2015-05-17 06:18:37 -04:00
bomEncoding = StandardCharsets.UTF_8;
2015-05-17 05:39:58 -04:00
skip = 3;
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
2015-05-17 06:18:37 -04:00
bomEncoding = StandardCharsets.UTF_16BE;
2015-05-17 05:39:58 -04:00
skip = 2;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
2015-05-17 06:18:37 -04:00
bomEncoding = StandardCharsets.UTF_16LE;
2015-05-17 05:39:58 -04:00
skip = 2;
} else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
2015-05-17 06:18:37 -04:00
bomEncoding = Charset.forName("UTF-32BE");
2015-05-17 05:39:58 -04:00
skip = 4;
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
2015-05-17 06:18:37 -04:00
bomEncoding = Charset.forName("UTF-32LE");
2015-05-17 05:39:58 -04:00
skip = 4;
}
2015-05-17 06:18:37 -04:00
// rewind and skip BOM
2015-05-17 05:39:58 -04:00
stream.reset();
stream.skip(skip);
2015-05-17 06:18:37 -04:00
// guess character encoding if necessary
if (bomEncoding != null) {
2015-05-17 06:18:37 -04:00
// initialize reader via BOM
reader = new InputStreamReader(stream, bomEncoding);
} else if (bomEncoding == null && guessCharset) {
// auto-detect encoding
reader = new CharsetDetector().getReader(stream, defaultCharset.name());
} else {
// use default
reader = new InputStreamReader(stream, defaultCharset);
2015-05-17 06:18:37 -04:00
}
2015-05-17 05:39:58 -04:00
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public int hashCode() {
return reader.hashCode();
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public int read(CharBuffer target) throws IOException {
return reader.read(target);
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public boolean equals(Object obj) {
return reader.equals(obj);
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public int read(char[] cbuf) throws IOException {
return reader.read(cbuf);
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public int read() throws IOException {
return reader.read();
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public int read(char[] cbuf, int offset, int length) throws IOException {
return reader.read(cbuf, offset, length);
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public long skip(long n) throws IOException {
return reader.skip(n);
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public boolean ready() throws IOException {
return reader.ready();
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public void close() throws IOException {
reader.close();
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public boolean markSupported() {
return reader.markSupported();
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public void mark(int readAheadLimit) throws IOException {
reader.mark(readAheadLimit);
}
2015-07-25 18:47:19 -04:00
@Override
2015-05-17 05:39:58 -04:00
public void reset() throws IOException {
reader.reset();
}
}