2015-05-17 05:39:58 -04:00
|
|
|
package net.filebot.util;
|
|
|
|
|
2015-05-20 03:35:40 -04:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.InputStreamReader;
|
|
|
|
import java.io.Reader;
|
2015-05-17 05:39:58 -04:00
|
|
|
import java.nio.CharBuffer;
|
|
|
|
import java.nio.charset.Charset;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
|
2015-05-17 06:18:37 -04:00
|
|
|
import com.ibm.icu.text.CharsetDetector;
|
|
|
|
|
2015-05-17 05:39:58 -04:00
|
|
|
public class UnicodeReader extends Reader {
|
|
|
|
|
|
|
|
private static final int BOM_SIZE = 4;
|
|
|
|
|
2015-05-17 06:18:37 -04:00
|
|
|
private final Reader reader;
|
2015-05-17 05:39:58 -04:00
|
|
|
|
2015-12-06 14:33:12 -05:00
|
|
|
public UnicodeReader(InputStream stream, boolean guessCharset, Charset defaultCharset) throws IOException {
|
|
|
|
if (!stream.markSupported()) {
|
2015-05-17 05:39:58 -04:00
|
|
|
throw new IllegalArgumentException("stream must support mark");
|
2015-12-06 14:33:12 -05:00
|
|
|
}
|
2015-05-17 05:39:58 -04:00
|
|
|
|
|
|
|
stream.mark(BOM_SIZE);
|
|
|
|
byte bom[] = new byte[BOM_SIZE];
|
|
|
|
stream.read(bom, 0, bom.length);
|
|
|
|
|
2015-05-17 06:18:37 -04:00
|
|
|
Charset bomEncoding = null;
|
2015-05-17 05:39:58 -04:00
|
|
|
int skip = 0;
|
|
|
|
|
|
|
|
if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
|
2015-05-17 06:18:37 -04:00
|
|
|
bomEncoding = StandardCharsets.UTF_8;
|
2015-05-17 05:39:58 -04:00
|
|
|
skip = 3;
|
|
|
|
} else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
|
2015-05-17 06:18:37 -04:00
|
|
|
bomEncoding = StandardCharsets.UTF_16BE;
|
2015-05-17 05:39:58 -04:00
|
|
|
skip = 2;
|
|
|
|
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
|
2015-05-17 06:18:37 -04:00
|
|
|
bomEncoding = StandardCharsets.UTF_16LE;
|
2015-05-17 05:39:58 -04:00
|
|
|
skip = 2;
|
|
|
|
} else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
|
2015-05-17 06:18:37 -04:00
|
|
|
bomEncoding = Charset.forName("UTF-32BE");
|
2015-05-17 05:39:58 -04:00
|
|
|
skip = 4;
|
|
|
|
} else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
|
2015-05-17 06:18:37 -04:00
|
|
|
bomEncoding = Charset.forName("UTF-32LE");
|
2015-05-17 05:39:58 -04:00
|
|
|
skip = 4;
|
|
|
|
}
|
|
|
|
|
2015-05-17 06:18:37 -04:00
|
|
|
// rewind and skip BOM
|
2015-05-17 05:39:58 -04:00
|
|
|
stream.reset();
|
|
|
|
stream.skip(skip);
|
|
|
|
|
2015-05-17 06:18:37 -04:00
|
|
|
// guess character encoding if necessary
|
2015-12-06 14:33:12 -05:00
|
|
|
if (bomEncoding != null) {
|
2015-05-17 06:18:37 -04:00
|
|
|
// initialize reader via BOM
|
|
|
|
reader = new InputStreamReader(stream, bomEncoding);
|
2015-12-06 14:33:12 -05:00
|
|
|
} else if (bomEncoding == null && guessCharset) {
|
|
|
|
// auto-detect encoding
|
|
|
|
reader = new CharsetDetector().getReader(stream, defaultCharset.name());
|
|
|
|
} else {
|
|
|
|
// use default
|
|
|
|
reader = new InputStreamReader(stream, defaultCharset);
|
2015-05-17 06:18:37 -04:00
|
|
|
}
|
2015-05-17 05:39:58 -04:00
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public int hashCode() {
|
|
|
|
return reader.hashCode();
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public int read(CharBuffer target) throws IOException {
|
|
|
|
return reader.read(target);
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public boolean equals(Object obj) {
|
|
|
|
return reader.equals(obj);
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public int read(char[] cbuf) throws IOException {
|
|
|
|
return reader.read(cbuf);
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public int read() throws IOException {
|
|
|
|
return reader.read();
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public int read(char[] cbuf, int offset, int length) throws IOException {
|
|
|
|
return reader.read(cbuf, offset, length);
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public long skip(long n) throws IOException {
|
|
|
|
return reader.skip(n);
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public boolean ready() throws IOException {
|
|
|
|
return reader.ready();
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public void close() throws IOException {
|
|
|
|
reader.close();
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public boolean markSupported() {
|
|
|
|
return reader.markSupported();
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public void mark(int readAheadLimit) throws IOException {
|
|
|
|
reader.mark(readAheadLimit);
|
|
|
|
}
|
|
|
|
|
2015-07-25 18:47:19 -04:00
|
|
|
@Override
|
2015-05-17 05:39:58 -04:00
|
|
|
public void reset() throws IOException {
|
|
|
|
reader.reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|