filebot/source/net/filebot/util/BOM.java

74 lines
1.3 KiB
Java

package net.filebot.util;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
public enum BOM {
UTF_8((byte) 0xEF, (byte) 0xBB, (byte) 0xBF),
UTF_16BE((byte) 0xFE, (byte) 0xFF),
UTF_16LE((byte) 0xFF, (byte) 0xFE),
UTF_32BE((byte) 0x00, (byte) 0x00, (byte) 0xFE, (byte) 0xFF),
UTF_32LE((byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00),
GB_18030((byte) 0x84, (byte) 0x31, (byte) 0x95, (byte) 0x33);
public static final int SIZE = 4;
private byte[] bom;
BOM(byte... bom) {
this.bom = bom;
}
public int size() {
return bom.length;
}
public boolean matches(byte[] bytes) {
if (bytes.length < bom.length) {
return false;
}
for (int i = 0; i < bom.length; i++) {
if (bom[i] != bytes[i]) {
return false;
}
}
return true;
}
public Charset getCharset() {
switch (this) {
case UTF_8:
return StandardCharsets.UTF_8;
case UTF_16BE:
return StandardCharsets.UTF_16BE;
case UTF_16LE:
return StandardCharsets.UTF_16LE;
case UTF_32BE:
return Charset.forName("UTF-32BE");
case UTF_32LE:
return Charset.forName("UTF-32LE");
case GB_18030:
return Charset.forName("GB18030");
}
return null;
}
public static BOM detect(byte[] bytes) {
for (BOM bom : values()) {
if (bom.matches(bytes)) {
return bom;
}
}
return null;
}
}