deep-c-rsc/mudclient204-headless/WordFilter.java

712 lines
25 KiB
Java

public class WordFilter {
static boolean DEBUGTLD;
static boolean DEBUGWORD;
static boolean forceLowercase = true;
//static int unused = 3;// todo
static int hashFragments[];
static char badList[][];
static byte badCharIds[][][];
static char hostList[][];
static byte hostCharIds[][][];
static char tldList[][];
static int tldType[];
static String ignoreList[] = {
"cook", "cook's", "cooks", "seeks", "sheet"
};
public static void loadFilters(Buffer fragments, Buffer bad, Buffer host, Buffer tld) {
loadBad(bad);
loadHost(host);
loadFragments(fragments);
loadTld(tld);
}
public static void loadTld(Buffer buffer) {
int wordcount = buffer.getUnsignedInt();
tldList = new char[wordcount][];
tldType = new int[wordcount];
for (int idx = 0; idx < wordcount; idx++) {
tldType[idx] = buffer.getUnsignedByte();
char ac[] = new char[buffer.getUnsignedByte()];
for (int k = 0; k < ac.length; k++)
ac[k] = (char) buffer.getUnsignedByte();
tldList[idx] = ac;
}
}
public static void loadBad(Buffer buffer) {
int wordcount = buffer.getUnsignedInt();
badList = new char[wordcount][];
badCharIds = new byte[wordcount][][];
readBuffer(buffer, badList, badCharIds);
}
public static void loadHost(Buffer buffer) {
int wordcount = buffer.getUnsignedInt();
hostList = new char[wordcount][];
hostCharIds = new byte[wordcount][][];
readBuffer(buffer, hostList, hostCharIds);
}
public static void loadFragments(Buffer buffer) {
hashFragments = new int[buffer.getUnsignedInt()];
for (int i = 0; i < hashFragments.length; i++) {
hashFragments[i] = buffer.getUnsignedShort();
}
}
public static void readBuffer(Buffer buffer, char wordList[][], byte charIds[][][]) {
for (int i = 0; i < wordList.length; i++) {
char currentWord[] = new char[buffer.getUnsignedByte()];
for (int j = 0; j < currentWord.length; j++)
currentWord[j] = (char) buffer.getUnsignedByte();
wordList[i] = currentWord;
byte ids[][] = new byte[buffer.getUnsignedInt()][2];
for (int j = 0; j < ids.length; j++) {
ids[j][0] = (byte) buffer.getUnsignedByte();
ids[j][1] = (byte) buffer.getUnsignedByte();
}
if (ids.length > 0)
charIds[i] = ids;
}
}
public static String filter(String input) {
char inputChars[] = input.toLowerCase().toCharArray();
applyDotSlashFilter(inputChars);
applyBadwordFilter(inputChars);
applyHostFilter(inputChars);
heywhathteufck(inputChars);
for (int ignoreIdx = 0; ignoreIdx < ignoreList.length; ignoreIdx++) {
for (int inputIgnoreIdx = -1; (inputIgnoreIdx = input.indexOf(ignoreList[ignoreIdx], inputIgnoreIdx + 1)) != -1; ) {
char ignorewordChars[] = ignoreList[ignoreIdx].toCharArray();
for (int ignorewordIdx = 0; ignorewordIdx < ignorewordChars.length; ignorewordIdx++)
inputChars[ignorewordIdx + inputIgnoreIdx] = ignorewordChars[ignorewordIdx];
}
}
if (forceLowercase) {
stripLowercase(input.toCharArray(), inputChars);
toLowercase(inputChars);
}
return new String(inputChars);
}
public static void stripLowercase(char input[], char output[]) {
for (int i = 0; i < input.length; i++)
if (output[i] != '*' && isUppercase(input[i]))
output[i] = input[i];
}
public static void toLowercase(char input[]) {
boolean isUppercase = true;
for (int i = 0; i < input.length; i++) {
char current = input[i];
if (isLetter(current)) {
if (isUppercase) {
if (isLowercase(current))
isUppercase = false;
} else if (isUppercase(current))
input[i] = (char) ((current + 97) - 65);
} else {
isUppercase = true;
}
}
}
public static void applyBadwordFilter(char input[]) {
for (int i = 0; i < 2; i++) {// why lol
for (int j = badList.length - 1; j >= 0; j--)
applyWordFilter(input, badList[j], badCharIds[j]);
}
}
public static void applyHostFilter(char input[]) {
for (int i = hostList.length - 1; i >= 0; i--)
applyWordFilter(input, hostList[i], hostCharIds[i]);
}
public static void applyDotSlashFilter(char input[]) {
char input1[] = input.clone();
char dot[] = {
'd', 'o', 't'
};
applyWordFilter(input1, dot, null);
char input2[] = input.clone();
char slash[] = {
's', 'l', 'a', 's', 'h'
};
applyWordFilter(input2, slash, null);
for (int i = 0; i < tldList.length; i++)
applyTldFilter(input, input1, input2, tldList[i], tldType[i]);
}
public static void applyTldFilter(char input[], char input1[], char input2[], char tld[], int type) {
if (tld.length > input.length)
return;
for (int charIndex = 0; charIndex <= input.length - tld.length; charIndex++) {
int inputCharCount = charIndex;
int l = 0;
while (inputCharCount < input.length) {
int i1 = 0;
char current = input[inputCharCount];
char next = '\0';
if (inputCharCount + 1 < input.length)
next = input[inputCharCount + 1];
if (l < tld.length && (i1 = compareLettersNumbers(tld[l], current, next)) > 0) {
inputCharCount += i1;
l++;
continue;
}
if (l == 0)
break;
if ((i1 = compareLettersNumbers(tld[l - 1], current, next)) > 0) {
inputCharCount += i1;
continue;
}
if (l >= tld.length || !isSpecial(current))
break;
inputCharCount++;
}
if (l >= tld.length) {
boolean flag = false;
int startMatch = getAsteriskCount(input, input1, charIndex);
int endMatch = getAsteriskCount2(input, input2, inputCharCount - 1);
if (DEBUGTLD)
System.out.println("Potential tld: " + tld + " at char " + charIndex + " (type=" + type + ", startmatch=" + startMatch + ", endmatch=" + endMatch + ")");
if (type == 1 && startMatch > 0 && endMatch > 0)
flag = true;
if (type == 2 && (startMatch > 2 && endMatch > 0 || startMatch > 0 && endMatch > 2))
flag = true;
if (type == 3 && startMatch > 0 && endMatch > 2)
flag = true;
boolean tmp = type == 3 && startMatch > 2 && endMatch > 0;
if (flag) {
if (DEBUGTLD)
System.out.println("Filtered tld: " + tld + " at char " + charIndex);
int l1 = charIndex;
int i2 = inputCharCount - 1;
if (startMatch > 2) {
if (startMatch == 4) {
boolean flag1 = false;
for (int k2 = l1 - 1; k2 >= 0; k2--)
if (flag1) {
if (input1[k2] != '*')
break;
l1 = k2;
} else if (input1[k2] == '*') {
l1 = k2;
flag1 = true;
}
}
boolean flag2 = false;
for (int l2 = l1 - 1; l2 >= 0; l2--)
if (flag2) {
if (isSpecial(input[l2]))
break;
l1 = l2;
} else if (!isSpecial(input[l2])) {
flag2 = true;
l1 = l2;
}
}
if (endMatch > 2) {
if (endMatch == 4) {
boolean flag3 = false;
for (int i3 = i2 + 1; i3 < input.length; i3++)
if (flag3) {
if (input2[i3] != '*')
break;
i2 = i3;
} else if (input2[i3] == '*') {
i2 = i3;
flag3 = true;
}
}
boolean flag4 = false;
for (int j3 = i2 + 1; j3 < input.length; j3++)
if (flag4) {
if (isSpecial(input[j3]))
break;
i2 = j3;
} else if (!isSpecial(input[j3])) {
flag4 = true;
i2 = j3;
}
}
for (int j2 = l1; j2 <= i2; j2++)
input[j2] = '*';
}
}
}
}
public static int getAsteriskCount(char input[], char input1[], int len) {// fldajmolfmiALFKM
if (len == 0)
return 2;
for (int j = len - 1; j >= 0; j--) {
if (!isSpecial(input[j]))
break;
if (input[j] == ',' || input[j] == '.')
return 3;
}
int filtered = 0;
for (int l = len - 1; l >= 0; l--) {
if (!isSpecial(input1[l]))
break;
if (input1[l] == '*')
filtered++;
}
if (filtered >= 3)
return 4;
return isSpecial(input[len - 1]) ? 1 : 0;
}
public static int getAsteriskCount2(char input[], char input1[], int len) {// lolmnafomLMAFOA
if (len + 1 == input.length)
return 2;
for (int j = len + 1; j < input.length; j++) {
if (!isSpecial(input[j]))
break;
if (input[j] == '\\' || input[j] == '/')
return 3;
}
int filtered = 0;
for (int l = len + 1; l < input.length; l++) {
if (!isSpecial(input1[l]))
break;
if (input1[l] == '*')
filtered++;
}
if (filtered >= 5)
return 4;
return isSpecial(input[len + 1]) ? 1 : 0;
}
public static void applyWordFilter(char input[], char wordlist[], byte charIds[][]) {
if (wordlist.length > input.length)
return;
for (int charIndex = 0; charIndex <= input.length - wordlist.length; charIndex++) {
int inputCharCount = charIndex;
int k = 0;
boolean specialChar = false;
while (inputCharCount < input.length) {
int l = 0;
char inputChar = input[inputCharCount];
char nextChar = '\0';
if (inputCharCount + 1 < input.length)
nextChar = input[inputCharCount + 1];
if (k < wordlist.length && (l = compareLettersSymbols(wordlist[k], inputChar, nextChar)) > 0) {
inputCharCount += l;
k++;
continue;
}
if (k == 0)
break;
if ((l = compareLettersSymbols(wordlist[k - 1], inputChar, nextChar)) > 0) {
inputCharCount += l;
continue;
}
if (k >= wordlist.length || !isNotLowercase(inputChar))
break;
if (isSpecial(inputChar) && inputChar != '\'')
specialChar = true;
inputCharCount++;
}
if (k >= wordlist.length) {
boolean filter = true;
if (DEBUGTLD)
System.out.println("Potential word: " + wordlist + " at char " + charIndex);
if (!specialChar) {
char prevChar = ' ';
if (charIndex - 1 >= 0)
prevChar = input[charIndex - 1];
char curChar = ' ';
if (inputCharCount < input.length)
curChar = input[inputCharCount];
byte prevId = getCharId(prevChar);
byte curId = getCharId(curChar);
if (charIds != null && compareCharIds(charIds, prevId, curId))
filter = false;
} else {
boolean flag2 = false;
boolean flag3 = false;
if (charIndex - 1 < 0 || isSpecial(input[charIndex - 1]) && input[charIndex - 1] != '\'')
flag2 = true;
if (inputCharCount >= input.length || isSpecial(input[inputCharCount]) && input[inputCharCount] != '\'')
flag3 = true;
if (!flag2 || !flag3) {
boolean flag4 = false;
int j1 = charIndex - 2;
if (flag2)
j1 = charIndex;
for (; !flag4 && j1 < inputCharCount; j1++)
if (j1 >= 0 && (!isSpecial(input[j1]) || input[j1] == '\'')) {
char ac2[] = new char[3];
int k1;
for (k1 = 0; k1 < 3; k1++) {
if (j1 + k1 >= input.length || isSpecial(input[j1 + k1]) && input[j1 + k1] != '\'')
break;
ac2[k1] = input[j1 + k1];
}
boolean flag5 = true;
if (k1 == 0)
flag5 = false;
if (k1 < 3 && j1 - 1 >= 0 && (!isSpecial(input[j1 - 1]) || input[j1 - 1] == '\''))
flag5 = false;
if (flag5 && !containsFragmentHashes(ac2))
flag4 = true;
}
if (!flag4)
filter = false;
}
}
if (filter) {
if (DEBUGWORD)
System.out.println("Filtered word: " + wordlist + " at char " + charIndex);
for (int i1 = charIndex; i1 < inputCharCount; i1++)
input[i1] = '*';
}
}
}
}
public static boolean compareCharIds(byte charIdData[][], byte prevCharId, byte curCharId) {
int first = 0;
if (charIdData[first][0] == prevCharId && charIdData[first][1] == curCharId)
return true;
int last = charIdData.length - 1;
if (charIdData[last][0] == prevCharId && charIdData[last][1] == curCharId)
return true;
while (first != last && first + 1 != last) {
int middle = (first + last) / 2;
if (charIdData[middle][0] == prevCharId && charIdData[middle][1] == curCharId)
return true;
if (prevCharId < charIdData[middle][0] || prevCharId == charIdData[middle][0] && curCharId < charIdData[middle][1])
last = middle;
else
first = middle;
}
return false;
}
/**
* @param filterChar
* @param currentChar
* @param nextChar
* @return 0 for no match, 1 for currentChar matches, 2 for both currentChar and nextChar matching
*/
public static int compareLettersNumbers(char filterChar, char currentChar, char nextChar) {
if (filterChar == currentChar)
return 1;
if (filterChar == 'e' && currentChar == '3')
return 1;
if (filterChar == 't' && (currentChar == '7' || currentChar == '+'))
return 1;
if (filterChar == 'a' && (currentChar == '4' || currentChar == '@'))
return 1;
if (filterChar == 'o' && currentChar == '0')
return 1;
if (filterChar == 'i' && currentChar == '1')
return 1;
if (filterChar == 's' && currentChar == '5')
return 1;
if (filterChar == 'f' && currentChar == 'p' && nextChar == 'h')
return 2;
return filterChar == 'g' && currentChar == '9' ? 1 : 0;
}
/**
* @param filterChar character to compare against
* @param currentChar current character
* @param nextChar next character
* @return 0 for no match, 1 for currentChar matches, 2 for both currentChar and nextChar matching
*/
public static int compareLettersSymbols(char filterChar, char currentChar, char nextChar) {
if (filterChar == '*')
return 0;
if (filterChar == currentChar)
return 1;
if (filterChar >= 'a' && filterChar <= 'z') {
if (filterChar == 'e')
return currentChar == '3' ? 1 : 0;
if (filterChar == 't')
return currentChar == '7' ? 1 : 0;
if (filterChar == 'a')
return currentChar == '4' || currentChar == '@' ? 1 : 0;
if (filterChar == 'o') {
if (currentChar == '0' || currentChar == '*')
return 1;
return currentChar == '(' && nextChar == ')' ? 2 : 0;
}
if (filterChar == 'i')
return currentChar == 'y' || currentChar == 'l' || currentChar == 'j' || currentChar == 'l' || currentChar == '!' || currentChar == ':' || currentChar == ';' ? 1 : 0;
if (filterChar == 'n')
return 0;
if (filterChar == 's')
return currentChar == '5' || currentChar == 'z' || currentChar == '$' ? 1 : 0;
if (filterChar == 'r')
return 0;
if (filterChar == 'h')
return 0;
if (filterChar == 'l')
return currentChar == '1' ? 1 : 0;
if (filterChar == 'd')
return 0;
if (filterChar == 'c')
return currentChar == '(' ? 1 : 0;
if (filterChar == 'u')
return currentChar == 'v' ? 1 : 0;
if (filterChar == 'm')
return 0;
if (filterChar == 'f')
return currentChar == 'p' && nextChar == 'h' ? 2 : 0;
if (filterChar == 'p')
return 0;
if (filterChar == 'g')
return currentChar == '9' || currentChar == '6' ? 1 : 0;
if (filterChar == 'w')
return currentChar == 'v' && nextChar == 'v' ? 2 : 0;
if (filterChar == 'y')
return 0;
if (filterChar == 'b')
return currentChar == '1' && nextChar == '3' ? 2 : 0;
if (filterChar == 'v')
return 0;
if (filterChar == 'k')
return 0;
if (filterChar == 'x')
return currentChar == ')' && nextChar == '(' ? 2 : 0;
if (filterChar == 'j')
return 0;
if (filterChar == 'q')
return 0;
if (filterChar == 'z')
return 0;
}
if (filterChar >= '0' && filterChar <= '9') {
if (filterChar == '0') {
if (currentChar == 'o' || currentChar == 'O')
return 1;
return currentChar == '(' && nextChar == ')' ? 2 : 0;
}
if (filterChar == '1')
return currentChar != 'l' ? 0 : 1;
if (filterChar == '2')
return 0;
if (filterChar == '3')
return 0;
if (filterChar == '4')
return 0;
if (filterChar == '5')
return 0;
if (filterChar == '6')
return 0;
if (filterChar == '7')
return 0;
if (filterChar == '8')
return 0;
if (filterChar == '9')
return 0;
}
if (filterChar == '-')
return 0;
if (filterChar == ',')
return currentChar == '.' ? 1 : 0;
if (filterChar == '.')
return currentChar == ',' ? 1 : 0;
if (filterChar == '(')
return 0;
if (filterChar == ')')
return 0;
if (filterChar == '!')
return currentChar == 'i' ? 1 : 0;
if (filterChar == '\'')
return 0;
if (DEBUGWORD)
System.out.println("Letter=" + filterChar + " not matched");
return 0;
}
/**
* Returns the id for the given char, ranging from {@code 1} to {@code 38}.
* <p>
* <pre>
* id range
* 1-26 a-z
* 27 unknown
* 28 apostrophe
* 29-38 0-9
* </pre>
*
* @param c
* @return id for char {@code c}
*/
public static byte getCharId(char c) {
if (c >= 'a' && c <= 'z')
return (byte) (c - 97 + 1);
if (c == '\'')
return 28;
if (c >= '0' && c <= '9')
return (byte) (c - 48 + 29);
else
return 27;
}
public static void heywhathteufck(char input[]) {
int digitIndex = 0;
int fromIndex = 0;
int k = 0;
int l = 0;
while ((digitIndex = indexOfDigit(input, fromIndex)) != -1) {
boolean flag = false;
for (int i = fromIndex; i >= 0 && i < digitIndex && !flag; i++)
if (!isSpecial(input[i]) && !isNotLowercase(input[i]))
flag = true;
if (flag)
k = 0;
if (k == 0)
l = digitIndex;
fromIndex = indexOfNonDigit(input, digitIndex);
int j1 = 0;
for (int k1 = digitIndex; k1 < fromIndex; k1++)
j1 = (j1 * 10 + input[k1]) - 48;
if (j1 > 255 || fromIndex - digitIndex > 8)
k = 0;
else
k++;
if (k == 4) {
for (int i = l; i < fromIndex; i++)
input[i] = '*';
k = 0;
}
}
}
public static int indexOfDigit(char input[], int fromIndex) {
for (int i = fromIndex; i < input.length && i >= 0; i++)
if (input[i] >= '0' && input[i] <= '9')
return i;
return -1;
}
public static int indexOfNonDigit(char input[], int fromIndex) {
for (int i = fromIndex; i < input.length && i >= 0; i++)
if (input[i] < '0' || input[i] > '9')
return i;
return input.length;
}
public static boolean isSpecial(char c) {
return !isLetter(c) && !isDigit(c);
}
public static boolean isNotLowercase(char c) {
if (c < 'a' || c > 'z')
return true;
return c == 'v' || c == 'x' || c == 'j' || c == 'q' || c == 'z';
}
public static boolean isLetter(char c) {
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
}
public static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
public static boolean isLowercase(char c) {
return c >= 'a' && c <= 'z';
}
public static boolean isUppercase(char c) {
return c >= 'A' && c <= 'Z';
}
public static boolean containsFragmentHashes(char input[]) {
boolean notNum = true;
for (int i = 0; i < input.length; i++)
if (!isDigit(input[i]) && input[i] != 0)
notNum = false;
if (notNum)
return true;
int inputHash = word2hash(input);
int first = 0;
int last = hashFragments.length - 1;
if (inputHash == hashFragments[first] || inputHash == hashFragments[last])
return true;
while (first != last && first + 1 != last) {
int middle = (first + last) / 2;
if (inputHash == hashFragments[middle])
return true;
if (inputHash < hashFragments[middle])
last = middle;
else
first = middle;
}
return false;
}
/**
* @param word
* @return
* @see WordFilter#getCharId(char)
*/
public static int word2hash(char word[]) {
if (word.length > 6)
return 0;
int hash = 0;
for (int i = 0; i < word.length; i++) {
char c = word[word.length - i - 1];
if (c >= 'a' && c <= 'z')
hash = hash * 38 + c - 97 + 1;
else if (c == '\'')
hash = hash * 38 + 27;
else if (c >= '0' && c <= '9')
hash = hash * 38 + c - 48 + 28;
else if (c != 0) {
if (DEBUGWORD)
System.out.println("word2hash failed on " + new String(word));
return 0;
}
}
return hash;
}
}