mirror of
https://github.com/mitb-archive/filebot
synced 2025-03-10 06:20:27 -04:00
+ String.asciiQuotes() to normalize wierd quotation marks (e.g. "\u00b4\u2018\u2019\u02bb".asciiQuotes() == "''''")
This commit is contained in:
parent
5c1dac0533
commit
8a77762e34
@ -12,6 +12,7 @@ import java.util.List;
|
|||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
import net.filebot.similarity.Normalization;
|
||||||
import net.filebot.util.FileUtilities;
|
import net.filebot.util.FileUtilities;
|
||||||
|
|
||||||
import com.ibm.icu.text.Transliterator;
|
import com.ibm.icu.text.Transliterator;
|
||||||
@ -254,6 +255,10 @@ public class ExpressionFormatMethods {
|
|||||||
return Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove").transform(self).replaceAll("[^\\p{ASCII}]+", fallback).trim();
|
return Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove").transform(self).replaceAll("[^\\p{ASCII}]+", fallback).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String asciiQuotes(String self) {
|
||||||
|
return Normalization.normalizeQuotationMarks(self);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Replace multiple replacement pairs
|
* Replace multiple replacement pairs
|
||||||
*
|
*
|
||||||
|
@ -17,6 +17,18 @@ public class Normalization {
|
|||||||
|
|
||||||
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
|
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
|
||||||
|
|
||||||
|
private static final char[] doubleQuotes = new char[] { '\"', '\u0060', '\u00b4', '\u2018', '\u2019', '\u02bb' };
|
||||||
|
private static final char[] singleQuotes = new char[] { '\'', '\u201c', '\u201d' };
|
||||||
|
|
||||||
|
public static String normalizeQuotationMarks(String name) {
|
||||||
|
for (char[] cs : new char[][] { doubleQuotes, singleQuotes }) {
|
||||||
|
for (char c : cs) {
|
||||||
|
name = name.replace(c, cs[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
public static String normalizePunctuation(String name) {
|
public static String normalizePunctuation(String name) {
|
||||||
// remove/normalize special characters
|
// remove/normalize special characters
|
||||||
name = apostrophe.matcher(name).replaceAll("");
|
name = apostrophe.matcher(name).replaceAll("");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user