mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-11 13:58:16 -05:00
* ignore trailing (1) patterns for TITLE metrics
This commit is contained in:
parent
b7a9d524cb
commit
ecb9fac822
@ -118,7 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
Episode e = (Episode) object;
|
||||
|
||||
// don't use title for matching if title equals series name
|
||||
String normalizedToken = normalizeObject(e.getTitle());
|
||||
String normalizedToken = normalizeObject(removeTrailingBrackets(e.getTitle()));
|
||||
if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) {
|
||||
return normalizedToken;
|
||||
}
|
||||
|
@ -1,50 +1,43 @@
|
||||
|
||||
package net.sourceforge.filebot.similarity;
|
||||
|
||||
|
||||
import static java.util.regex.Pattern.*;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
public class Normalization {
|
||||
|
||||
|
||||
private static final Pattern apostrophe = compile("['`´‘’ʻ]+");
|
||||
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
|
||||
|
||||
|
||||
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
|
||||
private static final Pattern trailingParentheses = compile("[(]([^)]*)[)]$");
|
||||
|
||||
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
|
||||
|
||||
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
|
||||
|
||||
|
||||
|
||||
public static String normalizePunctuation(String name) {
|
||||
// remove/normalize special characters
|
||||
name = apostrophe.matcher(name).replaceAll("");
|
||||
name = punctuation.matcher(name).replaceAll(" ");
|
||||
return name.trim();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static String normalizeBrackets(String name) {
|
||||
// remove group names and checksums, any [...] or (...)
|
||||
for (Pattern it : brackets) {
|
||||
name = it.matcher(name).replaceAll(" ");
|
||||
}
|
||||
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static String removeEmbeddedChecksum(String string) {
|
||||
// match embedded checksum and surrounding brackets
|
||||
// match embedded checksum and surrounding brackets
|
||||
return checksum.matcher(string).replaceAll("");
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static String removeTrailingBrackets(String name) {
|
||||
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
|
||||
return trailingParentheses.matcher(name).replaceAll("").trim();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -161,6 +161,7 @@ ANBU
|
||||
Anbu-Solar
|
||||
AnCo.2
|
||||
aNDy
|
||||
ANE
|
||||
AnFs
|
||||
ANGELiC
|
||||
Ani-Kraze
|
||||
@ -261,6 +262,7 @@ BaDTaStE
|
||||
BAF
|
||||
BAJSKORV
|
||||
Baka-Anime
|
||||
Baka-Chi
|
||||
Bakaniichan
|
||||
Bakura2
|
||||
BaLD
|
||||
@ -533,6 +535,7 @@ DaDuck
|
||||
DAFTPUNK
|
||||
Dali-Neko
|
||||
Daman
|
||||
DameDesuYo
|
||||
danger2u
|
||||
Dango
|
||||
danirl
|
||||
@ -575,6 +578,7 @@ dEr
|
||||
DerSchuft
|
||||
DESiRED
|
||||
desnsurrender
|
||||
DESS
|
||||
DETAiLS
|
||||
DeTvaVe
|
||||
DEViSE
|
||||
@ -1115,6 +1119,7 @@ iNfInItE_424
|
||||
iNFOTv
|
||||
iNGOT
|
||||
iNjECT
|
||||
iNK
|
||||
Inko
|
||||
INP
|
||||
InSaNiTy
|
||||
@ -2181,6 +2186,7 @@ ULTiMATE
|
||||
umai
|
||||
umee
|
||||
UMF
|
||||
UNDERWATER
|
||||
Underwater-Mahjong
|
||||
UNiQUE
|
||||
UNiT
|
||||
@ -2271,6 +2277,7 @@ WATERS
|
||||
WAVEY
|
||||
WBZ
|
||||
WEST
|
||||
WESTSiDE
|
||||
WHATELSE
|
||||
WHEELS
|
||||
WHiiZz
|
||||
|
Loading…
Reference in New Issue
Block a user