* ignore trailing (1) patterns for TITLE metrics

This commit is contained in:
Reinhard Pointner 2013-11-07 07:45:30 +00:00
parent b7a9d524cb
commit ecb9fac822
3 changed files with 19 additions and 19 deletions

View File

@ -118,7 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
Episode e = (Episode) object;
// don't use title for matching if title equals series name
String normalizedToken = normalizeObject(e.getTitle());
String normalizedToken = normalizeObject(removeTrailingBrackets(e.getTitle()));
if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) {
return normalizedToken;
}

View File

@ -1,50 +1,43 @@
package net.sourceforge.filebot.similarity;
import static java.util.regex.Pattern.*;
import java.util.regex.Pattern;
public class Normalization {
private static final Pattern apostrophe = compile("['`´ʻ]+");
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
private static final Pattern trailingParentheses = compile("[(]([^)]*)[)]$");
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
public static String normalizePunctuation(String name) {
// remove/normalize special characters
name = apostrophe.matcher(name).replaceAll("");
name = punctuation.matcher(name).replaceAll(" ");
return name.trim();
}
public static String normalizeBrackets(String name) {
// remove group names and checksums, any [...] or (...)
for (Pattern it : brackets) {
name = it.matcher(name).replaceAll(" ");
}
return name;
}
public static String removeEmbeddedChecksum(String string) {
// match embedded checksum and surrounding brackets
// match embedded checksum and surrounding brackets
return checksum.matcher(string).replaceAll("");
}
public static String removeTrailingBrackets(String name) {
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
return trailingParentheses.matcher(name).replaceAll("").trim();
}
}

View File

@ -161,6 +161,7 @@ ANBU
Anbu-Solar
AnCo.2
aNDy
ANE
AnFs
ANGELiC
Ani-Kraze
@ -261,6 +262,7 @@ BaDTaStE
BAF
BAJSKORV
Baka-Anime
Baka-Chi
Bakaniichan
Bakura2
BaLD
@ -533,6 +535,7 @@ DaDuck
DAFTPUNK
Dali-Neko
Daman
DameDesuYo
danger2u
Dango
danirl
@ -575,6 +578,7 @@ dEr
DerSchuft
DESiRED
desnsurrender
DESS
DETAiLS
DeTvaVe
DEViSE
@ -1115,6 +1119,7 @@ iNfInItE_424
iNFOTv
iNGOT
iNjECT
iNK
Inko
INP
InSaNiTy
@ -2181,6 +2186,7 @@ ULTiMATE
umai
umee
UMF
UNDERWATER
Underwater-Mahjong
UNiQUE
UNiT
@ -2271,6 +2277,7 @@ WATERS
WAVEY
WBZ
WEST
WESTSiDE
WHATELSE
WHEELS
WHiiZz