1
0
mirror of https://github.com/mitb-archive/filebot synced 2025-01-11 13:58:16 -05:00

* ignore trailing (1) patterns for TITLE metrics

This commit is contained in:
Reinhard Pointner 2013-11-07 07:45:30 +00:00
parent b7a9d524cb
commit ecb9fac822
3 changed files with 19 additions and 19 deletions

View File

@ -118,7 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
Episode e = (Episode) object; Episode e = (Episode) object;
// don't use title for matching if title equals series name // don't use title for matching if title equals series name
String normalizedToken = normalizeObject(e.getTitle()); String normalizedToken = normalizeObject(removeTrailingBrackets(e.getTitle()));
if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) { if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) {
return normalizedToken; return normalizedToken;
} }

View File

@ -1,50 +1,43 @@
package net.sourceforge.filebot.similarity; package net.sourceforge.filebot.similarity;
import static java.util.regex.Pattern.*; import static java.util.regex.Pattern.*;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class Normalization { public class Normalization {
private static final Pattern apostrophe = compile("['`´ʻ]+"); private static final Pattern apostrophe = compile("['`´ʻ]+");
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+"); private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") }; private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
private static final Pattern trailingParentheses = compile("[(]([^)]*)[)]$"); private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]"); private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
public static String normalizePunctuation(String name) { public static String normalizePunctuation(String name) {
// remove/normalize special characters // remove/normalize special characters
name = apostrophe.matcher(name).replaceAll(""); name = apostrophe.matcher(name).replaceAll("");
name = punctuation.matcher(name).replaceAll(" "); name = punctuation.matcher(name).replaceAll(" ");
return name.trim(); return name.trim();
} }
public static String normalizeBrackets(String name) { public static String normalizeBrackets(String name) {
// remove group names and checksums, any [...] or (...) // remove group names and checksums, any [...] or (...)
for (Pattern it : brackets) { for (Pattern it : brackets) {
name = it.matcher(name).replaceAll(" "); name = it.matcher(name).replaceAll(" ");
} }
return name; return name;
} }
public static String removeEmbeddedChecksum(String string) { public static String removeEmbeddedChecksum(String string) {
// match embedded checksum and surrounding brackets // match embedded checksum and surrounding brackets
return checksum.matcher(string).replaceAll(""); return checksum.matcher(string).replaceAll("");
} }
public static String removeTrailingBrackets(String name) { public static String removeTrailingBrackets(String name) {
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who // remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
return trailingParentheses.matcher(name).replaceAll("").trim(); return trailingParentheses.matcher(name).replaceAll("").trim();
} }
} }

View File

@ -161,6 +161,7 @@ ANBU
Anbu-Solar Anbu-Solar
AnCo.2 AnCo.2
aNDy aNDy
ANE
AnFs AnFs
ANGELiC ANGELiC
Ani-Kraze Ani-Kraze
@ -261,6 +262,7 @@ BaDTaStE
BAF BAF
BAJSKORV BAJSKORV
Baka-Anime Baka-Anime
Baka-Chi
Bakaniichan Bakaniichan
Bakura2 Bakura2
BaLD BaLD
@ -533,6 +535,7 @@ DaDuck
DAFTPUNK DAFTPUNK
Dali-Neko Dali-Neko
Daman Daman
DameDesuYo
danger2u danger2u
Dango Dango
danirl danirl
@ -575,6 +578,7 @@ dEr
DerSchuft DerSchuft
DESiRED DESiRED
desnsurrender desnsurrender
DESS
DETAiLS DETAiLS
DeTvaVe DeTvaVe
DEViSE DEViSE
@ -1115,6 +1119,7 @@ iNfInItE_424
iNFOTv iNFOTv
iNGOT iNGOT
iNjECT iNjECT
iNK
Inko Inko
INP INP
InSaNiTy InSaNiTy
@ -2181,6 +2186,7 @@ ULTiMATE
umai umai
umee umee
UMF UMF
UNDERWATER
Underwater-Mahjong Underwater-Mahjong
UNiQUE UNiQUE
UNiT UNiT
@ -2271,6 +2277,7 @@ WATERS
WAVEY WAVEY
WBZ WBZ
WEST WEST
WESTSiDE
WHATELSE WHATELSE
WHEELS WHEELS
WHiiZz WHiiZz