mirror of
https://github.com/mitb-archive/filebot
synced 2025-01-11 13:58:16 -05:00
* ignore trailing (1) patterns for TITLE metrics
This commit is contained in:
parent
b7a9d524cb
commit
ecb9fac822
@ -118,7 +118,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||||||
Episode e = (Episode) object;
|
Episode e = (Episode) object;
|
||||||
|
|
||||||
// don't use title for matching if title equals series name
|
// don't use title for matching if title equals series name
|
||||||
String normalizedToken = normalizeObject(e.getTitle());
|
String normalizedToken = normalizeObject(removeTrailingBrackets(e.getTitle()));
|
||||||
if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) {
|
if (normalizedToken.length() >= 4 && !normalizeObject(e.getSeriesName()).contains(normalizedToken)) {
|
||||||
return normalizedToken;
|
return normalizedToken;
|
||||||
}
|
}
|
||||||
|
@ -1,50 +1,43 @@
|
|||||||
|
|
||||||
package net.sourceforge.filebot.similarity;
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
import static java.util.regex.Pattern.*;
|
import static java.util.regex.Pattern.*;
|
||||||
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
|
||||||
public class Normalization {
|
public class Normalization {
|
||||||
|
|
||||||
private static final Pattern apostrophe = compile("['`´‘’ʻ]+");
|
private static final Pattern apostrophe = compile("['`´‘’ʻ]+");
|
||||||
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
|
private static final Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
|
||||||
|
|
||||||
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
|
private static final Pattern[] brackets = new Pattern[] { compile("\\([^\\(]*\\)"), compile("\\[[^\\[]*\\]"), compile("\\{[^\\{]*\\}") };
|
||||||
private static final Pattern trailingParentheses = compile("[(]([^)]*)[)]$");
|
private static final Pattern trailingParentheses = compile("(?<!^)[(]([^)]*)[)]$");
|
||||||
|
|
||||||
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
|
private static final Pattern checksum = compile("[\\(\\[]\\p{XDigit}{8}[\\]\\)]");
|
||||||
|
|
||||||
|
|
||||||
public static String normalizePunctuation(String name) {
|
public static String normalizePunctuation(String name) {
|
||||||
// remove/normalize special characters
|
// remove/normalize special characters
|
||||||
name = apostrophe.matcher(name).replaceAll("");
|
name = apostrophe.matcher(name).replaceAll("");
|
||||||
name = punctuation.matcher(name).replaceAll(" ");
|
name = punctuation.matcher(name).replaceAll(" ");
|
||||||
return name.trim();
|
return name.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String normalizeBrackets(String name) {
|
public static String normalizeBrackets(String name) {
|
||||||
// remove group names and checksums, any [...] or (...)
|
// remove group names and checksums, any [...] or (...)
|
||||||
for (Pattern it : brackets) {
|
for (Pattern it : brackets) {
|
||||||
name = it.matcher(name).replaceAll(" ");
|
name = it.matcher(name).replaceAll(" ");
|
||||||
}
|
}
|
||||||
|
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String removeEmbeddedChecksum(String string) {
|
public static String removeEmbeddedChecksum(String string) {
|
||||||
// match embedded checksum and surrounding brackets
|
// match embedded checksum and surrounding brackets
|
||||||
return checksum.matcher(string).replaceAll("");
|
return checksum.matcher(string).replaceAll("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String removeTrailingBrackets(String name) {
|
public static String removeTrailingBrackets(String name) {
|
||||||
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
|
// remove trailing braces, e.g. Doctor Who (2005) -> Doctor Who
|
||||||
return trailingParentheses.matcher(name).replaceAll("").trim();
|
return trailingParentheses.matcher(name).replaceAll("").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -161,6 +161,7 @@ ANBU
|
|||||||
Anbu-Solar
|
Anbu-Solar
|
||||||
AnCo.2
|
AnCo.2
|
||||||
aNDy
|
aNDy
|
||||||
|
ANE
|
||||||
AnFs
|
AnFs
|
||||||
ANGELiC
|
ANGELiC
|
||||||
Ani-Kraze
|
Ani-Kraze
|
||||||
@ -261,6 +262,7 @@ BaDTaStE
|
|||||||
BAF
|
BAF
|
||||||
BAJSKORV
|
BAJSKORV
|
||||||
Baka-Anime
|
Baka-Anime
|
||||||
|
Baka-Chi
|
||||||
Bakaniichan
|
Bakaniichan
|
||||||
Bakura2
|
Bakura2
|
||||||
BaLD
|
BaLD
|
||||||
@ -533,6 +535,7 @@ DaDuck
|
|||||||
DAFTPUNK
|
DAFTPUNK
|
||||||
Dali-Neko
|
Dali-Neko
|
||||||
Daman
|
Daman
|
||||||
|
DameDesuYo
|
||||||
danger2u
|
danger2u
|
||||||
Dango
|
Dango
|
||||||
danirl
|
danirl
|
||||||
@ -575,6 +578,7 @@ dEr
|
|||||||
DerSchuft
|
DerSchuft
|
||||||
DESiRED
|
DESiRED
|
||||||
desnsurrender
|
desnsurrender
|
||||||
|
DESS
|
||||||
DETAiLS
|
DETAiLS
|
||||||
DeTvaVe
|
DeTvaVe
|
||||||
DEViSE
|
DEViSE
|
||||||
@ -1115,6 +1119,7 @@ iNfInItE_424
|
|||||||
iNFOTv
|
iNFOTv
|
||||||
iNGOT
|
iNGOT
|
||||||
iNjECT
|
iNjECT
|
||||||
|
iNK
|
||||||
Inko
|
Inko
|
||||||
INP
|
INP
|
||||||
InSaNiTy
|
InSaNiTy
|
||||||
@ -2181,6 +2186,7 @@ ULTiMATE
|
|||||||
umai
|
umai
|
||||||
umee
|
umee
|
||||||
UMF
|
UMF
|
||||||
|
UNDERWATER
|
||||||
Underwater-Mahjong
|
Underwater-Mahjong
|
||||||
UNiQUE
|
UNiQUE
|
||||||
UNiT
|
UNiT
|
||||||
@ -2271,6 +2277,7 @@ WATERS
|
|||||||
WAVEY
|
WAVEY
|
||||||
WBZ
|
WBZ
|
||||||
WEST
|
WEST
|
||||||
|
WESTSiDE
|
||||||
WHATELSE
|
WHATELSE
|
||||||
WHEELS
|
WHEELS
|
||||||
WHiiZz
|
WHiiZz
|
||||||
|
Loading…
Reference in New Issue
Block a user