Take care of obvious ^RG- or -RG$ patterns

This commit is contained in:
Reinhard Pointner 2016-03-12 15:19:27 +00:00
parent c5f8dc4356
commit 1c95bfd16a
1 changed files with 8 additions and 4 deletions

View File

@ -24,7 +24,6 @@ import java.text.Normalizer.Form;
import java.time.Duration; import java.time.Duration;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
@ -166,6 +165,7 @@ public class ReleaseInfo {
Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet(); Set<String> languages = getLanguageMap(Locale.ENGLISH, Locale.getDefault()).keySet();
Pattern clutterBracket = getClutterBracketPattern(strict); Pattern clutterBracket = getClutterBracketPattern(strict);
Pattern releaseGroup = getReleaseGroupPattern(strict); Pattern releaseGroup = getReleaseGroupPattern(strict);
Pattern releaseGroupTrim = getReleaseGroupTrimPattern();
Pattern languageSuffix = getLanguageSuffixPattern(languages, strict); Pattern languageSuffix = getLanguageSuffixPattern(languages, strict);
Pattern languageTag = getLanguageTagPattern(languages); Pattern languageTag = getLanguageTagPattern(languages);
Pattern videoSource = getVideoSourcePattern(); Pattern videoSource = getVideoSourcePattern();
@ -176,7 +176,7 @@ public class ReleaseInfo {
Pattern queryBlacklist = getBlacklistPattern(); Pattern queryBlacklist = getBlacklistPattern();
stopwords[b] = new Pattern[] { languageTag, videoSource, videoTags, videoFormat, resolution, stereoscopic3d, languageSuffix }; stopwords[b] = new Pattern[] { languageTag, videoSource, videoTags, videoFormat, resolution, stereoscopic3d, languageSuffix };
blacklist[b] = new Pattern[] { queryBlacklist, languageTag, clutterBracket, releaseGroup, videoSource, videoTags, videoFormat, resolution, stereoscopic3d, languageSuffix }; blacklist[b] = new Pattern[] { releaseGroupTrim, queryBlacklist, languageTag, clutterBracket, releaseGroup, videoSource, videoTags, videoFormat, resolution, stereoscopic3d, languageSuffix };
} }
return items.stream().map(it -> { return items.stream().map(it -> {
@ -307,6 +307,11 @@ public class ReleaseInfo {
return compile("(?<!\\p{Alnum})" + or(releaseGroup.get()) + "(?!\\p{Alnum}|[^\\p{Alnum}](19|20)\\d{2})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); return compile("(?<!\\p{Alnum})" + or(releaseGroup.get()) + "(?!\\p{Alnum}|[^\\p{Alnum}](19|20)\\d{2})", strict ? 0 : CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
} }
public Pattern getReleaseGroupTrimPattern() throws Exception {
// pattern matching any release group name enclosed in specific separators or at the start/end
return compile("(?<=\\[|\\(|^)" + or(releaseGroup.get()) + "(?=\\]|\\)|\\-)|(?<!\\[|\\(|\\-)" + or(releaseGroup.get()) + "(?=\\]|\\)|$)", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
}
public Pattern getBlacklistPattern() throws Exception { public Pattern getBlacklistPattern() throws Exception {
// pattern matching any release group name enclosed in separators // pattern matching any release group name enclosed in separators
return compile("(?<!\\p{Alnum})" + or(queryBlacklist.get()) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS); return compile("(?<!\\p{Alnum})" + or(queryBlacklist.get()) + "(?!\\p{Alnum})", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS);
@ -519,8 +524,7 @@ public class ReleaseInfo {
collator.setDecomposition(Collator.FULL_DECOMPOSITION); collator.setDecomposition(Collator.FULL_DECOMPOSITION);
collator.setStrength(Collator.PRIMARY); collator.setStrength(Collator.PRIMARY);
Comparator<? super String> order = collator; Map<String, Locale> languageMap = new TreeMap<String, Locale>(collator);
Map<String, Locale> languageMap = new TreeMap<String, Locale>(order);
for (String code : Locale.getISOLanguages()) { for (String code : Locale.getISOLanguages()) {
Locale locale = new Locale(code); // force ISO3 language as default toString() value Locale locale = new Locale(code); // force ISO3 language as default toString() value