mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-23 16:28:51 -05:00
* fine-tune anime matching
This commit is contained in:
parent
2232576c1d
commit
3a1eada102
@ -228,7 +228,7 @@ if (thetvdb_txt.size() < 30000) { throw new Exception('TheTVDB index sanity fail
|
||||
|
||||
|
||||
// BUILD anidb index
|
||||
def anidb = new net.sourceforge.filebot.web.AnidbClient(null, 0).getAnimeTitles()
|
||||
def anidb = new net.sourceforge.filebot.web.AnidbClient('filebot', 4).getAnimeTitles()
|
||||
|
||||
def anidb_index = anidb.findResults{
|
||||
def row = []
|
||||
@ -243,4 +243,4 @@ pack(anidb_out, anidb_txt)
|
||||
println "AniDB Index: " + anidb_txt.size()
|
||||
|
||||
// sanity check
|
||||
if (anidb_txt.size() < 5000) { throw new Exception('AniDB index sanity failed') }
|
||||
if (anidb_txt.size() < 8000) { throw new Exception('AniDB index sanity failed') }
|
||||
|
@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
||||
int sxe = 0; // SxE
|
||||
int cws = 0; // common word sequence
|
||||
|
||||
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale);
|
||||
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale, true);
|
||||
Collection<String> cwsList = emptySet();
|
||||
if (max >= 5) {
|
||||
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
|
||||
|
@ -360,7 +360,7 @@ public class MediaDetection {
|
||||
Collection<String> matches = new LinkedHashSet<String>();
|
||||
|
||||
// check CWS matches
|
||||
SeriesNameMatcher snm = new SeriesNameMatcher(locale);
|
||||
SeriesNameMatcher snm = new SeriesNameMatcher(locale, true);
|
||||
matches.addAll(snm.matchAll(files.toArray(new File[files.size()])));
|
||||
|
||||
// check for known pattern matches
|
||||
|
@ -300,64 +300,72 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
SeriesName(new NameSimilarityMetric() {
|
||||
|
||||
private ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
|
||||
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher(Locale.ROOT, false);
|
||||
|
||||
@Override
|
||||
public float getSimilarity(Object o1, Object o2) {
|
||||
float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
|
||||
float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
|
||||
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
|
||||
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
|
||||
|
||||
return (float) (floor(max(lowerBound, upperBound) * 4) / 4);
|
||||
};
|
||||
// match all fields and average similarity
|
||||
float max = 0;
|
||||
for (String s1 : f1) {
|
||||
for (String s2 : f2) {
|
||||
max = max(super.getSimilarity(s1, s2), max);
|
||||
}
|
||||
}
|
||||
|
||||
// normalize absolute similarity to similarity rank (4 ranks in total),
|
||||
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
||||
return (float) (floor(max * 4) / 4);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String normalize(Object object) {
|
||||
return object.toString();
|
||||
};
|
||||
}
|
||||
|
||||
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
|
||||
List<?> identifiers = getEffectiveIdentifiers(object);
|
||||
String[] names = new String[identifiers.size()];
|
||||
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
names[i] = normalizeObject(identifiers.get(i));
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
protected List<?> getEffectiveIdentifiers(Object object) {
|
||||
List<String> names = null;
|
||||
|
||||
protected String normalize(Object object, boolean strict) {
|
||||
if (object instanceof Episode) {
|
||||
if (strict) {
|
||||
object = ((Episode) object).getSeriesName(); // focus on series name
|
||||
} else {
|
||||
object = removeTrailingBrackets(((Episode) object).getSeriesName()); // focus on series name (without US/UK 1967/2005 differentiation)
|
||||
}
|
||||
names = ((Episode) object).getSeries().getEffectiveNames();
|
||||
} else if (object instanceof File) {
|
||||
object = ((File) object).getName(); // try to narrow down on series name
|
||||
|
||||
try {
|
||||
object = resolveSeriesDirectMapping((String) object);
|
||||
} catch (IOException e) {
|
||||
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
||||
}
|
||||
|
||||
String snm = seriesNameMatcher.matchByEpisodeIdentifier((String) object);
|
||||
if (snm != null) {
|
||||
object = snm;
|
||||
names = new ArrayList<String>(3);
|
||||
for (File f : listPathTail((File) object, 3, true)) {
|
||||
String fn = getName(f);
|
||||
String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
|
||||
if (sn != null) {
|
||||
names.add(sn);
|
||||
} else {
|
||||
names.add(fn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// equally strip away strip potential any clutter
|
||||
try {
|
||||
object = releaseInfo.cleanRelease(singleton(object.toString()), strict).iterator().next();
|
||||
} catch (NoSuchElementException e) {
|
||||
// keep default value in case all tokens are stripped away
|
||||
} catch (IOException e) {
|
||||
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
||||
if (names != null) {
|
||||
try {
|
||||
return releaseInfo.cleanRelease(names, false);
|
||||
} catch (NoSuchElementException e) {
|
||||
// keep default value in case all tokens are stripped away
|
||||
} catch (IOException e) {
|
||||
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// simplify file name, if possible
|
||||
return normalizeObject(object);
|
||||
}
|
||||
|
||||
protected String resolveSeriesDirectMapping(String input) throws IOException {
|
||||
for (Pattern it : releaseInfo.getSeriesDirectMappings().keySet()) {
|
||||
Matcher m = it.matcher(input);
|
||||
if (m.find()) {
|
||||
return m.replaceAll(releaseInfo.getSeriesDirectMappings().get(it));
|
||||
}
|
||||
}
|
||||
return input;
|
||||
return emptyList();
|
||||
}
|
||||
}),
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
|
||||
package net.sourceforge.filebot.similarity;
|
||||
|
||||
|
||||
import static java.util.Collections.*;
|
||||
import static java.util.regex.Pattern.*;
|
||||
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
||||
@ -29,63 +27,62 @@ import java.util.regex.Pattern;
|
||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||
import net.sourceforge.tuned.FileUtilities;
|
||||
|
||||
|
||||
public class SeriesNameMatcher {
|
||||
|
||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
||||
protected DateMatcher dateMatcher = new DateMatcher();
|
||||
|
||||
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
||||
|
||||
|
||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher;
|
||||
protected DateMatcher dateMatcher;
|
||||
|
||||
protected NameSimilarityMetric nameSimilarityMetric;
|
||||
|
||||
protected CommonSequenceMatcher commonSequenceMatcher;
|
||||
|
||||
|
||||
|
||||
public SeriesNameMatcher() {
|
||||
this(Locale.ROOT);
|
||||
this(Locale.ROOT, true);
|
||||
}
|
||||
|
||||
|
||||
public SeriesNameMatcher(Locale locale) {
|
||||
|
||||
public SeriesNameMatcher(Locale locale, boolean strict) {
|
||||
seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict);
|
||||
dateMatcher = new DateMatcher();
|
||||
nameSimilarityMetric = new NameSimilarityMetric();
|
||||
|
||||
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) {
|
||||
|
||||
|
||||
@Override
|
||||
protected CollationKey[] split(String sequence) {
|
||||
return super.split(normalize(sequence));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
||||
public Collection<String> matchAll(File[] files) {
|
||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||
|
||||
|
||||
// group files by parent folder
|
||||
for (Entry<File, String[]> entry : mapNamesByFolder(files).entrySet()) {
|
||||
String parent = entry.getKey().getName();
|
||||
String[] names = entry.getValue();
|
||||
|
||||
|
||||
for (String nameMatch : matchAll(names)) {
|
||||
String commonMatch = commonSequenceMatcher.matchFirstCommonSequence(nameMatch, parent);
|
||||
float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch);
|
||||
|
||||
|
||||
// prefer common match, but only if it's very similar to the original match
|
||||
seriesNames.add(similarity > 0.7 ? commonMatch : nameMatch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return seriesNames;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public Collection<String> matchAll(String[] names) {
|
||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||
|
||||
|
||||
// allow matching of a small number of episodes, by setting threshold = length if length < 5
|
||||
int threshold = Math.min(names.length, 5);
|
||||
|
||||
|
||||
// match common word sequences (likely series names)
|
||||
SeriesNameCollection whitelist = new SeriesNameCollection();
|
||||
|
||||
|
||||
// focus chars before the SxE / Date pattern when matching by common word sequence
|
||||
String[] focus = Arrays.copyOf(names, names.length);
|
||||
for (int i = 0; i < focus.length; i++) {
|
||||
@ -100,41 +97,40 @@ public class SeriesNameMatcher {
|
||||
}
|
||||
}
|
||||
whitelist.addAll(deepMatchAll(focus, threshold));
|
||||
|
||||
|
||||
// 1. use pattern matching
|
||||
seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE), threshold, false));
|
||||
|
||||
|
||||
// 2. use common word sequences
|
||||
seriesNames.addAll(whitelist);
|
||||
|
||||
|
||||
return seriesNames;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Try to match and verify all series names using known season episode patterns.
|
||||
*
|
||||
* @param names episode names
|
||||
* @return series names that have been matched one or multiple times depending on the
|
||||
* threshold
|
||||
* @param names
|
||||
* episode names
|
||||
* @return series names that have been matched one or multiple times depending on the threshold
|
||||
*/
|
||||
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Comparator<String> wordComparator = (Comparator) commonSequenceMatcher.getCollator();
|
||||
ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, wordComparator);
|
||||
|
||||
|
||||
for (String name : names) {
|
||||
// use normalized name
|
||||
name = normalize(name);
|
||||
|
||||
|
||||
Matcher prefix = prefixPattern.matcher(name);
|
||||
int prefixEnd = prefix.find() ? prefix.end() : 0;
|
||||
|
||||
|
||||
int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd);
|
||||
if (sxePosition > 0) {
|
||||
String hit = name.substring(0, sxePosition).trim();
|
||||
List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
|
||||
|
||||
|
||||
if (!strict && sxe.size() == 1 && sxe.get(0).season >= 0) {
|
||||
// bypass threshold if hit is likely to be genuine
|
||||
thresholdCollection.addDirect(hit);
|
||||
@ -149,17 +145,17 @@ public class SeriesNameMatcher {
|
||||
thresholdCollection.addDirect(name.substring(0, datePosition).trim());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
return thresholdCollection;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Try to match all common word sequences in the given list.
|
||||
*
|
||||
* @param names list of episode names
|
||||
* @param names
|
||||
* list of episode names
|
||||
* @return all common word sequences that have been found
|
||||
*/
|
||||
private Collection<String> deepMatchAll(String[] names, int threshold) {
|
||||
@ -167,32 +163,30 @@ public class SeriesNameMatcher {
|
||||
if (names.length < 2 || names.length < threshold) {
|
||||
return emptySet();
|
||||
}
|
||||
|
||||
|
||||
String common = commonSequenceMatcher.matchFirstCommonSequence(names);
|
||||
|
||||
|
||||
if (common != null) {
|
||||
// common word sequence found
|
||||
return singleton(common);
|
||||
}
|
||||
|
||||
|
||||
// recursive divide and conquer
|
||||
List<String> results = new ArrayList<String>();
|
||||
|
||||
|
||||
// split list in two and try to match common word sequence on those
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold));
|
||||
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold));
|
||||
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Try to match a series name from the given episode name using known season episode
|
||||
* patterns.
|
||||
* Try to match a series name from the given episode name using known season episode patterns.
|
||||
*
|
||||
* @param name episode name
|
||||
* @return a substring of the given name that ends before the first occurrence of a season
|
||||
* episode pattern, or null if there is no such pattern
|
||||
* @param name
|
||||
* episode name
|
||||
* @return a substring of the given name that ends before the first occurrence of a season episode pattern, or null if there is no such pattern
|
||||
*/
|
||||
public String matchByEpisodeIdentifier(String name) {
|
||||
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
||||
@ -200,245 +194,229 @@ public class SeriesNameMatcher {
|
||||
// series name ends at the first season episode pattern
|
||||
return normalize(name.substring(0, seasonEpisodePosition));
|
||||
}
|
||||
|
||||
|
||||
int datePosition = dateMatcher.find(name, 0);
|
||||
if (datePosition > 0) {
|
||||
// series name ends at the first season episode pattern
|
||||
return normalize(name.substring(0, datePosition));
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Try to match a series name from the first common word sequence.
|
||||
*
|
||||
* @param names various episode names (at least two)
|
||||
* @param names
|
||||
* various episode names (at least two)
|
||||
* @return a word sequence all episode names have in common, or null
|
||||
* @throws IllegalArgumentException if less than 2 episode names are given
|
||||
* @throws IllegalArgumentException
|
||||
* if less than 2 episode names are given
|
||||
*/
|
||||
public String matchByFirstCommonWordSequence(String... names) {
|
||||
if (names.length < 2) {
|
||||
throw new IllegalArgumentException("Can't match common sequence from less than two names");
|
||||
}
|
||||
|
||||
|
||||
return commonSequenceMatcher.matchFirstCommonSequence(names);
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected String normalize(String name) {
|
||||
// remove group names and checksums, any [...] or (...)
|
||||
name = normalizeBrackets(name);
|
||||
|
||||
|
||||
// remove/normalize special characters
|
||||
name = normalizePunctuation(name);
|
||||
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected <T> T[] firstCommonSequence(T[] seq1, T[] seq2, int maxStartIndex, Comparator<T> equalsComparator) {
|
||||
for (int i = 0; i < seq1.length && i <= maxStartIndex; i++) {
|
||||
for (int j = 0; j < seq2.length && j <= maxStartIndex; j++) {
|
||||
// common sequence length
|
||||
int len = 0;
|
||||
|
||||
|
||||
// iterate over common sequence
|
||||
while ((i + len < seq1.length) && (j + len < seq2.length) && (equalsComparator.compare(seq1[i + len], seq2[j + len]) == 0)) {
|
||||
len++;
|
||||
}
|
||||
|
||||
|
||||
// check if a common sequence was found
|
||||
if (len > 0) {
|
||||
if (i == 0 && len == seq1.length)
|
||||
return seq1;
|
||||
|
||||
|
||||
return Arrays.copyOfRange(seq1, i, i + len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// no intersection at all
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private Map<File, String[]> mapNamesByFolder(File... files) {
|
||||
Map<File, List<File>> filesByFolder = new LinkedHashMap<File, List<File>>();
|
||||
|
||||
|
||||
for (File file : files) {
|
||||
File folder = file.getParentFile();
|
||||
|
||||
|
||||
List<File> list = filesByFolder.get(folder);
|
||||
|
||||
|
||||
if (list == null) {
|
||||
list = new ArrayList<File>();
|
||||
filesByFolder.put(folder, list);
|
||||
}
|
||||
|
||||
|
||||
list.add(file);
|
||||
}
|
||||
|
||||
|
||||
// convert folder->files map to folder->names map
|
||||
Map<File, String[]> namesByFolder = new LinkedHashMap<File, String[]>();
|
||||
|
||||
|
||||
for (Entry<File, List<File>> entry : filesByFolder.entrySet()) {
|
||||
namesByFolder.put(entry.getKey(), names(entry.getValue()));
|
||||
}
|
||||
|
||||
|
||||
return namesByFolder;
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected String[] names(Collection<File> files) {
|
||||
String[] names = new String[files.size()];
|
||||
|
||||
|
||||
int i = 0;
|
||||
|
||||
|
||||
// fill array
|
||||
for (File file : files) {
|
||||
names[i++] = FileUtilities.getName(file);
|
||||
}
|
||||
|
||||
|
||||
return names;
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected static class SeriesNameCollection extends AbstractCollection<String> {
|
||||
|
||||
|
||||
private final Map<String, String> data = new LinkedHashMap<String, String>();
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean add(String value) {
|
||||
value = value.trim();
|
||||
|
||||
|
||||
// require series name to have at least two characters
|
||||
if (value.length() < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
String current = data.get(key(value));
|
||||
|
||||
|
||||
// prefer strings with similar upper/lower case ratio (e.g. prefer Roswell over roswell)
|
||||
if (current == null || firstCharacterCaseBalance(current) < firstCharacterCaseBalance(value)) {
|
||||
data.put(key(value), value);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected String key(Object value) {
|
||||
return value.toString().toLowerCase();
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected float firstCharacterCaseBalance(String s) {
|
||||
int upper = 0;
|
||||
int lower = 0;
|
||||
|
||||
|
||||
Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default
|
||||
|
||||
|
||||
while (scanner.hasNext()) {
|
||||
char c = scanner.next().charAt(0);
|
||||
|
||||
|
||||
if (Character.isLowerCase(c))
|
||||
lower++;
|
||||
else if (Character.isUpperCase(c))
|
||||
upper++;
|
||||
}
|
||||
|
||||
|
||||
// give upper case characters a slight boost over lower case characters
|
||||
return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean contains(Object value) {
|
||||
return data.containsKey(key(value));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return data.values().iterator();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return data.size();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected static class ThresholdCollection<E> extends AbstractCollection<E> {
|
||||
|
||||
|
||||
private final Collection<E> heaven;
|
||||
private final Map<E, Collection<E>> limbo;
|
||||
|
||||
|
||||
private final int threshold;
|
||||
|
||||
|
||||
|
||||
public ThresholdCollection(int threshold, Comparator<E> equalityComparator) {
|
||||
this.heaven = new ArrayList<E>();
|
||||
this.limbo = new TreeMap<E, Collection<E>>(equalityComparator);
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean add(E value) {
|
||||
Collection<E> buffer = limbo.get(value);
|
||||
|
||||
|
||||
if (buffer == null) {
|
||||
// initialize buffer
|
||||
buffer = new ArrayList<E>(threshold);
|
||||
limbo.put(value, buffer);
|
||||
}
|
||||
|
||||
|
||||
if (buffer == heaven) {
|
||||
// threshold reached
|
||||
heaven.add(value);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// add element to buffer
|
||||
buffer.add(value);
|
||||
|
||||
|
||||
// check if threshold has been reached
|
||||
if (buffer.size() >= threshold) {
|
||||
heaven.addAll(buffer);
|
||||
|
||||
|
||||
// replace buffer with heaven
|
||||
limbo.put(value, heaven);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
|
||||
|
||||
public boolean addDirect(E element) {
|
||||
return heaven.add(element);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public Iterator<E> iterator() {
|
||||
return heaven.iterator();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return heaven.size();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,8 @@ import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
@ -27,6 +29,7 @@ import javax.swing.Icon;
|
||||
import net.sourceforge.filebot.Cache;
|
||||
import net.sourceforge.filebot.ResourceManager;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Node;
|
||||
|
||||
@ -165,13 +168,15 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
|
||||
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
|
||||
|
||||
Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>();
|
||||
Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>();
|
||||
Map<Integer, Map<String, String>> synonymsTitleMap = new HashMap<Integer, Map<String, String>>();
|
||||
List<String> languageOrder = new ArrayList<String>();
|
||||
languageOrder.add("x-jat");
|
||||
languageOrder.add("en");
|
||||
languageOrder.add("ja");
|
||||
|
||||
// fetch data
|
||||
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
|
||||
Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536);
|
||||
|
||||
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
|
||||
try {
|
||||
while (scanner.hasNextLine()) {
|
||||
Matcher matcher = pattern.matcher(scanner.nextLine());
|
||||
@ -182,17 +187,17 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
String language = matcher.group(3);
|
||||
String title = matcher.group(4);
|
||||
|
||||
if (type.equals("1")) {
|
||||
primaryTitleMap.put(aid, title);
|
||||
} else if (type.equals("2") || type.equals("4")) {
|
||||
Map<Integer, Map<String, String>> titleMap = (type.equals("4") ? officialTitleMap : synonymsTitleMap);
|
||||
Map<String, String> languageTitleMap = titleMap.get(aid);
|
||||
if (languageTitleMap == null) {
|
||||
languageTitleMap = new HashMap<String, String>();
|
||||
titleMap.put(aid, languageTitleMap);
|
||||
if (aid > 0 && title.length() > 0 && languageOrder.contains(language)) {
|
||||
List<Object[]> names = entriesByAnime.get(aid);
|
||||
if (names == null) {
|
||||
names = new ArrayList<Object[]>();
|
||||
entriesByAnime.put(aid, names);
|
||||
}
|
||||
|
||||
languageTitleMap.put(language, title);
|
||||
// resolve HTML entities
|
||||
title = Jsoup.parse(title).text();
|
||||
|
||||
names.add(new Object[] { Integer.parseInt(type), languageOrder.indexOf(language), title });
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -201,23 +206,36 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
||||
}
|
||||
|
||||
// build up a list of all possible AniDB search results
|
||||
anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
|
||||
anime = new ArrayList<AnidbSearchResult>(entriesByAnime.size());
|
||||
|
||||
for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
|
||||
Map<String, String> localizedTitles = new HashMap<String, String>();
|
||||
if (synonymsTitleMap.containsKey(entry.getKey())) {
|
||||
localizedTitles.putAll(synonymsTitleMap.get(entry.getKey())); // use synonym as fallback
|
||||
}
|
||||
if (officialTitleMap.containsKey(entry.getKey())) {
|
||||
localizedTitles.putAll(officialTitleMap.get(entry.getKey())); // primarily use official title if available
|
||||
for (Entry<Integer, List<Object[]>> entry : entriesByAnime.entrySet()) {
|
||||
int aid = entry.getKey();
|
||||
List<Object[]> triples = entry.getValue();
|
||||
|
||||
Collections.sort(triples, new Comparator<Object[]>() {
|
||||
|
||||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||
@Override
|
||||
public int compare(Object[] a, Object[] b) {
|
||||
for (int i = 0; i < a.length; i++) {
|
||||
if (!a[i].equals(b[i]))
|
||||
return ((Comparable) a[i]).compareTo(b[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
|
||||
List<String> names = new ArrayList<String>(triples.size());
|
||||
for (Object[] it : triples) {
|
||||
names.add((String) it[2]);
|
||||
}
|
||||
|
||||
String englishTitle = localizedTitles.get("en"); // ONLY SUPPORT ENGLISH LOCALIZATION
|
||||
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitle == null || englishTitle.isEmpty() ? new String[] {} : new String[] { englishTitle }));
|
||||
String primaryTitle = names.get(0);
|
||||
String[] aliasNames = names.subList(1, names.size()).toArray(new String[0]);
|
||||
anime.add(new AnidbSearchResult(aid, primaryTitle, aliasNames));
|
||||
}
|
||||
|
||||
// populate cache
|
||||
return cache.putSearchResult(null, Locale.ROOT, anime);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,8 +8,8 @@ public class AnidbSearchResult extends SearchResult {
|
||||
// used by serializer
|
||||
}
|
||||
|
||||
public AnidbSearchResult(int aid, String primaryTitle, String[] localizedTitles) {
|
||||
super(primaryTitle, localizedTitles);
|
||||
public AnidbSearchResult(int aid, String primaryTitle, String[] aliasNames) {
|
||||
super(primaryTitle, aliasNames);
|
||||
this.aid = aid;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
|
||||
package net.sourceforge.filebot.web;
|
||||
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.List;
|
||||
@ -13,72 +11,72 @@ import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class AnidbClientTest {
|
||||
|
||||
|
||||
/**
|
||||
* 74 episodes
|
||||
*/
|
||||
private static AnidbSearchResult monsterSearchResult;
|
||||
|
||||
|
||||
/**
|
||||
* 45 episodes
|
||||
*/
|
||||
private static AnidbSearchResult twelvekingdomsSearchResult;
|
||||
|
||||
|
||||
/**
|
||||
* 38 episodes, lots of special characters
|
||||
*/
|
||||
private static AnidbSearchResult princessTutuSearchResult;
|
||||
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
monsterSearchResult = new AnidbSearchResult(1539, "Monster", null);
|
||||
twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", null);
|
||||
princessTutuSearchResult = new AnidbSearchResult(516, "Princess Tutu", null);
|
||||
}
|
||||
|
||||
|
||||
private AnidbClient anidb = new AnidbClient("filebot", 4);
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getAnimeTitles() throws Exception {
|
||||
List<AnidbSearchResult> animeTitles = anidb.getAnimeTitles();
|
||||
assertTrue(animeTitles.size() > 8000);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void search() throws Exception {
|
||||
List<SearchResult> results = anidb.search("one piece");
|
||||
|
||||
|
||||
AnidbSearchResult result = (AnidbSearchResult) results.get(0);
|
||||
assertEquals("One Piece", result.getName());
|
||||
assertEquals(69, result.getAnimeId());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void searchNoMatch() throws Exception {
|
||||
List<SearchResult> results = anidb.search("i will not find anything for this query string");
|
||||
|
||||
|
||||
assertTrue(results.isEmpty());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void searchTitleAlias() throws Exception {
|
||||
// Seikai no Senki (main title), Banner of the Stars (official English title)
|
||||
assertEquals("Seikai no Senki", anidb.search("banner of the stars").get(0).getName());
|
||||
assertEquals("Seikai no Senki", anidb.search("seikai no senki").get(0).getName());
|
||||
|
||||
|
||||
// no matching title
|
||||
assertEquals("Naruto", anidb.search("naruto").get(0).getName());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListAll() throws Exception {
|
||||
List<Episode> list = anidb.getEpisodeList(monsterSearchResult);
|
||||
|
||||
|
||||
assertEquals(74, list.size());
|
||||
|
||||
|
||||
Episode first = list.get(0);
|
||||
|
||||
|
||||
assertEquals("Monster", first.getSeriesName());
|
||||
assertEquals("2004-04-07", first.getSeriesStartDate().toString());
|
||||
assertEquals("Herr Dr. Tenma", first.getTitle());
|
||||
@ -87,16 +85,15 @@ public class AnidbClientTest {
|
||||
assertEquals(null, first.getSeason());
|
||||
assertEquals("2004-04-07", first.getAirdate().toString());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListAllShortLink() throws Exception {
|
||||
List<Episode> list = anidb.getEpisodeList(twelvekingdomsSearchResult);
|
||||
|
||||
|
||||
assertEquals(45, list.size());
|
||||
|
||||
|
||||
Episode first = list.get(0);
|
||||
|
||||
|
||||
assertEquals("The Twelve Kingdoms", first.getSeriesName());
|
||||
assertEquals("2002-04-09", first.getSeriesStartDate().toString());
|
||||
assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle());
|
||||
@ -105,18 +102,16 @@ public class AnidbClientTest {
|
||||
assertEquals(null, first.getSeason());
|
||||
assertEquals("2002-04-09", first.getAirdate().toString());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListEncoding() throws Exception {
|
||||
assertEquals("Raven Princess - An der schönen blauen Donau", anidb.getEpisodeList(princessTutuSearchResult).get(6).getTitle());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListI18N() throws Exception {
|
||||
List<Episode> list = anidb.getEpisodeList(monsterSearchResult, SortOrder.Airdate, Locale.JAPANESE);
|
||||
|
||||
|
||||
Episode last = list.get(73);
|
||||
assertEquals("モンスター", last.getSeriesName());
|
||||
assertEquals("2004-04-07", last.getSeriesStartDate().toString());
|
||||
@ -126,24 +121,21 @@ public class AnidbClientTest {
|
||||
assertEquals(null, last.getSeason());
|
||||
assertEquals("2005-09-28", last.getAirdate().toString());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListTrimRecap() throws Exception {
|
||||
assertEquals("Sea God of the East, Azure Sea of the West - Transition Chapter", anidb.getEpisodeList(twelvekingdomsSearchResult).get(44).getTitle());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void getEpisodeListLink() throws Exception {
|
||||
assertEquals("http://anidb.net/a1539", anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@BeforeClass
|
||||
@AfterClass
|
||||
public static void clearCache() {
|
||||
CacheManager.getInstance().clearAll();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user