mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-24 16:58:51 -05:00
* fine-tune anime matching
This commit is contained in:
parent
2232576c1d
commit
3a1eada102
@ -228,7 +228,7 @@ if (thetvdb_txt.size() < 30000) { throw new Exception('TheTVDB index sanity fail
|
|||||||
|
|
||||||
|
|
||||||
// BUILD anidb index
|
// BUILD anidb index
|
||||||
def anidb = new net.sourceforge.filebot.web.AnidbClient(null, 0).getAnimeTitles()
|
def anidb = new net.sourceforge.filebot.web.AnidbClient('filebot', 4).getAnimeTitles()
|
||||||
|
|
||||||
def anidb_index = anidb.findResults{
|
def anidb_index = anidb.findResults{
|
||||||
def row = []
|
def row = []
|
||||||
@ -243,4 +243,4 @@ pack(anidb_out, anidb_txt)
|
|||||||
println "AniDB Index: " + anidb_txt.size()
|
println "AniDB Index: " + anidb_txt.size()
|
||||||
|
|
||||||
// sanity check
|
// sanity check
|
||||||
if (anidb_txt.size() < 5000) { throw new Exception('AniDB index sanity failed') }
|
if (anidb_txt.size() < 8000) { throw new Exception('AniDB index sanity failed') }
|
||||||
|
@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface {
|
|||||||
int sxe = 0; // SxE
|
int sxe = 0; // SxE
|
||||||
int cws = 0; // common word sequence
|
int cws = 0; // common word sequence
|
||||||
|
|
||||||
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale);
|
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale, true);
|
||||||
Collection<String> cwsList = emptySet();
|
Collection<String> cwsList = emptySet();
|
||||||
if (max >= 5) {
|
if (max >= 5) {
|
||||||
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
|
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
|
||||||
|
@ -360,7 +360,7 @@ public class MediaDetection {
|
|||||||
Collection<String> matches = new LinkedHashSet<String>();
|
Collection<String> matches = new LinkedHashSet<String>();
|
||||||
|
|
||||||
// check CWS matches
|
// check CWS matches
|
||||||
SeriesNameMatcher snm = new SeriesNameMatcher(locale);
|
SeriesNameMatcher snm = new SeriesNameMatcher(locale, true);
|
||||||
matches.addAll(snm.matchAll(files.toArray(new File[files.size()])));
|
matches.addAll(snm.matchAll(files.toArray(new File[files.size()])));
|
||||||
|
|
||||||
// check for known pattern matches
|
// check for known pattern matches
|
||||||
|
@ -300,64 +300,72 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
|||||||
SeriesName(new NameSimilarityMetric() {
|
SeriesName(new NameSimilarityMetric() {
|
||||||
|
|
||||||
private ReleaseInfo releaseInfo = new ReleaseInfo();
|
private ReleaseInfo releaseInfo = new ReleaseInfo();
|
||||||
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
|
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher(Locale.ROOT, false);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getSimilarity(Object o1, Object o2) {
|
public float getSimilarity(Object o1, Object o2) {
|
||||||
float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
|
String[] f1 = getNormalizedEffectiveIdentifiers(o1);
|
||||||
float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
|
String[] f2 = getNormalizedEffectiveIdentifiers(o2);
|
||||||
|
|
||||||
return (float) (floor(max(lowerBound, upperBound) * 4) / 4);
|
// match all fields and average similarity
|
||||||
};
|
float max = 0;
|
||||||
|
for (String s1 : f1) {
|
||||||
|
for (String s2 : f2) {
|
||||||
|
max = max(super.getSimilarity(s1, s2), max);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalize absolute similarity to similarity rank (4 ranks in total),
|
||||||
|
// so we are less likely to fall for false positives in this pass, and move on to the next one
|
||||||
|
return (float) (floor(max * 4) / 4);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected String normalize(Object object) {
|
protected String normalize(Object object) {
|
||||||
return object.toString();
|
return object.toString();
|
||||||
};
|
}
|
||||||
|
|
||||||
|
protected String[] getNormalizedEffectiveIdentifiers(Object object) {
|
||||||
|
List<?> identifiers = getEffectiveIdentifiers(object);
|
||||||
|
String[] names = new String[identifiers.size()];
|
||||||
|
|
||||||
|
for (int i = 0; i < names.length; i++) {
|
||||||
|
names[i] = normalizeObject(identifiers.get(i));
|
||||||
|
}
|
||||||
|
return names;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<?> getEffectiveIdentifiers(Object object) {
|
||||||
|
List<String> names = null;
|
||||||
|
|
||||||
protected String normalize(Object object, boolean strict) {
|
|
||||||
if (object instanceof Episode) {
|
if (object instanceof Episode) {
|
||||||
if (strict) {
|
names = ((Episode) object).getSeries().getEffectiveNames();
|
||||||
object = ((Episode) object).getSeriesName(); // focus on series name
|
|
||||||
} else {
|
|
||||||
object = removeTrailingBrackets(((Episode) object).getSeriesName()); // focus on series name (without US/UK 1967/2005 differentiation)
|
|
||||||
}
|
|
||||||
} else if (object instanceof File) {
|
} else if (object instanceof File) {
|
||||||
object = ((File) object).getName(); // try to narrow down on series name
|
names = new ArrayList<String>(3);
|
||||||
|
for (File f : listPathTail((File) object, 3, true)) {
|
||||||
try {
|
String fn = getName(f);
|
||||||
object = resolveSeriesDirectMapping((String) object);
|
String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
|
||||||
} catch (IOException e) {
|
if (sn != null) {
|
||||||
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
names.add(sn);
|
||||||
|
} else {
|
||||||
|
names.add(fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
String snm = seriesNameMatcher.matchByEpisodeIdentifier((String) object);
|
|
||||||
if (snm != null) {
|
|
||||||
object = snm;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// equally strip away strip potential any clutter
|
// equally strip away strip potential any clutter
|
||||||
|
if (names != null) {
|
||||||
try {
|
try {
|
||||||
object = releaseInfo.cleanRelease(singleton(object.toString()), strict).iterator().next();
|
return releaseInfo.cleanRelease(names, false);
|
||||||
} catch (NoSuchElementException e) {
|
} catch (NoSuchElementException e) {
|
||||||
// keep default value in case all tokens are stripped away
|
// keep default value in case all tokens are stripped away
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// simplify file name, if possible
|
// simplify file name, if possible
|
||||||
return normalizeObject(object);
|
return emptyList();
|
||||||
}
|
|
||||||
|
|
||||||
protected String resolveSeriesDirectMapping(String input) throws IOException {
|
|
||||||
for (Pattern it : releaseInfo.getSeriesDirectMappings().keySet()) {
|
|
||||||
Matcher m = it.matcher(input);
|
|
||||||
if (m.find()) {
|
|
||||||
return m.replaceAll(releaseInfo.getSeriesDirectMappings().get(it));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return input;
|
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
|
|
||||||
package net.sourceforge.filebot.similarity;
|
package net.sourceforge.filebot.similarity;
|
||||||
|
|
||||||
|
|
||||||
import static java.util.Collections.*;
|
import static java.util.Collections.*;
|
||||||
import static java.util.regex.Pattern.*;
|
import static java.util.regex.Pattern.*;
|
||||||
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
|
||||||
@ -29,23 +27,24 @@ import java.util.regex.Pattern;
|
|||||||
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
|
||||||
import net.sourceforge.tuned.FileUtilities;
|
import net.sourceforge.tuned.FileUtilities;
|
||||||
|
|
||||||
|
|
||||||
public class SeriesNameMatcher {
|
public class SeriesNameMatcher {
|
||||||
|
|
||||||
protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
|
protected SeasonEpisodeMatcher seasonEpisodeMatcher;
|
||||||
protected DateMatcher dateMatcher = new DateMatcher();
|
protected DateMatcher dateMatcher;
|
||||||
|
|
||||||
protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
|
protected NameSimilarityMetric nameSimilarityMetric;
|
||||||
|
|
||||||
protected CommonSequenceMatcher commonSequenceMatcher;
|
protected CommonSequenceMatcher commonSequenceMatcher;
|
||||||
|
|
||||||
|
|
||||||
public SeriesNameMatcher() {
|
public SeriesNameMatcher() {
|
||||||
this(Locale.ROOT);
|
this(Locale.ROOT, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SeriesNameMatcher(Locale locale, boolean strict) {
|
||||||
|
seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict);
|
||||||
|
dateMatcher = new DateMatcher();
|
||||||
|
nameSimilarityMetric = new NameSimilarityMetric();
|
||||||
|
|
||||||
public SeriesNameMatcher(Locale locale) {
|
|
||||||
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) {
|
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -55,7 +54,6 @@ public class SeriesNameMatcher {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Collection<String> matchAll(File[] files) {
|
public Collection<String> matchAll(File[] files) {
|
||||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||||
|
|
||||||
@ -76,7 +74,6 @@ public class SeriesNameMatcher {
|
|||||||
return seriesNames;
|
return seriesNames;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Collection<String> matchAll(String[] names) {
|
public Collection<String> matchAll(String[] names) {
|
||||||
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
SeriesNameCollection seriesNames = new SeriesNameCollection();
|
||||||
|
|
||||||
@ -110,13 +107,12 @@ public class SeriesNameMatcher {
|
|||||||
return seriesNames;
|
return seriesNames;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to match and verify all series names using known season episode patterns.
|
* Try to match and verify all series names using known season episode patterns.
|
||||||
*
|
*
|
||||||
* @param names episode names
|
* @param names
|
||||||
* @return series names that have been matched one or multiple times depending on the
|
* episode names
|
||||||
* threshold
|
* @return series names that have been matched one or multiple times depending on the threshold
|
||||||
*/
|
*/
|
||||||
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@ -155,11 +151,11 @@ public class SeriesNameMatcher {
|
|||||||
return thresholdCollection;
|
return thresholdCollection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to match all common word sequences in the given list.
|
* Try to match all common word sequences in the given list.
|
||||||
*
|
*
|
||||||
* @param names list of episode names
|
* @param names
|
||||||
|
* list of episode names
|
||||||
* @return all common word sequences that have been found
|
* @return all common word sequences that have been found
|
||||||
*/
|
*/
|
||||||
private Collection<String> deepMatchAll(String[] names, int threshold) {
|
private Collection<String> deepMatchAll(String[] names, int threshold) {
|
||||||
@ -185,14 +181,12 @@ public class SeriesNameMatcher {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to match a series name from the given episode name using known season episode
|
* Try to match a series name from the given episode name using known season episode patterns.
|
||||||
* patterns.
|
|
||||||
*
|
*
|
||||||
* @param name episode name
|
* @param name
|
||||||
* @return a substring of the given name that ends before the first occurrence of a season
|
* episode name
|
||||||
* episode pattern, or null if there is no such pattern
|
* @return a substring of the given name that ends before the first occurrence of a season episode pattern, or null if there is no such pattern
|
||||||
*/
|
*/
|
||||||
public String matchByEpisodeIdentifier(String name) {
|
public String matchByEpisodeIdentifier(String name) {
|
||||||
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
|
||||||
@ -210,13 +204,14 @@ public class SeriesNameMatcher {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to match a series name from the first common word sequence.
|
* Try to match a series name from the first common word sequence.
|
||||||
*
|
*
|
||||||
* @param names various episode names (at least two)
|
* @param names
|
||||||
|
* various episode names (at least two)
|
||||||
* @return a word sequence all episode names have in common, or null
|
* @return a word sequence all episode names have in common, or null
|
||||||
* @throws IllegalArgumentException if less than 2 episode names are given
|
* @throws IllegalArgumentException
|
||||||
|
* if less than 2 episode names are given
|
||||||
*/
|
*/
|
||||||
public String matchByFirstCommonWordSequence(String... names) {
|
public String matchByFirstCommonWordSequence(String... names) {
|
||||||
if (names.length < 2) {
|
if (names.length < 2) {
|
||||||
@ -226,7 +221,6 @@ public class SeriesNameMatcher {
|
|||||||
return commonSequenceMatcher.matchFirstCommonSequence(names);
|
return commonSequenceMatcher.matchFirstCommonSequence(names);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String normalize(String name) {
|
protected String normalize(String name) {
|
||||||
// remove group names and checksums, any [...] or (...)
|
// remove group names and checksums, any [...] or (...)
|
||||||
name = normalizeBrackets(name);
|
name = normalizeBrackets(name);
|
||||||
@ -237,7 +231,6 @@ public class SeriesNameMatcher {
|
|||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected <T> T[] firstCommonSequence(T[] seq1, T[] seq2, int maxStartIndex, Comparator<T> equalsComparator) {
|
protected <T> T[] firstCommonSequence(T[] seq1, T[] seq2, int maxStartIndex, Comparator<T> equalsComparator) {
|
||||||
for (int i = 0; i < seq1.length && i <= maxStartIndex; i++) {
|
for (int i = 0; i < seq1.length && i <= maxStartIndex; i++) {
|
||||||
for (int j = 0; j < seq2.length && j <= maxStartIndex; j++) {
|
for (int j = 0; j < seq2.length && j <= maxStartIndex; j++) {
|
||||||
@ -263,7 +256,6 @@ public class SeriesNameMatcher {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Map<File, String[]> mapNamesByFolder(File... files) {
|
private Map<File, String[]> mapNamesByFolder(File... files) {
|
||||||
Map<File, List<File>> filesByFolder = new LinkedHashMap<File, List<File>>();
|
Map<File, List<File>> filesByFolder = new LinkedHashMap<File, List<File>>();
|
||||||
|
|
||||||
@ -290,7 +282,6 @@ public class SeriesNameMatcher {
|
|||||||
return namesByFolder;
|
return namesByFolder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String[] names(Collection<File> files) {
|
protected String[] names(Collection<File> files) {
|
||||||
String[] names = new String[files.size()];
|
String[] names = new String[files.size()];
|
||||||
|
|
||||||
@ -304,12 +295,10 @@ public class SeriesNameMatcher {
|
|||||||
return names;
|
return names;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected static class SeriesNameCollection extends AbstractCollection<String> {
|
protected static class SeriesNameCollection extends AbstractCollection<String> {
|
||||||
|
|
||||||
private final Map<String, String> data = new LinkedHashMap<String, String>();
|
private final Map<String, String> data = new LinkedHashMap<String, String>();
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(String value) {
|
public boolean add(String value) {
|
||||||
value = value.trim();
|
value = value.trim();
|
||||||
@ -330,12 +319,10 @@ public class SeriesNameMatcher {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String key(Object value) {
|
protected String key(Object value) {
|
||||||
return value.toString().toLowerCase();
|
return value.toString().toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected float firstCharacterCaseBalance(String s) {
|
protected float firstCharacterCaseBalance(String s) {
|
||||||
int upper = 0;
|
int upper = 0;
|
||||||
int lower = 0;
|
int lower = 0;
|
||||||
@ -355,19 +342,16 @@ public class SeriesNameMatcher {
|
|||||||
return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
|
return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean contains(Object value) {
|
public boolean contains(Object value) {
|
||||||
return data.containsKey(key(value));
|
return data.containsKey(key(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<String> iterator() {
|
public Iterator<String> iterator() {
|
||||||
return data.values().iterator();
|
return data.values().iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int size() {
|
public int size() {
|
||||||
return data.size();
|
return data.size();
|
||||||
@ -375,7 +359,6 @@ public class SeriesNameMatcher {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected static class ThresholdCollection<E> extends AbstractCollection<E> {
|
protected static class ThresholdCollection<E> extends AbstractCollection<E> {
|
||||||
|
|
||||||
private final Collection<E> heaven;
|
private final Collection<E> heaven;
|
||||||
@ -383,14 +366,12 @@ public class SeriesNameMatcher {
|
|||||||
|
|
||||||
private final int threshold;
|
private final int threshold;
|
||||||
|
|
||||||
|
|
||||||
public ThresholdCollection(int threshold, Comparator<E> equalityComparator) {
|
public ThresholdCollection(int threshold, Comparator<E> equalityComparator) {
|
||||||
this.heaven = new ArrayList<E>();
|
this.heaven = new ArrayList<E>();
|
||||||
this.limbo = new TreeMap<E, Collection<E>>(equalityComparator);
|
this.limbo = new TreeMap<E, Collection<E>>(equalityComparator);
|
||||||
this.threshold = threshold;
|
this.threshold = threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean add(E value) {
|
public boolean add(E value) {
|
||||||
Collection<E> buffer = limbo.get(value);
|
Collection<E> buffer = limbo.get(value);
|
||||||
@ -422,18 +403,15 @@ public class SeriesNameMatcher {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
public boolean addDirect(E element) {
|
public boolean addDirect(E element) {
|
||||||
return heaven.add(element);
|
return heaven.add(element);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<E> iterator() {
|
public Iterator<E> iterator() {
|
||||||
return heaven.iterator();
|
return heaven.iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int size() {
|
public int size() {
|
||||||
return heaven.size();
|
return heaven.size();
|
||||||
|
@ -8,6 +8,8 @@ import java.net.URI;
|
|||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
@ -27,6 +29,7 @@ import javax.swing.Icon;
|
|||||||
import net.sourceforge.filebot.Cache;
|
import net.sourceforge.filebot.Cache;
|
||||||
import net.sourceforge.filebot.ResourceManager;
|
import net.sourceforge.filebot.ResourceManager;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.Node;
|
import org.w3c.dom.Node;
|
||||||
|
|
||||||
@ -165,13 +168,15 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
|||||||
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
|
// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
|
||||||
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
|
Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
|
||||||
|
|
||||||
Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>();
|
List<String> languageOrder = new ArrayList<String>();
|
||||||
Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>();
|
languageOrder.add("x-jat");
|
||||||
Map<Integer, Map<String, String>> synonymsTitleMap = new HashMap<Integer, Map<String, String>>();
|
languageOrder.add("en");
|
||||||
|
languageOrder.add("ja");
|
||||||
|
|
||||||
// fetch data
|
// fetch data
|
||||||
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
|
Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536);
|
||||||
|
|
||||||
|
Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
|
||||||
try {
|
try {
|
||||||
while (scanner.hasNextLine()) {
|
while (scanner.hasNextLine()) {
|
||||||
Matcher matcher = pattern.matcher(scanner.nextLine());
|
Matcher matcher = pattern.matcher(scanner.nextLine());
|
||||||
@ -182,17 +187,17 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
|||||||
String language = matcher.group(3);
|
String language = matcher.group(3);
|
||||||
String title = matcher.group(4);
|
String title = matcher.group(4);
|
||||||
|
|
||||||
if (type.equals("1")) {
|
if (aid > 0 && title.length() > 0 && languageOrder.contains(language)) {
|
||||||
primaryTitleMap.put(aid, title);
|
List<Object[]> names = entriesByAnime.get(aid);
|
||||||
} else if (type.equals("2") || type.equals("4")) {
|
if (names == null) {
|
||||||
Map<Integer, Map<String, String>> titleMap = (type.equals("4") ? officialTitleMap : synonymsTitleMap);
|
names = new ArrayList<Object[]>();
|
||||||
Map<String, String> languageTitleMap = titleMap.get(aid);
|
entriesByAnime.put(aid, names);
|
||||||
if (languageTitleMap == null) {
|
|
||||||
languageTitleMap = new HashMap<String, String>();
|
|
||||||
titleMap.put(aid, languageTitleMap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
languageTitleMap.put(language, title);
|
// resolve HTML entities
|
||||||
|
title = Jsoup.parse(title).text();
|
||||||
|
|
||||||
|
names.add(new Object[] { Integer.parseInt(type), languageOrder.indexOf(language), title });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -201,23 +206,36 @@ public class AnidbClient extends AbstractEpisodeListProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// build up a list of all possible AniDB search results
|
// build up a list of all possible AniDB search results
|
||||||
anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
|
anime = new ArrayList<AnidbSearchResult>(entriesByAnime.size());
|
||||||
|
|
||||||
for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
|
for (Entry<Integer, List<Object[]>> entry : entriesByAnime.entrySet()) {
|
||||||
Map<String, String> localizedTitles = new HashMap<String, String>();
|
int aid = entry.getKey();
|
||||||
if (synonymsTitleMap.containsKey(entry.getKey())) {
|
List<Object[]> triples = entry.getValue();
|
||||||
localizedTitles.putAll(synonymsTitleMap.get(entry.getKey())); // use synonym as fallback
|
|
||||||
|
Collections.sort(triples, new Comparator<Object[]>() {
|
||||||
|
|
||||||
|
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||||
|
@Override
|
||||||
|
public int compare(Object[] a, Object[] b) {
|
||||||
|
for (int i = 0; i < a.length; i++) {
|
||||||
|
if (!a[i].equals(b[i]))
|
||||||
|
return ((Comparable) a[i]).compareTo(b[i]);
|
||||||
}
|
}
|
||||||
if (officialTitleMap.containsKey(entry.getKey())) {
|
return 0;
|
||||||
localizedTitles.putAll(officialTitleMap.get(entry.getKey())); // primarily use official title if available
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
List<String> names = new ArrayList<String>(triples.size());
|
||||||
|
for (Object[] it : triples) {
|
||||||
|
names.add((String) it[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
String englishTitle = localizedTitles.get("en"); // ONLY SUPPORT ENGLISH LOCALIZATION
|
String primaryTitle = names.get(0);
|
||||||
anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitle == null || englishTitle.isEmpty() ? new String[] {} : new String[] { englishTitle }));
|
String[] aliasNames = names.subList(1, names.size()).toArray(new String[0]);
|
||||||
|
anime.add(new AnidbSearchResult(aid, primaryTitle, aliasNames));
|
||||||
}
|
}
|
||||||
|
|
||||||
// populate cache
|
// populate cache
|
||||||
return cache.putSearchResult(null, Locale.ROOT, anime);
|
return cache.putSearchResult(null, Locale.ROOT, anime);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -8,8 +8,8 @@ public class AnidbSearchResult extends SearchResult {
|
|||||||
// used by serializer
|
// used by serializer
|
||||||
}
|
}
|
||||||
|
|
||||||
public AnidbSearchResult(int aid, String primaryTitle, String[] localizedTitles) {
|
public AnidbSearchResult(int aid, String primaryTitle, String[] aliasNames) {
|
||||||
super(primaryTitle, localizedTitles);
|
super(primaryTitle, aliasNames);
|
||||||
this.aid = aid;
|
this.aid = aid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
|
|
||||||
package net.sourceforge.filebot.web;
|
package net.sourceforge.filebot.web;
|
||||||
|
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -13,7 +11,6 @@ import org.junit.AfterClass;
|
|||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
||||||
public class AnidbClientTest {
|
public class AnidbClientTest {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -31,7 +28,6 @@ public class AnidbClientTest {
|
|||||||
*/
|
*/
|
||||||
private static AnidbSearchResult princessTutuSearchResult;
|
private static AnidbSearchResult princessTutuSearchResult;
|
||||||
|
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setUpBeforeClass() throws Exception {
|
public static void setUpBeforeClass() throws Exception {
|
||||||
monsterSearchResult = new AnidbSearchResult(1539, "Monster", null);
|
monsterSearchResult = new AnidbSearchResult(1539, "Monster", null);
|
||||||
@ -41,6 +37,11 @@ public class AnidbClientTest {
|
|||||||
|
|
||||||
private AnidbClient anidb = new AnidbClient("filebot", 4);
|
private AnidbClient anidb = new AnidbClient("filebot", 4);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void getAnimeTitles() throws Exception {
|
||||||
|
List<AnidbSearchResult> animeTitles = anidb.getAnimeTitles();
|
||||||
|
assertTrue(animeTitles.size() > 8000);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void search() throws Exception {
|
public void search() throws Exception {
|
||||||
@ -51,7 +52,6 @@ public class AnidbClientTest {
|
|||||||
assertEquals(69, result.getAnimeId());
|
assertEquals(69, result.getAnimeId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void searchNoMatch() throws Exception {
|
public void searchNoMatch() throws Exception {
|
||||||
List<SearchResult> results = anidb.search("i will not find anything for this query string");
|
List<SearchResult> results = anidb.search("i will not find anything for this query string");
|
||||||
@ -59,7 +59,6 @@ public class AnidbClientTest {
|
|||||||
assertTrue(results.isEmpty());
|
assertTrue(results.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void searchTitleAlias() throws Exception {
|
public void searchTitleAlias() throws Exception {
|
||||||
// Seikai no Senki (main title), Banner of the Stars (official English title)
|
// Seikai no Senki (main title), Banner of the Stars (official English title)
|
||||||
@ -70,7 +69,6 @@ public class AnidbClientTest {
|
|||||||
assertEquals("Naruto", anidb.search("naruto").get(0).getName());
|
assertEquals("Naruto", anidb.search("naruto").get(0).getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListAll() throws Exception {
|
public void getEpisodeListAll() throws Exception {
|
||||||
List<Episode> list = anidb.getEpisodeList(monsterSearchResult);
|
List<Episode> list = anidb.getEpisodeList(monsterSearchResult);
|
||||||
@ -88,7 +86,6 @@ public class AnidbClientTest {
|
|||||||
assertEquals("2004-04-07", first.getAirdate().toString());
|
assertEquals("2004-04-07", first.getAirdate().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListAllShortLink() throws Exception {
|
public void getEpisodeListAllShortLink() throws Exception {
|
||||||
List<Episode> list = anidb.getEpisodeList(twelvekingdomsSearchResult);
|
List<Episode> list = anidb.getEpisodeList(twelvekingdomsSearchResult);
|
||||||
@ -106,13 +103,11 @@ public class AnidbClientTest {
|
|||||||
assertEquals("2002-04-09", first.getAirdate().toString());
|
assertEquals("2002-04-09", first.getAirdate().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListEncoding() throws Exception {
|
public void getEpisodeListEncoding() throws Exception {
|
||||||
assertEquals("Raven Princess - An der schönen blauen Donau", anidb.getEpisodeList(princessTutuSearchResult).get(6).getTitle());
|
assertEquals("Raven Princess - An der schönen blauen Donau", anidb.getEpisodeList(princessTutuSearchResult).get(6).getTitle());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListI18N() throws Exception {
|
public void getEpisodeListI18N() throws Exception {
|
||||||
List<Episode> list = anidb.getEpisodeList(monsterSearchResult, SortOrder.Airdate, Locale.JAPANESE);
|
List<Episode> list = anidb.getEpisodeList(monsterSearchResult, SortOrder.Airdate, Locale.JAPANESE);
|
||||||
@ -127,19 +122,16 @@ public class AnidbClientTest {
|
|||||||
assertEquals("2005-09-28", last.getAirdate().toString());
|
assertEquals("2005-09-28", last.getAirdate().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListTrimRecap() throws Exception {
|
public void getEpisodeListTrimRecap() throws Exception {
|
||||||
assertEquals("Sea God of the East, Azure Sea of the West - Transition Chapter", anidb.getEpisodeList(twelvekingdomsSearchResult).get(44).getTitle());
|
assertEquals("Sea God of the East, Azure Sea of the West - Transition Chapter", anidb.getEpisodeList(twelvekingdomsSearchResult).get(44).getTitle());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void getEpisodeListLink() throws Exception {
|
public void getEpisodeListLink() throws Exception {
|
||||||
assertEquals("http://anidb.net/a1539", anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());
|
assertEquals("http://anidb.net/a1539", anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
@AfterClass
|
@AfterClass
|
||||||
public static void clearCache() {
|
public static void clearCache() {
|
||||||
|
Loading…
Reference in New Issue
Block a user