From 3a1eada10265fc02651165c02295d522a9805fda Mon Sep 17 00:00:00 2001
From: Reinhard Pointner <rednoah@users.noreply.github.com>
Date: Fri, 27 Dec 2013 22:49:56 +0000
Subject: [PATCH] * fine-tune anime matching

---
 BuildData.groovy                              |   4 +-
 .../filebot/cli/CmdlineOperations.java        |   2 +-
 .../filebot/media/MediaDetection.java         |   2 +-
 .../filebot/similarity/EpisodeMetrics.java    |  88 ++++---
 .../filebot/similarity/SeriesNameMatcher.java | 244 ++++++++----------
 .../sourceforge/filebot/web/AnidbClient.java  |  66 +++--
 .../filebot/web/AnidbSearchResult.java        |   4 +-
 .../filebot/web/AnidbClientTest.java          |  72 +++---
 8 files changed, 239 insertions(+), 243 deletions(-)
diff --git a/BuildData.groovy b/BuildData.groovy
index dd28397e..04ff6ee4 100644
--- a/BuildData.groovy
+++ b/BuildData.groovy
@@ -228,7 +228,7 @@ if (thetvdb_txt.size() < 30000) { throw new Exception('TheTVDB index sanity fail
 
 
 // BUILD anidb index
-def anidb = new net.sourceforge.filebot.web.AnidbClient(null, 0).getAnimeTitles()
+def anidb = new net.sourceforge.filebot.web.AnidbClient('filebot', 4).getAnimeTitles()
 
 def anidb_index = anidb.findResults{
 	def row = []
@@ -243,4 +243,4 @@ pack(anidb_out, anidb_txt)
 println "AniDB Index: " + anidb_txt.size()
 
 // sanity check
-if (anidb_txt.size() < 5000) { throw new Exception('AniDB index sanity failed') }
+if (anidb_txt.size() < 8000) { throw new Exception('AniDB index sanity failed') }
diff --git a/source/net/sourceforge/filebot/cli/CmdlineOperations.java b/source/net/sourceforge/filebot/cli/CmdlineOperations.java
index 9ef7e37b..09693fbc 100644
--- a/source/net/sourceforge/filebot/cli/CmdlineOperations.java
+++ b/source/net/sourceforge/filebot/cli/CmdlineOperations.java
@@ -106,7 +106,7 @@ public class CmdlineOperations implements CmdlineInterface {
 		int sxe = 0; // SxE
 		int cws = 0; // common word sequence
 
-		SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale);
+		SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale, true);
 		Collection<String> cwsList = emptySet();
 		if (max >= 5) {
 			cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
diff --git a/source/net/sourceforge/filebot/media/MediaDetection.java b/source/net/sourceforge/filebot/media/MediaDetection.java
index 23ae7a1b..cd59b9d6 100644
--- a/source/net/sourceforge/filebot/media/MediaDetection.java
+++ b/source/net/sourceforge/filebot/media/MediaDetection.java
@@ -360,7 +360,7 @@ public class MediaDetection {
 		Collection<String> matches = new LinkedHashSet<String>();
 
 		// check CWS matches
-		SeriesNameMatcher snm = new SeriesNameMatcher(locale);
+		SeriesNameMatcher snm = new SeriesNameMatcher(locale, true);
 		matches.addAll(snm.matchAll(files.toArray(new File[files.size()])));
 
 		// check for known pattern matches
diff --git a/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java b/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java
index 57eb684e..e74b25e3 100644
--- a/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java
+++ b/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java
@@ -300,64 +300,72 @@ public enum EpisodeMetrics implements SimilarityMetric {
 	SeriesName(new NameSimilarityMetric() {
 
 		private ReleaseInfo releaseInfo = new ReleaseInfo();
-		private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
+		private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher(Locale.ROOT, false);
 
 		@Override
 		public float getSimilarity(Object o1, Object o2) {
-			float lowerBound = super.getSimilarity(normalize(o1, true), normalize(o2, true));
-			float upperBound = super.getSimilarity(normalize(o1, false), normalize(o2, false));
+			String[] f1 = getNormalizedEffectiveIdentifiers(o1);
+			String[] f2 = getNormalizedEffectiveIdentifiers(o2);
 
-			return (float) (floor(max(lowerBound, upperBound) * 4) / 4);
-		};
+			// match all fields and average similarity
+			float max = 0;
+			for (String s1 : f1) {
+				for (String s2 : f2) {
+					max = max(super.getSimilarity(s1, s2), max);
+				}
+			}
+
+			// normalize absolute similarity to similarity rank (4 ranks in total),
+			// so we are less likely to fall for false positives in this pass, and move on to the next one
+			return (float) (floor(max * 4) / 4);
+		}
 
 		@Override
 		protected String normalize(Object object) {
 			return object.toString();
-		};
+		}
+
+		protected String[] getNormalizedEffectiveIdentifiers(Object object) {
+			List<?> identifiers = getEffectiveIdentifiers(object);
+			String[] names = new String[identifiers.size()];
+
+			for (int i = 0; i < names.length; i++) {
+				names[i] = normalizeObject(identifiers.get(i));
+			}
+			return names;
+		}
+
+		protected List<?> getEffectiveIdentifiers(Object object) {
+			List<String> names = null;
 
-		protected String normalize(Object object, boolean strict) {
 			if (object instanceof Episode) {
-				if (strict) {
-					object = ((Episode) object).getSeriesName(); // focus on series name
-				} else {
-					object = removeTrailingBrackets(((Episode) object).getSeriesName()); // focus on series name (without US/UK 1967/2005 differentiation)
-				}
+				names = ((Episode) object).getSeries().getEffectiveNames();
 			} else if (object instanceof File) {
-				object = ((File) object).getName(); // try to narrow down on series name
-
-				try {
-					object = resolveSeriesDirectMapping((String) object);
-				} catch (IOException e) {
-					Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
-				}
-
-				String snm = seriesNameMatcher.matchByEpisodeIdentifier((String) object);
-				if (snm != null) {
-					object = snm;
+				names = new ArrayList<String>(3);
+				for (File f : listPathTail((File) object, 3, true)) {
+					String fn = getName(f);
+					String sn = seriesNameMatcher.matchByEpisodeIdentifier(fn);
+					if (sn != null) {
+						names.add(sn);
+					} else {
+						names.add(fn);
+					}
 				}
 			}
 
 			// equally strip away strip potential any clutter
-			try {
-				object = releaseInfo.cleanRelease(singleton(object.toString()), strict).iterator().next();
-			} catch (NoSuchElementException e) {
-				// keep default value in case all tokens are stripped away
-			} catch (IOException e) {
-				Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
+			if (names != null) {
+				try {
+					return releaseInfo.cleanRelease(names, false);
+				} catch (NoSuchElementException e) {
+					// keep default value in case all tokens are stripped away
+				} catch (IOException e) {
+					Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage());
+				}
 			}
 
 			// simplify file name, if possible
-			return normalizeObject(object);
-		}
-
-		protected String resolveSeriesDirectMapping(String input) throws IOException {
-			for (Pattern it : releaseInfo.getSeriesDirectMappings().keySet()) {
-				Matcher m = it.matcher(input);
-				if (m.find()) {
-					return m.replaceAll(releaseInfo.getSeriesDirectMappings().get(it));
-				}
-			}
-			return input;
+			return emptyList();
 		}
 	}),
 
diff --git a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
index 5114833c..d0b14a3a 100644
--- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
+++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java
@@ -1,7 +1,5 @@
-
 package net.sourceforge.filebot.similarity;
 
-
 import static java.util.Collections.*;
 import static java.util.regex.Pattern.*;
 import static net.sourceforge.filebot.similarity.CommonSequenceMatcher.*;
@@ -29,63 +27,62 @@ import java.util.regex.Pattern;
 import net.sourceforge.filebot.similarity.SeasonEpisodeMatcher.SxE;
 import net.sourceforge.tuned.FileUtilities;
 
-
 public class SeriesNameMatcher {
-	
-	protected SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
-	protected DateMatcher dateMatcher = new DateMatcher();
-	
-	protected NameSimilarityMetric nameSimilarityMetric = new NameSimilarityMetric();
-	
+
+	protected SeasonEpisodeMatcher seasonEpisodeMatcher;
+	protected DateMatcher dateMatcher;
+
+	protected NameSimilarityMetric nameSimilarityMetric;
+
 	protected CommonSequenceMatcher commonSequenceMatcher;
-	
-	
+
 	public SeriesNameMatcher() {
-		this(Locale.ROOT);
+		this(Locale.ROOT, true);
 	}
-	
-	
-	public SeriesNameMatcher(Locale locale) {
+
+	public SeriesNameMatcher(Locale locale, boolean strict) {
+		seasonEpisodeMatcher = new SeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict);
+		dateMatcher = new DateMatcher();
+		nameSimilarityMetric = new NameSimilarityMetric();
+
 		commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) {
-			
+
 			@Override
 			protected CollationKey[] split(String sequence) {
 				return super.split(normalize(sequence));
 			}
 		};
 	}
-	
-	
+
 	public Collection<String> matchAll(File[] files) {
 		SeriesNameCollection seriesNames = new SeriesNameCollection();
-		
+
 		// group files by parent folder
 		for (Entry<File, String[]> entry : mapNamesByFolder(files).entrySet()) {
 			String parent = entry.getKey().getName();
 			String[] names = entry.getValue();
-			
+
 			for (String nameMatch : matchAll(names)) {
 				String commonMatch = commonSequenceMatcher.matchFirstCommonSequence(nameMatch, parent);
 				float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch);
-				
+
 				// prefer common match, but only if it's very similar to the original match
 				seriesNames.add(similarity > 0.7 ? commonMatch : nameMatch);
 			}
 		}
-		
+
 		return seriesNames;
 	}
-	
-	
+
 	public Collection<String> matchAll(String[] names) {
 		SeriesNameCollection seriesNames = new SeriesNameCollection();
-		
+
 		// allow matching of a small number of episodes, by setting threshold = length if length < 5
 		int threshold = Math.min(names.length, 5);
-		
+
 		// match common word sequences (likely series names)
 		SeriesNameCollection whitelist = new SeriesNameCollection();
-		
+
 		// focus chars before the SxE / Date pattern when matching by common word sequence
 		String[] focus = Arrays.copyOf(names, names.length);
 		for (int i = 0; i < focus.length; i++) {
@@ -100,41 +97,40 @@ public class SeriesNameMatcher {
 			}
 		}
 		whitelist.addAll(deepMatchAll(focus, threshold));
-		
+
 		// 1. use pattern matching
 		seriesNames.addAll(flatMatchAll(names, compile(join(whitelist, "|"), CASE_INSENSITIVE | UNICODE_CASE), threshold, false));
-		
+
 		// 2. use common word sequences
 		seriesNames.addAll(whitelist);
-		
+
 		return seriesNames;
 	}
-	
-	
+
 	/**
 	 * Try to match and verify all series names using known season episode patterns.
 	 * 
-	 * @param names episode names
-	 * @return series names that have been matched one or multiple times depending on the
-	 *         threshold
+	 * @param names
+	 *            episode names
+	 * @return series names that have been matched one or multiple times depending on the threshold
 	 */
 	private Collection<String> flatMatchAll(String[] names, Pattern prefixPattern, int threshold, boolean strict) {
 		@SuppressWarnings("unchecked")
 		Comparator<String> wordComparator = (Comparator) commonSequenceMatcher.getCollator();
 		ThresholdCollection<String> thresholdCollection = new ThresholdCollection<String>(threshold, wordComparator);
-		
+
 		for (String name : names) {
 			// use normalized name
 			name = normalize(name);
-			
+
 			Matcher prefix = prefixPattern.matcher(name);
 			int prefixEnd = prefix.find() ? prefix.end() : 0;
-			
+
 			int sxePosition = seasonEpisodeMatcher.find(name, prefixEnd);
 			if (sxePosition > 0) {
 				String hit = name.substring(0, sxePosition).trim();
 				List<SxE> sxe = seasonEpisodeMatcher.match(name.substring(sxePosition));
-				
+
 				if (!strict && sxe.size() == 1 && sxe.get(0).season >= 0) {
 					// bypass threshold if hit is likely to be genuine
 					thresholdCollection.addDirect(hit);
@@ -149,17 +145,17 @@ public class SeriesNameMatcher {
 					thresholdCollection.addDirect(name.substring(0, datePosition).trim());
 				}
 			}
-			
+
 		}
-		
+
 		return thresholdCollection;
 	}
-	
-	
+
 	/**
 	 * Try to match all common word sequences in the given list.
 	 * 
-	 * @param names list of episode names
+	 * @param names
+	 *            list of episode names
 	 * @return all common word sequences that have been found
 	 */
 	private Collection<String> deepMatchAll(String[] names, int threshold) {
@@ -167,32 +163,30 @@ public class SeriesNameMatcher {
 		if (names.length < 2 || names.length < threshold) {
 			return emptySet();
 		}
-		
+
 		String common = commonSequenceMatcher.matchFirstCommonSequence(names);
-		
+
 		if (common != null) {
 			// common word sequence found
 			return singleton(common);
 		}
-		
+
 		// recursive divide and conquer
 		List<String> results = new ArrayList<String>();
-		
+
 		// split list in two and try to match common word sequence on those
 		results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold));
 		results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold));
-		
+
 		return results;
 	}
-	
-	
+
 	/**
-	 * Try to match a series name from the given episode name using known season episode
-	 * patterns.
+	 * Try to match a series name from the given episode name using known season episode patterns.
 	 * 
-	 * @param name episode name
-	 * @return a substring of the given name that ends before the first occurrence of a season
-	 *         episode pattern, or null if there is no such pattern
+	 * @param name
+	 *            episode name
+	 * @return a substring of the given name that ends before the first occurrence of a season episode pattern, or null if there is no such pattern
 	 */
 	public String matchByEpisodeIdentifier(String name) {
 		int seasonEpisodePosition = seasonEpisodeMatcher.find(name, 0);
@@ -200,245 +194,229 @@ public class SeriesNameMatcher {
 			// series name ends at the first season episode pattern
 			return normalize(name.substring(0, seasonEpisodePosition));
 		}
-		
+
 		int datePosition = dateMatcher.find(name, 0);
 		if (datePosition > 0) {
 			// series name ends at the first season episode pattern
 			return normalize(name.substring(0, datePosition));
 		}
-		
+
 		return null;
 	}
-	
-	
+
 	/**
 	 * Try to match a series name from the first common word sequence.
 	 * 
-	 * @param names various episode names (at least two)
+	 * @param names
+	 *            various episode names (at least two)
 	 * @return a word sequence all episode names have in common, or null
-	 * @throws IllegalArgumentException if less than 2 episode names are given
+	 * @throws IllegalArgumentException
+	 *             if less than 2 episode names are given
 	 */
 	public String matchByFirstCommonWordSequence(String... names) {
 		if (names.length < 2) {
 			throw new IllegalArgumentException("Can't match common sequence from less than two names");
 		}
-		
+
 		return commonSequenceMatcher.matchFirstCommonSequence(names);
 	}
-	
-	
+
 	protected String normalize(String name) {
 		// remove group names and checksums, any [...] or (...)
 		name = normalizeBrackets(name);
-		
+
 		// remove/normalize special characters
 		name = normalizePunctuation(name);
-		
+
 		return name;
 	}
-	
-	
+
 	protected <T> T[] firstCommonSequence(T[] seq1, T[] seq2, int maxStartIndex, Comparator<T> equalsComparator) {
 		for (int i = 0; i < seq1.length && i <= maxStartIndex; i++) {
 			for (int j = 0; j < seq2.length && j <= maxStartIndex; j++) {
 				// common sequence length
 				int len = 0;
-				
+
 				// iterate over common sequence
 				while ((i + len < seq1.length) && (j + len < seq2.length) && (equalsComparator.compare(seq1[i + len], seq2[j + len]) == 0)) {
 					len++;
 				}
-				
+
 				// check if a common sequence was found
 				if (len > 0) {
 					if (i == 0 && len == seq1.length)
 						return seq1;
-					
+
 					return Arrays.copyOfRange(seq1, i, i + len);
 				}
 			}
 		}
-		
+
 		// no intersection at all
 		return null;
 	}
-	
-	
+
 	private Map<File, String[]> mapNamesByFolder(File... files) {
 		Map<File, List<File>> filesByFolder = new LinkedHashMap<File, List<File>>();
-		
+
 		for (File file : files) {
 			File folder = file.getParentFile();
-			
+
 			List<File> list = filesByFolder.get(folder);
-			
+
 			if (list == null) {
 				list = new ArrayList<File>();
 				filesByFolder.put(folder, list);
 			}
-			
+
 			list.add(file);
 		}
-		
+
 		// convert folder->files map to folder->names map
 		Map<File, String[]> namesByFolder = new LinkedHashMap<File, String[]>();
-		
+
 		for (Entry<File, List<File>> entry : filesByFolder.entrySet()) {
 			namesByFolder.put(entry.getKey(), names(entry.getValue()));
 		}
-		
+
 		return namesByFolder;
 	}
-	
-	
+
 	protected String[] names(Collection<File> files) {
 		String[] names = new String[files.size()];
-		
+
 		int i = 0;
-		
+
 		// fill array
 		for (File file : files) {
 			names[i++] = FileUtilities.getName(file);
 		}
-		
+
 		return names;
 	}
-	
-	
+
 	protected static class SeriesNameCollection extends AbstractCollection<String> {
-		
+
 		private final Map<String, String> data = new LinkedHashMap<String, String>();
-		
-		
+
 		@Override
 		public boolean add(String value) {
 			value = value.trim();
-			
+
 			// require series name to have at least two characters
 			if (value.length() < 2) {
 				return false;
 			}
-			
+
 			String current = data.get(key(value));
-			
+
 			// prefer strings with similar upper/lower case ratio (e.g. prefer Roswell over roswell)
 			if (current == null || firstCharacterCaseBalance(current) < firstCharacterCaseBalance(value)) {
 				data.put(key(value), value);
 				return true;
 			}
-			
+
 			return false;
 		}
-		
-		
+
 		protected String key(Object value) {
 			return value.toString().toLowerCase();
 		}
-		
-		
+
 		protected float firstCharacterCaseBalance(String s) {
 			int upper = 0;
 			int lower = 0;
-			
+
 			Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default
-			
+
 			while (scanner.hasNext()) {
 				char c = scanner.next().charAt(0);
-				
+
 				if (Character.isLowerCase(c))
 					lower++;
 				else if (Character.isUpperCase(c))
 					upper++;
 			}
-			
+
 			// give upper case characters a slight boost over lower case characters
 			return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
 		}
-		
-		
+
 		@Override
 		public boolean contains(Object value) {
 			return data.containsKey(key(value));
 		}
-		
-		
+
 		@Override
 		public Iterator<String> iterator() {
 			return data.values().iterator();
 		}
-		
-		
+
 		@Override
 		public int size() {
 			return data.size();
 		}
-		
+
 	}
-	
-	
+
 	protected static class ThresholdCollection<E> extends AbstractCollection<E> {
-		
+
 		private final Collection<E> heaven;
 		private final Map<E, Collection<E>> limbo;
-		
+
 		private final int threshold;
-		
-		
+
 		public ThresholdCollection(int threshold, Comparator<E> equalityComparator) {
 			this.heaven = new ArrayList<E>();
 			this.limbo = new TreeMap<E, Collection<E>>(equalityComparator);
 			this.threshold = threshold;
 		}
-		
-		
+
 		@Override
 		public boolean add(E value) {
 			Collection<E> buffer = limbo.get(value);
-			
+
 			if (buffer == null) {
 				// initialize buffer
 				buffer = new ArrayList<E>(threshold);
 				limbo.put(value, buffer);
 			}
-			
+
 			if (buffer == heaven) {
 				// threshold reached
 				heaven.add(value);
 				return true;
 			}
-			
+
 			// add element to buffer
 			buffer.add(value);
-			
+
 			// check if threshold has been reached
 			if (buffer.size() >= threshold) {
 				heaven.addAll(buffer);
-				
+
 				// replace buffer with heaven
 				limbo.put(value, heaven);
 				return true;
 			}
-			
+
 			return false;
 		};
-		
-		
+
 		public boolean addDirect(E element) {
 			return heaven.add(element);
 		}
-		
-		
+
 		@Override
 		public Iterator<E> iterator() {
 			return heaven.iterator();
 		}
-		
-		
+
 		@Override
 		public int size() {
 			return heaven.size();
 		}
-		
+
 	}
-	
+
 }
diff --git a/source/net/sourceforge/filebot/web/AnidbClient.java b/source/net/sourceforge/filebot/web/AnidbClient.java
index 65e3527e..022ecedc 100644
--- a/source/net/sourceforge/filebot/web/AnidbClient.java
+++ b/source/net/sourceforge/filebot/web/AnidbClient.java
@@ -8,6 +8,8 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
@@ -27,6 +29,7 @@ import javax.swing.Icon;
 import net.sourceforge.filebot.Cache;
 import net.sourceforge.filebot.ResourceManager;
 
+import org.jsoup.Jsoup;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
 
@@ -165,13 +168,15 @@ public class AnidbClient extends AbstractEpisodeListProvider {
 		// type: 1=primary title (one per anime), 2=synonyms (multiple per anime), 3=shorttitles (multiple per anime), 4=official title (one per language)
 		Pattern pattern = Pattern.compile("^(?!#)(\\d+)[|](\\d)[|]([\\w-]+)[|](.+)$");
 
-		Map<Integer, String> primaryTitleMap = new HashMap<Integer, String>();
-		Map<Integer, Map<String, String>> officialTitleMap = new HashMap<Integer, Map<String, String>>();
-		Map<Integer, Map<String, String>> synonymsTitleMap = new HashMap<Integer, Map<String, String>>();
+		List<String> languageOrder = new ArrayList<String>();
+		languageOrder.add("x-jat");
+		languageOrder.add("en");
+		languageOrder.add("ja");
 
 		// fetch data
-		Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
+		Map<Integer, List<Object[]>> entriesByAnime = new HashMap<Integer, List<Object[]>>(65536);
 
+		Scanner scanner = new Scanner(new GZIPInputStream(url.openStream()), "UTF-8");
 		try {
 			while (scanner.hasNextLine()) {
 				Matcher matcher = pattern.matcher(scanner.nextLine());
@@ -182,17 +187,17 @@ public class AnidbClient extends AbstractEpisodeListProvider {
 					String language = matcher.group(3);
 					String title = matcher.group(4);
 
-					if (type.equals("1")) {
-						primaryTitleMap.put(aid, title);
-					} else if (type.equals("2") || type.equals("4")) {
-						Map<Integer, Map<String, String>> titleMap = (type.equals("4") ? officialTitleMap : synonymsTitleMap);
-						Map<String, String> languageTitleMap = titleMap.get(aid);
-						if (languageTitleMap == null) {
-							languageTitleMap = new HashMap<String, String>();
-							titleMap.put(aid, languageTitleMap);
+					if (aid > 0 && title.length() > 0 && languageOrder.contains(language)) {
+						List<Object[]> names = entriesByAnime.get(aid);
+						if (names == null) {
+							names = new ArrayList<Object[]>();
+							entriesByAnime.put(aid, names);
 						}
 
-						languageTitleMap.put(language, title);
+						// resolve HTML entities
+						title = Jsoup.parse(title).text();
+
+						names.add(new Object[] { Integer.parseInt(type), languageOrder.indexOf(language), title });
 					}
 				}
 			}
@@ -201,23 +206,36 @@ public class AnidbClient extends AbstractEpisodeListProvider {
 		}
 
 		// build up a list of all possible AniDB search results
-		anime = new ArrayList<AnidbSearchResult>(primaryTitleMap.size());
+		anime = new ArrayList<AnidbSearchResult>(entriesByAnime.size());
 
-		for (Entry<Integer, String> entry : primaryTitleMap.entrySet()) {
-			Map<String, String> localizedTitles = new HashMap<String, String>();
-			if (synonymsTitleMap.containsKey(entry.getKey())) {
-				localizedTitles.putAll(synonymsTitleMap.get(entry.getKey())); // use synonym as fallback
-			}
-			if (officialTitleMap.containsKey(entry.getKey())) {
-				localizedTitles.putAll(officialTitleMap.get(entry.getKey())); // primarily use official title if available
+		for (Entry<Integer, List<Object[]>> entry : entriesByAnime.entrySet()) {
+			int aid = entry.getKey();
+			List<Object[]> triples = entry.getValue();
+
+			Collections.sort(triples, new Comparator<Object[]>() {
+
+				@SuppressWarnings({ "unchecked", "rawtypes" })
+				@Override
+				public int compare(Object[] a, Object[] b) {
+					for (int i = 0; i < a.length; i++) {
+						if (!a[i].equals(b[i]))
+							return ((Comparable) a[i]).compareTo(b[i]);
+					}
+					return 0;
+				}
+			});
+
+			List<String> names = new ArrayList<String>(triples.size());
+			for (Object[] it : triples) {
+				names.add((String) it[2]);
 			}
 
-			String englishTitle = localizedTitles.get("en"); // ONLY SUPPORT ENGLISH LOCALIZATION
-			anime.add(new AnidbSearchResult(entry.getKey(), entry.getValue(), englishTitle == null || englishTitle.isEmpty() ? new String[] {} : new String[] { englishTitle }));
+			String primaryTitle = names.get(0);
+			String[] aliasNames = names.subList(1, names.size()).toArray(new String[0]);
+			anime.add(new AnidbSearchResult(aid, primaryTitle, aliasNames));
 		}
 
 		// populate cache
 		return cache.putSearchResult(null, Locale.ROOT, anime);
 	}
-
 }
diff --git a/source/net/sourceforge/filebot/web/AnidbSearchResult.java b/source/net/sourceforge/filebot/web/AnidbSearchResult.java
index 63bfea52..c52c004d 100644
--- a/source/net/sourceforge/filebot/web/AnidbSearchResult.java
+++ b/source/net/sourceforge/filebot/web/AnidbSearchResult.java
@@ -8,8 +8,8 @@ public class AnidbSearchResult extends SearchResult {
 		// used by serializer
 	}
 
-	public AnidbSearchResult(int aid, String primaryTitle, String[] localizedTitles) {
-		super(primaryTitle, localizedTitles);
+	public AnidbSearchResult(int aid, String primaryTitle, String[] aliasNames) {
+		super(primaryTitle, aliasNames);
 		this.aid = aid;
 	}
 
diff --git a/test/net/sourceforge/filebot/web/AnidbClientTest.java b/test/net/sourceforge/filebot/web/AnidbClientTest.java
index 05e142ee..df43bf19 100644
--- a/test/net/sourceforge/filebot/web/AnidbClientTest.java
+++ b/test/net/sourceforge/filebot/web/AnidbClientTest.java
@@ -1,7 +1,5 @@
-
 package net.sourceforge.filebot.web;
 
-
 import static org.junit.Assert.*;
 
 import java.util.List;
@@ -13,72 +11,72 @@ import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-
 public class AnidbClientTest {
-	
+
 	/**
 	 * 74 episodes
 	 */
 	private static AnidbSearchResult monsterSearchResult;
-	
+
 	/**
 	 * 45 episodes
 	 */
 	private static AnidbSearchResult twelvekingdomsSearchResult;
-	
+
 	/**
 	 * 38 episodes, lots of special characters
 	 */
 	private static AnidbSearchResult princessTutuSearchResult;
-	
-	
+
 	@BeforeClass
 	public static void setUpBeforeClass() throws Exception {
 		monsterSearchResult = new AnidbSearchResult(1539, "Monster", null);
 		twelvekingdomsSearchResult = new AnidbSearchResult(26, "Juuni Kokuki", null);
 		princessTutuSearchResult = new AnidbSearchResult(516, "Princess Tutu", null);
 	}
-	
+
 	private AnidbClient anidb = new AnidbClient("filebot", 4);
-	
-	
+
+	@Test
+	public void getAnimeTitles() throws Exception {
+		List<AnidbSearchResult> animeTitles = anidb.getAnimeTitles();
+		assertTrue(animeTitles.size() > 8000);
+	}
+
 	@Test
 	public void search() throws Exception {
 		List<SearchResult> results = anidb.search("one piece");
-		
+
 		AnidbSearchResult result = (AnidbSearchResult) results.get(0);
 		assertEquals("One Piece", result.getName());
 		assertEquals(69, result.getAnimeId());
 	}
-	
-	
+
 	@Test
 	public void searchNoMatch() throws Exception {
 		List<SearchResult> results = anidb.search("i will not find anything for this query string");
-		
+
 		assertTrue(results.isEmpty());
 	}
-	
-	
+
 	@Test
 	public void searchTitleAlias() throws Exception {
 		// Seikai no Senki (main title), Banner of the Stars (official English title)
 		assertEquals("Seikai no Senki", anidb.search("banner of the stars").get(0).getName());
 		assertEquals("Seikai no Senki", anidb.search("seikai no senki").get(0).getName());
-		
+
 		// no matching title
 		assertEquals("Naruto", anidb.search("naruto").get(0).getName());
 	}
-	
-	
+
 	@Test
 	public void getEpisodeListAll() throws Exception {
 		List<Episode> list = anidb.getEpisodeList(monsterSearchResult);
-		
+
 		assertEquals(74, list.size());
-		
+
 		Episode first = list.get(0);
-		
+
 		assertEquals("Monster", first.getSeriesName());
 		assertEquals("2004-04-07", first.getSeriesStartDate().toString());
 		assertEquals("Herr Dr. Tenma", first.getTitle());
@@ -87,16 +85,15 @@ public class AnidbClientTest {
 		assertEquals(null, first.getSeason());
 		assertEquals("2004-04-07", first.getAirdate().toString());
 	}
-	
-	
+
 	@Test
 	public void getEpisodeListAllShortLink() throws Exception {
 		List<Episode> list = anidb.getEpisodeList(twelvekingdomsSearchResult);
-		
+
 		assertEquals(45, list.size());
-		
+
 		Episode first = list.get(0);
-		
+
 		assertEquals("The Twelve Kingdoms", first.getSeriesName());
 		assertEquals("2002-04-09", first.getSeriesStartDate().toString());
 		assertEquals("Shadow of the Moon, The Sea of Shadow - Chapter 1", first.getTitle());
@@ -105,18 +102,16 @@ public class AnidbClientTest {
 		assertEquals(null, first.getSeason());
 		assertEquals("2002-04-09", first.getAirdate().toString());
 	}
-	
-	
+
 	@Test
 	public void getEpisodeListEncoding() throws Exception {
 		assertEquals("Raven Princess - An der schönen blauen Donau", anidb.getEpisodeList(princessTutuSearchResult).get(6).getTitle());
 	}
-	
-	
+
 	@Test
 	public void getEpisodeListI18N() throws Exception {
 		List<Episode> list = anidb.getEpisodeList(monsterSearchResult, SortOrder.Airdate, Locale.JAPANESE);
-		
+
 		Episode last = list.get(73);
 		assertEquals("モンスター", last.getSeriesName());
 		assertEquals("2004-04-07", last.getSeriesStartDate().toString());
@@ -126,24 +121,21 @@ public class AnidbClientTest {
 		assertEquals(null, last.getSeason());
 		assertEquals("2005-09-28", last.getAirdate().toString());
 	}
-	
-	
+
 	@Test
 	public void getEpisodeListTrimRecap() throws Exception {
 		assertEquals("Sea God of the East, Azure Sea of the West - Transition Chapter", anidb.getEpisodeList(twelvekingdomsSearchResult).get(44).getTitle());
 	}
-	
-	
+
 	@Test
 	public void getEpisodeListLink() throws Exception {
 		assertEquals("http://anidb.net/a1539", anidb.getEpisodeListLink(monsterSearchResult).toURL().toString());
 	}
-	
-	
+
 	@BeforeClass
 	@AfterClass
 	public static void clearCache() {
 		CacheManager.getInstance().clearAll();
 	}
-	
+
 }