From cdc4a94daa122f150429b61878840fd0838631b7 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Sun, 15 Dec 2013 18:35:41 +0000 Subject: [PATCH] * fix the "law.and.order.svu" special-fuckin-corner-case --- BuildData.groovy | 6 +- .../filebot/similarity/EpisodeMetrics.java | 65 +++++++++++++++---- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/BuildData.groovy b/BuildData.groovy index 98d21d9f..e53790b0 100644 --- a/BuildData.groovy +++ b/BuildData.groovy @@ -199,6 +199,7 @@ def addSeriesAlias = { from, to -> // additional custom mappings addSeriesAlias('Law & Order: Special Victims Unit', 'Law and Order SVU') +addSeriesAlias('Law & Order: Special Victims Unit', 'Law & Order SVU') addSeriesAlias('Battlestar Galactica (2003)', 'BSG') addSeriesAlias('CSI: Crime Scene Investigation', 'CSI') addSeriesAlias('M*A*S*H', 'MASH') @@ -208,10 +209,11 @@ addSeriesAlias('World Series of Poker', 'WSOP') thetvdb_index = thetvdb_index.findResults{ [it[0] as Integer, it[1].replaceAll(/\s+/, ' ').trim()] }.findAll{ !(it[1] =~ /(?i:duplicate)/ || it[1] =~ /\d{6,}/ || it[1].startsWith('*') || it[1].endsWith('*') || it[1].length() < 2) } -thetvdb_index = thetvdb_index.sort({a,b -> a[0] <=> b[0]} as Comparator) +thetvdb_index = thetvdb_index.sort({ a, b -> a[0] <=> b[0] } as Comparator) // join and sort -def thetvdb_txt = thetvdb_index.groupBy{ it[0] }.findResults{ k, v -> ([k.pad(6)] + v*.getAt(1).unique{it.toLowerCase()}).join('\t') } +def thetvdb_txt = thetvdb_index.groupBy{ it[0] }.findResults{ k, v -> ([k.pad(6)] + v*.getAt(1).unique{ it.toLowerCase() }).join('\t') } + pack(thetvdb_out, thetvdb_txt) println "TheTVDB Index: " + thetvdb_txt.size() diff --git a/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java b/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java index 6d5b276e..57eb684e 100644 --- a/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java +++ b/source/net/sourceforge/filebot/similarity/EpisodeMetrics.java @@ -236,22 +236,46 @@ public enum EpisodeMetrics implements SimilarityMetric { @Override public float getSimilarity(Object o1, Object o2) { + String[] f1 = getNormalizedEffectiveIdentifiers(o1); + String[] f2 = getNormalizedEffectiveIdentifiers(o2); + + // match all fields and average similarity + float max = 0; + for (String s1 : f1) { + for (String s2 : f2) { + max = max(super.getSimilarity(s1, s2), max); + } + } + // normalize absolute similarity to similarity rank (4 ranks in total), // so we are less likely to fall for false positives in this pass, and move on to the next one - return (float) (floor(super.getSimilarity(o1, o2) * 4) / 4); + return (float) (floor(max * 4) / 4); } @Override protected String normalize(Object object) { - if (object instanceof Episode) { - object = removeTrailingBrackets(((Episode) object).getSeriesName()); - } else if (object instanceof Movie) { - object = ((Movie) object).getName(); - } else if (object instanceof File) { - object = getNameWithoutExtension(getRelativePathTail((File) object, 3).getPath()); + return object.toString(); + } + + protected String[] getNormalizedEffectiveIdentifiers(Object object) { + List identifiers = getEffectiveIdentifiers(object); + String[] names = new String[identifiers.size()]; + + for (int i = 0; i < names.length; i++) { + names[i] = normalizeObject(identifiers.get(i)); } - // simplify file name, if possible - return normalizeObject(object); + return names; + } + + protected List getEffectiveIdentifiers(Object object) { + if (object instanceof Episode) { + return ((Episode) object).getSeries().getEffectiveNames(); + } else if (object instanceof Movie) { + return ((Movie) object).getEffectiveNames(); + } else if (object instanceof File) { + return listPathTail((File) object, 3, true); + } + return singletonList(object); } }), @@ -300,9 +324,16 @@ public enum EpisodeMetrics implements SimilarityMetric { } } else if (object instanceof File) { object = ((File) object).getName(); // try to narrow down on series name - String sn = seriesNameMatcher.matchByEpisodeIdentifier(object.toString()); - if (sn != null) { - object = sn; + + try { + object = resolveSeriesDirectMapping((String) object); + } catch (IOException e) { + Logger.getLogger(EpisodeMetrics.class.getName()).log(Level.WARNING, e.getMessage()); + } + + String snm = seriesNameMatcher.matchByEpisodeIdentifier((String) object); + if (snm != null) { + object = snm; } } @@ -318,6 +349,16 @@ public enum EpisodeMetrics implements SimilarityMetric { // simplify file name, if possible return normalizeObject(object); } + + protected String resolveSeriesDirectMapping(String input) throws IOException { + for (Pattern it : releaseInfo.getSeriesDirectMappings().keySet()) { + Matcher m = it.matcher(input); + if (m.find()) { + return m.replaceAll(releaseInfo.getSeriesDirectMappings().get(it)); + } + } + return input; + } }), // Match by generic name similarity (absolute)