Cache SeriesNameMatcher objects

This commit is contained in:
Reinhard Pointner 2016-02-10 18:32:30 +00:00
parent a81fcf155a
commit bf69d750e8
8 changed files with 31 additions and 40 deletions

View File

@ -116,10 +116,9 @@ public class CmdlineOperations implements CmdlineInterface {
int sxe = 0; // SxE
int cws = 0; // common word sequence
SeriesNameMatcher nameMatcher = new SeriesNameMatcher(locale, true);
Collection<String> cwsList = emptySet();
if (max >= 5) {
cwsList = nameMatcher.matchAll(mediaFiles.toArray(new File[0]));
cwsList = getSeriesNameMatcher().matchAll(mediaFiles.toArray(new File[0]));
}
for (File f : mediaFiles) {
@ -130,7 +129,7 @@ public class CmdlineOperations implements CmdlineInterface {
// count CWS matches
for (String base : cwsList) {
if (base.equalsIgnoreCase(nameMatcher.matchByFirstCommonWordSequence(base, f.getName()))) {
if (base.equalsIgnoreCase(getSeriesNameMatcher().matchByFirstCommonWordSequence(base, f.getName()))) {
cws++;
break;
}

View File

@ -115,9 +115,10 @@ public class MediaDetection {
return releaseInfo.getLanguageSuffix(getName(file));
}
private static final SeasonEpisodeMatcher seasonEpisodeMatcherStrict = new SmartSeasonEpisodeMatcher(true);
private static final SeasonEpisodeMatcher seasonEpisodeMatcherNonStrict = new SmartSeasonEpisodeMatcher(false);
private static final SeasonEpisodeMatcher seasonEpisodeMatcherStrict = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, true);
private static final SeasonEpisodeMatcher seasonEpisodeMatcherNonStrict = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, false);
private static final DateMatcher dateMatcher = new DateMatcher(Locale.getDefault(), DateMatcher.DEFAULT_SANITY);
private static final SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher(Locale.ENGLISH, true);
public static SeasonEpisodeMatcher getSeasonEpisodeMatcher(boolean strict) {
return strict ? seasonEpisodeMatcherStrict : seasonEpisodeMatcherNonStrict;
@ -127,6 +128,10 @@ public class MediaDetection {
return dateMatcher;
}
public static SeriesNameMatcher getSeriesNameMatcher() {
return seriesNameMatcher;
}
public static boolean isEpisode(String name, boolean strict) {
return parseEpisodeNumber(name, strict) != null || parseDate(name) != null;
}

View File

@ -11,10 +11,6 @@ public class SmartSeasonEpisodeMatcher extends SeasonEpisodeMatcher {
super(sanity, strict);
}
public SmartSeasonEpisodeMatcher(boolean strict) {
super(DEFAULT_SANITY, strict);
}
protected String clean(CharSequence name) {
return MediaDetection.stripFormatInfo(name);
}

View File

@ -592,8 +592,6 @@ public enum EpisodeMetrics implements SimilarityMetric {
RegionHint(new SimilarityMetric() {
private Pattern hint = compile("[(](\\p{Alpha}+|\\p{Digit}+)[)]$");
private SeriesNameMatcher seriesNameMatcher = new SeriesNameMatcher();
private Pattern punctuation = compile("[\\p{Punct}\\p{Space}]+");
@Override
@ -617,7 +615,7 @@ public enum EpisodeMetrics implements SimilarityMetric {
for (File f : listPathTail((File) o, 3, true)) {
// try to focus on series name
String n = f.getName();
String sn = seriesNameMatcher.matchByEpisodeIdentifier(n);
String sn = getSeriesNameMatcher().matchByEpisodeIdentifier(n);
String[] tokens = punctuation.split(sn != null ? sn : n);
for (String s : tokens) {

View File

@ -8,6 +8,7 @@ import static net.filebot.util.StringUtilities.*;
import java.io.File;
import java.text.CollationKey;
import java.text.Collator;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
@ -30,23 +31,21 @@ import net.filebot.util.FileUtilities;
public class SeriesNameMatcher {
protected SeasonEpisodeMatcher seasonEpisodeMatcher;
protected DateMatcher dateMatcher;
protected NameSimilarityMetric nameSimilarityMetric;
protected CommonSequenceMatcher commonSequenceMatcher;
public SeriesNameMatcher() {
this(Locale.ENGLISH, true);
}
protected final SimilarityMetric metric;
protected final SeasonEpisodeMatcher seasonEpisodeMatcher;
protected final DateMatcher dateMatcher;
protected final CommonSequenceMatcher commonSequenceMatcher;
public SeriesNameMatcher(Locale locale, boolean strict) {
seasonEpisodeMatcher = new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict);
dateMatcher = new DateMatcher(locale, DateMatcher.DEFAULT_SANITY);
nameSimilarityMetric = new NameSimilarityMetric();
this(new NameSimilarityMetric(), getLenientCollator(locale), new SmartSeasonEpisodeMatcher(SeasonEpisodeMatcher.DEFAULT_SANITY, strict), new DateMatcher(locale, DateMatcher.DEFAULT_SANITY));
}
commonSequenceMatcher = new CommonSequenceMatcher(getLenientCollator(locale), 3, true) {
public SeriesNameMatcher(SimilarityMetric metric, Collator collator, SeasonEpisodeMatcher seasonEpisodeMatcher, DateMatcher dateMatcher) {
this.metric = metric;
this.seasonEpisodeMatcher = seasonEpisodeMatcher;
this.dateMatcher = dateMatcher;
this.commonSequenceMatcher = new CommonSequenceMatcher(collator, 3, true) {
@Override
public CollationKey[] split(String sequence) {
@ -65,7 +64,7 @@ public class SeriesNameMatcher {
for (String nameMatch : matchAll(names)) {
String commonMatch = commonSequenceMatcher.matchFirstCommonSequence(nameMatch, parent);
float similarity = commonMatch == null ? 0 : nameSimilarityMetric.getSimilarity(commonMatch, nameMatch);
float similarity = commonMatch == null ? 0 : metric.getSimilarity(commonMatch, nameMatch);
// prefer common match, but only if it's very similar to the original match
seriesNames.add(similarity > 0.7 ? commonMatch : nameMatch);

View File

@ -2,6 +2,7 @@ package net.filebot.ui.list;
import static java.awt.Font.*;
import static java.lang.Math.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.ui.NotificationLogging.*;
import java.awt.BorderLayout;
@ -128,7 +129,7 @@ public class ListPanel extends JComponent {
}
// try to match title from the first five names
Collection<String> title = new SeriesNameMatcher().matchAll((names.size() < 5 ? names : names.subList(0, 4)).toArray(new String[0]));
Collection<String> title = getSeriesNameMatcher().matchAll((names.size() < 5 ? names : names.subList(0, 4)).toArray(new String[0]));
list.setTitle(title.isEmpty() ? "List" : title.iterator().next());

View File

@ -35,8 +35,7 @@ class MovieEditor implements TableCellEditor {
String fn = FileUtilities.getName(mapping.getVideo() != null ? mapping.getVideo() : mapping.getSubtitle());
// check if query contain an episode identifier
SeriesNameMatcher snm = new SeriesNameMatcher();
String sn = snm.matchByEpisodeIdentifier(fn);
String sn = getSeriesNameMatcher().matchByEpisodeIdentifier(fn);
if (sn != null) {
return stripReleaseInfo(sn, true);
}

View File

@ -1,17 +1,16 @@
package net.filebot.similarity;
import static org.junit.Assert.*;
import java.util.Locale;
import net.filebot.similarity.SeriesNameMatcher.SeriesNameCollection;
import org.junit.Test;
public class SeriesNameMatcherTest {
private static SeriesNameMatcher matcher = new SeriesNameMatcher();
private static SeriesNameMatcher matcher = new SeriesNameMatcher(Locale.ENGLISH, true);
@Test
public void whitelist() {
@ -21,7 +20,6 @@ public class SeriesNameMatcherTest {
assertArrayEquals(new String[] { "Test 101" }, matcher.matchAll(names).toArray());
}
@Test
public void threshold() {
// ignore recurring word sequences when matching episode patterns
@ -30,7 +28,6 @@ public class SeriesNameMatcherTest {
assertArrayEquals(new String[] { "Test" }, matcher.matchAll(names).toArray());
}
@Test
public void matchBeforeSeasonEpisodePattern() {
assertEquals("The Test", matcher.matchByEpisodeIdentifier("The Test - 1x01"));
@ -39,7 +36,6 @@ public class SeriesNameMatcherTest {
assertEquals("Mushishi", matcher.matchByEpisodeIdentifier("[niizk]_Mushishi_-_1x01_-_The_Green_Gathering"));
}
@Test
public void normalize() {
// non-letter and non-digit characters
@ -52,7 +48,6 @@ public class SeriesNameMatcherTest {
assertEquals("strawhat Luffy", matcher.normalize("(strawhat [Luffy (#Monkey)"));
}
@Test
public void firstCommonSequence() {
String[] seq1 = "Common Name 1 Any Title".split("\\s");
@ -66,7 +61,6 @@ public class SeriesNameMatcherTest {
assertArrayEquals(null, matcher.firstCommonSequence(seq2, seq1, 1, String.CASE_INSENSITIVE_ORDER));
}
@Test
public void firstCharacterCaseBalance() {
SeriesNameCollection n = new SeriesNameCollection();