* improved auto episode list matching

This commit is contained in:
Reinhard Pointner 2009-02-03 20:36:57 +00:00
parent f7fdc5b5db
commit 7b61757fd7
6 changed files with 146 additions and 80 deletions

View File

@ -100,6 +100,18 @@ public final class FileBotUtilities {
public static final FileFilter LIST_FILES = new ExtensionFileFilter("txt", "list", ""); public static final FileFilter LIST_FILES = new ExtensionFileFilter("txt", "list", "");
public static final FileFilter SUBTITLE_FILES = new ExtensionFileFilter("srt", "sub", "ssa", "ass", "smi"); public static final FileFilter SUBTITLE_FILES = new ExtensionFileFilter("srt", "sub", "ssa", "ass", "smi");
/**
* This filter does not filter by extension, but file size. All files larger than 10 MB
* will be accepted.
*/
public static final FileFilter MOVIE_FILES = new FileFilter() {
@Override
public boolean accept(File file) {
return file.length() > 10 * FileUtilities.MEGA;
}
};
/** /**
* Dummy constructor to prevent instantiation. * Dummy constructor to prevent instantiation.

View File

@ -26,23 +26,13 @@ public class SeriesNameMatcher {
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(); protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher();
protected final int threshold;
public SeriesNameMatcher(int threshold) {
if (threshold < 0)
throw new IllegalArgumentException("threshold must be greater than 0");
this.threshold = threshold;
}
public String match(File file) { public String match(File file) {
return match(file.getName(), file.getParent()); return match(file.getName(), file.getParent());
} }
public Collection<String> matchAll(File... files) { public Collection<String> matchAll(File[] files) {
SeriesNameCollection seriesNames = new SeriesNameCollection(); SeriesNameCollection seriesNames = new SeriesNameCollection();
// group files by parent folder // group files by parent folder
@ -62,14 +52,17 @@ public class SeriesNameMatcher {
} }
public Collection<String> matchAll(String... names) { public Collection<String> matchAll(String[] names) {
SeriesNameCollection seriesNames = new SeriesNameCollection(); SeriesNameCollection seriesNames = new SeriesNameCollection();
// allow matching of a small number of episodes, by setting threshold = length if length < 5
int threshold = Math.min(names.length, 5);
// 1. use pattern matching with frequency threshold // 1. use pattern matching with frequency threshold
seriesNames.addAll(flatMatchAll(names)); seriesNames.addAll(flatMatchAll(names, threshold));
// 2. match common word sequences // 2. match common word sequences
seriesNames.addAll(deepMatchAll(names)); seriesNames.addAll(deepMatchAll(names, threshold));
return seriesNames; return seriesNames;
} }
@ -82,7 +75,7 @@ public class SeriesNameMatcher {
* @return series names that have been matched one or multiple times depending on the * @return series names that have been matched one or multiple times depending on the
* threshold * threshold
*/ */
private Collection<String> flatMatchAll(String[] names) { private Collection<String> flatMatchAll(String[] names, int threshold) {
ThresholdCollection<String> seriesNames = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER); ThresholdCollection<String> seriesNames = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER);
for (String name : names) { for (String name : names) {
@ -103,7 +96,7 @@ public class SeriesNameMatcher {
* @param names list of episode names * @param names list of episode names
* @return all common word sequences that have been found * @return all common word sequences that have been found
*/ */
private Collection<String> deepMatchAll(String[] names) { private Collection<String> deepMatchAll(String[] names, int threshold) {
// can't use common word sequence matching for less than 2 names // can't use common word sequence matching for less than 2 names
if (names.length < 2 || names.length < threshold) { if (names.length < 2 || names.length < threshold) {
return Collections.emptySet(); return Collections.emptySet();
@ -120,8 +113,8 @@ public class SeriesNameMatcher {
List<String> results = new ArrayList<String>(); List<String> results = new ArrayList<String>();
// split list in two and try to match common word sequence on those // split list in two and try to match common word sequence on those
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2))); results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold));
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length))); results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold));
return results; return results;
} }
@ -173,8 +166,9 @@ public class SeriesNameMatcher {
/** /**
* Try to match a series name from the first common word sequence. * Try to match a series name from the first common word sequence.
* *
* @param names various episode names (5 or more for accurate results) * @param names various episode names (at least two)
* @return a word sequence all episode names have in common, or null * @return a word sequence all episode names have in common, or null
* @throws IllegalArgumentException if less than 2 episode names are given
*/ */
public String matchByFirstCommonWordSequence(String... names) { public String matchByFirstCommonWordSequence(String... names) {
if (names.length < 2) { if (names.length < 2) {
@ -301,7 +295,7 @@ public class SeriesNameMatcher {
int upper = 0; int upper = 0;
int lower = 0; int lower = 0;
Scanner scanner = new Scanner(s); // Scanner has white space delimiter by default Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default
while (scanner.hasNext()) { while (scanner.hasNext()) {
char c = scanner.next().charAt(0); char c = scanner.next().charAt(0);
@ -312,7 +306,7 @@ public class SeriesNameMatcher {
upper++; upper++;
} }
// give upper case characters a slight boost // give upper case characters a slight boost over lower case characters
return (lower + (upper * 1.01f)) / Math.abs(lower - upper); return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
} }

View File

@ -2,16 +2,20 @@
package net.sourceforge.filebot.ui.panel.rename; package net.sourceforge.filebot.ui.panel.rename;
import static net.sourceforge.filebot.FileBotUtilities.MOVIE_FILES;
import static net.sourceforge.filebot.FileBotUtilities.SUBTITLE_FILES; import static net.sourceforge.filebot.FileBotUtilities.SUBTITLE_FILES;
import static net.sourceforge.filebot.web.Episode.formatEpisodeNumbers; import static net.sourceforge.filebot.web.Episode.formatEpisodeNumbers;
import static net.sourceforge.tuned.FileUtilities.FILES;
import java.io.File; import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
@ -26,52 +30,42 @@ import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.EpisodeListClient; import net.sourceforge.filebot.web.EpisodeListClient;
import net.sourceforge.filebot.web.SearchResult; import net.sourceforge.filebot.web.SearchResult;
import net.sourceforge.tuned.FileUtilities;
class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>, Void> { class AutoEpisodeListMatcher extends SwingWorker<List<Match<File, Episode>>, Void> {
private final List<FileEntry> remainingFiles = new ArrayList<FileEntry>(); private final List<File> files;
private final List<FileEntry> files;
private final EpisodeListClient client; private final EpisodeListClient client;
private final Collection<SimilarityMetric> metrics; private final Collection<SimilarityMetric> metrics;
public AutoEpisodeListMatcher(EpisodeListClient client, List<FileEntry> files, Collection<SimilarityMetric> metrics) { public AutoEpisodeListMatcher(EpisodeListClient client, List<File> files, Collection<SimilarityMetric> metrics) {
this.client = client; this.client = client;
this.files = files; this.files = new LinkedList<File>(files);
this.metrics = metrics; this.metrics = new ArrayList<SimilarityMetric>(metrics);
} }
public Collection<FileEntry> remainingFiles() { public Collection<File> remainingFiles() {
return Collections.unmodifiableCollection(remainingFiles); return Collections.unmodifiableCollection(files);
} }
protected Collection<String> matchSeriesNames(List<FileEntry> episodes) { protected Collection<String> detectSeriesNames(Collection<File> files) {
File[] files = new File[episodes.size()]; // detect series name(s) from files
return new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
for (int i = 0; i < files.length; i++) {
files[i] = episodes.get(i).getFile();
}
// allow matching of a small number of episodes, by setting threshold = length if length < 5
int threshold = Math.min(files.length, 5);
return new SeriesNameMatcher(threshold).matchAll(files);
} }
@Override protected List<Episode> fetchEpisodeList(Collection<String> seriesNames) throws Exception {
protected List<Match<FileEntry, Episode>> doInBackground() throws Exception { List<Callable<Collection<Episode>>> tasks = new ArrayList<Callable<Collection<Episode>>>();
List<Callable<Collection<Episode>>> fetchTasks = new ArrayList<Callable<Collection<Episode>>>();
// match series names and create episode list fetch tasks // detect series names and create episode list fetch tasks
for (final String seriesName : matchSeriesNames(files)) { for (final String seriesName : seriesNames) {
fetchTasks.add(new Callable<Collection<Episode>>() { tasks.add(new Callable<Collection<Episode>>() {
@Override @Override
public Collection<Episode> call() throws Exception { public Collection<Episode> call() throws Exception {
@ -85,47 +79,78 @@ class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>
}); });
} }
if (fetchTasks.isEmpty()) { if (tasks.isEmpty())
throw new IllegalArgumentException("Failed to auto-detect series name."); throw new IllegalArgumentException("Failed to auto-detect series name.");
}
// fetch episode lists concurrently // fetch episode lists concurrently
List<Episode> episodeList = new ArrayList<Episode>(); List<Episode> episodes = new ArrayList<Episode>();
ExecutorService executor = Executors.newFixedThreadPool(fetchTasks.size()); ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
for (Future<Collection<Episode>> future : executor.invokeAll(fetchTasks)) { for (Future<Collection<Episode>> future : executor.invokeAll(tasks)) {
episodeList.addAll(future.get()); episodes.addAll(future.get());
} }
// destroy background threads
executor.shutdown(); executor.shutdown();
List<Match<FileEntry, Episode>> matches = new ArrayList<Match<FileEntry, Episode>>(); return episodes;
}
@Override
protected List<Match<File, Episode>> doInBackground() throws Exception {
for (List<FileEntry> entryList : splitByFileType(files)) { // focus on movie and subtitle files
Matcher<FileEntry, Episode> matcher = new Matcher<FileEntry, Episode>(entryList, episodeList, metrics); List<File> mediaFiles = FileUtilities.filter(files, MOVIE_FILES, SUBTITLE_FILES);
// detect series name and fetch episode list
List<Episode> episodes = fetchEpisodeList(detectSeriesNames(mediaFiles));
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
// group by subtitles first and then by files in general
for (List<File> filesPerType : mapByFileType(files, MOVIE_FILES, SUBTITLE_FILES).values()) {
Matcher<File, Episode> matcher = new Matcher<File, Episode>(filesPerType, episodes, metrics);
matches.addAll(matcher.match()); matches.addAll(matcher.match());
remainingFiles.addAll(matcher.remainingValues()); }
// restore original order
Collections.sort(matches, new Comparator<Match<File, Episode>>() {
@Override
public int compare(Match<File, Episode> o1, Match<File, Episode> o2) {
return files.indexOf(o1.getValue()) - files.indexOf(o2.getValue());
}
});
// update remaining files
for (Match<File, Episode> match : matches) {
files.remove(match.getValue());
} }
return matches; return matches;
} }
@SuppressWarnings("unchecked") protected Map<FileFilter, List<File>> mapByFileType(Collection<File> files, FileFilter... filters) {
protected Collection<List<FileEntry>> splitByFileType(Collection<FileEntry> files) { // initialize map, keep filter order
List<FileEntry> subtitles = new ArrayList<FileEntry>(); Map<FileFilter, List<File>> map = new HashMap<FileFilter, List<File>>(filters.length);
List<FileEntry> other = new ArrayList<FileEntry>();
for (FileEntry file : files) { // initialize value lists
// check for for subtitles first, then files in general for (FileFilter filter : filters) {
if (SUBTITLE_FILES.accept(file.getFile())) { map.put(filter, new ArrayList<File>());
subtitles.add(file); }
} else if (FILES.accept(file.getFile())) {
other.add(file); for (File file : files) {
for (FileFilter filter : filters) {
if (filter.accept(file)) {
// put each value into one group only
map.get(filter).add(file);
break;
}
} }
} }
return Arrays.asList(other, subtitles); return map;
} }
} }

View File

@ -7,6 +7,7 @@ import static net.sourceforge.tuned.ui.LoadingOverlayPane.LOADING_PROPERTY;
import static net.sourceforge.filebot.FileBotUtilities.*; import static net.sourceforge.filebot.FileBotUtilities.*;
import java.awt.Insets; import java.awt.Insets;
import java.awt.event.ActionEvent; import java.awt.event.ActionEvent;
import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.logging.Level; import java.util.logging.Level;
@ -33,6 +34,8 @@ import net.sourceforge.filebot.web.TheTVDBClient;
import net.sourceforge.tuned.ExceptionUtil; import net.sourceforge.tuned.ExceptionUtil;
import net.sourceforge.tuned.ui.ActionPopup; import net.sourceforge.tuned.ui.ActionPopup;
import net.sourceforge.tuned.ui.LoadingOverlayPane; import net.sourceforge.tuned.ui.LoadingOverlayPane;
import ca.odell.glazedlists.FunctionList;
import ca.odell.glazedlists.FunctionList.Function;
import ca.odell.glazedlists.event.ListEvent; import ca.odell.glazedlists.event.ListEvent;
import ca.odell.glazedlists.event.ListEventListener; import ca.odell.glazedlists.event.ListEventListener;
@ -156,10 +159,22 @@ public class RenamePanel extends FileBotPanel {
@Override @Override
public void actionPerformed(ActionEvent evt) { public void actionPerformed(ActionEvent evt) {
if (model.files().isEmpty() || isAutoMatchInProgress()) if (model.files().isEmpty() || isAutoMatchInProgress()) {
return; return;
}
AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, new ArrayList<FileEntry>(model.files()), matchAction.getMetrics()) { // clear names list
model.names().clear();
List<File> files = new FunctionList<FileEntry, File>(model.files(), new Function<FileEntry, File>() {
@Override
public File evaluate(FileEntry entry) {
return entry.getFile();
}
});
AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, files, matchAction.getMetrics()) {
@Override @Override
protected void done() { protected void done() {
@ -172,15 +187,15 @@ public class RenamePanel extends FileBotPanel {
List<StringEntry> invalidNames = new ArrayList<StringEntry>(); List<StringEntry> invalidNames = new ArrayList<StringEntry>();
for (Match<FileEntry, Episode> match : get()) { for (Match<File, Episode> match : get()) {
StringEntry name = new StringEntry(match.getCandidate()); StringEntry name = new StringEntry(match.getCandidate());
if (isInvalidFileName(name.toString())) { if (isInvalidFileName(name.toString())) {
invalidNames.add(name); invalidNames.add(name);
} }
names.add(new StringEntry(name)); names.add(name);
files.add(match.getValue()); files.add(new FileEntry(match.getValue()));
} }
if (!invalidNames.isEmpty()) { if (!invalidNames.isEmpty()) {
@ -193,13 +208,15 @@ public class RenamePanel extends FileBotPanel {
} }
} }
// add remaining file entries
for (File file : remainingFiles()) {
files.add(new FileEntry(file));
}
model.clear(); model.clear();
model.names().addAll(names); model.names().addAll(names);
model.files().addAll(files); model.files().addAll(files);
// add remaining file entries again
model.files().addAll(remainingFiles());
} catch (Exception e) { } catch (Exception e) {
Logger.getLogger("ui").log(Level.WARNING, ExceptionUtil.getRootCause(e).getMessage(), e); Logger.getLogger("ui").log(Level.WARNING, ExceptionUtil.getRootCause(e).getMessage(), e);
} }

View File

@ -4,6 +4,8 @@ package net.sourceforge.tuned;
import java.io.File; import java.io.File;
import java.io.FileFilter; import java.io.FileFilter;
import java.util.ArrayList;
import java.util.List;
public final class FileUtilities { public final class FileUtilities {
@ -115,6 +117,22 @@ public final class FileUtilities {
return true; return true;
} }
public static List<File> filter(Iterable<File> files, FileFilter... filters) {
List<File> accepted = new ArrayList<File>();
for (File file : files) {
for (FileFilter filter : filters) {
if (filter.accept(file)) {
accepted.add(file);
break;
}
}
}
return accepted;
}
public static final FileFilter FOLDERS = new FileFilter() { public static final FileFilter FOLDERS = new FileFilter() {
@Override @Override

View File

@ -12,7 +12,7 @@ import org.junit.Test;
public class SeriesNameMatcherTest { public class SeriesNameMatcherTest {
private static SeriesNameMatcher matcher = new SeriesNameMatcher(5); private static SeriesNameMatcher matcher = new SeriesNameMatcher();
@Test @Test