* improved auto episode list matching

This commit is contained in:
Reinhard Pointner 2009-02-03 20:36:57 +00:00
parent f7fdc5b5db
commit 7b61757fd7
6 changed files with 146 additions and 80 deletions

View File

@ -100,6 +100,18 @@ public final class FileBotUtilities {
public static final FileFilter LIST_FILES = new ExtensionFileFilter("txt", "list", "");
public static final FileFilter SUBTITLE_FILES = new ExtensionFileFilter("srt", "sub", "ssa", "ass", "smi");
/**
* This filter does not filter by extension, but file size. All files larger than 10 MB
* will be accepted.
*/
public static final FileFilter MOVIE_FILES = new FileFilter() {
@Override
public boolean accept(File file) {
return file.length() > 10 * FileUtilities.MEGA;
}
};
/**
* Dummy constructor to prevent instantiation.

View File

@ -26,23 +26,13 @@ public class SeriesNameMatcher {
protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher();
protected final int threshold;
public SeriesNameMatcher(int threshold) {
if (threshold < 0)
throw new IllegalArgumentException("threshold must be greater than 0");
this.threshold = threshold;
}
public String match(File file) {
return match(file.getName(), file.getParent());
}
public Collection<String> matchAll(File... files) {
public Collection<String> matchAll(File[] files) {
SeriesNameCollection seriesNames = new SeriesNameCollection();
// group files by parent folder
@ -62,14 +52,17 @@ public class SeriesNameMatcher {
}
public Collection<String> matchAll(String... names) {
public Collection<String> matchAll(String[] names) {
SeriesNameCollection seriesNames = new SeriesNameCollection();
// allow matching of a small number of episodes, by setting threshold = length if length < 5
int threshold = Math.min(names.length, 5);
// 1. use pattern matching with frequency threshold
seriesNames.addAll(flatMatchAll(names));
seriesNames.addAll(flatMatchAll(names, threshold));
// 2. match common word sequences
seriesNames.addAll(deepMatchAll(names));
seriesNames.addAll(deepMatchAll(names, threshold));
return seriesNames;
}
@ -82,7 +75,7 @@ public class SeriesNameMatcher {
* @return series names that have been matched one or multiple times depending on the
* threshold
*/
private Collection<String> flatMatchAll(String[] names) {
private Collection<String> flatMatchAll(String[] names, int threshold) {
ThresholdCollection<String> seriesNames = new ThresholdCollection<String>(threshold, String.CASE_INSENSITIVE_ORDER);
for (String name : names) {
@ -103,7 +96,7 @@ public class SeriesNameMatcher {
* @param names list of episode names
* @return all common word sequences that have been found
*/
private Collection<String> deepMatchAll(String[] names) {
private Collection<String> deepMatchAll(String[] names, int threshold) {
// can't use common word sequence matching for less than 2 names
if (names.length < 2 || names.length < threshold) {
return Collections.emptySet();
@ -120,8 +113,8 @@ public class SeriesNameMatcher {
List<String> results = new ArrayList<String>();
// split list in two and try to match common word sequence on those
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2)));
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length)));
results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold));
results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold));
return results;
}
@ -173,8 +166,9 @@ public class SeriesNameMatcher {
/**
* Try to match a series name from the first common word sequence.
*
* @param names various episode names (5 or more for accurate results)
* @param names various episode names (at least two)
* @return a word sequence all episode names have in common, or null
* @throws IllegalArgumentException if less than 2 episode names are given
*/
public String matchByFirstCommonWordSequence(String... names) {
if (names.length < 2) {
@ -301,7 +295,7 @@ public class SeriesNameMatcher {
int upper = 0;
int lower = 0;
Scanner scanner = new Scanner(s); // Scanner has white space delimiter by default
Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default
while (scanner.hasNext()) {
char c = scanner.next().charAt(0);
@ -312,7 +306,7 @@ public class SeriesNameMatcher {
upper++;
}
// give upper case characters a slight boost
// give upper case characters a slight boost over lower case characters
return (lower + (upper * 1.01f)) / Math.abs(lower - upper);
}

View File

@ -2,16 +2,20 @@
package net.sourceforge.filebot.ui.panel.rename;
import static net.sourceforge.filebot.FileBotUtilities.MOVIE_FILES;
import static net.sourceforge.filebot.FileBotUtilities.SUBTITLE_FILES;
import static net.sourceforge.filebot.web.Episode.formatEpisodeNumbers;
import static net.sourceforge.tuned.FileUtilities.FILES;
import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@ -26,52 +30,42 @@ import net.sourceforge.filebot.similarity.SimilarityMetric;
import net.sourceforge.filebot.web.Episode;
import net.sourceforge.filebot.web.EpisodeListClient;
import net.sourceforge.filebot.web.SearchResult;
import net.sourceforge.tuned.FileUtilities;
class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>, Void> {
class AutoEpisodeListMatcher extends SwingWorker<List<Match<File, Episode>>, Void> {
private final List<FileEntry> remainingFiles = new ArrayList<FileEntry>();
private final List<FileEntry> files;
private final List<File> files;
private final EpisodeListClient client;
private final Collection<SimilarityMetric> metrics;
public AutoEpisodeListMatcher(EpisodeListClient client, List<FileEntry> files, Collection<SimilarityMetric> metrics) {
public AutoEpisodeListMatcher(EpisodeListClient client, List<File> files, Collection<SimilarityMetric> metrics) {
this.client = client;
this.files = files;
this.metrics = metrics;
this.files = new LinkedList<File>(files);
this.metrics = new ArrayList<SimilarityMetric>(metrics);
}
public Collection<FileEntry> remainingFiles() {
return Collections.unmodifiableCollection(remainingFiles);
public Collection<File> remainingFiles() {
return Collections.unmodifiableCollection(files);
}
protected Collection<String> matchSeriesNames(List<FileEntry> episodes) {
File[] files = new File[episodes.size()];
for (int i = 0; i < files.length; i++) {
files[i] = episodes.get(i).getFile();
}
// allow matching of a small number of episodes, by setting threshold = length if length < 5
int threshold = Math.min(files.length, 5);
return new SeriesNameMatcher(threshold).matchAll(files);
protected Collection<String> detectSeriesNames(Collection<File> files) {
// detect series name(s) from files
return new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()]));
}
@Override
protected List<Match<FileEntry, Episode>> doInBackground() throws Exception {
List<Callable<Collection<Episode>>> fetchTasks = new ArrayList<Callable<Collection<Episode>>>();
protected List<Episode> fetchEpisodeList(Collection<String> seriesNames) throws Exception {
List<Callable<Collection<Episode>>> tasks = new ArrayList<Callable<Collection<Episode>>>();
// match series names and create episode list fetch tasks
for (final String seriesName : matchSeriesNames(files)) {
fetchTasks.add(new Callable<Collection<Episode>>() {
// detect series names and create episode list fetch tasks
for (final String seriesName : seriesNames) {
tasks.add(new Callable<Collection<Episode>>() {
@Override
public Collection<Episode> call() throws Exception {
@ -85,47 +79,78 @@ class AutoEpisodeListMatcher extends SwingWorker<List<Match<FileEntry, Episode>>
});
}
if (fetchTasks.isEmpty()) {
if (tasks.isEmpty())
throw new IllegalArgumentException("Failed to auto-detect series name.");
}
// fetch episode lists concurrently
List<Episode> episodeList = new ArrayList<Episode>();
ExecutorService executor = Executors.newFixedThreadPool(fetchTasks.size());
List<Episode> episodes = new ArrayList<Episode>();
ExecutorService executor = Executors.newFixedThreadPool(tasks.size());
for (Future<Collection<Episode>> future : executor.invokeAll(fetchTasks)) {
episodeList.addAll(future.get());
for (Future<Collection<Episode>> future : executor.invokeAll(tasks)) {
episodes.addAll(future.get());
}
// destroy background threads
executor.shutdown();
List<Match<FileEntry, Episode>> matches = new ArrayList<Match<FileEntry, Episode>>();
return episodes;
}
@Override
protected List<Match<File, Episode>> doInBackground() throws Exception {
for (List<FileEntry> entryList : splitByFileType(files)) {
Matcher<FileEntry, Episode> matcher = new Matcher<FileEntry, Episode>(entryList, episodeList, metrics);
// focus on movie and subtitle files
List<File> mediaFiles = FileUtilities.filter(files, MOVIE_FILES, SUBTITLE_FILES);
// detect series name and fetch episode list
List<Episode> episodes = fetchEpisodeList(detectSeriesNames(mediaFiles));
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
// group by subtitles first and then by files in general
for (List<File> filesPerType : mapByFileType(files, MOVIE_FILES, SUBTITLE_FILES).values()) {
Matcher<File, Episode> matcher = new Matcher<File, Episode>(filesPerType, episodes, metrics);
matches.addAll(matcher.match());
remainingFiles.addAll(matcher.remainingValues());
}
// restore original order
Collections.sort(matches, new Comparator<Match<File, Episode>>() {
@Override
public int compare(Match<File, Episode> o1, Match<File, Episode> o2) {
return files.indexOf(o1.getValue()) - files.indexOf(o2.getValue());
}
});
// update remaining files
for (Match<File, Episode> match : matches) {
files.remove(match.getValue());
}
return matches;
}
@SuppressWarnings("unchecked")
protected Collection<List<FileEntry>> splitByFileType(Collection<FileEntry> files) {
List<FileEntry> subtitles = new ArrayList<FileEntry>();
List<FileEntry> other = new ArrayList<FileEntry>();
protected Map<FileFilter, List<File>> mapByFileType(Collection<File> files, FileFilter... filters) {
// initialize map, keep filter order
Map<FileFilter, List<File>> map = new HashMap<FileFilter, List<File>>(filters.length);
for (FileEntry file : files) {
// check for for subtitles first, then files in general
if (SUBTITLE_FILES.accept(file.getFile())) {
subtitles.add(file);
} else if (FILES.accept(file.getFile())) {
other.add(file);
// initialize value lists
for (FileFilter filter : filters) {
map.put(filter, new ArrayList<File>());
}
for (File file : files) {
for (FileFilter filter : filters) {
if (filter.accept(file)) {
// put each value into one group only
map.get(filter).add(file);
break;
}
}
}
return Arrays.asList(other, subtitles);
return map;
}
}

View File

@ -7,6 +7,7 @@ import static net.sourceforge.tuned.ui.LoadingOverlayPane.LOADING_PROPERTY;
import static net.sourceforge.filebot.FileBotUtilities.*;
import java.awt.Insets;
import java.awt.event.ActionEvent;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
@ -33,6 +34,8 @@ import net.sourceforge.filebot.web.TheTVDBClient;
import net.sourceforge.tuned.ExceptionUtil;
import net.sourceforge.tuned.ui.ActionPopup;
import net.sourceforge.tuned.ui.LoadingOverlayPane;
import ca.odell.glazedlists.FunctionList;
import ca.odell.glazedlists.FunctionList.Function;
import ca.odell.glazedlists.event.ListEvent;
import ca.odell.glazedlists.event.ListEventListener;
@ -156,10 +159,22 @@ public class RenamePanel extends FileBotPanel {
@Override
public void actionPerformed(ActionEvent evt) {
if (model.files().isEmpty() || isAutoMatchInProgress())
if (model.files().isEmpty() || isAutoMatchInProgress()) {
return;
}
AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, new ArrayList<FileEntry>(model.files()), matchAction.getMetrics()) {
// clear names list
model.names().clear();
List<File> files = new FunctionList<FileEntry, File>(model.files(), new Function<FileEntry, File>() {
@Override
public File evaluate(FileEntry entry) {
return entry.getFile();
}
});
AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, files, matchAction.getMetrics()) {
@Override
protected void done() {
@ -172,15 +187,15 @@ public class RenamePanel extends FileBotPanel {
List<StringEntry> invalidNames = new ArrayList<StringEntry>();
for (Match<FileEntry, Episode> match : get()) {
for (Match<File, Episode> match : get()) {
StringEntry name = new StringEntry(match.getCandidate());
if (isInvalidFileName(name.toString())) {
invalidNames.add(name);
}
names.add(new StringEntry(name));
files.add(match.getValue());
names.add(name);
files.add(new FileEntry(match.getValue()));
}
if (!invalidNames.isEmpty()) {
@ -193,13 +208,15 @@ public class RenamePanel extends FileBotPanel {
}
}
// add remaining file entries
for (File file : remainingFiles()) {
files.add(new FileEntry(file));
}
model.clear();
model.names().addAll(names);
model.files().addAll(files);
// add remaining file entries again
model.files().addAll(remainingFiles());
} catch (Exception e) {
Logger.getLogger("ui").log(Level.WARNING, ExceptionUtil.getRootCause(e).getMessage(), e);
}

View File

@ -4,6 +4,8 @@ package net.sourceforge.tuned;
import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.List;
public final class FileUtilities {
@ -115,6 +117,22 @@ public final class FileUtilities {
return true;
}
public static List<File> filter(Iterable<File> files, FileFilter... filters) {
List<File> accepted = new ArrayList<File>();
for (File file : files) {
for (FileFilter filter : filters) {
if (filter.accept(file)) {
accepted.add(file);
break;
}
}
}
return accepted;
}
public static final FileFilter FOLDERS = new FileFilter() {
@Override

View File

@ -12,7 +12,7 @@ import org.junit.Test;
public class SeriesNameMatcherTest {
private static SeriesNameMatcher matcher = new SeriesNameMatcher(5);
private static SeriesNameMatcher matcher = new SeriesNameMatcher();
@Test