From 7b61757fd719f477a505ea44342fa53f621f7114 Mon Sep 17 00:00:00 2001 From: Reinhard Pointner Date: Tue, 3 Feb 2009 20:36:57 +0000 Subject: [PATCH] * improved auto episode list matching --- .../sourceforge/filebot/FileBotUtilities.java | 12 ++ .../filebot/similarity/SeriesNameMatcher.java | 36 +++-- .../panel/rename/AutoEpisodeListMatcher.java | 125 +++++++++++------- .../filebot/ui/panel/rename/RenamePanel.java | 33 +++-- .../net/sourceforge/tuned/FileUtilities.java | 18 +++ .../similarity/SeriesNameMatcherTest.java | 2 +- 6 files changed, 146 insertions(+), 80 deletions(-) diff --git a/source/net/sourceforge/filebot/FileBotUtilities.java b/source/net/sourceforge/filebot/FileBotUtilities.java index 18d92dfa..125d3902 100644 --- a/source/net/sourceforge/filebot/FileBotUtilities.java +++ b/source/net/sourceforge/filebot/FileBotUtilities.java @@ -100,6 +100,18 @@ public final class FileBotUtilities { public static final FileFilter LIST_FILES = new ExtensionFileFilter("txt", "list", ""); public static final FileFilter SUBTITLE_FILES = new ExtensionFileFilter("srt", "sub", "ssa", "ass", "smi"); + /** + * This filter does not filter by extension, but file size. All files larger than 10 MB + * will be accepted. + */ + public static final FileFilter MOVIE_FILES = new FileFilter() { + + @Override + public boolean accept(File file) { + return file.length() > 10 * FileUtilities.MEGA; + } + }; + /** * Dummy constructor to prevent instantiation. diff --git a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java index 6306f0fe..683f2fa9 100644 --- a/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java +++ b/source/net/sourceforge/filebot/similarity/SeriesNameMatcher.java @@ -26,23 +26,13 @@ public class SeriesNameMatcher { protected final SeasonEpisodeMatcher seasonEpisodeMatcher = new SeasonEpisodeMatcher(); - protected final int threshold; - - public SeriesNameMatcher(int threshold) { - if (threshold < 0) - throw new IllegalArgumentException("threshold must be greater than 0"); - - this.threshold = threshold; - } - - public String match(File file) { return match(file.getName(), file.getParent()); } - public Collection matchAll(File... files) { + public Collection matchAll(File[] files) { SeriesNameCollection seriesNames = new SeriesNameCollection(); // group files by parent folder @@ -62,14 +52,17 @@ public class SeriesNameMatcher { } - public Collection matchAll(String... names) { + public Collection matchAll(String[] names) { SeriesNameCollection seriesNames = new SeriesNameCollection(); + // allow matching of a small number of episodes, by setting threshold = length if length < 5 + int threshold = Math.min(names.length, 5); + // 1. use pattern matching with frequency threshold - seriesNames.addAll(flatMatchAll(names)); + seriesNames.addAll(flatMatchAll(names, threshold)); // 2. match common word sequences - seriesNames.addAll(deepMatchAll(names)); + seriesNames.addAll(deepMatchAll(names, threshold)); return seriesNames; } @@ -82,7 +75,7 @@ public class SeriesNameMatcher { * @return series names that have been matched one or multiple times depending on the * threshold */ - private Collection flatMatchAll(String[] names) { + private Collection flatMatchAll(String[] names, int threshold) { ThresholdCollection seriesNames = new ThresholdCollection(threshold, String.CASE_INSENSITIVE_ORDER); for (String name : names) { @@ -103,7 +96,7 @@ public class SeriesNameMatcher { * @param names list of episode names * @return all common word sequences that have been found */ - private Collection deepMatchAll(String[] names) { + private Collection deepMatchAll(String[] names, int threshold) { // can't use common word sequence matching for less than 2 names if (names.length < 2 || names.length < threshold) { return Collections.emptySet(); @@ -120,8 +113,8 @@ public class SeriesNameMatcher { List results = new ArrayList(); // split list in two and try to match common word sequence on those - results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2))); - results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length))); + results.addAll(deepMatchAll(Arrays.copyOfRange(names, 0, names.length / 2), threshold)); + results.addAll(deepMatchAll(Arrays.copyOfRange(names, names.length / 2, names.length), threshold)); return results; } @@ -173,8 +166,9 @@ public class SeriesNameMatcher { /** * Try to match a series name from the first common word sequence. * - * @param names various episode names (5 or more for accurate results) + * @param names various episode names (at least two) * @return a word sequence all episode names have in common, or null + * @throws IllegalArgumentException if less than 2 episode names are given */ public String matchByFirstCommonWordSequence(String... names) { if (names.length < 2) { @@ -301,7 +295,7 @@ public class SeriesNameMatcher { int upper = 0; int lower = 0; - Scanner scanner = new Scanner(s); // Scanner has white space delimiter by default + Scanner scanner = new Scanner(s); // Scanner uses a white space delimiter by default while (scanner.hasNext()) { char c = scanner.next().charAt(0); @@ -312,7 +306,7 @@ public class SeriesNameMatcher { upper++; } - // give upper case characters a slight boost + // give upper case characters a slight boost over lower case characters return (lower + (upper * 1.01f)) / Math.abs(lower - upper); } diff --git a/source/net/sourceforge/filebot/ui/panel/rename/AutoEpisodeListMatcher.java b/source/net/sourceforge/filebot/ui/panel/rename/AutoEpisodeListMatcher.java index 64e72b2b..89c83792 100644 --- a/source/net/sourceforge/filebot/ui/panel/rename/AutoEpisodeListMatcher.java +++ b/source/net/sourceforge/filebot/ui/panel/rename/AutoEpisodeListMatcher.java @@ -2,16 +2,20 @@ package net.sourceforge.filebot.ui.panel.rename; +import static net.sourceforge.filebot.FileBotUtilities.MOVIE_FILES; import static net.sourceforge.filebot.FileBotUtilities.SUBTITLE_FILES; import static net.sourceforge.filebot.web.Episode.formatEpisodeNumbers; -import static net.sourceforge.tuned.FileUtilities.FILES; import java.io.File; +import java.io.FileFilter; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -26,52 +30,42 @@ import net.sourceforge.filebot.similarity.SimilarityMetric; import net.sourceforge.filebot.web.Episode; import net.sourceforge.filebot.web.EpisodeListClient; import net.sourceforge.filebot.web.SearchResult; +import net.sourceforge.tuned.FileUtilities; -class AutoEpisodeListMatcher extends SwingWorker>, Void> { +class AutoEpisodeListMatcher extends SwingWorker>, Void> { - private final List remainingFiles = new ArrayList(); - - private final List files; + private final List files; private final EpisodeListClient client; private final Collection metrics; - public AutoEpisodeListMatcher(EpisodeListClient client, List files, Collection metrics) { + public AutoEpisodeListMatcher(EpisodeListClient client, List files, Collection metrics) { this.client = client; - this.files = files; - this.metrics = metrics; + this.files = new LinkedList(files); + this.metrics = new ArrayList(metrics); } - public Collection remainingFiles() { - return Collections.unmodifiableCollection(remainingFiles); + public Collection remainingFiles() { + return Collections.unmodifiableCollection(files); } - protected Collection matchSeriesNames(List episodes) { - File[] files = new File[episodes.size()]; - - for (int i = 0; i < files.length; i++) { - files[i] = episodes.get(i).getFile(); - } - - // allow matching of a small number of episodes, by setting threshold = length if length < 5 - int threshold = Math.min(files.length, 5); - - return new SeriesNameMatcher(threshold).matchAll(files); + protected Collection detectSeriesNames(Collection files) { + // detect series name(s) from files + return new SeriesNameMatcher().matchAll(files.toArray(new File[files.size()])); } - @Override - protected List> doInBackground() throws Exception { - List>> fetchTasks = new ArrayList>>(); + protected List fetchEpisodeList(Collection seriesNames) throws Exception { + List>> tasks = new ArrayList>>(); - // match series names and create episode list fetch tasks - for (final String seriesName : matchSeriesNames(files)) { - fetchTasks.add(new Callable>() { + // detect series names and create episode list fetch tasks + for (final String seriesName : seriesNames) { + tasks.add(new Callable>() { @Override public Collection call() throws Exception { @@ -85,47 +79,78 @@ class AutoEpisodeListMatcher extends SwingWorker> }); } - if (fetchTasks.isEmpty()) { + if (tasks.isEmpty()) throw new IllegalArgumentException("Failed to auto-detect series name."); - } // fetch episode lists concurrently - List episodeList = new ArrayList(); - ExecutorService executor = Executors.newFixedThreadPool(fetchTasks.size()); + List episodes = new ArrayList(); + ExecutorService executor = Executors.newFixedThreadPool(tasks.size()); - for (Future> future : executor.invokeAll(fetchTasks)) { - episodeList.addAll(future.get()); + for (Future> future : executor.invokeAll(tasks)) { + episodes.addAll(future.get()); } + // destroy background threads executor.shutdown(); - List> matches = new ArrayList>(); + return episodes; + } + + + @Override + protected List> doInBackground() throws Exception { - for (List entryList : splitByFileType(files)) { - Matcher matcher = new Matcher(entryList, episodeList, metrics); + // focus on movie and subtitle files + List mediaFiles = FileUtilities.filter(files, MOVIE_FILES, SUBTITLE_FILES); + + // detect series name and fetch episode list + List episodes = fetchEpisodeList(detectSeriesNames(mediaFiles)); + + List> matches = new ArrayList>(); + + // group by subtitles first and then by files in general + for (List filesPerType : mapByFileType(files, MOVIE_FILES, SUBTITLE_FILES).values()) { + Matcher matcher = new Matcher(filesPerType, episodes, metrics); matches.addAll(matcher.match()); - remainingFiles.addAll(matcher.remainingValues()); + } + + // restore original order + Collections.sort(matches, new Comparator>() { + + @Override + public int compare(Match o1, Match o2) { + return files.indexOf(o1.getValue()) - files.indexOf(o2.getValue()); + } + }); + + // update remaining files + for (Match match : matches) { + files.remove(match.getValue()); } return matches; } - @SuppressWarnings("unchecked") - protected Collection> splitByFileType(Collection files) { - List subtitles = new ArrayList(); - List other = new ArrayList(); + protected Map> mapByFileType(Collection files, FileFilter... filters) { + // initialize map, keep filter order + Map> map = new HashMap>(filters.length); - for (FileEntry file : files) { - // check for for subtitles first, then files in general - if (SUBTITLE_FILES.accept(file.getFile())) { - subtitles.add(file); - } else if (FILES.accept(file.getFile())) { - other.add(file); + // initialize value lists + for (FileFilter filter : filters) { + map.put(filter, new ArrayList()); + } + + for (File file : files) { + for (FileFilter filter : filters) { + if (filter.accept(file)) { + // put each value into one group only + map.get(filter).add(file); + break; + } } } - return Arrays.asList(other, subtitles); + return map; } - } diff --git a/source/net/sourceforge/filebot/ui/panel/rename/RenamePanel.java b/source/net/sourceforge/filebot/ui/panel/rename/RenamePanel.java index a5cd2ebb..332f0944 100644 --- a/source/net/sourceforge/filebot/ui/panel/rename/RenamePanel.java +++ b/source/net/sourceforge/filebot/ui/panel/rename/RenamePanel.java @@ -7,6 +7,7 @@ import static net.sourceforge.tuned.ui.LoadingOverlayPane.LOADING_PROPERTY; import static net.sourceforge.filebot.FileBotUtilities.*; import java.awt.Insets; import java.awt.event.ActionEvent; +import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; @@ -33,6 +34,8 @@ import net.sourceforge.filebot.web.TheTVDBClient; import net.sourceforge.tuned.ExceptionUtil; import net.sourceforge.tuned.ui.ActionPopup; import net.sourceforge.tuned.ui.LoadingOverlayPane; +import ca.odell.glazedlists.FunctionList; +import ca.odell.glazedlists.FunctionList.Function; import ca.odell.glazedlists.event.ListEvent; import ca.odell.glazedlists.event.ListEventListener; @@ -156,10 +159,22 @@ public class RenamePanel extends FileBotPanel { @Override public void actionPerformed(ActionEvent evt) { - if (model.files().isEmpty() || isAutoMatchInProgress()) + if (model.files().isEmpty() || isAutoMatchInProgress()) { return; + } - AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, new ArrayList(model.files()), matchAction.getMetrics()) { + // clear names list + model.names().clear(); + + List files = new FunctionList(model.files(), new Function() { + + @Override + public File evaluate(FileEntry entry) { + return entry.getFile(); + } + }); + + AutoEpisodeListMatcher worker = new AutoEpisodeListMatcher(client, files, matchAction.getMetrics()) { @Override protected void done() { @@ -172,15 +187,15 @@ public class RenamePanel extends FileBotPanel { List invalidNames = new ArrayList(); - for (Match match : get()) { + for (Match match : get()) { StringEntry name = new StringEntry(match.getCandidate()); if (isInvalidFileName(name.toString())) { invalidNames.add(name); } - names.add(new StringEntry(name)); - files.add(match.getValue()); + names.add(name); + files.add(new FileEntry(match.getValue())); } if (!invalidNames.isEmpty()) { @@ -193,13 +208,15 @@ public class RenamePanel extends FileBotPanel { } } + // add remaining file entries + for (File file : remainingFiles()) { + files.add(new FileEntry(file)); + } + model.clear(); model.names().addAll(names); model.files().addAll(files); - - // add remaining file entries again - model.files().addAll(remainingFiles()); } catch (Exception e) { Logger.getLogger("ui").log(Level.WARNING, ExceptionUtil.getRootCause(e).getMessage(), e); } diff --git a/source/net/sourceforge/tuned/FileUtilities.java b/source/net/sourceforge/tuned/FileUtilities.java index d97077f2..7edb9cdb 100644 --- a/source/net/sourceforge/tuned/FileUtilities.java +++ b/source/net/sourceforge/tuned/FileUtilities.java @@ -4,6 +4,8 @@ package net.sourceforge.tuned; import java.io.File; import java.io.FileFilter; +import java.util.ArrayList; +import java.util.List; public final class FileUtilities { @@ -115,6 +117,22 @@ public final class FileUtilities { return true; } + + public static List filter(Iterable files, FileFilter... filters) { + List accepted = new ArrayList(); + + for (File file : files) { + for (FileFilter filter : filters) { + if (filter.accept(file)) { + accepted.add(file); + break; + } + } + } + + return accepted; + } + public static final FileFilter FOLDERS = new FileFilter() { @Override diff --git a/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java b/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java index 087e4f88..812b2936 100644 --- a/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java +++ b/test/net/sourceforge/filebot/similarity/SeriesNameMatcherTest.java @@ -12,7 +12,7 @@ import org.junit.Test; public class SeriesNameMatcherTest { - private static SeriesNameMatcher matcher = new SeriesNameMatcher(5); + private static SeriesNameMatcher matcher = new SeriesNameMatcher(); @Test