mirror of
https://github.com/mitb-archive/filebot
synced 2024-12-23 16:28:51 -05:00
* fine-tune new match set auto-detection and use in cmdline as well
* added final string similarity pass to matching cascade
This commit is contained in:
parent
3af542f195
commit
d7d37104c4
@ -129,22 +129,37 @@ public class CmdlineOperations implements CmdlineInterface {
|
||||
CLILogger.config(format("Rename episodes using [%s]", db.getName()));
|
||||
List<File> mediaFiles = filter(files, VIDEO_FILES, SUBTITLE_FILES);
|
||||
|
||||
// auto-detect series name if not given
|
||||
Collection<String> seriesNames = (query == null) ? detectQuery(mediaFiles, strict) : singleton(query);
|
||||
|
||||
// fetch episode data
|
||||
Set<Episode> episodes = fetchEpisodeSet(db, seriesNames, locale, strict);
|
||||
|
||||
if (episodes.isEmpty()) {
|
||||
throw new Exception("Failed to fetch episode data");
|
||||
}
|
||||
|
||||
// similarity metrics for matching
|
||||
SimilarityMetric[] sequence = strict ? StrictEpisodeMetrics.defaultSequence(false) : EpisodeMetrics.defaultSequence(false);
|
||||
|
||||
List<Match<File, Episode>> matches = new ArrayList<Match<File, Episode>>();
|
||||
matches.addAll(matchEpisodes(filter(mediaFiles, VIDEO_FILES), episodes, sequence));
|
||||
matches.addAll(matchEpisodes(filter(mediaFiles, SUBTITLE_FILES), episodes, sequence));
|
||||
|
||||
// auto-determine optimal batch sets
|
||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles).entrySet()) {
|
||||
List<List<File>> batchSets = new ArrayList<List<File>>();
|
||||
|
||||
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
|
||||
// handle series name batch set all at once
|
||||
batchSets.add(new ArrayList<File>(sameSeriesGroup.getKey()));
|
||||
} else {
|
||||
// these files don't seem to belong to any series -> handle folder per folder
|
||||
batchSets.addAll(mapByFolder(sameSeriesGroup.getKey()).values());
|
||||
}
|
||||
|
||||
for (List<File> batch : batchSets) {
|
||||
// auto-detect series name if not given
|
||||
Collection<String> seriesNames = (query == null) ? detectQuery(batch, strict) : singleton(query);
|
||||
|
||||
// fetch episode data
|
||||
Set<Episode> episodes = fetchEpisodeSet(db, seriesNames, locale, strict);
|
||||
|
||||
if (episodes.size() > 0) {
|
||||
matches.addAll(matchEpisodes(filter(mediaFiles, VIDEO_FILES), episodes, sequence));
|
||||
matches.addAll(matchEpisodes(filter(mediaFiles, SUBTITLE_FILES), episodes, sequence));
|
||||
} else {
|
||||
CLILogger.warning("Failed to fetch episode data: " + mapByFolder(batch).keySet());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matches.isEmpty()) {
|
||||
throw new Exception("Unable to match files to episode data");
|
||||
|
@ -35,7 +35,7 @@ import net.sourceforge.filebot.web.TheTVDBClient.TheTVDBSearchResult;
|
||||
|
||||
public class MediaDetection {
|
||||
|
||||
public static Map<Set<File>, Set<String>> mapFoldersBySeriesNames(Collection<File> files) throws Exception {
|
||||
public static Map<Set<File>, Set<String>> mapSeriesNamesByFiles(Collection<File> files) throws Exception {
|
||||
SortedMap<File, List<File>> filesByFolder = mapByFolder(filter(files, VIDEO_FILES, SUBTITLE_FILES));
|
||||
|
||||
// map series names by folder
|
||||
@ -64,7 +64,7 @@ public class MediaDetection {
|
||||
}
|
||||
|
||||
// join both sets
|
||||
Map<Set<File>, Set<String>> matchSets = new HashMap<Set<File>, Set<String>>();
|
||||
Map<Set<File>, Set<String>> batchSets = new HashMap<Set<File>, Set<String>>();
|
||||
|
||||
while (seriesNamesByFolder.size() > 0) {
|
||||
Set<String> combinedNameSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
|
||||
@ -90,13 +90,22 @@ public class MediaDetection {
|
||||
for (File folder : combinedFolderSet) {
|
||||
combinedFileSet.addAll(filesByFolder.get(folder));
|
||||
}
|
||||
matchSets.put(combinedFileSet, combinedNameSet);
|
||||
batchSets.put(combinedFileSet, combinedNameSet);
|
||||
|
||||
// set folders as accounted for
|
||||
seriesNamesByFolder.keySet().removeAll(combinedFolderSet);
|
||||
}
|
||||
|
||||
return matchSets;
|
||||
// handle files that have not been matched to a batch set yet
|
||||
Set<File> remainingFiles = new HashSet<File>(files);
|
||||
for (Set<File> batch : batchSets.keySet()) {
|
||||
remainingFiles.removeAll(batch);
|
||||
}
|
||||
if (remainingFiles.size() > 0) {
|
||||
batchSets.put(remainingFiles, null);
|
||||
}
|
||||
|
||||
return batchSets;
|
||||
}
|
||||
|
||||
|
||||
|
@ -295,15 +295,16 @@ public enum EpisodeMetrics implements SimilarityMetric {
|
||||
|
||||
|
||||
public static SimilarityMetric[] defaultSequence(boolean includeFileMetrics) {
|
||||
// 1. pass: match by file length (fast, but only works when matching torrents or files)
|
||||
// 2. pass: match by season / episode numbers
|
||||
// 3. pass: match by checking series / episode title against the file path
|
||||
// 4. pass: match by generic name similarity (slow, but most matches will have been determined in second pass)
|
||||
// 5. pass: match by generic numeric similarity
|
||||
// 1 pass: divide by file length (only works for matching torrent entries or files)
|
||||
// 2-3 pass: divide by title or season / episode numbers
|
||||
// 4 pass: divide by folder / file name and show name / episode title
|
||||
// 5 pass: divide by name (rounded into n levels)
|
||||
// 6 pass: divide by generic numeric similarity
|
||||
// 7 pass: resolve remaining collisions via absolute string similarity
|
||||
if (includeFileMetrics) {
|
||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, Name, Numeric };
|
||||
return new SimilarityMetric[] { FileSize, new MetricCascade(FileName, EpisodeFunnel), EpisodeBalancer, SubstringFields, Name, Numeric, new NameSimilarityMetric() };
|
||||
} else {
|
||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, Name, Numeric };
|
||||
return new SimilarityMetric[] { EpisodeFunnel, EpisodeBalancer, SubstringFields, Name, Numeric, new NameSimilarityMetric() };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ import java.util.LinkedHashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
@ -173,14 +174,26 @@ class EpisodeListMatcher implements AutoCompleteMatcher {
|
||||
List<Callable<List<Match<File, ?>>>> taskPerFolder = new ArrayList<Callable<List<Match<File, ?>>>>();
|
||||
|
||||
// detect series names and create episode list fetch tasks
|
||||
for (final Set<File> folder : mapFoldersBySeriesNames(mediaFiles).keySet()) {
|
||||
taskPerFolder.add(new Callable<List<Match<File, ?>>>() {
|
||||
|
||||
@Override
|
||||
public List<Match<File, ?>> call() throws Exception {
|
||||
return matchEpisodeSet(new ArrayList<File>(folder), locale, autodetection, parent);
|
||||
}
|
||||
});
|
||||
for (Entry<Set<File>, Set<String>> sameSeriesGroup : mapSeriesNamesByFiles(mediaFiles).entrySet()) {
|
||||
List<List<File>> batchSets = new ArrayList<List<File>>();
|
||||
|
||||
if (sameSeriesGroup.getValue() != null && sameSeriesGroup.getValue().size() > 0) {
|
||||
// handle series name batch set all at once
|
||||
batchSets.add(new ArrayList<File>(sameSeriesGroup.getKey()));
|
||||
} else {
|
||||
// these files don't seem to belong to any series -> handle folder per folder
|
||||
batchSets.addAll(mapByFolder(sameSeriesGroup.getKey()).values());
|
||||
}
|
||||
|
||||
for (final List<File> batchSet : batchSets) {
|
||||
taskPerFolder.add(new Callable<List<Match<File, ?>>>() {
|
||||
|
||||
@Override
|
||||
public List<Match<File, ?>> call() throws Exception {
|
||||
return matchEpisodeSet(batchSet, locale, autodetection, parent);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// match folder per folder in parallel
|
||||
|
Loading…
Reference in New Issue
Block a user