mirror of https://github.com/mitb-archive/filebot synced 2024-08-13 17:03:45 -04:00

573 lines
17 KiB
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package net.filebot.format;
import static java.util.Arrays.*;
import static java.util.regex.Pattern.*;
import static java.util.stream.Collectors.*;
import static net.filebot.MediaTypes.*;
import static net.filebot.WebServices.*;
import static net.filebot.format.ExpressionFormatFunctions.*;
import static net.filebot.media.MediaDetection.*;
import static net.filebot.util.RegularExpressions.*;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.attribute.BasicFileAttributeView;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.codehaus.groovy.runtime.DefaultGroovyMethods;
import org.codehaus.groovy.runtime.typehandling.DefaultTypeTransformation;
import com.ibm.icu.text.Transliterator;
import groovy.lang.Closure;
import net.filebot.Language;
import net.filebot.similarity.Normalization;
import net.filebot.util.FileUtilities;
import net.filebot.web.Episode;
import net.filebot.web.EpisodeInfo;
import net.filebot.web.Person;
import net.filebot.web.SeriesInfo;
import net.filebot.web.SimpleDate;
public class ExpressionFormatMethods {
* Convenience methods for String.toLowerCase() and String.toUpperCase()
public static String lower(String self) {
return self.toLowerCase();
public static String upper(String self) {
return self.toUpperCase();
* Pad strings or numbers with given characters ('0' by default).
* e.g. "1" -> "01"
public static String pad(String self, int length, String padding) {
while (self.length() < length) {
self = padding + self;
return self;
public static String pad(String self, int length) {
return pad(self, length, "0");
public static String pad(Number self, int length) {
return pad(self.toString(), length, "0");
* Return a substring matching the given pattern or break.
public static String match(String self, String pattern) throws Exception {
return match(self, pattern, -1);
public static String match(String self, String pattern, int matchGroup) throws Exception {
Matcher matcher = compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS | MULTILINE).matcher(self);
if (matcher.find()) {
return firstCapturingGroup(matcher, matchGroup);
} else {
throw new Exception("Pattern not found: " + self);
* Return a list of all matching patterns or break.
public static List<String> matchAll(String self, String pattern) throws Exception {
return matchAll(self, pattern, -1);
public static List<String> matchAll(String self, String pattern, int matchGroup) throws Exception {
List<String> matches = new ArrayList<String>();
Matcher matcher = compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS | MULTILINE).matcher(self);
while (matcher.find()) {
matches.add(firstCapturingGroup(matcher, matchGroup));
if (matches.isEmpty()) {
throw new Exception("Pattern not found: " + self);
return matches;
public static String firstCapturingGroup(Matcher self, int matchGroup) throws Exception {
int g = matchGroup < 0 ? self.groupCount() > 0 ? 1 : 0 : matchGroup;
// return the entire match
if (g == 0) {
return self.group();
// otherwise find first non-empty capturing group
return IntStream.rangeClosed(g, self.groupCount()).mapToObj(self::group).filter(Objects::nonNull).map(String::trim).filter(s -> s.length() > 0).findFirst().orElseThrow(() -> {
return new Exception(String.format("Capturing group %d not found", g));
public static String replaceAll(String self, String pattern) {
return self.replaceAll(pattern, "");
public static String removeAll(String self, String pattern) {
return compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS | MULTILINE).matcher(self).replaceAll("").trim();
public static String removeIllegalCharacters(String self) {
return FileUtilities.validateFileName(Normalization.normalizeQuotationMarks(self));
* Replace space characters with a given characters.
* e.g. "Doctor Who" -> "Doctor_Who"
public static String space(String self, String replacement) {
return Normalization.normalizeSpace(self, replacement);
* Replace colon to make the name more Windows friendly.
* e.g. "Sissi: The Young Empress" -> "Sissi - The Young Empress"
public static String colon(String self, String colon) {
return COLON.matcher(self).replaceAll(colon);
* Replace colon to make the name more Windows friendly.
* e.g. "12:00 A.M.-1:00 A.M." -> "12.00 A.M.-1.00 A.M."
public static String colon(String self, String ratio, String colon) {
return COLON.matcher(RATIO.matcher(self).replaceAll(ratio)).replaceAll(colon);
* Replace slash and backslash to make sure the result is not a file path.
* e.g. "V_MPEG4/ISO/AVC" -> "V_MPEG4.ISO.AVC"
public static String slash(String self, String replacement) {
return SLASH.matcher(self).replaceAll(replacement);
* Upper-case all initials.
* e.g. "The Day a new Demon was born" -> "The Day A New Demon Was Born"
public static String upperInitial(String self) {
return replaceHeadTail(self, String::toUpperCase, String::toString);
* Lower-case all letters that are not initials.
* e.g. "Gundam SEED" -> "Gundam Seed"
public static String lowerTrail(String self) {
return replaceHeadTail(self, String::toString, String::toLowerCase);
public static String replaceHeadTail(String self, Function<String, String> head, Function<String, String> tail) {
Matcher matcher = compile("\\b(['`´]|\\p{Alnum})(\\p{Alnum}*)\\b", UNICODE_CHARACTER_CLASS).matcher(self);
StringBuffer buffer = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(buffer, head.apply(matcher.group(1)) + tail.apply(matcher.group(2)));
return matcher.appendTail(buffer).toString();
public static String sortName(String self) {
return sortName(self, "$2");
public static String sortName(String self, String replacement) {
return compile("^(The|A|An)\\s(.+)", CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self).replaceFirst(replacement).trim();
public static String sortInitial(String self) {
// use primary initial, ignore The XY, A XY, etc
char c = ascii(sortName(self)).charAt(0);
if (Character.isDigit(c)) {
return "0-9";
} else if (Character.isLetter(c)) {
return String.valueOf(c).toUpperCase();
} else {
return null;
* Get acronym, i.e. first letter of each word.
* e.g. "Deep Space 9" -> "DS9"
public static String acronym(String self) {
return compile("\\s|\\B\\p{Alnum}+", UNICODE_CHARACTER_CLASS).matcher(space(sortName(self), " ")).replaceAll("");
public static String truncate(String self, int limit) {
if (limit >= self.length())
return self;
return self.substring(0, limit);
public static String truncate(String self, int hardLimit, String nonWordPattern) {
if (hardLimit >= self.length())
return self;
int softLimit = 0;
Matcher matcher = compile(nonWordPattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self);
while (matcher.find()) {
if (matcher.start() > hardLimit) {
softLimit = matcher.start();
return truncate(self, softLimit);
* Return substring before the given pattern.
public static String before(String self, String pattern) {
Matcher matcher = compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self);
// pattern was found, return leading substring, else return original value
return matcher.find() ? self.substring(0, matcher.start()).trim() : self;
* Return substring after the given pattern.
public static String after(String self, String pattern) {
Matcher matcher = compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self);
// pattern was found, return trailing substring, else return original value
return matcher.find() ? self.substring(matcher.end(), self.length()).trim() : self;
* Find a matcher that matches the given pattern (case-insensitive)
public static boolean findMatch(String self, String pattern) {
if (pattern == null || pattern.isEmpty())
return false;
return compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self).find();
* Find a matcher that matches the given pattern (case-insensitive) but matches only if the pattern is enclosed in word-boundaries
public static boolean findWordMatch(String self, String pattern) {
if (pattern == null || pattern.isEmpty())
return false;
return findMatch(self, "\\b(" + pattern + ")\\b");
* Replace trailing parenthesis including any leading whitespace.
* e.g. "The IT Crowd (UK)" -> "The IT Crowd"
public static String replaceTrailingBrackets(String self) {
return replaceTrailingBrackets(self, "");
public static String replaceTrailingBrackets(String self, String replacement) {
return compile("\\s*[(]([^)]*)[)]$", UNICODE_CHARACTER_CLASS).matcher(self).replaceAll(replacement);
* Replace 'part identifier'.
* e.g. "Today Is the Day: Part 1" -> "Today Is the Day, Part 1" or "Today Is the Day (1)" -> "Today Is the Day, Part 1"
public static String replacePart(String self) {
return replacePart(self, "");
public static String replacePart(String self, String replacement) {
// handle '(n)', '(Part n)' and ': Part n' like syntax
String[] patterns = new String[] { "\\s*[(](\\w+)[)]$", "\\W+Part (\\w+)\\W*$" };
for (String pattern : patterns) {
Matcher matcher = compile(pattern, CASE_INSENSITIVE | UNICODE_CHARACTER_CLASS).matcher(self);
if (matcher.find()) {
return matcher.replaceAll(replacement).trim();
// no pattern matches, nothing to replace
return self;
* Replace numbers 1..12 with Roman numerals
* e.g. "Star Wars: Episode 4" -> "Star Wars: Episode IV"
public static String roman(String self) {
TreeMap<Integer, String> numerals = new TreeMap<Integer, String>();
numerals.put(10, "X");
numerals.put(9, "IX");
numerals.put(5, "V");
numerals.put(4, "IV");
numerals.put(1, "I");
StringBuffer s = new StringBuffer();
Matcher m = compile("\\b\\d+\\b").matcher(self);
while (m.find()) {
int n = Integer.parseInt(m.group());
m.appendReplacement(s, n >= 1 && n <= 12 ? roman(n, numerals) : m.group());
return m.appendTail(s).toString();
public static String roman(Integer n, TreeMap<Integer, String> numerals) {
int l = numerals.floorKey(n);
if (n == l) {
return numerals.get(n);
return numerals.get(l) + roman(n - l, numerals);
* Apply ICU transliteration
* @see http://userguide.icu-project.org/transforms/general
public static String transliterate(String self, String transformIdentifier) {
return Transliterator.getInstance(transformIdentifier).transform(self);
* Convert Unicode to ASCII as best as possible. Works with most alphabets/scripts used in the world.
* e.g. "Österreich" -> "Osterreich" "カタカナ" -> "katakana"
public static String ascii(String self) {
return ascii(self, " ");
public static String ascii(String self, String fallback) {
return Transliterator.getInstance("Any-Latin;Latin-ASCII;[:Diacritic:]remove").transform(asciiQuotes(self)).replaceAll("\\P{ASCII}+", fallback).trim();
public static String asciiQuotes(String self) {
return Normalization.normalizeQuotationMarks(self);
* Replace multiple replacement pairs
* e.g. replace(ä:'ae', ö:'oe', ü:'ue')
public static String replace(String self, Map<?, ?> replace) {
// the first two parameters are required, the rest of the parameter sequence is optional
for (Entry<?, ?> it : replace.entrySet()) {
if (it.getKey() instanceof Pattern) {
self = ((Pattern) it.getKey()).matcher(self).replaceAll(it.getValue().toString());
} else {
self = self.replace(it.getKey().toString(), it.getValue().toString());
return self;
public static String joining(Collection<?> self, String delimiter) throws Exception {
String[] list = self.stream().filter(Objects::nonNull).map(Objects::toString).filter(s -> !s.isEmpty()).toArray(String[]::new);
if (list.length > 0) {
return String.join(delimiter, list);
throw new Exception("Collection did not yield any values: " + self);
public static String joiningDistinct(Collection<?> self, String delimiter, Closure<?>... mapper) throws Exception {
Stream<?> stream = self.stream().filter(Objects::nonNull);
// apply custom mappers if any
if (mapper.length > 0) {
stream = stream.flatMap(v -> stream(mapper).map(m -> m.call(v)).filter(Objects::nonNull));
// sort unique
String[] list = stream.map(Objects::toString).filter(s -> !s.isEmpty()).distinct().sorted().toArray(String[]::new);
if (list.length > 0) {
return String.join(delimiter, list);
throw new Exception("Collection did not yield any values: " + self);
* Unwind if an object does not satisfy the given predicate
* e.g. (0..9)*.check{it < 10}.sum()
public static Object check(Object self, Closure<?> c) throws Exception {
if (DefaultTypeTransformation.castToBoolean(c.call(self))) {
return self;
throw new Exception("Object failed check: " + self);
* File utilities
public static File derive(File self, Object tag, Object... tagN) {
// e.g. plex.derive{" by $director"}{" [$vc, $ac]"}
String name = FileUtilities.getName(self);
String extension = self.getName().substring(name.length());
// e.g. Avatar (2009).eng.srt => Avatar (2009) 1080p.eng.srt
if (SUBTITLE_FILES.accept(self)) {
Matcher nameMatcher = releaseInfo.getSubtitleLanguageTagPattern().matcher(name);
if (nameMatcher.find()) {
extension = name.substring(nameMatcher.start() - 1) + extension;
name = name.substring(0, nameMatcher.start() - 1);
return new File(self.getParentFile(), concat(name, slash(concat(tag, null, tagN), ""), extension));
public static File getRoot(File self) {
return FileUtilities.listPath(self).get(0);
public static File getTail(File self) {
return FileUtilities.getRelativePathTail(self, FileUtilities.listPath(self).size() - 1);
public static List<File> listPath(File self) {
return FileUtilities.listPath(self);
public static List<File> listPath(File self, int tailSize) {
return FileUtilities.listPath(FileUtilities.getRelativePathTail(self, tailSize));
public static File getRelativePathTail(File self, int tailSize) {
return FileUtilities.getRelativePathTail(self, tailSize);
public static long getDiskSpace(File self) {
List<File> list = FileUtilities.listPath(self);
for (int i = list.size() - 1; i >= 0; i--) {
if (list.get(i).exists()) {
long usableSpace = list.get(i).getUsableSpace();
if (usableSpace > 0) {
return usableSpace;
return 0;
public static long getCreationDate(File self) {
try {
BasicFileAttributes attr = Files.getFileAttributeView(self.toPath(), BasicFileAttributeView.class).readAttributes();
long creationDate = attr.creationTime().toMillis();
if (creationDate > 0) {
return creationDate;
return attr.lastModifiedTime().toMillis();
} catch (IOException e) {
throw new RuntimeException(e);
public static File toFile(String self) {
if (self == null || self.isEmpty()) {
return null;
return new File(self);
public static File toFile(String self, String parent) {
if (self == null || self.isEmpty()) {
return null;
File file = new File(self);
if (file.isAbsolute()) {
return file;
return new File(parent, self);
public static Locale toLocale(String self) {
return Locale.forLanguageTag(self);
public static String plus(String self, Closure<?> other) {
return concat(self, other);
public static String plus(Closure<?> self, Object other) {
return concat(self, other);
public static String plus(Language self, Object other) {
return concat(self, other);
public static String plus(SimpleDate self, Object other) {
return concat(self, other);
public static List<?> bounds(Iterable<?> self) {
return Stream.of(DefaultGroovyMethods.min(self), DefaultGroovyMethods.max(self)).filter(Objects::nonNull).distinct().collect(toList());
* Episode utilities (EXPERIMENTAL)
public static EpisodeInfo getInfo(Episode self) throws Exception {
if (TheTVDB.getIdentifier().equals(self.getSeriesInfo().getDatabase())) {
return TheTVDBv2.getEpisodeInfo(self.getId(), Locale.ENGLISH);
return null;
public static List<String> getActors(SeriesInfo self) throws Exception {
if (TheTVDB.getIdentifier().equals(self.getDatabase())) {
return TheTVDBv2.getActors(self.getId(), Locale.ENGLISH).stream().map(Person::getName).collect(toList());
return null;