Added in regex matching with fuzzy matching.

Shows now display Indexer absolute numbering. Improved speed of parsing search results. Fixed episode naming issues.
2014-05-31 03:35:11 -07:00 · 2014-05-31 03:35:11 -07:00 · d7396896b5
parent 2c37523ab7
commit d7396896b5
26 changed files with 44507 additions and 143 deletions
--- a/gui/slick/interfaces/default/displayShow.tmpl
+++ b/gui/slick/interfaces/default/displayShow.tmpl
@ -302,7 +302,7 @@
        <h2>#if int($epResult["season"]) == 0 then "Specials" else "Season "+str($epResult["season"])#</h2>
    </td>
  </tr>
-  <tr id="season-$epResult["season"]-cols"><th width="1%"><input type="checkbox" class="seasonCheck" id="$epResult["season"]" /></th><th>NFO</th><th>TBN</th><th>Episode</th>#if $scene then "<th>Scene #</th>" else ""# #if $scene_anime then "<th>Scene Absolute #</th>" else ""#<th>Name</th><th class="nowrap">Airdate</th><th>Filename</th>#if $sickbeard.USE_SUBTITLES and $show.subtitles then "<th>Subtitles</th>" else ""#<th>Status</th><th>Search</th></tr>
+  <tr id="season-$epResult["season"]-cols"><th width="1%"><input type="checkbox" class="seasonCheck" id="$epResult["season"]" /></th><th>NFO</th><th>TBN</th><th>Episode</th>#if $show.is_anime then "<th>Absolute</th>" else ""# #if $scene then "<th>Scene #</th>" else ""# #if $scene_anime then "<th>Scene Absolute</th>" else ""#<th>Name</th><th class="nowrap">Airdate</th><th>Filename</th>#if $sickbeard.USE_SUBTITLES and $show.subtitles then "<th>Subtitles</th>" else ""#<th>Status</th><th>Search</th></tr>
        #set $curSeason = int($epResult["season"])
  #end if    

@ -317,6 +317,9 @@
    <td align="center"><img src="$sbRoot/images/#if $epResult["hasnfo"] == 1 then "nfo.gif\" alt=\"Y" else "nfo-no.gif\" alt=\"N"#" width="23" height="11" /></td>
    <td align="center"><img src="$sbRoot/images/#if $epResult["hastbn"] == 1 then "tbn.gif\" alt=\"Y" else "tbn-no.gif\" alt=\"N"#" width="23" height="11" /></td>
    <td align="center">$epResult["episode"]</td>
+    #if $show.is_anime:
+        <td align="center">$epResult["absolute_number"]</td>
+    #end if

    #if $scene:
    <td align="center">
--- a/lib/fuzzywuzzy/StringMatcher.py
+++ b/lib/fuzzywuzzy/StringMatcher.py
@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+StringMatcher.py
+
+ported from python-Levenshtein
+[https://github.com/miohtama/python-Levenshtein]
+"""
+
+from Levenshtein import *
+from warnings import warn
+
+class StringMatcher:
+    """A SequenceMatcher-like class built on the top of Levenshtein"""
+
+    def _reset_cache(self):
+        self._ratio = self._distance = None
+        self._opcodes = self._editops = self._matching_blocks = None
+
+    def __init__(self, isjunk=None, seq1='', seq2=''):
+        if isjunk:
+            warn("isjunk not NOT implemented, it will be ignored")
+        self._str1, self._str2 = seq1, seq2
+        self._reset_cache()
+
+    def set_seqs(self, seq1, seq2):
+        self._str1, self._str2 = seq1, seq2
+        self._reset_cache()
+
+    def set_seq1(self, seq1):
+        self._str1 = seq1
+        self._reset_cache()
+
+    def set_seq2(self, seq2):
+        self._str2 = seq2
+        self._reset_cache()
+
+    def get_opcodes(self):
+        if not self._opcodes:
+            if self._editops:
+                self._opcodes = opcodes(self._editops, self._str1, self._str2)
+            else:
+                self._opcodes = opcodes(self._str1, self._str2)
+        return self._opcodes
+
+    def get_editops(self):
+        if not self._editops:
+            if self._opcodes:
+                self._editops = editops(self._opcodes, self._str1, self._str2)
+            else:
+                self._editops = editops(self._str1, self._str2)
+        return self._editops
+
+    def get_matching_blocks(self):
+        if not self._matching_blocks:
+            self._matching_blocks = matching_blocks(self.get_opcodes(),
+                                                    self._str1, self._str2)
+        return self._matching_blocks
+
+    def ratio(self):
+        if not self._ratio:
+            self._ratio = ratio(self._str1, self._str2)
+        return self._ratio
+
+    def quick_ratio(self):
+        # This is usually quick enough :o)
+        if not self._ratio:
+            self._ratio = ratio(self._str1, self._str2)
+        return self._ratio
+
+    def real_quick_ratio(self):
+        len1, len2 = len(self._str1), len(self._str2)
+        return 2.0 * min(len1, len2) / (len1 + len2)
+
+    def distance(self):
+        if not self._distance:
+            self._distance = distance(self._str1, self._str2)
+        return self._distance
--- a/lib/fuzzywuzzy/init.py
+++ b/lib/fuzzywuzzy/init.py
--- a/lib/fuzzywuzzy/fuzz.py
+++ b/lib/fuzzywuzzy/fuzz.py
@ -0,0 +1,263 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+fuzz.py
+
+Copyright (c) 2011 Adam Cohen
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+from __future__ import unicode_literals
+
+try:
+    from StringMatcher import StringMatcher as SequenceMatcher
+except:
+    from difflib import SequenceMatcher
+
+from . import utils
+
+
+###########################
+# Basic Scoring Functions #
+###########################
+
+
+def ratio(s1, s2):
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+    s1, s2 = utils.make_type_consistent(s1, s2)
+    if len(s1) == 0 or len(s2) == 0:
+        return 0
+
+    m = SequenceMatcher(None, s1, s2)
+    return utils.intr(100 * m.ratio())
+
+
+# todo: skip duplicate indexes for a little more speed
+def partial_ratio(s1, s2):
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+    s1, s2 = utils.make_type_consistent(s1, s2)
+    if len(s1) == 0 or len(s2) == 0:
+        return 0
+
+    if len(s1) <= len(s2):
+        shorter = s1
+        longer = s2
+    else:
+        shorter = s2
+        longer = s1
+
+    m = SequenceMatcher(None, shorter, longer)
+    blocks = m.get_matching_blocks()
+
+    # each block represents a sequence of matching characters in a string
+    # of the form (idx_1, idx_2, len)
+    # the best partial match will block align with at least one of those blocks
+    #   e.g. shorter = "abcd", longer = XXXbcdeEEE
+    #   block = (1,3,3)
+    #   best score === ratio("abcd", "Xbcd")
+    scores = []
+    for block in blocks:
+        long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
+        long_end = long_start + len(shorter)
+        long_substr = longer[long_start:long_end]
+
+        m2 = SequenceMatcher(None, shorter, long_substr)
+        r = m2.ratio()
+        if r > .995:
+            return 100
+        else:
+            scores.append(r)
+
+    return int(100 * max(scores))
+
+
+##############################
+# Advanced Scoring Functions #
+##############################
+
+# Sorted Token
+#   find all alphanumeric tokens in the string
+#   sort those tokens and take ratio of resulting joined strings
+#   controls for unordered string elements
+def _token_sort(s1, s2, partial=True, force_ascii=True):
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+
+    # pull tokens
+    tokens1 = utils.full_process(s1, force_ascii=force_ascii).split()
+    tokens2 = utils.full_process(s2, force_ascii=force_ascii).split()
+
+    # sort tokens and join
+    sorted1 = " ".join(sorted(tokens1))
+    sorted2 = " ".join(sorted(tokens2))
+
+    sorted1 = sorted1.strip()
+    sorted2 = sorted2.strip()
+
+    if partial:
+        return partial_ratio(sorted1, sorted2)
+    else:
+        return ratio(sorted1, sorted2)
+
+
+def token_sort_ratio(s1, s2, force_ascii=True):
+    return _token_sort(s1, s2, partial=False, force_ascii=force_ascii)
+
+
+def partial_token_sort_ratio(s1, s2, force_ascii=True):
+    return _token_sort(s1, s2, partial=True, force_ascii=force_ascii)
+
+
+# Token Set
+#   find all alphanumeric tokens in each string...treat them as a set
+#   construct two strings of the form
+#       <sorted_intersection><sorted_remainder>
+#   take ratios of those two strings
+#   controls for unordered partial matches
+def _token_set(s1, s2, partial=True, force_ascii=True):
+
+    if s1 is None:
+        raise TypeError("s1 is None")
+    if s2 is None:
+        raise TypeError("s2 is None")
+
+    p1 = utils.full_process(s1, force_ascii=force_ascii)
+    p2 = utils.full_process(s2, force_ascii=force_ascii)
+
+    if not utils.validate_string(p1):
+        return 0
+    if not utils.validate_string(p2):
+        return 0
+
+    # pull tokens
+    tokens1 = set(utils.full_process(p1).split())
+    tokens2 = set(utils.full_process(p2).split())
+
+    intersection = tokens1.intersection(tokens2)
+    diff1to2 = tokens1.difference(tokens2)
+    diff2to1 = tokens2.difference(tokens1)
+
+    sorted_sect = " ".join(sorted(intersection))
+    sorted_1to2 = " ".join(sorted(diff1to2))
+    sorted_2to1 = " ".join(sorted(diff2to1))
+
+    combined_1to2 = sorted_sect + " " + sorted_1to2
+    combined_2to1 = sorted_sect + " " + sorted_2to1
+
+    # strip
+    sorted_sect = sorted_sect.strip()
+    combined_1to2 = combined_1to2.strip()
+    combined_2to1 = combined_2to1.strip()
+
+    pairwise = [
+        ratio(sorted_sect, combined_1to2),
+        ratio(sorted_sect, combined_2to1),
+        ratio(combined_1to2, combined_2to1)
+    ]
+    return max(pairwise)
+
+
+def token_set_ratio(s1, s2, force_ascii=True):
+    return _token_set(s1, s2, partial=False, force_ascii=force_ascii)
+
+
+def partial_token_set_ratio(s1, s2, force_ascii=True):
+    return _token_set(s1, s2, partial=True, force_ascii=force_ascii)
+
+
+# TODO: numerics
+
+###################
+# Combination API #
+###################
+
+# q is for quick
+def QRatio(s1, s2, force_ascii=True):
+
+    p1 = utils.full_process(s1, force_ascii=force_ascii)
+    p2 = utils.full_process(s2, force_ascii=force_ascii)
+
+    if not utils.validate_string(p1):
+        return 0
+    if not utils.validate_string(p2):
+        return 0
+
+    return ratio(p1, p2)
+
+
+def UQRatio(s1, s2):
+    return QRatio(s1, s2, force_ascii=False)
+
+
+# w is for weighted
+def WRatio(s1, s2, force_ascii=True):
+
+    p1 = utils.full_process(s1, force_ascii=force_ascii)
+    p2 = utils.full_process(s2, force_ascii=force_ascii)
+
+    if not utils.validate_string(p1):
+        return 0
+    if not utils.validate_string(p2):
+        return 0
+
+    # should we look at partials?
+    try_partial = True
+    unbase_scale = .95
+    partial_scale = .90
+
+    base = ratio(p1, p2)
+    len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2))
+
+    # if strings are similar length, don't use partials
+    if len_ratio < 1.5:
+        try_partial = False
+
+    # if one string is much much shorter than the other
+    if len_ratio > 8:
+        partial_scale = .6
+
+    if try_partial:
+        partial = partial_ratio(p1, p2) * partial_scale
+        ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) \
+            * unbase_scale * partial_scale
+        ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) \
+            * unbase_scale * partial_scale
+
+        return int(max(base, partial, ptsor, ptser))
+    else:
+        tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
+        tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
+
+        return int(max(base, tsor, tser))
+
+
+def UWRatio(s1, s2):
+    return WRatio(s1, s2, force_ascii=False)
--- a/lib/fuzzywuzzy/process.py
+++ b/lib/fuzzywuzzy/process.py
@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+process.py
+
+Copyright (c) 2011 Adam Cohen
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+"""
+import itertools
+
+from . import fuzz
+from . import utils
+
+
+def extract(query, choices, processor=None, scorer=None, limit=5):
+    """Find best matches in a list of choices, return a list of tuples
+       containing the match and it's score.
+
+    Arguments:
+        query       -- an object representing the thing we want to find
+        choices     -- a list of objects we are attempting to extract
+                       values from
+        scorer      -- f(OBJ, QUERY) --> INT. We will return the objects
+                       with the highest score by default, we use
+                       score.WRatio() and both OBJ and QUERY should be
+                       strings
+        processor   -- f(OBJ_A) --> OBJ_B, where the output is an input
+                       to scorer for example, "processor = lambda x:
+                       x[0]" would return the first element in a
+                       collection x (of, say, strings) this would then
+                       be used in the scoring collection by default, we
+                       use utils.full_process()
+
+    """
+    if choices is None or len(choices) == 0:
+        return []
+
+    # default, turn whatever the choice is into a workable string
+    if processor is None:
+        processor = lambda x: utils.full_process(x)
+
+    # default: wratio
+    if scorer is None:
+        scorer = fuzz.WRatio
+
+    sl = list()
+
+    for choice in choices:
+        processed = processor(choice)
+        score = scorer(query, processed)
+        tuple = (choice, score)
+        sl.append(tuple)
+
+    sl.sort(key=lambda i: i[1], reverse=True)
+    return sl[:limit]
+
+
+def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5):
+    """Find best matches above a score in a list of choices, return a
+    list of tuples containing the match and it's score.
+
+    Convenience method which returns the choices with best scores, see
+    extract() for full arguments list
+
+    Optional parameter: score_cutoff.
+        If the choice has a score of less than or equal to score_cutoff
+        it will not be included on result list
+
+    """
+
+    best_list = extract(query, choices, processor, scorer, limit)
+    if len(best_list) > 0:
+        return list(itertools.takewhile(lambda x: x[1] > score_cutoff, best_list))
+    else:
+        return []
+
+
+def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0):
+    """Find the best match above a score in a list of choices, return a
+    tuple containing the match and it's score if it's above the treshold
+    or None.
+
+    Convenience method which returns the single best choice, see
+    extract() for full arguments list
+
+    Optional parameter: score_cutoff.
+        If the best choice has a score of less than or equal to
+        score_cutoff we will return none (intuition: not a good enough
+        match)
+
+    """
+
+    best_list = extract(query, choices, processor, scorer, limit=1)
+    if len(best_list) > 0:
+        best = best_list[0]
+        if best[1] > score_cutoff:
+            return best
+        else:
+            return None
+    else:
+        return None
--- a/lib/fuzzywuzzy/string_processing.py
+++ b/lib/fuzzywuzzy/string_processing.py
@ -0,0 +1,41 @@
+from __future__ import unicode_literals
+import re
+
+
+class StringProcessor(object):
+    """
+    This class defines method to process strings in the most
+    efficient way. Ideally all the methods below use unicode strings
+    for both input and output.
+    """
+
+    @classmethod
+    def replace_non_letters_non_numbers_with_whitespace(cls, a_string):
+        """
+        This function replaces any sequence of non letters and non
+        numbers with a single white space.
+        """
+        regex = re.compile(r"(?ui)\W")
+        return regex.sub(" ", a_string)
+
+    @classmethod
+    def strip(cls, a_string):
+        """
+        This function strips leading and trailing white space.
+        """
+
+        return a_string.strip()
+
+    @classmethod
+    def to_lower_case(cls, a_string):
+        """
+        This function returns the lower-cased version of the string given.
+        """
+        return a_string.lower()
+
+    @classmethod
+    def to_upper_case(cls, a_string):
+        """
+        This function returns the upper-cased version of the string given.
+        """
+        return a_string.upper()
--- a/lib/fuzzywuzzy/utils.py
+++ b/lib/fuzzywuzzy/utils.py
@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+import sys
+
+from fuzzywuzzy.string_processing import StringProcessor
+
+
+PY3 = sys.version_info[0] == 3
+
+
+def validate_string(s):
+    try:
+        if len(s) > 0:
+            return True
+        else:
+            return False
+    except:
+        return False
+
+bad_chars = str('')  # ascii dammit!
+for i in range(128, 256):
+    bad_chars += chr(i)
+if PY3:
+    translation_table = dict((ord(c), None) for c in bad_chars)
+
+
+def asciionly(s):
+    if PY3:
+        return s.translate(translation_table)
+    else:
+        return s.translate(None, bad_chars)
+
+
+def asciidammit(s):
+    if type(s) is str:
+        return asciionly(s)
+    elif type(s) is unicode:
+        return asciionly(s.encode('ascii', 'ignore'))
+    else:
+        return asciidammit(unicode(s))
+
+
+def make_type_consistent(s1, s2):
+    if isinstance(s1, str) and isinstance(s2, str):
+        return s1, s2
+
+    elif isinstance(s1, unicode) and isinstance(s2, unicode):
+        return s1, s2
+
+    else:
+        return unicode(s1), unicode(s2)
+
+
+def full_process(s, force_ascii=False):
+    """Process string by
+        -- removing all but letters and numbers
+        -- trim whitespace
+        -- force to lower case
+        if force_ascii == True, force convert to ascii"""
+
+    if s is None:
+        return ""
+
+    if force_ascii:
+        s = asciidammit(s)
+    # Keep only Letters and Numbres (see Unicode docs).
+    string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
+    # Force into lowercase.
+    string_out = StringProcessor.to_lower_case(string_out)
+    # Remove leading and trailing whitespaces.
+    string_out = StringProcessor.strip(string_out)
+    return string_out
+
+
+def intr(n):
+    '''Returns a correctly rounded integer'''
+    return int(round(n))
--- a/lib/regex/Python25/init.py
+++ b/lib/regex/Python25/init.py
@ -0,0 +1 @@
+
--- a/lib/regex/Python25/_regex.pyd
+++ b/lib/regex/Python25/_regex.pyd
--- a/lib/regex/Python26/init.py
+++ b/lib/regex/Python26/init.py
@ -0,0 +1 @@
+
--- a/lib/regex/Python26/_regex.pyd
+++ b/lib/regex/Python26/_regex.pyd
--- a/lib/regex/Python27/init.py
+++ b/lib/regex/Python27/init.py
@ -0,0 +1 @@
+
--- a/lib/regex/Python27/_regex.pyd
+++ b/lib/regex/Python27/_regex.pyd
--- a/lib/regex/init.py
+++ b/lib/regex/init.py
@ -0,0 +1 @@
+
--- a/lib/regex/_regex.c
+++ b/lib/regex/_regex.c
--- a/lib/regex/_regex.h
+++ b/lib/regex/_regex.h
@ -0,0 +1,228 @@
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
+ *
+ * NOTE: This file is generated by regex.py.  If you need
+ * to change anything in here, edit regex.py and run it.
+ *
+ * 2010-01-16 mrab Re-written
+ */
+
+/* Supports Unicode version 6.3.0. */
+
+#define RE_MAGIC 20100116
+
+#include "_regex_unicode.h"
+
+/* Operators. */
+#define RE_OP_FAILURE 0
+#define RE_OP_SUCCESS 1
+#define RE_OP_ANY 2
+#define RE_OP_ANY_ALL 3
+#define RE_OP_ANY_ALL_REV 4
+#define RE_OP_ANY_REV 5
+#define RE_OP_ANY_U 6
+#define RE_OP_ANY_U_REV 7
+#define RE_OP_ATOMIC 8
+#define RE_OP_BOUNDARY 9
+#define RE_OP_BRANCH 10
+#define RE_OP_CALL_REF 11
+#define RE_OP_CHARACTER 12
+#define RE_OP_CHARACTER_IGN 13
+#define RE_OP_CHARACTER_IGN_REV 14
+#define RE_OP_CHARACTER_REV 15
+#define RE_OP_DEFAULT_BOUNDARY 16
+#define RE_OP_DEFAULT_END_OF_WORD 17
+#define RE_OP_DEFAULT_START_OF_WORD 18
+#define RE_OP_END 19
+#define RE_OP_END_OF_LINE 20
+#define RE_OP_END_OF_LINE_U 21
+#define RE_OP_END_OF_STRING 22
+#define RE_OP_END_OF_STRING_LINE 23
+#define RE_OP_END_OF_STRING_LINE_U 24
+#define RE_OP_END_OF_WORD 25
+#define RE_OP_FUZZY 26
+#define RE_OP_GRAPHEME_BOUNDARY 27
+#define RE_OP_GREEDY_REPEAT 28
+#define RE_OP_GROUP 29
+#define RE_OP_GROUP_CALL 30
+#define RE_OP_GROUP_EXISTS 31
+#define RE_OP_LAZY_REPEAT 32
+#define RE_OP_LOOKAROUND 33
+#define RE_OP_NEXT 34
+#define RE_OP_PROPERTY 35
+#define RE_OP_PROPERTY_IGN 36
+#define RE_OP_PROPERTY_IGN_REV 37
+#define RE_OP_PROPERTY_REV 38
+#define RE_OP_RANGE 39
+#define RE_OP_RANGE_IGN 40
+#define RE_OP_RANGE_IGN_REV 41
+#define RE_OP_RANGE_REV 42
+#define RE_OP_REF_GROUP 43
+#define RE_OP_REF_GROUP_FLD 44
+#define RE_OP_REF_GROUP_FLD_REV 45
+#define RE_OP_REF_GROUP_IGN 46
+#define RE_OP_REF_GROUP_IGN_REV 47
+#define RE_OP_REF_GROUP_REV 48
+#define RE_OP_SEARCH_ANCHOR 49
+#define RE_OP_SET_DIFF 50
+#define RE_OP_SET_DIFF_IGN 51
+#define RE_OP_SET_DIFF_IGN_REV 52
+#define RE_OP_SET_DIFF_REV 53
+#define RE_OP_SET_INTER 54
+#define RE_OP_SET_INTER_IGN 55
+#define RE_OP_SET_INTER_IGN_REV 56
+#define RE_OP_SET_INTER_REV 57
+#define RE_OP_SET_SYM_DIFF 58
+#define RE_OP_SET_SYM_DIFF_IGN 59
+#define RE_OP_SET_SYM_DIFF_IGN_REV 60
+#define RE_OP_SET_SYM_DIFF_REV 61
+#define RE_OP_SET_UNION 62
+#define RE_OP_SET_UNION_IGN 63
+#define RE_OP_SET_UNION_IGN_REV 64
+#define RE_OP_SET_UNION_REV 65
+#define RE_OP_START_OF_LINE 66
+#define RE_OP_START_OF_LINE_U 67
+#define RE_OP_START_OF_STRING 68
+#define RE_OP_START_OF_WORD 69
+#define RE_OP_STRING 70
+#define RE_OP_STRING_FLD 71
+#define RE_OP_STRING_FLD_REV 72
+#define RE_OP_STRING_IGN 73
+#define RE_OP_STRING_IGN_REV 74
+#define RE_OP_STRING_REV 75
+#define RE_OP_STRING_SET 76
+#define RE_OP_STRING_SET_FLD 77
+#define RE_OP_STRING_SET_FLD_REV 78
+#define RE_OP_STRING_SET_IGN 79
+#define RE_OP_STRING_SET_IGN_REV 80
+#define RE_OP_STRING_SET_REV 81
+#define RE_OP_BODY_END 82
+#define RE_OP_BODY_START 83
+#define RE_OP_END_FUZZY 84
+#define RE_OP_END_GREEDY_REPEAT 85
+#define RE_OP_END_GROUP 86
+#define RE_OP_END_LAZY_REPEAT 87
+#define RE_OP_GREEDY_REPEAT_ONE 88
+#define RE_OP_GROUP_RETURN 89
+#define RE_OP_LAZY_REPEAT_ONE 90
+#define RE_OP_MATCH_BODY 91
+#define RE_OP_MATCH_TAIL 92
+#define RE_OP_START_GROUP 93
+
+char* re_op_text[] = {
+    "RE_OP_FAILURE",
+    "RE_OP_SUCCESS",
+    "RE_OP_ANY",
+    "RE_OP_ANY_ALL",
+    "RE_OP_ANY_ALL_REV",
+    "RE_OP_ANY_REV",
+    "RE_OP_ANY_U",
+    "RE_OP_ANY_U_REV",
+    "RE_OP_ATOMIC",
+    "RE_OP_BOUNDARY",
+    "RE_OP_BRANCH",
+    "RE_OP_CALL_REF",
+    "RE_OP_CHARACTER",
+    "RE_OP_CHARACTER_IGN",
+    "RE_OP_CHARACTER_IGN_REV",
+    "RE_OP_CHARACTER_REV",
+    "RE_OP_DEFAULT_BOUNDARY",
+    "RE_OP_DEFAULT_END_OF_WORD",
+    "RE_OP_DEFAULT_START_OF_WORD",
+    "RE_OP_END",
+    "RE_OP_END_OF_LINE",
+    "RE_OP_END_OF_LINE_U",
+    "RE_OP_END_OF_STRING",
+    "RE_OP_END_OF_STRING_LINE",
+    "RE_OP_END_OF_STRING_LINE_U",
+    "RE_OP_END_OF_WORD",
+    "RE_OP_FUZZY",
+    "RE_OP_GRAPHEME_BOUNDARY",
+    "RE_OP_GREEDY_REPEAT",
+    "RE_OP_GROUP",
+    "RE_OP_GROUP_CALL",
+    "RE_OP_GROUP_EXISTS",
+    "RE_OP_LAZY_REPEAT",
+    "RE_OP_LOOKAROUND",
+    "RE_OP_NEXT",
+    "RE_OP_PROPERTY",
+    "RE_OP_PROPERTY_IGN",
+    "RE_OP_PROPERTY_IGN_REV",
+    "RE_OP_PROPERTY_REV",
+    "RE_OP_RANGE",
+    "RE_OP_RANGE_IGN",
+    "RE_OP_RANGE_IGN_REV",
+    "RE_OP_RANGE_REV",
+    "RE_OP_REF_GROUP",
+    "RE_OP_REF_GROUP_FLD",
+    "RE_OP_REF_GROUP_FLD_REV",
+    "RE_OP_REF_GROUP_IGN",
+    "RE_OP_REF_GROUP_IGN_REV",
+    "RE_OP_REF_GROUP_REV",
+    "RE_OP_SEARCH_ANCHOR",
+    "RE_OP_SET_DIFF",
+    "RE_OP_SET_DIFF_IGN",
+    "RE_OP_SET_DIFF_IGN_REV",
+    "RE_OP_SET_DIFF_REV",
+    "RE_OP_SET_INTER",
+    "RE_OP_SET_INTER_IGN",
+    "RE_OP_SET_INTER_IGN_REV",
+    "RE_OP_SET_INTER_REV",
+    "RE_OP_SET_SYM_DIFF",
+    "RE_OP_SET_SYM_DIFF_IGN",
+    "RE_OP_SET_SYM_DIFF_IGN_REV",
+    "RE_OP_SET_SYM_DIFF_REV",
+    "RE_OP_SET_UNION",
+    "RE_OP_SET_UNION_IGN",
+    "RE_OP_SET_UNION_IGN_REV",
+    "RE_OP_SET_UNION_REV",
+    "RE_OP_START_OF_LINE",
+    "RE_OP_START_OF_LINE_U",
+    "RE_OP_START_OF_STRING",
+    "RE_OP_START_OF_WORD",
+    "RE_OP_STRING",
+    "RE_OP_STRING_FLD",
+    "RE_OP_STRING_FLD_REV",
+    "RE_OP_STRING_IGN",
+    "RE_OP_STRING_IGN_REV",
+    "RE_OP_STRING_REV",
+    "RE_OP_STRING_SET",
+    "RE_OP_STRING_SET_FLD",
+    "RE_OP_STRING_SET_FLD_REV",
+    "RE_OP_STRING_SET_IGN",
+    "RE_OP_STRING_SET_IGN_REV",
+    "RE_OP_STRING_SET_REV",
+    "RE_OP_BODY_END",
+    "RE_OP_BODY_START",
+    "RE_OP_END_FUZZY",
+    "RE_OP_END_GREEDY_REPEAT",
+    "RE_OP_END_GROUP",
+    "RE_OP_END_LAZY_REPEAT",
+    "RE_OP_GREEDY_REPEAT_ONE",
+    "RE_OP_GROUP_RETURN",
+    "RE_OP_LAZY_REPEAT_ONE",
+    "RE_OP_MATCH_BODY",
+    "RE_OP_MATCH_TAIL",
+    "RE_OP_START_GROUP",
+};
+
+#define RE_FLAG_ASCII 0x80
+#define RE_FLAG_BESTMATCH 0x1000
+#define RE_FLAG_DEBUG 0x200
+#define RE_FLAG_DOTALL 0x10
+#define RE_FLAG_ENHANCEMATCH 0x8000
+#define RE_FLAG_FULLCASE 0x4000
+#define RE_FLAG_IGNORECASE 0x2
+#define RE_FLAG_LOCALE 0x4
+#define RE_FLAG_MULTILINE 0x8
+#define RE_FLAG_REVERSE 0x400
+#define RE_FLAG_TEMPLATE 0x1
+#define RE_FLAG_UNICODE 0x20
+#define RE_FLAG_VERBOSE 0x40
+#define RE_FLAG_VERSION0 0x2000
+#define RE_FLAG_VERSION1 0x100
+#define RE_FLAG_WORD 0x800
--- a/lib/regex/_regex_core.py
+++ b/lib/regex/_regex_core.py
--- a/lib/regex/_regex_unicode.c
+++ b/lib/regex/_regex_unicode.c
--- a/lib/regex/_regex_unicode.h
+++ b/lib/regex/_regex_unicode.h
@ -0,0 +1,218 @@
+typedef unsigned char RE_UINT8;
+typedef signed char RE_INT8;
+typedef unsigned short RE_UINT16;
+typedef signed short RE_INT16;
+typedef unsigned int RE_UINT32;
+typedef signed int RE_INT32;
+
+typedef unsigned char BOOL;
+enum {FALSE, TRUE};
+
+#define RE_ASCII_MAX 0x7F
+#define RE_LOCALE_MAX 0xFF
+#define RE_UNICODE_MAX 0x10FFFF
+
+#define RE_MAX_CASES 4
+#define RE_MAX_FOLDED 3
+
+typedef struct RE_Property {
+    RE_UINT16 name;
+    RE_UINT8 id;
+    RE_UINT8 value_set;
+} RE_Property;
+
+typedef struct RE_PropertyValue {
+    RE_UINT16 name;
+    RE_UINT8 value_set;
+    RE_UINT8 id;
+} RE_PropertyValue;
+
+typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
+
+#define RE_PROP_GC 0x0
+#define RE_PROP_CASED 0xA
+#define RE_PROP_UPPERCASE 0x9
+#define RE_PROP_LOWERCASE 0x8
+
+#define RE_PROP_C 30
+#define RE_PROP_L 31
+#define RE_PROP_M 32
+#define RE_PROP_N 33
+#define RE_PROP_P 34
+#define RE_PROP_S 35
+#define RE_PROP_Z 36
+#define RE_PROP_ASSIGNED 38
+#define RE_PROP_CASEDLETTER 37
+
+#define RE_PROP_CN 0
+#define RE_PROP_LU 1
+#define RE_PROP_LL 2
+#define RE_PROP_LT 3
+#define RE_PROP_LM 4
+#define RE_PROP_LO 5
+#define RE_PROP_MN 6
+#define RE_PROP_ME 7
+#define RE_PROP_MC 8
+#define RE_PROP_ND 9
+#define RE_PROP_NL 10
+#define RE_PROP_NO 11
+#define RE_PROP_ZS 12
+#define RE_PROP_ZL 13
+#define RE_PROP_ZP 14
+#define RE_PROP_CC 15
+#define RE_PROP_CF 16
+#define RE_PROP_CO 17
+#define RE_PROP_CS 18
+#define RE_PROP_PD 19
+#define RE_PROP_PS 20
+#define RE_PROP_PE 21
+#define RE_PROP_PC 22
+#define RE_PROP_PO 23
+#define RE_PROP_SM 24
+#define RE_PROP_SC 25
+#define RE_PROP_SK 26
+#define RE_PROP_SO 27
+#define RE_PROP_PI 28
+#define RE_PROP_PF 29
+
+#define RE_PROP_C_MASK 0x00078001
+#define RE_PROP_L_MASK 0x0000003E
+#define RE_PROP_M_MASK 0x000001C0
+#define RE_PROP_N_MASK 0x00000E00
+#define RE_PROP_P_MASK 0x30F80000
+#define RE_PROP_S_MASK 0x0F000000
+#define RE_PROP_Z_MASK 0x00007000
+
+#define RE_PROP_ALNUM 0x460001
+#define RE_PROP_ALPHA 0x070001
+#define RE_PROP_ANY 0x470001
+#define RE_PROP_ASCII 0x010001
+#define RE_PROP_BLANK 0x480001
+#define RE_PROP_CNTRL 0x00000F
+#define RE_PROP_DIGIT 0x000009
+#define RE_PROP_GRAPH 0x490001
+#define RE_PROP_LOWER 0x080001
+#define RE_PROP_PRINT 0x4A0001
+#define RE_PROP_SPACE 0x190001
+#define RE_PROP_UPPER 0x090001
+#define RE_PROP_WORD 0x4B0001
+#define RE_PROP_XDIGIT 0x4C0001
+
+#define RE_BREAK_OTHER 0
+#define RE_BREAK_DOUBLEQUOTE 1
+#define RE_BREAK_SINGLEQUOTE 2
+#define RE_BREAK_HEBREWLETTER 3
+#define RE_BREAK_CR 4
+#define RE_BREAK_LF 5
+#define RE_BREAK_NEWLINE 6
+#define RE_BREAK_EXTEND 7
+#define RE_BREAK_REGIONALINDICATOR 8
+#define RE_BREAK_FORMAT 9
+#define RE_BREAK_KATAKANA 10
+#define RE_BREAK_ALETTER 11
+#define RE_BREAK_MIDLETTER 12
+#define RE_BREAK_MIDNUM 13
+#define RE_BREAK_MIDNUMLET 14
+#define RE_BREAK_NUMERIC 15
+#define RE_BREAK_EXTENDNUMLET 16
+
+#define RE_GBREAK_OTHER 0
+#define RE_GBREAK_CR 1
+#define RE_GBREAK_LF 2
+#define RE_GBREAK_CONTROL 3
+#define RE_GBREAK_EXTEND 4
+#define RE_GBREAK_REGIONALINDICATOR 5
+#define RE_GBREAK_SPACINGMARK 6
+#define RE_GBREAK_L 7
+#define RE_GBREAK_V 8
+#define RE_GBREAK_T 9
+#define RE_GBREAK_LV 10
+#define RE_GBREAK_LVT 11
+#define RE_GBREAK_PREPEND 12
+
+extern char* re_strings[1160];
+extern RE_Property re_properties[143];
+extern RE_PropertyValue re_property_values[1251];
+extern RE_UINT16 re_expand_on_folding[104];
+extern RE_GetPropertyFunc re_get_property[77];
+
+RE_UINT32 re_get_general_category(RE_UINT32 ch);
+RE_UINT32 re_get_block(RE_UINT32 ch);
+RE_UINT32 re_get_script(RE_UINT32 ch);
+RE_UINT32 re_get_word_break(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch);
+RE_UINT32 re_get_sentence_break(RE_UINT32 ch);
+RE_UINT32 re_get_math(RE_UINT32 ch);
+RE_UINT32 re_get_alphabetic(RE_UINT32 ch);
+RE_UINT32 re_get_lowercase(RE_UINT32 ch);
+RE_UINT32 re_get_uppercase(RE_UINT32 ch);
+RE_UINT32 re_get_cased(RE_UINT32 ch);
+RE_UINT32 re_get_case_ignorable(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch);
+RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch);
+RE_UINT32 re_get_id_start(RE_UINT32 ch);
+RE_UINT32 re_get_id_continue(RE_UINT32 ch);
+RE_UINT32 re_get_xid_start(RE_UINT32 ch);
+RE_UINT32 re_get_xid_continue(RE_UINT32 ch);
+RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_base(RE_UINT32 ch);
+RE_UINT32 re_get_grapheme_link(RE_UINT32 ch);
+RE_UINT32 re_get_white_space(RE_UINT32 ch);
+RE_UINT32 re_get_bidi_control(RE_UINT32 ch);
+RE_UINT32 re_get_join_control(RE_UINT32 ch);
+RE_UINT32 re_get_dash(RE_UINT32 ch);
+RE_UINT32 re_get_hyphen(RE_UINT32 ch);
+RE_UINT32 re_get_quotation_mark(RE_UINT32 ch);
+RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch);
+RE_UINT32 re_get_other_math(RE_UINT32 ch);
+RE_UINT32 re_get_hex_digit(RE_UINT32 ch);
+RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch);
+RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch);
+RE_UINT32 re_get_ideographic(RE_UINT32 ch);
+RE_UINT32 re_get_diacritic(RE_UINT32 ch);
+RE_UINT32 re_get_extender(RE_UINT32 ch);
+RE_UINT32 re_get_other_lowercase(RE_UINT32 ch);
+RE_UINT32 re_get_other_uppercase(RE_UINT32 ch);
+RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch);
+RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch);
+RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch);
+RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch);
+RE_UINT32 re_get_radical(RE_UINT32 ch);
+RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch);
+RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch);
+RE_UINT32 re_get_deprecated(RE_UINT32 ch);
+RE_UINT32 re_get_soft_dotted(RE_UINT32 ch);
+RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
+RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
+RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
+RE_UINT32 re_get_sterm(RE_UINT32 ch);
+RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
+RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
+RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
+RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
+RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
+RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
+RE_UINT32 re_get_decomposition_type(RE_UINT32 ch);
+RE_UINT32 re_get_east_asian_width(RE_UINT32 ch);
+RE_UINT32 re_get_joining_group(RE_UINT32 ch);
+RE_UINT32 re_get_joining_type(RE_UINT32 ch);
+RE_UINT32 re_get_line_break(RE_UINT32 ch);
+RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
+RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
+RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
+RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
+RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
+RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
+RE_UINT32 re_get_any(RE_UINT32 ch);
+RE_UINT32 re_get_blank(RE_UINT32 ch);
+RE_UINT32 re_get_graph(RE_UINT32 ch);
+RE_UINT32 re_get_print(RE_UINT32 ch);
+RE_UINT32 re_get_word(RE_UINT32 ch);
+RE_UINT32 re_get_xdigit(RE_UINT32 ch);
+int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
+RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
+int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);
--- a/lib/regex/regex.py
+++ b/lib/regex/regex.py
@ -0,0 +1,684 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
+#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license.  For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
+# Portions of this engine have been developed in cooperation with
+# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
+# other compatibility work.
+#
+# 2010-01-16 mrab Python front-end re-written and extended
+
+r"""Support for regular expressions (RE).
+
+This module provides regular expression matching operations similar to those
+found in Perl. It supports both 8-bit and Unicode strings; both the pattern and
+the strings being processed can contain null bytes and characters outside the
+US ASCII range.
+
+Regular expressions can contain both special and ordinary characters. Most
+ordinary characters, like "A", "a", or "0", are the simplest regular
+expressions; they simply match themselves. You can concatenate ordinary
+characters, so last matches the string 'last'.
+
+There are a few differences between the old (legacy) behaviour and the new
+(enhanced) behaviour, which are indicated by VERSION0 or VERSION1.
+
+The special characters are:
+    "."                 Matches any character except a newline.
+    "^"                 Matches the start of the string.
+    "$"                 Matches the end of the string or just before the
+                        newline at the end of the string.
+    "*"                 Matches 0 or more (greedy) repetitions of the preceding
+                        RE. Greedy means that it will match as many repetitions
+                        as possible.
+    "+"                 Matches 1 or more (greedy) repetitions of the preceding
+                        RE.
+    "?"                 Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,??            Non-greedy versions of the previous three special
+                        characters.
+    *+,++,?+            Possessive versions of the previous three special
+                        characters.
+    {m,n}               Matches from m to n repetitions of the preceding RE.
+    {m,n}?              Non-greedy version of the above.
+    {m,n}+              Possessive version of the above.
+    {...}               Fuzzy matching constraints.
+    "\\"                Either escapes special characters or signals a special
+                        sequence.
+    [...]               Indicates a set of characters. A "^" as the first
+                        character indicates a complementing set.
+    "|"                 A|B, creates an RE that will match either A or B.
+    (...)               Matches the RE inside the parentheses. The contents are
+                        captured and can be retrieved or matched later in the
+                        string.
+    (?flags-flags)      VERSION1: Sets/clears the flags for the remainder of
+                        the group or pattern; VERSION0: Sets the flags for the
+                        entire pattern.
+    (?:...)             Non-capturing version of regular parentheses.
+    (?>...)             Atomic non-capturing version of regular parentheses.
+    (?flags-flags:...)  Non-capturing version of regular parentheses with local
+                        flags.
+    (?P<name>...)       The substring matched by the group is accessible by
+                        name.
+    (?<name>...)        The substring matched by the group is accessible by
+                        name.
+    (?P=name)           Matches the text matched earlier by the group named
+                        name.
+    (?#...)             A comment; ignored.
+    (?=...)             Matches if ... matches next, but doesn't consume the
+                        string.
+    (?!...)             Matches if ... doesn't match next.
+    (?<=...)            Matches if preceded by ....
+    (?<!...)            Matches if not preceded by ....
+    (?(id)yes|no)       Matches yes pattern if group id matched, the (optional)
+                        no pattern otherwise.
+    (?|...|...)         (?|A|B), creates an RE that will match either A or B,
+                        but reuses capture group numbers across the
+                        alternatives.
+
+The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
+deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
+optional with "<=" and "<". If any type of error is provided then any type not
+provided is not permitted.
+
+A cost equation may be provided.
+
+Examples:
+    (?:fuzzy){i<=2}
+    (?:fuzzy){i<=1,s<=2,d<=1,1i+1s+1d<3}
+
+VERSION1: Set operators are supported, and a set can include nested sets. The
+set operators, in order of increasing precedence, are:
+    ||  Set union ("x||y" means "x or y").
+    ~~  (double tilde) Symmetric set difference ("x~~y" means "x or y, but not
+        both").
+    &&  Set intersection ("x&&y" means "x and y").
+    --  (double dash) Set difference ("x--y" means "x but not y").
+
+Implicit union, ie, simple juxtaposition like in [ab], has the highest
+precedence.
+
+VERSION0 and VERSION1:
+The special sequences consist of "\\" and a character from the list below. If
+the ordinary character is not on the list, then the resulting RE will match the
+second character.
+    \number         Matches the contents of the group of the same number if
+                    number is no more than 2 digits, otherwise the character
+                    with the 3-digit octal code.
+    \a              Matches the bell character.
+    \A              Matches only at the start of the string.
+    \b              Matches the empty string, but only at the start or end of a
+                    word.
+    \B              Matches the empty string, but not at the start or end of a
+                    word.
+    \d              Matches any decimal digit; equivalent to the set [0-9] when
+                    matching a bytestring or a Unicode string with the ASCII
+                    flag, or the whole range of Unicode digits when matching a
+                    Unicode string.
+    \D              Matches any non-digit character; equivalent to [^\d].
+    \f              Matches the formfeed character.
+    \g<name>        Matches the text matched by the group named name.
+    \G              Matches the empty string, but only at the position where
+                    the search started.
+    \L<name>        Named list. The list is provided as a keyword argument.
+    \m              Matches the empty string, but only at the start of a word.
+    \M              Matches the empty string, but only at the end of a word.
+    \n              Matches the newline character.
+    \N{name}        Matches the named character.
+    \p{name=value}  Matches the character if its property has the specified
+                    value.
+    \P{name=value}  Matches the character if its property hasn't the specified
+                    value.
+    \r              Matches the carriage-return character.
+    \s              Matches any whitespace character; equivalent to
+                    [ \t\n\r\f\v].
+    \S              Matches any non-whitespace character; equivalent to [^\s].
+    \t              Matches the tab character.
+    \uXXXX          Matches the Unicode codepoint with 4-digit hex code XXXX.
+    \UXXXXXXXX      Matches the Unicode codepoint with 8-digit hex code
+                    XXXXXXXX.
+    \v              Matches the vertical tab character.
+    \w              Matches any alphanumeric character; equivalent to
+                    [a-zA-Z0-9_] when matching a bytestring or a Unicode string
+                    with the ASCII flag, or the whole range of Unicode
+                    alphanumeric characters (letters plus digits plus
+                    underscore) when matching a Unicode string. With LOCALE, it
+                    will match the set [0-9_] plus characters defined as
+                    letters for the current locale.
+    \W              Matches the complement of \w; equivalent to [^\w].
+    \xXX            Matches the character with 2-digit hex code XX.
+    \X              Matches a grapheme.
+    \Z              Matches only at the end of the string.
+    \\              Matches a literal backslash.
+
+This module exports the following functions:
+    match      Match a regular expression pattern at the beginning of a string.
+    fullmatch  Match a regular expression pattern against all of a string.
+    search     Search a string for the presence of a pattern.
+    sub        Substitute occurrences of a pattern found in a string using a
+               template string.
+    subf       Substitute occurrences of a pattern found in a string using a
+               format string.
+    subn       Same as sub, but also return the number of substitutions made.
+    subfn      Same as subf, but also return the number of substitutions made.
+    split      Split a string by the occurrences of a pattern. VERSION1: will
+               split at zero-width match; VERSION0: won't split at zero-width
+               match.
+    splititer  Return an iterator yielding the parts of a split string.
+    findall    Find all occurrences of a pattern in a string.
+    finditer   Return an iterator yielding a match object for each match.
+    compile    Compile a pattern into a Pattern object.
+    purge      Clear the regular expression cache.
+    escape     Backslash all non-alphanumerics or special characters in a
+               string.
+
+Most of the functions support a concurrent parameter: if True, the GIL will be
+released during matching, allowing other Python threads to run concurrently. If
+the string changes during matching, the behaviour is undefined. This parameter
+is not needed when working on the builtin (immutable) string classes.
+
+Some of the functions in this module take flags as optional parameters. Most of
+these flags can also be set within an RE:
+    A   a   ASCII         Make \w, \W, \b, \B, \d, and \D match the
+                          corresponding ASCII character categories. Default
+                          when matching a bytestring.
+    B   b   BESTMATCH     Find the best fuzzy match (default is first).
+    D       DEBUG         Print the parsed pattern.
+    F   f   FULLCASE      Use full case-folding when performing
+                          case-insensitive matching in Unicode.
+    I   i   IGNORECASE    Perform case-insensitive matching.
+    L   L   LOCALE        Make \w, \W, \b, \B, \d, and \D dependent on the
+                          current locale. (One byte per character only.)
+    M   m   MULTILINE     "^" matches the beginning of lines (after a newline)
+                          as well as the string. "$" matches the end of lines
+                          (before a newline) as well as the end of the string.
+    E   e   ENHANCEMATCH  Attempt to improve the fit after finding the first
+                          fuzzy match.
+    R   r   REVERSE       Searches backwards.
+    S   s   DOTALL        "." matches any character at all, including the
+                          newline.
+    U   u   UNICODE       Make \w, \W, \b, \B, \d, and \D dependent on the
+                          Unicode locale. Default when matching a Unicode
+                          string.
+    V0  V0  VERSION0      Turn on the old legacy behaviour.
+    V1  V1  VERSION1      Turn on the new enhanced behaviour. This flag
+                          includes the FULLCASE flag.
+    W   w   WORD          Make \b and \B work with default Unicode word breaks
+                          and make ".", "^" and "$" work with Unicode line
+                          breaks.
+    X   x   VERBOSE       Ignore whitespace and comments for nicer looking REs.
+
+This module also defines an exception 'error'.
+
+"""
+
+# Public symbols.
+__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
+  "purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn",
+  "template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E",
+  "ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L",
+  "LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE",
+  "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
+  "Regex"]
+
+__version__ = "2.4.45"
+
+# --------------------------------------------------------------------
+# Public interface.
+
+def match(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+  concurrent=None, **kwargs):
+    """Try to apply the pattern at the start of the string, returning a match
+    object, or None if no match was found."""
+    return _compile(pattern, flags, kwargs).match(string, pos, endpos,
+      concurrent, partial)
+
+def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+  concurrent=None, **kwargs):
+    """Try to apply the pattern against all of the string, returning a match
+    object, or None if no match was found."""
+    return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos,
+      concurrent, partial)
+
+def search(pattern, string, flags=0, pos=None, endpos=None, partial=False,
+  concurrent=None, **kwargs):
+    """Search through string looking for a match to the pattern, returning a
+    match object, or None if no match was found."""
+    return _compile(pattern, flags, kwargs).search(string, pos, endpos,
+      concurrent, partial)
+
+def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return the string obtained by replacing the leftmost (or rightmost with a
+    reverse pattern) non-overlapping occurrences of the pattern in string by the
+    replacement repl. repl can be either a string or a callable; if a string,
+    backslash escapes in it are processed; if a callable, it's passed the match
+    object and must return a replacement string to be used."""
+    return _compile(pattern, flags, kwargs).sub(repl, string, count, pos,
+      endpos, concurrent)
+
+def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return the string obtained by replacing the leftmost (or rightmost with a
+    reverse pattern) non-overlapping occurrences of the pattern in string by the
+    replacement format. format can be either a string or a callable; if a string,
+    it's treated as a format string; if a callable, it's passed the match object
+    and must return a replacement string to be used."""
+    return _compile(pattern, flags, kwargs).subf(format, string, count, pos,
+      endpos, concurrent)
+
+def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return a 2-tuple containing (new_string, number). new_string is the string
+    obtained by replacing the leftmost (or rightmost with a reverse pattern)
+    non-overlapping occurrences of the pattern in the source string by the
+    replacement repl. number is the number of substitutions that were made. repl
+    can be either a string or a callable; if a string, backslash escapes in it
+    are processed; if a callable, it's passed the match object and must return a
+    replacement string to be used."""
+    return _compile(pattern, flags, kwargs).subn(repl, string, count, pos,
+      endpos, concurrent)
+
+def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
+  concurrent=None, **kwargs):
+    """Return a 2-tuple containing (new_string, number). new_string is the string
+    obtained by replacing the leftmost (or rightmost with a reverse pattern)
+    non-overlapping occurrences of the pattern in the source string by the
+    replacement format. number is the number of substitutions that were made. format
+    can be either a string or a callable; if a string, it's treated as a format
+    string; if a callable, it's passed the match object and must return a
+    replacement string to be used."""
+    return _compile(pattern, flags, kwargs).subfn(format, string, count, pos,
+      endpos, concurrent)
+
+def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
+    """Split the source string by the occurrences of the pattern, returning a
+    list containing the resulting substrings.  If capturing parentheses are used
+    in pattern, then the text of all groups in the pattern are also returned as
+    part of the resulting list.  If maxsplit is nonzero, at most maxsplit splits
+    occur, and the remainder of the string is returned as the final element of
+    the list."""
+    return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent)
+
+def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
+    "Return an iterator yielding the parts of a split string."
+    return _compile(pattern, flags, kwargs).splititer(string, maxsplit,
+      concurrent)
+
+def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+  concurrent=None, **kwargs):
+    """Return a list of all matches in the string. The matches may be overlapped
+    if overlapped is True. If one or more groups are present in the pattern,
+    return a list of groups; this will be a list of tuples if the pattern has
+    more than one group. Empty matches are included in the result."""
+    return _compile(pattern, flags, kwargs).findall(string, pos, endpos,
+      overlapped, concurrent)
+
+def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
+  partial=False, concurrent=None, **kwargs):
+    """Return an iterator over all matches in the string. The matches may be
+    overlapped if overlapped is True. For each match, the iterator returns a
+    match object. Empty matches are included in the result."""
+    return _compile(pattern, flags, kwargs).finditer(string, pos, endpos,
+      overlapped, concurrent, partial)
+
+def compile(pattern, flags=0, **kwargs):
+    "Compile a regular expression pattern, returning a pattern object."
+    return _compile(pattern, flags, kwargs)
+
+def purge():
+    "Clear the regular expression cache"
+    _cache.clear()
+
+def template(pattern, flags=0):
+    "Compile a template pattern, returning a pattern object."
+    return _compile(pattern, flags | TEMPLATE)
+
+def escape(pattern, special_only=False):
+    "Escape all non-alphanumeric characters or special characters in pattern."
+    if isinstance(pattern, unicode):
+        s = []
+        if special_only:
+            for c in pattern:
+                if c in _METACHARS:
+                    s.append(u"\\")
+                    s.append(c)
+                elif c == u"\x00":
+                    s.append(u"\\000")
+                else:
+                    s.append(c)
+        else:
+            for c in pattern:
+                if c in _ALNUM:
+                    s.append(c)
+                elif c == u"\x00":
+                    s.append(u"\\000")
+                else:
+                    s.append(u"\\")
+                    s.append(c)
+
+        return u"".join(s)
+    else:
+        s = []
+        if special_only:
+            for c in pattern:
+                if c in _METACHARS:
+                    s.append("\\")
+                    s.append(c)
+                elif c == "\x00":
+                    s.append("\\000")
+                else:
+                    s.append(c)
+        else:
+            for c in pattern:
+                if c in _ALNUM:
+                    s.append(c)
+                elif c == "\x00":
+                    s.append("\\000")
+                else:
+                    s.append("\\")
+                    s.append(c)
+
+        return "".join(s)
+
+# --------------------------------------------------------------------
+# Internals.
+
+import _regex_core
+import sys
+if sys.version_info < (2, 6):
+    from Python25 import _regex
+elif sys.version_info < (2, 7):
+    from Python26 import _regex
+else:
+    from Python27 import _regex
+from threading import RLock as _RLock
+from _regex_core import *
+from _regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError,
+  _UnscopedFlagSet, _check_group_features, _compile_firstset,
+  _compile_replacement, _flatten_code, _fold_case, _get_required_string,
+  _parse_pattern, _shrink_cache)
+from _regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as
+  _Source, Fuzzy as _Fuzzy)
+
+# Version 0 is the old behaviour, compatible with the original 're' module.
+# Version 1 is the new behaviour, which differs slightly.
+
+DEFAULT_VERSION = VERSION0
+
+_METACHARS = frozenset("()[]{}?*+|^$\\.")
+
+_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+# Caches for the patterns and replacements.
+_cache = {}
+_cache_lock = _RLock()
+_named_args = {}
+_replacement_cache = {}
+
+# Maximum size of the cache.
+_MAXCACHE = 500
+_MAXREPCACHE = 500
+
+def _compile(pattern, flags=0, kwargs={}):
+    "Compiles a regular expression to a PatternObject."
+    try:
+        # Do we know what keyword arguments are needed?
+        args_key = pattern, type(pattern), flags
+        args_needed = _named_args[args_key]
+
+        # Are we being provided with its required keyword arguments?
+        args_supplied = set()
+        if args_needed:
+            for k, v in args_needed:
+                try:
+                    args_supplied.add((k, frozenset(kwargs[k])))
+                except KeyError:
+                    raise error("missing named list")
+
+        args_supplied = frozenset(args_supplied)
+
+        # Have we already seen this regular expression and named list?
+        pattern_key = (pattern, type(pattern), flags, args_supplied,
+          DEFAULT_VERSION)
+        return _cache[pattern_key]
+    except KeyError:
+        # It's a new pattern, or new named list for a known pattern.
+        pass
+
+    # Guess the encoding from the class of the pattern string.
+    if isinstance(pattern, unicode):
+        guess_encoding = UNICODE
+    elif isinstance(pattern, str):
+        guess_encoding = ASCII
+    elif isinstance(pattern, _pattern_type):
+        if flags:
+            raise ValueError("can't process flags argument with a compiled pattern")
+
+        return pattern
+    else:
+        raise TypeError("first argument must be a string or compiled pattern")
+
+    # Set the default version in the core code in case it has been changed.
+    _regex_core.DEFAULT_VERSION = DEFAULT_VERSION
+
+    caught_exception = None
+    global_flags = flags
+
+    while True:
+        try:
+            source = _Source(pattern)
+            info = _Info(global_flags, source.char_type, kwargs)
+            info.guess_encoding = guess_encoding
+            source.ignore_space = bool(info.flags & VERBOSE)
+            parsed = _parse_pattern(source, info)
+            break
+        except _UnscopedFlagSet:
+            # Remember the global flags for the next attempt.
+            global_flags = info.global_flags
+        except error, e:
+            caught_exception = e
+
+        if caught_exception:
+            raise error(str(caught_exception))
+
+    if not source.at_end():
+        raise error("trailing characters in pattern at position %d" % source.pos)
+
+    # Check the global flags for conflicts.
+    version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
+    if version not in (0, VERSION0, VERSION1):
+        raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible")
+
+    if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE):
+        raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible")
+
+    if not (info.flags & _ALL_ENCODINGS):
+        if isinstance(pattern, unicode):
+            info.flags |= UNICODE
+        else:
+            info.flags |= ASCII
+
+    reverse = bool(info.flags & REVERSE)
+    fuzzy = isinstance(parsed, _Fuzzy)
+
+    # Should we print the parsed pattern?
+    if flags & DEBUG:
+        parsed.dump(indent=0, reverse=reverse)
+
+    # Fix the group references.
+    parsed.fix_groups(reverse, False)
+
+    # Optimise the parsed pattern.
+    parsed = parsed.optimise(info)
+    parsed = parsed.pack_characters(info)
+
+    # Get the required string.
+    req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags)
+
+    # Build the named lists.
+    named_lists = {}
+    named_list_indexes = [None] * len(info.named_lists_used)
+    args_needed = set()
+    for key, index in info.named_lists_used.items():
+        name, case_flags = key
+        values = frozenset(kwargs[name])
+        if case_flags:
+            items = frozenset(_fold_case(info, v) for v in values)
+        else:
+            items = values
+        named_lists[name] = values
+        named_list_indexes[index] = items
+        args_needed.add((name, values))
+
+    # Check the features of the groups.
+    _check_group_features(info, parsed)
+
+    # Compile the parsed pattern. The result is a list of tuples.
+    code = parsed.compile(reverse)
+
+    # Is there a group call to the pattern as a whole?
+    key = (0, reverse, fuzzy)
+    ref = info.call_refs.get(key)
+    if ref is not None:
+        code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )]
+
+    # Add the final 'success' opcode.
+    code += [(_OP.SUCCESS, )]
+
+    # Compile the additional copies of the groups that we need.
+    for group, rev, fuz in info.additional_groups:
+        code += group.compile(rev, fuz)
+
+    # Flatten the code into a list of ints.
+    code = _flatten_code(code)
+
+    if not parsed.has_simple_start():
+        # Get the first set, if possible.
+        try:
+            fs_code = _compile_firstset(info, parsed.get_firstset(reverse))
+            fs_code = _flatten_code(fs_code)
+            code = fs_code + code
+        except _FirstSetError:
+            pass
+
+    # The named capture groups.
+    index_group = dict((v, n) for n, v in info.group_index.items())
+
+    # Create the PatternObject.
+    #
+    # Local flags like IGNORECASE affect the code generation, but aren't needed
+    # by the PatternObject itself. Conversely, global flags like LOCALE _don't_
+    # affect the code generation but _are_ needed by the PatternObject.
+    compiled_pattern = _regex.compile(pattern, info.flags | version, code,
+      info.group_index, index_group, named_lists, named_list_indexes,
+      req_offset, req_chars, req_flags, info.group_count)
+
+    # Do we need to reduce the size of the cache?
+    if len(_cache) >= _MAXCACHE:
+        _cache_lock.acquire()
+        try:
+            _shrink_cache(_cache, _named_args, _MAXCACHE)
+        finally:
+            _cache_lock.release()
+
+    args_needed = frozenset(args_needed)
+
+    # Store this regular expression and named list.
+    pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION)
+    _cache[pattern_key] = compiled_pattern
+
+    # Store what keyword arguments are needed.
+    _named_args[args_key] = args_needed
+
+    return compiled_pattern
+
+def _compile_replacement_helper(pattern, template):
+    "Compiles a replacement template."
+    # This function is called by the _regex module.
+
+    # Have we seen this before?
+    key = pattern.pattern, pattern.flags, template
+    compiled = _replacement_cache.get(key)
+    if compiled is not None:
+        return compiled
+
+    if len(_replacement_cache) >= _MAXREPCACHE:
+        _replacement_cache.clear()
+
+    is_unicode = isinstance(template, unicode)
+    source = _Source(template)
+    if is_unicode:
+        def make_string(char_codes):
+            return u"".join(unichr(c) for c in char_codes)
+    else:
+        def make_string(char_codes):
+            return "".join(chr(c) for c in char_codes)
+
+    compiled = []
+    literal = []
+    while True:
+        ch = source.get()
+        if not ch:
+            break
+        if ch == "\\":
+            # '_compile_replacement' will return either an int group reference
+            # or a string literal. It returns items (plural) in order to handle
+            # a 2-character literal (an invalid escape sequence).
+            is_group, items = _compile_replacement(source, pattern, is_unicode)
+            if is_group:
+                # It's a group, so first flush the literal.
+                if literal:
+                    compiled.append(make_string(literal))
+                    literal = []
+                compiled.extend(items)
+            else:
+                literal.extend(items)
+        else:
+            literal.append(ord(ch))
+
+    # Flush the literal.
+    if literal:
+        compiled.append(make_string(literal))
+
+    _replacement_cache[key] = compiled
+
+    return compiled
+
+# We define _pattern_type here after all the support objects have been defined.
+_pattern_type = type(_compile("", 0, {}))
+
+# We'll define an alias for the 'compile' function so that the repr of a
+# pattern object is eval-able.
+Regex = compile
+
+# Register myself for pickling.
+import copy_reg as _copy_reg
+
+def _pickle(p):
+    return _compile, (p.pattern, p.flags)
+
+_copy_reg.pickle(_pattern_type, _pickle, _compile)
+
+if not hasattr(str, "format"):
+    # Strings don't have the .format method (below Python 2.6).
+    while True:
+        _start = __doc__.find("    subf")
+        if _start < 0:
+            break
+
+        _end = __doc__.find("\n", _start) + 1
+        while __doc__.startswith("     ", _end):
+            _end = __doc__.find("\n", _end) + 1
+
+        __doc__ = __doc__[ : _start] + __doc__[_end : ]
+
+    __all__ = [_name for _name in __all__ if not _name.startswith("subf")]
+
+    del _start, _end
+
+    del subf, subfn
--- a/lib/regex/test_regex.py
+++ b/lib/regex/test_regex.py
--- a/sickbeard/name_parser/parser.py
+++ b/sickbeard/name_parser/parser.py
@ -16,35 +16,33 @@
 # You should have received a copy of the GNU General Public License
 # along with SickRage.  If not, see <http://www.gnu.org/licenses/>.

+import re
 import datetime
 import os.path
-import re
 import threading
 import regexes
-import time
 import sickbeard

-from sickbeard import logger, helpers, scene_numbering, db
-from sickbeard.exceptions import EpisodeNotFoundByAbsoluteNumberException
+from sickbeard import logger, helpers, scene_numbering
+from regex import regex
 from dateutil import parser

 nameparser_lock = threading.Lock()

-
 class NameParser(object):
    ALL_REGEX = 0
    NORMAL_REGEX = 1
    SPORTS_REGEX = 2
    ANIME_REGEX = 3

-    def __init__(self, file_name=True, show=None, useIndexers=False):
+    def __init__(self, file_name=True, showObj=None, epObj=None, useIndexers=False, convert=False):

        regexMode = self.ALL_REGEX
-        if show and show.is_anime:
+        if showObj and showObj.is_anime:
            regexMode = self.ANIME_REGEX
-        elif show and show.is_sports:
+        elif showObj and showObj.is_sports:
            regexMode = self.SPORTS_REGEX
-        elif show and not show.is_anime and not show.is_sports:
+        elif showObj and not showObj.is_anime and not showObj.is_sports:
            regexMode = self.NORMAL_REGEX

        self.file_name = file_name
@ -53,7 +51,9 @@ class NameParser(object):
        self._compile_regexes(self.regexMode)
        self.showList = sickbeard.showList
        self.useIndexers = useIndexers
-        self.show = show
+        self.showObj = showObj
+        self.epObj = epObj
+        self.convert = convert

    def clean_series_name(self, series_name):
        """Cleans up series name by removing any . and _
@ -85,7 +85,7 @@ class NameParser(object):
            uncompiled_regex = [regexes.anime_regexes, regexes.sports_regexs, regexes.normal_regexes]

        elif regexMode == self.NORMAL_REGEX:
-            logger.log(u"Using NORMAL regexs", logger.DEBUG)
+            logger.log(u"Using NORMAL reqgexs", logger.DEBUG)
            uncompiled_regex = [regexes.normal_regexes]

        elif regexMode == self.SPORTS_REGEX:
@ -101,125 +101,120 @@ class NameParser(object):
            uncompiled_regex = [regexes.normal_regexes]

        for regexItem in uncompiled_regex:
-            for regex_type, regex in regexItem.items():
-                try:
-                    self.compiled_regexes[regex_type]
-                except:
-                    self.compiled_regexes[regex_type] = {}
-
-                for (cur_pattern_name, cur_pattern) in regex:
+            for regex_type, regex_pattern in regexItem.items():
+                for (cur_pattern_name, cur_pattern) in regex_pattern:
                    try:
-                        cur_regex = re.compile(cur_pattern, re.VERBOSE | re.IGNORECASE)
-                    except re.error, errormsg:
+                        cur_regex = regex.compile(cur_pattern, regex.V1 | regex.VERBOSE | regex.IGNORECASE | regex.BESTMATCH)
+                    except regex.error, errormsg:
                        logger.log(u"WARNING: Invalid episode_pattern, %s. %s" % (errormsg, cur_pattern))
                    else:
-                        self.compiled_regexes[regex_type].update({cur_pattern_name: cur_regex})
+                        self.compiled_regexes[(regex_type,cur_pattern_name)] = cur_regex

    def _parse_string(self, name):
        if not name:
            return

-        for cur_regex_type, cur_regexes in self.compiled_regexes.items():
-            for cur_regex_name, cur_regex in cur_regexes.items():
-                match = cur_regex.match(name)
+        result = ParseResult(name)
+        for (cur_regex_type, cur_regex_name), cur_regex in self.compiled_regexes.items():
+            match = cur_regex.fullmatch(name)

-                if not match:
+            if not match:
+                continue
+
+            result.which_regex = [cur_regex_name]
+
+            named_groups = match.groupdict().keys()
+
+            if 'series_name' in named_groups:
+                result.series_name = match.group('series_name')
+                if result.series_name:
+                    result.series_name = self.clean_series_name(result.series_name)
+                else:continue
+
+            if 'season_num' in named_groups:
+                tmp_season = int(match.group('season_num'))
+                if cur_regex_name == 'bare' and tmp_season in (19, 20):
                    continue
+                result.season_number = tmp_season

-                result = ParseResult(name)
-                result.which_regex = [cur_regex_name]
+            if 'ep_num' in named_groups:
+                ep_num = self._convert_number(match.group('ep_num'))
+                if 'extra_ep_num' in named_groups and match.group('extra_ep_num'):
+                    result.episode_numbers = range(ep_num, self._convert_number(match.group('extra_ep_num')) + 1)
+                else:
+                    result.episode_numbers = [ep_num]

-                named_groups = match.groupdict().keys()
+            if 'ep_ab_num' in named_groups:
+                ep_ab_num = self._convert_number(match.group('ep_ab_num'))
+                if 'extra_ab_ep_num' in named_groups and match.group('extra_ab_ep_num'):
+                    result.ab_episode_numbers = range(ep_ab_num,
+                                                      self._convert_number(match.group('extra_ab_ep_num')) + 1)
+                else:
+                    result.ab_episode_numbers = [ep_ab_num]

-                if 'series_name' in named_groups:
-                    result.series_name = match.group('series_name')
-                    if result.series_name:
-                        result.series_name = self.clean_series_name(result.series_name)
+            if 'sports_event_id' in named_groups:
+                sports_event_id = match.group('sports_event_id')
+                if sports_event_id:
+                    result.sports_event_id = int(match.group('sports_event_id'))

-                        cur_show = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers)
-                        if not cur_show:
-                            continue
-
-                        # if we have a show object to compare against then do so else return the result anyways
-                        if self.show:
-                            if self.show.indexerid != cur_show.indexerid:
-                                logger.log(
-                                    u"I expected an episode of the show " + self.show.name + " but the parser thinks its the show " + cur_show.name + ". I will continue thinking its " + self.show.name,
-                                    logger.WARNING)
-                                continue
-
-                        result.show = cur_show
-
-                if 'season_num' in named_groups:
-                    tmp_season = int(match.group('season_num'))
-                    if cur_regex_name == 'bare' and tmp_season in (19, 20):
-                        continue
-                    result.season_number = tmp_season
-
-                if 'ep_num' in named_groups:
-                    ep_num = self._convert_number(match.group('ep_num'))
-                    if 'extra_ep_num' in named_groups and match.group('extra_ep_num'):
-                        result.episode_numbers = range(ep_num, self._convert_number(match.group('extra_ep_num')) + 1)
-                    else:
-                        result.episode_numbers = [ep_num]
-
-                if 'ep_ab_num' in named_groups:
-                    ep_ab_num = self._convert_number(match.group('ep_ab_num'))
-                    if 'extra_ab_ep_num' in named_groups and match.group('extra_ab_ep_num'):
-                        result.ab_episode_numbers = range(ep_ab_num,
-                                                          self._convert_number(match.group('extra_ab_ep_num')) + 1)
-                    else:
-                        result.ab_episode_numbers = [ep_ab_num]
-
-                if 'sports_event_id' in named_groups:
-                    sports_event_id = match.group('sports_event_id')
-                    if sports_event_id:
-                        result.sports_event_id = int(match.group('sports_event_id'))
-
-                if 'sports_event_name' in named_groups:
-                    result.sports_event_name = match.group('sports_event_name')
-                    if result.sports_event_name:
-                        result.sports_event_name = self.clean_series_name(result.sports_event_name)
-
-                if 'sports_event_date' in named_groups:
-                    sports_event_date = match.group('sports_event_date')
-                    if sports_event_date:
-                        try:
-                            result.sports_event_date = parser.parse(sports_event_date, fuzzy=True).date()
-                        except:
-                            continue
-
-                if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups:
-                    year = int(match.group('air_year'))
-                    month = int(match.group('air_month'))
-                    day = int(match.group('air_day'))
+            if 'sports_event_name' in named_groups:
+                result.sports_event_name = match.group('sports_event_name')
+                if result.sports_event_name:
+                    result.sports_event_name = self.clean_series_name(result.sports_event_name)

+            if 'sports_event_date' in named_groups:
+                sports_event_date = match.group('sports_event_date')
+                if sports_event_date:
                    try:
-                        dtStr = '%s-%s-%s' % (year, month, day)
-                        result.air_date = datetime.datetime.strptime(dtStr, "%Y-%m-%d").date()
+                        result.sports_event_date = parser.parse(sports_event_date, fuzzy=True).date()
                    except:
                        continue

-                if 'extra_info' in named_groups:
-                    tmp_extra_info = match.group('extra_info')
+            if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups:
+                year = int(match.group('air_year'))
+                month = int(match.group('air_month'))
+                day = int(match.group('air_day'))

-                    # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season
-                    if tmp_extra_info and cur_regex_name == 'season_only' and re.search(
-                            r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I):
-                        continue
-                    result.extra_info = tmp_extra_info
+                try:
+                    dtStr = '%s-%s-%s' % (year, month, day)
+                    result.air_date = datetime.datetime.strptime(dtStr, "%Y-%m-%d").date()
+                except:
+                    continue

-                if 'release_group' in named_groups:
-                    result.release_group = match.group('release_group')
+            if 'extra_info' in named_groups:
+                tmp_extra_info = match.group('extra_info')

-                if result.show and result.show.is_anime and cur_regex_type in ['anime', 'normal']:
-                    return result
-                elif result.show and result.show.is_sports and cur_regex_type == 'sports':
-                    return result
-                elif cur_regex_type == 'normal':
-                    return result
+                # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season
+                if tmp_extra_info and cur_regex_name == 'season_only' and regex.search(
+                        r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, regex.I):
+                    continue
+                result.extra_info = tmp_extra_info

-        return None
+            if 'release_group' in named_groups:
+                result.release_group = match.group('release_group')
+
+            cur_show = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers)
+            if cur_show:
+                if self.showObj:
+                    if self.showObj.indexerid != cur_show.indexerid:
+                        logger.log(
+                            u"I expected an episode of the show " + self.showObj.name + " but the parser thinks its the show " + cur_show.name + ". I will continue thinking its " + self.showObj.name,
+                            logger.WARNING)
+                        return
+
+                result.show = cur_show
+
+            if not result.show:
+                continue
+
+            # Natch found!
+            break
+
+
+        if self.convert:
+            result = result.convert()
+
+        return result

    def _combine_results(self, first, second, attr):
        # if the first doesn't exist then return the second or nothing
@ -291,7 +286,7 @@ class NameParser(object):

        # break it into parts if there are any (dirname, file name, extension)
        dir_name, file_name = os.path.split(name)
-        ext_match = re.match('(.*)\.\w{3,4}$', file_name)
+        ext_match = regex.match('(.*)\.\w{3,4}$', file_name)
        if ext_match and self.file_name:
            base_file_name = ext_match.group(1)
        else:
@ -364,7 +359,8 @@ class ParseResult(object):
                 release_group=None,
                 air_date=None,
                 ab_episode_numbers=None,
-                 show=None
+                 show=None,
+                 score=None
    ):

        self.original_name = original_name
@ -392,6 +388,7 @@ class ParseResult(object):

        self.which_regex = None
        self.show = show
+        self.score = score

    def __eq__(self, other):
        if not other:
@ -419,6 +416,8 @@ class ParseResult(object):
            return False
        if self.show != other.show:
            return False
+        if self.score != other.score:
+            return False

        return True

@ -479,7 +478,7 @@ class ParseResult(object):
                new_episode_numbers.append(e)
                new_season_numbers.append(s)

-        # need to do a quick sanity check here.  It's possible that we now have episodes
+        # need to do a quick sanity check heregex.  It's possible that we now have episodes
        # from more than one season (by tvdb numbering), and this is just too much
        # for sickbeard, so we'd need to flag it.
        new_season_numbers = list(set(new_season_numbers))  # remove duplicates
@ -546,17 +545,7 @@ class NameParserCache(object):
            logger.log("Using cached parse result for: " + name, logger.DEBUG)
            return self._previous_parsed[name]

-
 name_parser_cache = NameParserCache()

-
 class InvalidNameException(Exception):
-    "The given name is not valid"
-
-
-class MultipleSceneShowResults(Exception):
-    pass
-
-
-class MultipleSceneEpisodeResults(Exception):
-    pass
+    "The given name is not valid"
--- a/sickbeard/naming.py
+++ b/sickbeard/naming.py
@ -56,6 +56,31 @@ class TVShow():
        self.anime = 0
        self.scene = 0

+    def _is_anime(self):
+        if (self.anime > 0):
+            return True
+        else:
+            return False
+
+    is_anime = property(_is_anime)
+
+    def _is_sports(self):
+        if (self.sports > 0):
+            return True
+        else:
+            return False
+
+    is_sports = property(_is_sports)
+
+    def _is_scene(self):
+        if (self.scene > 0):
+            return True
+        else:
+            return False
+
+    is_scene = property(_is_scene)
+
+
 class TVEpisode(tv.TVEpisode):
    def __init__(self, season, episode, absolute_number, name):
        self.relatedEps = []
@ -139,9 +164,7 @@ def check_valid_sports_naming(pattern=None):
    return valid

 def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False):
-    ep = _generate_sample_ep(multi, abd, sports)
-
-    parser = NameParser(True)
+    ep = generate_sample_ep(multi, abd, sports)

    new_name = ep.formatted_filename(pattern, multi) + '.ext'
    new_path = ep.formatted_dir(pattern, multi)
@ -154,9 +177,11 @@ def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False)

    logger.log(u"Trying to parse " + new_name, logger.DEBUG)

+    parser = NameParser(True)
+
    try:
        result = parser.parse(new_name)
-    except InvalidNameException, e  :
+    except Exception, e:
        logger.log(u"Unable to parse " + new_name + ", not valid", logger.DEBUG)
        return False

@ -177,7 +202,7 @@ def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False)
    return True


-def _generate_sample_ep(multi=None, abd=False, sports=False, anime=False):
+def generate_sample_ep(multi=None, abd=False, sports=False, anime=False):
    # make a fake episode object
    ep = TVEpisode(2, 3, 3, "Ep Name")

@ -215,6 +240,6 @@ def _generate_sample_ep(multi=None, abd=False, sports=False, anime=False):


 def test_name(pattern, multi=None, abd=False, sports=False, anime=False):
-    ep = _generate_sample_ep(multi, abd, sports, anime)
+    ep = generate_sample_ep(multi, abd, sports, anime)

    return {'name': ep.formatted_filename(pattern, multi), 'dir': ep.formatted_dir(pattern, multi)}
--- a/sickbeard/properFinder.py
+++ b/sickbeard/properFinder.py
@ -119,7 +119,7 @@ class ProperFinder():

            try:
                myParser = NameParser(False)
-                parse_result = myParser.parse(curProper.name).convert()
+                parse_result = myParser.parse(curProper.name)
            except InvalidNameException:
                logger.log(u"Unable to parse the filename " + curProper.name + " into a valid episode", logger.DEBUG)
                continue
@ -138,7 +138,7 @@ class ProperFinder():

            showObj = parse_result.show
            logger.log(
-                u"Successful match! Result " + parse_result.series_name + " matched to show " + showObj.name,
+                u"Successful match! Result " + parse_result.original_name + " matched to show " + showObj.name,
                logger.DEBUG)

            # set the indexerid in the db to the show's indexerid
--- a/sickbeard/providers/generic.py
+++ b/sickbeard/providers/generic.py
@ -278,8 +278,8 @@ class GenericProvider:

                # parse the file name
                try:
-                    myParser = NameParser(False, show=show, useIndexers=manualSearch)
-                    parse_result = myParser.parse(title).convert()
+                    myParser = NameParser(False, showObj=show, epObj=ep_obj, convert=True)
+                    parse_result = myParser.parse(title)
                except InvalidNameException:
                    logger.log(u"Unable to parse the filename " + title + " into a valid episode", logger.WARNING)
                    continue
--- a/sickbeard/tv.py
+++ b/sickbeard/tv.py
@ -1895,6 +1895,26 @@ class TVEpisode(object):
        else:
            return ek.ek(os.path.join, self.show.location, self.location)

+    def createStrings(self, pattern=None):
+        patterns = [
+            '%S.N.S%SE%0E',
+            '%S.N.S%0SE%E',
+            '%S.N.S%SE%E',
+            '%S.N.S%0SE%0E',
+            '%SN S%SE%0E',
+            '%SN S%0SE%E',
+            '%SN S%SE%E',
+            '%SN S%0SE%0E'
+
+        ]
+
+        strings = []
+        if not pattern:
+            for p in patterns:
+                strings += [self._format_pattern(p)]
+            return strings
+        return self._format_pattern(pattern)
+
    def prettyName(self):
        """
        Returns the name of this episode in a "pretty" human-readable format. Used for logging
@ -1903,16 +1923,12 @@ class TVEpisode(object):
        Returns: A string representing the episode's name and season/ep numbers 
        """

-        if self.show.is_anime and not self.show.is_scene:
-            return self._format_pattern('%SN - %A - %EN')
-        elif self.show.is_anime and self.show.is_scene:
-            return self._format_pattern('%SN - %XA - %EN')
-        elif self.show.is_scene:
-            return self._format_pattern('%SN - %XSx%0XE - %EN')
+        if self.show.anime and not self.show.scene:
+            return self._format_pattern('%SN - %AB - %EN')
        elif self.show.air_by_date:
            return self._format_pattern('%SN - %AD - %EN')
-        else:
-            return self._format_pattern('%SN - %Sx%0E - %EN')
+
+        return self._format_pattern('%SN - %Sx%0E - %EN')

    def _ep_name(self):
        """
@ -1980,9 +1996,8 @@ class TVEpisode(object):
            if not name:
                return ''

-            np = NameParser(name)
-
            try:
+                np = NameParser(name)
                parse_result = np.parse(name)
            except InvalidNameException, e:
                logger.log(u"Unable to get parse release_group: " + ex(e), logger.DEBUG)
@ -2017,7 +2032,7 @@ class TVEpisode(object):
            '%0XS': '%02d' % self.scene_season,
            '%XE': str(self.scene_episode),
            '%0XE': '%02d' % self.scene_episode,
-            '%A': '%(#)03d' % {'#': self.absolute_number},
+            '%AB': '%(#)03d' % {'#': self.absolute_number},
            '%XA': '%(#)03d' % {'#': self.scene_absolute_number},
            '%RN': release_name(self.release_name),
            '%RG': release_group(self.release_name),