mirror of
https://github.com/moparisthebest/SickRage
synced 2024-12-12 11:02:21 -05:00
Added in regex matching with fuzzy matching.
Shows now display Indexer absolute numbering. Improved speed of parsing search results. Fixed episode naming issues.
This commit is contained in:
parent
2c37523ab7
commit
d7396896b5
@ -302,7 +302,7 @@
|
|||||||
<h2>#if int($epResult["season"]) == 0 then "Specials" else "Season "+str($epResult["season"])#</h2>
|
<h2>#if int($epResult["season"]) == 0 then "Specials" else "Season "+str($epResult["season"])#</h2>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr id="season-$epResult["season"]-cols"><th width="1%"><input type="checkbox" class="seasonCheck" id="$epResult["season"]" /></th><th>NFO</th><th>TBN</th><th>Episode</th>#if $scene then "<th>Scene #</th>" else ""# #if $scene_anime then "<th>Scene Absolute #</th>" else ""#<th>Name</th><th class="nowrap">Airdate</th><th>Filename</th>#if $sickbeard.USE_SUBTITLES and $show.subtitles then "<th>Subtitles</th>" else ""#<th>Status</th><th>Search</th></tr>
|
<tr id="season-$epResult["season"]-cols"><th width="1%"><input type="checkbox" class="seasonCheck" id="$epResult["season"]" /></th><th>NFO</th><th>TBN</th><th>Episode</th>#if $show.is_anime then "<th>Absolute</th>" else ""# #if $scene then "<th>Scene #</th>" else ""# #if $scene_anime then "<th>Scene Absolute</th>" else ""#<th>Name</th><th class="nowrap">Airdate</th><th>Filename</th>#if $sickbeard.USE_SUBTITLES and $show.subtitles then "<th>Subtitles</th>" else ""#<th>Status</th><th>Search</th></tr>
|
||||||
#set $curSeason = int($epResult["season"])
|
#set $curSeason = int($epResult["season"])
|
||||||
#end if
|
#end if
|
||||||
|
|
||||||
@ -317,6 +317,9 @@
|
|||||||
<td align="center"><img src="$sbRoot/images/#if $epResult["hasnfo"] == 1 then "nfo.gif\" alt=\"Y" else "nfo-no.gif\" alt=\"N"#" width="23" height="11" /></td>
|
<td align="center"><img src="$sbRoot/images/#if $epResult["hasnfo"] == 1 then "nfo.gif\" alt=\"Y" else "nfo-no.gif\" alt=\"N"#" width="23" height="11" /></td>
|
||||||
<td align="center"><img src="$sbRoot/images/#if $epResult["hastbn"] == 1 then "tbn.gif\" alt=\"Y" else "tbn-no.gif\" alt=\"N"#" width="23" height="11" /></td>
|
<td align="center"><img src="$sbRoot/images/#if $epResult["hastbn"] == 1 then "tbn.gif\" alt=\"Y" else "tbn-no.gif\" alt=\"N"#" width="23" height="11" /></td>
|
||||||
<td align="center">$epResult["episode"]</td>
|
<td align="center">$epResult["episode"]</td>
|
||||||
|
#if $show.is_anime:
|
||||||
|
<td align="center">$epResult["absolute_number"]</td>
|
||||||
|
#end if
|
||||||
|
|
||||||
#if $scene:
|
#if $scene:
|
||||||
<td align="center">
|
<td align="center">
|
||||||
|
78
lib/fuzzywuzzy/StringMatcher.py
Normal file
78
lib/fuzzywuzzy/StringMatcher.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
"""
|
||||||
|
StringMatcher.py
|
||||||
|
|
||||||
|
ported from python-Levenshtein
|
||||||
|
[https://github.com/miohtama/python-Levenshtein]
|
||||||
|
"""
|
||||||
|
|
||||||
|
from Levenshtein import *
|
||||||
|
from warnings import warn
|
||||||
|
|
||||||
|
class StringMatcher:
|
||||||
|
"""A SequenceMatcher-like class built on the top of Levenshtein"""
|
||||||
|
|
||||||
|
def _reset_cache(self):
|
||||||
|
self._ratio = self._distance = None
|
||||||
|
self._opcodes = self._editops = self._matching_blocks = None
|
||||||
|
|
||||||
|
def __init__(self, isjunk=None, seq1='', seq2=''):
|
||||||
|
if isjunk:
|
||||||
|
warn("isjunk not NOT implemented, it will be ignored")
|
||||||
|
self._str1, self._str2 = seq1, seq2
|
||||||
|
self._reset_cache()
|
||||||
|
|
||||||
|
def set_seqs(self, seq1, seq2):
|
||||||
|
self._str1, self._str2 = seq1, seq2
|
||||||
|
self._reset_cache()
|
||||||
|
|
||||||
|
def set_seq1(self, seq1):
|
||||||
|
self._str1 = seq1
|
||||||
|
self._reset_cache()
|
||||||
|
|
||||||
|
def set_seq2(self, seq2):
|
||||||
|
self._str2 = seq2
|
||||||
|
self._reset_cache()
|
||||||
|
|
||||||
|
def get_opcodes(self):
|
||||||
|
if not self._opcodes:
|
||||||
|
if self._editops:
|
||||||
|
self._opcodes = opcodes(self._editops, self._str1, self._str2)
|
||||||
|
else:
|
||||||
|
self._opcodes = opcodes(self._str1, self._str2)
|
||||||
|
return self._opcodes
|
||||||
|
|
||||||
|
def get_editops(self):
|
||||||
|
if not self._editops:
|
||||||
|
if self._opcodes:
|
||||||
|
self._editops = editops(self._opcodes, self._str1, self._str2)
|
||||||
|
else:
|
||||||
|
self._editops = editops(self._str1, self._str2)
|
||||||
|
return self._editops
|
||||||
|
|
||||||
|
def get_matching_blocks(self):
|
||||||
|
if not self._matching_blocks:
|
||||||
|
self._matching_blocks = matching_blocks(self.get_opcodes(),
|
||||||
|
self._str1, self._str2)
|
||||||
|
return self._matching_blocks
|
||||||
|
|
||||||
|
def ratio(self):
|
||||||
|
if not self._ratio:
|
||||||
|
self._ratio = ratio(self._str1, self._str2)
|
||||||
|
return self._ratio
|
||||||
|
|
||||||
|
def quick_ratio(self):
|
||||||
|
# This is usually quick enough :o)
|
||||||
|
if not self._ratio:
|
||||||
|
self._ratio = ratio(self._str1, self._str2)
|
||||||
|
return self._ratio
|
||||||
|
|
||||||
|
def real_quick_ratio(self):
|
||||||
|
len1, len2 = len(self._str1), len(self._str2)
|
||||||
|
return 2.0 * min(len1, len2) / (len1 + len2)
|
||||||
|
|
||||||
|
def distance(self):
|
||||||
|
if not self._distance:
|
||||||
|
self._distance = distance(self._str1, self._str2)
|
||||||
|
return self._distance
|
0
lib/fuzzywuzzy/__init__.py
Normal file
0
lib/fuzzywuzzy/__init__.py
Normal file
263
lib/fuzzywuzzy/fuzz.py
Normal file
263
lib/fuzzywuzzy/fuzz.py
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
"""
|
||||||
|
fuzz.py
|
||||||
|
|
||||||
|
Copyright (c) 2011 Adam Cohen
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
"""
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
try:
|
||||||
|
from StringMatcher import StringMatcher as SequenceMatcher
|
||||||
|
except:
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
###########################
|
||||||
|
# Basic Scoring Functions #
|
||||||
|
###########################
|
||||||
|
|
||||||
|
|
||||||
|
def ratio(s1, s2):
|
||||||
|
|
||||||
|
if s1 is None:
|
||||||
|
raise TypeError("s1 is None")
|
||||||
|
if s2 is None:
|
||||||
|
raise TypeError("s2 is None")
|
||||||
|
s1, s2 = utils.make_type_consistent(s1, s2)
|
||||||
|
if len(s1) == 0 or len(s2) == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
m = SequenceMatcher(None, s1, s2)
|
||||||
|
return utils.intr(100 * m.ratio())
|
||||||
|
|
||||||
|
|
||||||
|
# todo: skip duplicate indexes for a little more speed
|
||||||
|
def partial_ratio(s1, s2):
|
||||||
|
|
||||||
|
if s1 is None:
|
||||||
|
raise TypeError("s1 is None")
|
||||||
|
if s2 is None:
|
||||||
|
raise TypeError("s2 is None")
|
||||||
|
s1, s2 = utils.make_type_consistent(s1, s2)
|
||||||
|
if len(s1) == 0 or len(s2) == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if len(s1) <= len(s2):
|
||||||
|
shorter = s1
|
||||||
|
longer = s2
|
||||||
|
else:
|
||||||
|
shorter = s2
|
||||||
|
longer = s1
|
||||||
|
|
||||||
|
m = SequenceMatcher(None, shorter, longer)
|
||||||
|
blocks = m.get_matching_blocks()
|
||||||
|
|
||||||
|
# each block represents a sequence of matching characters in a string
|
||||||
|
# of the form (idx_1, idx_2, len)
|
||||||
|
# the best partial match will block align with at least one of those blocks
|
||||||
|
# e.g. shorter = "abcd", longer = XXXbcdeEEE
|
||||||
|
# block = (1,3,3)
|
||||||
|
# best score === ratio("abcd", "Xbcd")
|
||||||
|
scores = []
|
||||||
|
for block in blocks:
|
||||||
|
long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
|
||||||
|
long_end = long_start + len(shorter)
|
||||||
|
long_substr = longer[long_start:long_end]
|
||||||
|
|
||||||
|
m2 = SequenceMatcher(None, shorter, long_substr)
|
||||||
|
r = m2.ratio()
|
||||||
|
if r > .995:
|
||||||
|
return 100
|
||||||
|
else:
|
||||||
|
scores.append(r)
|
||||||
|
|
||||||
|
return int(100 * max(scores))
|
||||||
|
|
||||||
|
|
||||||
|
##############################
|
||||||
|
# Advanced Scoring Functions #
|
||||||
|
##############################
|
||||||
|
|
||||||
|
# Sorted Token
|
||||||
|
# find all alphanumeric tokens in the string
|
||||||
|
# sort those tokens and take ratio of resulting joined strings
|
||||||
|
# controls for unordered string elements
|
||||||
|
def _token_sort(s1, s2, partial=True, force_ascii=True):
|
||||||
|
|
||||||
|
if s1 is None:
|
||||||
|
raise TypeError("s1 is None")
|
||||||
|
if s2 is None:
|
||||||
|
raise TypeError("s2 is None")
|
||||||
|
|
||||||
|
# pull tokens
|
||||||
|
tokens1 = utils.full_process(s1, force_ascii=force_ascii).split()
|
||||||
|
tokens2 = utils.full_process(s2, force_ascii=force_ascii).split()
|
||||||
|
|
||||||
|
# sort tokens and join
|
||||||
|
sorted1 = " ".join(sorted(tokens1))
|
||||||
|
sorted2 = " ".join(sorted(tokens2))
|
||||||
|
|
||||||
|
sorted1 = sorted1.strip()
|
||||||
|
sorted2 = sorted2.strip()
|
||||||
|
|
||||||
|
if partial:
|
||||||
|
return partial_ratio(sorted1, sorted2)
|
||||||
|
else:
|
||||||
|
return ratio(sorted1, sorted2)
|
||||||
|
|
||||||
|
|
||||||
|
def token_sort_ratio(s1, s2, force_ascii=True):
|
||||||
|
return _token_sort(s1, s2, partial=False, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
|
||||||
|
def partial_token_sort_ratio(s1, s2, force_ascii=True):
|
||||||
|
return _token_sort(s1, s2, partial=True, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
|
||||||
|
# Token Set
|
||||||
|
# find all alphanumeric tokens in each string...treat them as a set
|
||||||
|
# construct two strings of the form
|
||||||
|
# <sorted_intersection><sorted_remainder>
|
||||||
|
# take ratios of those two strings
|
||||||
|
# controls for unordered partial matches
|
||||||
|
def _token_set(s1, s2, partial=True, force_ascii=True):
|
||||||
|
|
||||||
|
if s1 is None:
|
||||||
|
raise TypeError("s1 is None")
|
||||||
|
if s2 is None:
|
||||||
|
raise TypeError("s2 is None")
|
||||||
|
|
||||||
|
p1 = utils.full_process(s1, force_ascii=force_ascii)
|
||||||
|
p2 = utils.full_process(s2, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
if not utils.validate_string(p1):
|
||||||
|
return 0
|
||||||
|
if not utils.validate_string(p2):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# pull tokens
|
||||||
|
tokens1 = set(utils.full_process(p1).split())
|
||||||
|
tokens2 = set(utils.full_process(p2).split())
|
||||||
|
|
||||||
|
intersection = tokens1.intersection(tokens2)
|
||||||
|
diff1to2 = tokens1.difference(tokens2)
|
||||||
|
diff2to1 = tokens2.difference(tokens1)
|
||||||
|
|
||||||
|
sorted_sect = " ".join(sorted(intersection))
|
||||||
|
sorted_1to2 = " ".join(sorted(diff1to2))
|
||||||
|
sorted_2to1 = " ".join(sorted(diff2to1))
|
||||||
|
|
||||||
|
combined_1to2 = sorted_sect + " " + sorted_1to2
|
||||||
|
combined_2to1 = sorted_sect + " " + sorted_2to1
|
||||||
|
|
||||||
|
# strip
|
||||||
|
sorted_sect = sorted_sect.strip()
|
||||||
|
combined_1to2 = combined_1to2.strip()
|
||||||
|
combined_2to1 = combined_2to1.strip()
|
||||||
|
|
||||||
|
pairwise = [
|
||||||
|
ratio(sorted_sect, combined_1to2),
|
||||||
|
ratio(sorted_sect, combined_2to1),
|
||||||
|
ratio(combined_1to2, combined_2to1)
|
||||||
|
]
|
||||||
|
return max(pairwise)
|
||||||
|
|
||||||
|
|
||||||
|
def token_set_ratio(s1, s2, force_ascii=True):
|
||||||
|
return _token_set(s1, s2, partial=False, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
|
||||||
|
def partial_token_set_ratio(s1, s2, force_ascii=True):
|
||||||
|
return _token_set(s1, s2, partial=True, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: numerics
|
||||||
|
|
||||||
|
###################
|
||||||
|
# Combination API #
|
||||||
|
###################
|
||||||
|
|
||||||
|
# q is for quick
|
||||||
|
def QRatio(s1, s2, force_ascii=True):
|
||||||
|
|
||||||
|
p1 = utils.full_process(s1, force_ascii=force_ascii)
|
||||||
|
p2 = utils.full_process(s2, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
if not utils.validate_string(p1):
|
||||||
|
return 0
|
||||||
|
if not utils.validate_string(p2):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return ratio(p1, p2)
|
||||||
|
|
||||||
|
|
||||||
|
def UQRatio(s1, s2):
|
||||||
|
return QRatio(s1, s2, force_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
# w is for weighted
|
||||||
|
def WRatio(s1, s2, force_ascii=True):
|
||||||
|
|
||||||
|
p1 = utils.full_process(s1, force_ascii=force_ascii)
|
||||||
|
p2 = utils.full_process(s2, force_ascii=force_ascii)
|
||||||
|
|
||||||
|
if not utils.validate_string(p1):
|
||||||
|
return 0
|
||||||
|
if not utils.validate_string(p2):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# should we look at partials?
|
||||||
|
try_partial = True
|
||||||
|
unbase_scale = .95
|
||||||
|
partial_scale = .90
|
||||||
|
|
||||||
|
base = ratio(p1, p2)
|
||||||
|
len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2))
|
||||||
|
|
||||||
|
# if strings are similar length, don't use partials
|
||||||
|
if len_ratio < 1.5:
|
||||||
|
try_partial = False
|
||||||
|
|
||||||
|
# if one string is much much shorter than the other
|
||||||
|
if len_ratio > 8:
|
||||||
|
partial_scale = .6
|
||||||
|
|
||||||
|
if try_partial:
|
||||||
|
partial = partial_ratio(p1, p2) * partial_scale
|
||||||
|
ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) \
|
||||||
|
* unbase_scale * partial_scale
|
||||||
|
ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) \
|
||||||
|
* unbase_scale * partial_scale
|
||||||
|
|
||||||
|
return int(max(base, partial, ptsor, ptser))
|
||||||
|
else:
|
||||||
|
tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
|
||||||
|
tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale
|
||||||
|
|
||||||
|
return int(max(base, tsor, tser))
|
||||||
|
|
||||||
|
|
||||||
|
def UWRatio(s1, s2):
|
||||||
|
return WRatio(s1, s2, force_ascii=False)
|
119
lib/fuzzywuzzy/process.py
Normal file
119
lib/fuzzywuzzy/process.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# encoding: utf-8
|
||||||
|
"""
|
||||||
|
process.py
|
||||||
|
|
||||||
|
Copyright (c) 2011 Adam Cohen
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be
|
||||||
|
included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
"""
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from . import fuzz
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
|
||||||
|
def extract(query, choices, processor=None, scorer=None, limit=5):
|
||||||
|
"""Find best matches in a list of choices, return a list of tuples
|
||||||
|
containing the match and it's score.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
query -- an object representing the thing we want to find
|
||||||
|
choices -- a list of objects we are attempting to extract
|
||||||
|
values from
|
||||||
|
scorer -- f(OBJ, QUERY) --> INT. We will return the objects
|
||||||
|
with the highest score by default, we use
|
||||||
|
score.WRatio() and both OBJ and QUERY should be
|
||||||
|
strings
|
||||||
|
processor -- f(OBJ_A) --> OBJ_B, where the output is an input
|
||||||
|
to scorer for example, "processor = lambda x:
|
||||||
|
x[0]" would return the first element in a
|
||||||
|
collection x (of, say, strings) this would then
|
||||||
|
be used in the scoring collection by default, we
|
||||||
|
use utils.full_process()
|
||||||
|
|
||||||
|
"""
|
||||||
|
if choices is None or len(choices) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# default, turn whatever the choice is into a workable string
|
||||||
|
if processor is None:
|
||||||
|
processor = lambda x: utils.full_process(x)
|
||||||
|
|
||||||
|
# default: wratio
|
||||||
|
if scorer is None:
|
||||||
|
scorer = fuzz.WRatio
|
||||||
|
|
||||||
|
sl = list()
|
||||||
|
|
||||||
|
for choice in choices:
|
||||||
|
processed = processor(choice)
|
||||||
|
score = scorer(query, processed)
|
||||||
|
tuple = (choice, score)
|
||||||
|
sl.append(tuple)
|
||||||
|
|
||||||
|
sl.sort(key=lambda i: i[1], reverse=True)
|
||||||
|
return sl[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5):
|
||||||
|
"""Find best matches above a score in a list of choices, return a
|
||||||
|
list of tuples containing the match and it's score.
|
||||||
|
|
||||||
|
Convenience method which returns the choices with best scores, see
|
||||||
|
extract() for full arguments list
|
||||||
|
|
||||||
|
Optional parameter: score_cutoff.
|
||||||
|
If the choice has a score of less than or equal to score_cutoff
|
||||||
|
it will not be included on result list
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
best_list = extract(query, choices, processor, scorer, limit)
|
||||||
|
if len(best_list) > 0:
|
||||||
|
return list(itertools.takewhile(lambda x: x[1] > score_cutoff, best_list))
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0):
|
||||||
|
"""Find the best match above a score in a list of choices, return a
|
||||||
|
tuple containing the match and it's score if it's above the treshold
|
||||||
|
or None.
|
||||||
|
|
||||||
|
Convenience method which returns the single best choice, see
|
||||||
|
extract() for full arguments list
|
||||||
|
|
||||||
|
Optional parameter: score_cutoff.
|
||||||
|
If the best choice has a score of less than or equal to
|
||||||
|
score_cutoff we will return none (intuition: not a good enough
|
||||||
|
match)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
best_list = extract(query, choices, processor, scorer, limit=1)
|
||||||
|
if len(best_list) > 0:
|
||||||
|
best = best_list[0]
|
||||||
|
if best[1] > score_cutoff:
|
||||||
|
return best
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return None
|
41
lib/fuzzywuzzy/string_processing.py
Normal file
41
lib/fuzzywuzzy/string_processing.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class StringProcessor(object):
|
||||||
|
"""
|
||||||
|
This class defines method to process strings in the most
|
||||||
|
efficient way. Ideally all the methods below use unicode strings
|
||||||
|
for both input and output.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def replace_non_letters_non_numbers_with_whitespace(cls, a_string):
|
||||||
|
"""
|
||||||
|
This function replaces any sequence of non letters and non
|
||||||
|
numbers with a single white space.
|
||||||
|
"""
|
||||||
|
regex = re.compile(r"(?ui)\W")
|
||||||
|
return regex.sub(" ", a_string)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def strip(cls, a_string):
|
||||||
|
"""
|
||||||
|
This function strips leading and trailing white space.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return a_string.strip()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def to_lower_case(cls, a_string):
|
||||||
|
"""
|
||||||
|
This function returns the lower-cased version of the string given.
|
||||||
|
"""
|
||||||
|
return a_string.lower()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def to_upper_case(cls, a_string):
|
||||||
|
"""
|
||||||
|
This function returns the upper-cased version of the string given.
|
||||||
|
"""
|
||||||
|
return a_string.upper()
|
76
lib/fuzzywuzzy/utils.py
Normal file
76
lib/fuzzywuzzy/utils.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from fuzzywuzzy.string_processing import StringProcessor
|
||||||
|
|
||||||
|
|
||||||
|
PY3 = sys.version_info[0] == 3
|
||||||
|
|
||||||
|
|
||||||
|
def validate_string(s):
|
||||||
|
try:
|
||||||
|
if len(s) > 0:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
bad_chars = str('') # ascii dammit!
|
||||||
|
for i in range(128, 256):
|
||||||
|
bad_chars += chr(i)
|
||||||
|
if PY3:
|
||||||
|
translation_table = dict((ord(c), None) for c in bad_chars)
|
||||||
|
|
||||||
|
|
||||||
|
def asciionly(s):
|
||||||
|
if PY3:
|
||||||
|
return s.translate(translation_table)
|
||||||
|
else:
|
||||||
|
return s.translate(None, bad_chars)
|
||||||
|
|
||||||
|
|
||||||
|
def asciidammit(s):
|
||||||
|
if type(s) is str:
|
||||||
|
return asciionly(s)
|
||||||
|
elif type(s) is unicode:
|
||||||
|
return asciionly(s.encode('ascii', 'ignore'))
|
||||||
|
else:
|
||||||
|
return asciidammit(unicode(s))
|
||||||
|
|
||||||
|
|
||||||
|
def make_type_consistent(s1, s2):
|
||||||
|
if isinstance(s1, str) and isinstance(s2, str):
|
||||||
|
return s1, s2
|
||||||
|
|
||||||
|
elif isinstance(s1, unicode) and isinstance(s2, unicode):
|
||||||
|
return s1, s2
|
||||||
|
|
||||||
|
else:
|
||||||
|
return unicode(s1), unicode(s2)
|
||||||
|
|
||||||
|
|
||||||
|
def full_process(s, force_ascii=False):
|
||||||
|
"""Process string by
|
||||||
|
-- removing all but letters and numbers
|
||||||
|
-- trim whitespace
|
||||||
|
-- force to lower case
|
||||||
|
if force_ascii == True, force convert to ascii"""
|
||||||
|
|
||||||
|
if s is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if force_ascii:
|
||||||
|
s = asciidammit(s)
|
||||||
|
# Keep only Letters and Numbres (see Unicode docs).
|
||||||
|
string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
|
||||||
|
# Force into lowercase.
|
||||||
|
string_out = StringProcessor.to_lower_case(string_out)
|
||||||
|
# Remove leading and trailing whitespaces.
|
||||||
|
string_out = StringProcessor.strip(string_out)
|
||||||
|
return string_out
|
||||||
|
|
||||||
|
|
||||||
|
def intr(n):
|
||||||
|
'''Returns a correctly rounded integer'''
|
||||||
|
return int(round(n))
|
1
lib/regex/Python25/__init__.py
Normal file
1
lib/regex/Python25/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
BIN
lib/regex/Python25/_regex.pyd
Normal file
BIN
lib/regex/Python25/_regex.pyd
Normal file
Binary file not shown.
1
lib/regex/Python26/__init__.py
Normal file
1
lib/regex/Python26/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
BIN
lib/regex/Python26/_regex.pyd
Normal file
BIN
lib/regex/Python26/_regex.pyd
Normal file
Binary file not shown.
1
lib/regex/Python27/__init__.py
Normal file
1
lib/regex/Python27/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
BIN
lib/regex/Python27/_regex.pyd
Normal file
BIN
lib/regex/Python27/_regex.pyd
Normal file
Binary file not shown.
1
lib/regex/__init__.py
Normal file
1
lib/regex/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
22557
lib/regex/_regex.c
Normal file
22557
lib/regex/_regex.c
Normal file
File diff suppressed because it is too large
Load Diff
228
lib/regex/_regex.h
Normal file
228
lib/regex/_regex.h
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
/*
|
||||||
|
* Secret Labs' Regular Expression Engine
|
||||||
|
*
|
||||||
|
* regular expression matching engine
|
||||||
|
*
|
||||||
|
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
||||||
|
*
|
||||||
|
* NOTE: This file is generated by regex.py. If you need
|
||||||
|
* to change anything in here, edit regex.py and run it.
|
||||||
|
*
|
||||||
|
* 2010-01-16 mrab Re-written
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Supports Unicode version 6.3.0. */
|
||||||
|
|
||||||
|
#define RE_MAGIC 20100116
|
||||||
|
|
||||||
|
#include "_regex_unicode.h"
|
||||||
|
|
||||||
|
/* Operators. */
|
||||||
|
#define RE_OP_FAILURE 0
|
||||||
|
#define RE_OP_SUCCESS 1
|
||||||
|
#define RE_OP_ANY 2
|
||||||
|
#define RE_OP_ANY_ALL 3
|
||||||
|
#define RE_OP_ANY_ALL_REV 4
|
||||||
|
#define RE_OP_ANY_REV 5
|
||||||
|
#define RE_OP_ANY_U 6
|
||||||
|
#define RE_OP_ANY_U_REV 7
|
||||||
|
#define RE_OP_ATOMIC 8
|
||||||
|
#define RE_OP_BOUNDARY 9
|
||||||
|
#define RE_OP_BRANCH 10
|
||||||
|
#define RE_OP_CALL_REF 11
|
||||||
|
#define RE_OP_CHARACTER 12
|
||||||
|
#define RE_OP_CHARACTER_IGN 13
|
||||||
|
#define RE_OP_CHARACTER_IGN_REV 14
|
||||||
|
#define RE_OP_CHARACTER_REV 15
|
||||||
|
#define RE_OP_DEFAULT_BOUNDARY 16
|
||||||
|
#define RE_OP_DEFAULT_END_OF_WORD 17
|
||||||
|
#define RE_OP_DEFAULT_START_OF_WORD 18
|
||||||
|
#define RE_OP_END 19
|
||||||
|
#define RE_OP_END_OF_LINE 20
|
||||||
|
#define RE_OP_END_OF_LINE_U 21
|
||||||
|
#define RE_OP_END_OF_STRING 22
|
||||||
|
#define RE_OP_END_OF_STRING_LINE 23
|
||||||
|
#define RE_OP_END_OF_STRING_LINE_U 24
|
||||||
|
#define RE_OP_END_OF_WORD 25
|
||||||
|
#define RE_OP_FUZZY 26
|
||||||
|
#define RE_OP_GRAPHEME_BOUNDARY 27
|
||||||
|
#define RE_OP_GREEDY_REPEAT 28
|
||||||
|
#define RE_OP_GROUP 29
|
||||||
|
#define RE_OP_GROUP_CALL 30
|
||||||
|
#define RE_OP_GROUP_EXISTS 31
|
||||||
|
#define RE_OP_LAZY_REPEAT 32
|
||||||
|
#define RE_OP_LOOKAROUND 33
|
||||||
|
#define RE_OP_NEXT 34
|
||||||
|
#define RE_OP_PROPERTY 35
|
||||||
|
#define RE_OP_PROPERTY_IGN 36
|
||||||
|
#define RE_OP_PROPERTY_IGN_REV 37
|
||||||
|
#define RE_OP_PROPERTY_REV 38
|
||||||
|
#define RE_OP_RANGE 39
|
||||||
|
#define RE_OP_RANGE_IGN 40
|
||||||
|
#define RE_OP_RANGE_IGN_REV 41
|
||||||
|
#define RE_OP_RANGE_REV 42
|
||||||
|
#define RE_OP_REF_GROUP 43
|
||||||
|
#define RE_OP_REF_GROUP_FLD 44
|
||||||
|
#define RE_OP_REF_GROUP_FLD_REV 45
|
||||||
|
#define RE_OP_REF_GROUP_IGN 46
|
||||||
|
#define RE_OP_REF_GROUP_IGN_REV 47
|
||||||
|
#define RE_OP_REF_GROUP_REV 48
|
||||||
|
#define RE_OP_SEARCH_ANCHOR 49
|
||||||
|
#define RE_OP_SET_DIFF 50
|
||||||
|
#define RE_OP_SET_DIFF_IGN 51
|
||||||
|
#define RE_OP_SET_DIFF_IGN_REV 52
|
||||||
|
#define RE_OP_SET_DIFF_REV 53
|
||||||
|
#define RE_OP_SET_INTER 54
|
||||||
|
#define RE_OP_SET_INTER_IGN 55
|
||||||
|
#define RE_OP_SET_INTER_IGN_REV 56
|
||||||
|
#define RE_OP_SET_INTER_REV 57
|
||||||
|
#define RE_OP_SET_SYM_DIFF 58
|
||||||
|
#define RE_OP_SET_SYM_DIFF_IGN 59
|
||||||
|
#define RE_OP_SET_SYM_DIFF_IGN_REV 60
|
||||||
|
#define RE_OP_SET_SYM_DIFF_REV 61
|
||||||
|
#define RE_OP_SET_UNION 62
|
||||||
|
#define RE_OP_SET_UNION_IGN 63
|
||||||
|
#define RE_OP_SET_UNION_IGN_REV 64
|
||||||
|
#define RE_OP_SET_UNION_REV 65
|
||||||
|
#define RE_OP_START_OF_LINE 66
|
||||||
|
#define RE_OP_START_OF_LINE_U 67
|
||||||
|
#define RE_OP_START_OF_STRING 68
|
||||||
|
#define RE_OP_START_OF_WORD 69
|
||||||
|
#define RE_OP_STRING 70
|
||||||
|
#define RE_OP_STRING_FLD 71
|
||||||
|
#define RE_OP_STRING_FLD_REV 72
|
||||||
|
#define RE_OP_STRING_IGN 73
|
||||||
|
#define RE_OP_STRING_IGN_REV 74
|
||||||
|
#define RE_OP_STRING_REV 75
|
||||||
|
#define RE_OP_STRING_SET 76
|
||||||
|
#define RE_OP_STRING_SET_FLD 77
|
||||||
|
#define RE_OP_STRING_SET_FLD_REV 78
|
||||||
|
#define RE_OP_STRING_SET_IGN 79
|
||||||
|
#define RE_OP_STRING_SET_IGN_REV 80
|
||||||
|
#define RE_OP_STRING_SET_REV 81
|
||||||
|
#define RE_OP_BODY_END 82
|
||||||
|
#define RE_OP_BODY_START 83
|
||||||
|
#define RE_OP_END_FUZZY 84
|
||||||
|
#define RE_OP_END_GREEDY_REPEAT 85
|
||||||
|
#define RE_OP_END_GROUP 86
|
||||||
|
#define RE_OP_END_LAZY_REPEAT 87
|
||||||
|
#define RE_OP_GREEDY_REPEAT_ONE 88
|
||||||
|
#define RE_OP_GROUP_RETURN 89
|
||||||
|
#define RE_OP_LAZY_REPEAT_ONE 90
|
||||||
|
#define RE_OP_MATCH_BODY 91
|
||||||
|
#define RE_OP_MATCH_TAIL 92
|
||||||
|
#define RE_OP_START_GROUP 93
|
||||||
|
|
||||||
|
char* re_op_text[] = {
|
||||||
|
"RE_OP_FAILURE",
|
||||||
|
"RE_OP_SUCCESS",
|
||||||
|
"RE_OP_ANY",
|
||||||
|
"RE_OP_ANY_ALL",
|
||||||
|
"RE_OP_ANY_ALL_REV",
|
||||||
|
"RE_OP_ANY_REV",
|
||||||
|
"RE_OP_ANY_U",
|
||||||
|
"RE_OP_ANY_U_REV",
|
||||||
|
"RE_OP_ATOMIC",
|
||||||
|
"RE_OP_BOUNDARY",
|
||||||
|
"RE_OP_BRANCH",
|
||||||
|
"RE_OP_CALL_REF",
|
||||||
|
"RE_OP_CHARACTER",
|
||||||
|
"RE_OP_CHARACTER_IGN",
|
||||||
|
"RE_OP_CHARACTER_IGN_REV",
|
||||||
|
"RE_OP_CHARACTER_REV",
|
||||||
|
"RE_OP_DEFAULT_BOUNDARY",
|
||||||
|
"RE_OP_DEFAULT_END_OF_WORD",
|
||||||
|
"RE_OP_DEFAULT_START_OF_WORD",
|
||||||
|
"RE_OP_END",
|
||||||
|
"RE_OP_END_OF_LINE",
|
||||||
|
"RE_OP_END_OF_LINE_U",
|
||||||
|
"RE_OP_END_OF_STRING",
|
||||||
|
"RE_OP_END_OF_STRING_LINE",
|
||||||
|
"RE_OP_END_OF_STRING_LINE_U",
|
||||||
|
"RE_OP_END_OF_WORD",
|
||||||
|
"RE_OP_FUZZY",
|
||||||
|
"RE_OP_GRAPHEME_BOUNDARY",
|
||||||
|
"RE_OP_GREEDY_REPEAT",
|
||||||
|
"RE_OP_GROUP",
|
||||||
|
"RE_OP_GROUP_CALL",
|
||||||
|
"RE_OP_GROUP_EXISTS",
|
||||||
|
"RE_OP_LAZY_REPEAT",
|
||||||
|
"RE_OP_LOOKAROUND",
|
||||||
|
"RE_OP_NEXT",
|
||||||
|
"RE_OP_PROPERTY",
|
||||||
|
"RE_OP_PROPERTY_IGN",
|
||||||
|
"RE_OP_PROPERTY_IGN_REV",
|
||||||
|
"RE_OP_PROPERTY_REV",
|
||||||
|
"RE_OP_RANGE",
|
||||||
|
"RE_OP_RANGE_IGN",
|
||||||
|
"RE_OP_RANGE_IGN_REV",
|
||||||
|
"RE_OP_RANGE_REV",
|
||||||
|
"RE_OP_REF_GROUP",
|
||||||
|
"RE_OP_REF_GROUP_FLD",
|
||||||
|
"RE_OP_REF_GROUP_FLD_REV",
|
||||||
|
"RE_OP_REF_GROUP_IGN",
|
||||||
|
"RE_OP_REF_GROUP_IGN_REV",
|
||||||
|
"RE_OP_REF_GROUP_REV",
|
||||||
|
"RE_OP_SEARCH_ANCHOR",
|
||||||
|
"RE_OP_SET_DIFF",
|
||||||
|
"RE_OP_SET_DIFF_IGN",
|
||||||
|
"RE_OP_SET_DIFF_IGN_REV",
|
||||||
|
"RE_OP_SET_DIFF_REV",
|
||||||
|
"RE_OP_SET_INTER",
|
||||||
|
"RE_OP_SET_INTER_IGN",
|
||||||
|
"RE_OP_SET_INTER_IGN_REV",
|
||||||
|
"RE_OP_SET_INTER_REV",
|
||||||
|
"RE_OP_SET_SYM_DIFF",
|
||||||
|
"RE_OP_SET_SYM_DIFF_IGN",
|
||||||
|
"RE_OP_SET_SYM_DIFF_IGN_REV",
|
||||||
|
"RE_OP_SET_SYM_DIFF_REV",
|
||||||
|
"RE_OP_SET_UNION",
|
||||||
|
"RE_OP_SET_UNION_IGN",
|
||||||
|
"RE_OP_SET_UNION_IGN_REV",
|
||||||
|
"RE_OP_SET_UNION_REV",
|
||||||
|
"RE_OP_START_OF_LINE",
|
||||||
|
"RE_OP_START_OF_LINE_U",
|
||||||
|
"RE_OP_START_OF_STRING",
|
||||||
|
"RE_OP_START_OF_WORD",
|
||||||
|
"RE_OP_STRING",
|
||||||
|
"RE_OP_STRING_FLD",
|
||||||
|
"RE_OP_STRING_FLD_REV",
|
||||||
|
"RE_OP_STRING_IGN",
|
||||||
|
"RE_OP_STRING_IGN_REV",
|
||||||
|
"RE_OP_STRING_REV",
|
||||||
|
"RE_OP_STRING_SET",
|
||||||
|
"RE_OP_STRING_SET_FLD",
|
||||||
|
"RE_OP_STRING_SET_FLD_REV",
|
||||||
|
"RE_OP_STRING_SET_IGN",
|
||||||
|
"RE_OP_STRING_SET_IGN_REV",
|
||||||
|
"RE_OP_STRING_SET_REV",
|
||||||
|
"RE_OP_BODY_END",
|
||||||
|
"RE_OP_BODY_START",
|
||||||
|
"RE_OP_END_FUZZY",
|
||||||
|
"RE_OP_END_GREEDY_REPEAT",
|
||||||
|
"RE_OP_END_GROUP",
|
||||||
|
"RE_OP_END_LAZY_REPEAT",
|
||||||
|
"RE_OP_GREEDY_REPEAT_ONE",
|
||||||
|
"RE_OP_GROUP_RETURN",
|
||||||
|
"RE_OP_LAZY_REPEAT_ONE",
|
||||||
|
"RE_OP_MATCH_BODY",
|
||||||
|
"RE_OP_MATCH_TAIL",
|
||||||
|
"RE_OP_START_GROUP",
|
||||||
|
};
|
||||||
|
|
||||||
|
#define RE_FLAG_ASCII 0x80
|
||||||
|
#define RE_FLAG_BESTMATCH 0x1000
|
||||||
|
#define RE_FLAG_DEBUG 0x200
|
||||||
|
#define RE_FLAG_DOTALL 0x10
|
||||||
|
#define RE_FLAG_ENHANCEMATCH 0x8000
|
||||||
|
#define RE_FLAG_FULLCASE 0x4000
|
||||||
|
#define RE_FLAG_IGNORECASE 0x2
|
||||||
|
#define RE_FLAG_LOCALE 0x4
|
||||||
|
#define RE_FLAG_MULTILINE 0x8
|
||||||
|
#define RE_FLAG_REVERSE 0x400
|
||||||
|
#define RE_FLAG_TEMPLATE 0x1
|
||||||
|
#define RE_FLAG_UNICODE 0x20
|
||||||
|
#define RE_FLAG_VERBOSE 0x40
|
||||||
|
#define RE_FLAG_VERSION0 0x2000
|
||||||
|
#define RE_FLAG_VERSION1 0x100
|
||||||
|
#define RE_FLAG_WORD 0x800
|
4086
lib/regex/_regex_core.py
Normal file
4086
lib/regex/_regex_core.py
Normal file
File diff suppressed because it is too large
Load Diff
12748
lib/regex/_regex_unicode.c
Normal file
12748
lib/regex/_regex_unicode.c
Normal file
File diff suppressed because it is too large
Load Diff
218
lib/regex/_regex_unicode.h
Normal file
218
lib/regex/_regex_unicode.h
Normal file
@ -0,0 +1,218 @@
|
|||||||
|
typedef unsigned char RE_UINT8;
|
||||||
|
typedef signed char RE_INT8;
|
||||||
|
typedef unsigned short RE_UINT16;
|
||||||
|
typedef signed short RE_INT16;
|
||||||
|
typedef unsigned int RE_UINT32;
|
||||||
|
typedef signed int RE_INT32;
|
||||||
|
|
||||||
|
typedef unsigned char BOOL;
|
||||||
|
enum {FALSE, TRUE};
|
||||||
|
|
||||||
|
#define RE_ASCII_MAX 0x7F
|
||||||
|
#define RE_LOCALE_MAX 0xFF
|
||||||
|
#define RE_UNICODE_MAX 0x10FFFF
|
||||||
|
|
||||||
|
#define RE_MAX_CASES 4
|
||||||
|
#define RE_MAX_FOLDED 3
|
||||||
|
|
||||||
|
typedef struct RE_Property {
|
||||||
|
RE_UINT16 name;
|
||||||
|
RE_UINT8 id;
|
||||||
|
RE_UINT8 value_set;
|
||||||
|
} RE_Property;
|
||||||
|
|
||||||
|
typedef struct RE_PropertyValue {
|
||||||
|
RE_UINT16 name;
|
||||||
|
RE_UINT8 value_set;
|
||||||
|
RE_UINT8 id;
|
||||||
|
} RE_PropertyValue;
|
||||||
|
|
||||||
|
typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);
|
||||||
|
|
||||||
|
#define RE_PROP_GC 0x0
|
||||||
|
#define RE_PROP_CASED 0xA
|
||||||
|
#define RE_PROP_UPPERCASE 0x9
|
||||||
|
#define RE_PROP_LOWERCASE 0x8
|
||||||
|
|
||||||
|
#define RE_PROP_C 30
|
||||||
|
#define RE_PROP_L 31
|
||||||
|
#define RE_PROP_M 32
|
||||||
|
#define RE_PROP_N 33
|
||||||
|
#define RE_PROP_P 34
|
||||||
|
#define RE_PROP_S 35
|
||||||
|
#define RE_PROP_Z 36
|
||||||
|
#define RE_PROP_ASSIGNED 38
|
||||||
|
#define RE_PROP_CASEDLETTER 37
|
||||||
|
|
||||||
|
#define RE_PROP_CN 0
|
||||||
|
#define RE_PROP_LU 1
|
||||||
|
#define RE_PROP_LL 2
|
||||||
|
#define RE_PROP_LT 3
|
||||||
|
#define RE_PROP_LM 4
|
||||||
|
#define RE_PROP_LO 5
|
||||||
|
#define RE_PROP_MN 6
|
||||||
|
#define RE_PROP_ME 7
|
||||||
|
#define RE_PROP_MC 8
|
||||||
|
#define RE_PROP_ND 9
|
||||||
|
#define RE_PROP_NL 10
|
||||||
|
#define RE_PROP_NO 11
|
||||||
|
#define RE_PROP_ZS 12
|
||||||
|
#define RE_PROP_ZL 13
|
||||||
|
#define RE_PROP_ZP 14
|
||||||
|
#define RE_PROP_CC 15
|
||||||
|
#define RE_PROP_CF 16
|
||||||
|
#define RE_PROP_CO 17
|
||||||
|
#define RE_PROP_CS 18
|
||||||
|
#define RE_PROP_PD 19
|
||||||
|
#define RE_PROP_PS 20
|
||||||
|
#define RE_PROP_PE 21
|
||||||
|
#define RE_PROP_PC 22
|
||||||
|
#define RE_PROP_PO 23
|
||||||
|
#define RE_PROP_SM 24
|
||||||
|
#define RE_PROP_SC 25
|
||||||
|
#define RE_PROP_SK 26
|
||||||
|
#define RE_PROP_SO 27
|
||||||
|
#define RE_PROP_PI 28
|
||||||
|
#define RE_PROP_PF 29
|
||||||
|
|
||||||
|
#define RE_PROP_C_MASK 0x00078001
|
||||||
|
#define RE_PROP_L_MASK 0x0000003E
|
||||||
|
#define RE_PROP_M_MASK 0x000001C0
|
||||||
|
#define RE_PROP_N_MASK 0x00000E00
|
||||||
|
#define RE_PROP_P_MASK 0x30F80000
|
||||||
|
#define RE_PROP_S_MASK 0x0F000000
|
||||||
|
#define RE_PROP_Z_MASK 0x00007000
|
||||||
|
|
||||||
|
#define RE_PROP_ALNUM 0x460001
|
||||||
|
#define RE_PROP_ALPHA 0x070001
|
||||||
|
#define RE_PROP_ANY 0x470001
|
||||||
|
#define RE_PROP_ASCII 0x010001
|
||||||
|
#define RE_PROP_BLANK 0x480001
|
||||||
|
#define RE_PROP_CNTRL 0x00000F
|
||||||
|
#define RE_PROP_DIGIT 0x000009
|
||||||
|
#define RE_PROP_GRAPH 0x490001
|
||||||
|
#define RE_PROP_LOWER 0x080001
|
||||||
|
#define RE_PROP_PRINT 0x4A0001
|
||||||
|
#define RE_PROP_SPACE 0x190001
|
||||||
|
#define RE_PROP_UPPER 0x090001
|
||||||
|
#define RE_PROP_WORD 0x4B0001
|
||||||
|
#define RE_PROP_XDIGIT 0x4C0001
|
||||||
|
|
||||||
|
#define RE_BREAK_OTHER 0
|
||||||
|
#define RE_BREAK_DOUBLEQUOTE 1
|
||||||
|
#define RE_BREAK_SINGLEQUOTE 2
|
||||||
|
#define RE_BREAK_HEBREWLETTER 3
|
||||||
|
#define RE_BREAK_CR 4
|
||||||
|
#define RE_BREAK_LF 5
|
||||||
|
#define RE_BREAK_NEWLINE 6
|
||||||
|
#define RE_BREAK_EXTEND 7
|
||||||
|
#define RE_BREAK_REGIONALINDICATOR 8
|
||||||
|
#define RE_BREAK_FORMAT 9
|
||||||
|
#define RE_BREAK_KATAKANA 10
|
||||||
|
#define RE_BREAK_ALETTER 11
|
||||||
|
#define RE_BREAK_MIDLETTER 12
|
||||||
|
#define RE_BREAK_MIDNUM 13
|
||||||
|
#define RE_BREAK_MIDNUMLET 14
|
||||||
|
#define RE_BREAK_NUMERIC 15
|
||||||
|
#define RE_BREAK_EXTENDNUMLET 16
|
||||||
|
|
||||||
|
#define RE_GBREAK_OTHER 0
|
||||||
|
#define RE_GBREAK_CR 1
|
||||||
|
#define RE_GBREAK_LF 2
|
||||||
|
#define RE_GBREAK_CONTROL 3
|
||||||
|
#define RE_GBREAK_EXTEND 4
|
||||||
|
#define RE_GBREAK_REGIONALINDICATOR 5
|
||||||
|
#define RE_GBREAK_SPACINGMARK 6
|
||||||
|
#define RE_GBREAK_L 7
|
||||||
|
#define RE_GBREAK_V 8
|
||||||
|
#define RE_GBREAK_T 9
|
||||||
|
#define RE_GBREAK_LV 10
|
||||||
|
#define RE_GBREAK_LVT 11
|
||||||
|
#define RE_GBREAK_PREPEND 12
|
||||||
|
|
||||||
|
extern char* re_strings[1160];
|
||||||
|
extern RE_Property re_properties[143];
|
||||||
|
extern RE_PropertyValue re_property_values[1251];
|
||||||
|
extern RE_UINT16 re_expand_on_folding[104];
|
||||||
|
extern RE_GetPropertyFunc re_get_property[77];
|
||||||
|
|
||||||
|
RE_UINT32 re_get_general_category(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_block(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_script(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_word_break(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_sentence_break(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_math(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_alphabetic(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_lowercase(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_uppercase(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_cased(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_case_ignorable(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_id_start(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_id_continue(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_xid_start(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_xid_continue(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_grapheme_base(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_grapheme_link(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_white_space(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_bidi_control(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_join_control(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_dash(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_hyphen(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_quotation_mark(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_math(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_hex_digit(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_ideographic(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_diacritic(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_extender(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_lowercase(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_uppercase(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_radical(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_deprecated(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_soft_dotted(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_sterm(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_decomposition_type(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_east_asian_width(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_joining_group(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_joining_type(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_line_break(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_any(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_blank(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_graph(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_print(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_word(RE_UINT32 ch);
|
||||||
|
RE_UINT32 re_get_xdigit(RE_UINT32 ch);
|
||||||
|
int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
|
||||||
|
RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
|
||||||
|
int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);
|
684
lib/regex/regex.py
Normal file
684
lib/regex/regex.py
Normal file
@ -0,0 +1,684 @@
|
|||||||
|
#
|
||||||
|
# Secret Labs' Regular Expression Engine
|
||||||
|
#
|
||||||
|
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
|
||||||
|
#
|
||||||
|
# This version of the SRE library can be redistributed under CNRI's
|
||||||
|
# Python 1.6 license. For any other use, please contact Secret Labs
|
||||||
|
# AB (info@pythonware.com).
|
||||||
|
#
|
||||||
|
# Portions of this engine have been developed in cooperation with
|
||||||
|
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
|
||||||
|
# other compatibility work.
|
||||||
|
#
|
||||||
|
# 2010-01-16 mrab Python front-end re-written and extended
|
||||||
|
|
||||||
|
r"""Support for regular expressions (RE).
|
||||||
|
|
||||||
|
This module provides regular expression matching operations similar to those
|
||||||
|
found in Perl. It supports both 8-bit and Unicode strings; both the pattern and
|
||||||
|
the strings being processed can contain null bytes and characters outside the
|
||||||
|
US ASCII range.
|
||||||
|
|
||||||
|
Regular expressions can contain both special and ordinary characters. Most
|
||||||
|
ordinary characters, like "A", "a", or "0", are the simplest regular
|
||||||
|
expressions; they simply match themselves. You can concatenate ordinary
|
||||||
|
characters, so last matches the string 'last'.
|
||||||
|
|
||||||
|
There are a few differences between the old (legacy) behaviour and the new
|
||||||
|
(enhanced) behaviour, which are indicated by VERSION0 or VERSION1.
|
||||||
|
|
||||||
|
The special characters are:
|
||||||
|
"." Matches any character except a newline.
|
||||||
|
"^" Matches the start of the string.
|
||||||
|
"$" Matches the end of the string or just before the
|
||||||
|
newline at the end of the string.
|
||||||
|
"*" Matches 0 or more (greedy) repetitions of the preceding
|
||||||
|
RE. Greedy means that it will match as many repetitions
|
||||||
|
as possible.
|
||||||
|
"+" Matches 1 or more (greedy) repetitions of the preceding
|
||||||
|
RE.
|
||||||
|
"?" Matches 0 or 1 (greedy) of the preceding RE.
|
||||||
|
*?,+?,?? Non-greedy versions of the previous three special
|
||||||
|
characters.
|
||||||
|
*+,++,?+ Possessive versions of the previous three special
|
||||||
|
characters.
|
||||||
|
{m,n} Matches from m to n repetitions of the preceding RE.
|
||||||
|
{m,n}? Non-greedy version of the above.
|
||||||
|
{m,n}+ Possessive version of the above.
|
||||||
|
{...} Fuzzy matching constraints.
|
||||||
|
"\\" Either escapes special characters or signals a special
|
||||||
|
sequence.
|
||||||
|
[...] Indicates a set of characters. A "^" as the first
|
||||||
|
character indicates a complementing set.
|
||||||
|
"|" A|B, creates an RE that will match either A or B.
|
||||||
|
(...) Matches the RE inside the parentheses. The contents are
|
||||||
|
captured and can be retrieved or matched later in the
|
||||||
|
string.
|
||||||
|
(?flags-flags) VERSION1: Sets/clears the flags for the remainder of
|
||||||
|
the group or pattern; VERSION0: Sets the flags for the
|
||||||
|
entire pattern.
|
||||||
|
(?:...) Non-capturing version of regular parentheses.
|
||||||
|
(?>...) Atomic non-capturing version of regular parentheses.
|
||||||
|
(?flags-flags:...) Non-capturing version of regular parentheses with local
|
||||||
|
flags.
|
||||||
|
(?P<name>...) The substring matched by the group is accessible by
|
||||||
|
name.
|
||||||
|
(?<name>...) The substring matched by the group is accessible by
|
||||||
|
name.
|
||||||
|
(?P=name) Matches the text matched earlier by the group named
|
||||||
|
name.
|
||||||
|
(?#...) A comment; ignored.
|
||||||
|
(?=...) Matches if ... matches next, but doesn't consume the
|
||||||
|
string.
|
||||||
|
(?!...) Matches if ... doesn't match next.
|
||||||
|
(?<=...) Matches if preceded by ....
|
||||||
|
(?<!...) Matches if not preceded by ....
|
||||||
|
(?(id)yes|no) Matches yes pattern if group id matched, the (optional)
|
||||||
|
no pattern otherwise.
|
||||||
|
(?|...|...) (?|A|B), creates an RE that will match either A or B,
|
||||||
|
but reuses capture group numbers across the
|
||||||
|
alternatives.
|
||||||
|
|
||||||
|
The fuzzy matching constraints are: "i" to permit insertions, "d" to permit
|
||||||
|
deletions, "s" to permit substitutions, "e" to permit any of these. Limits are
|
||||||
|
optional with "<=" and "<". If any type of error is provided then any type not
|
||||||
|
provided is not permitted.
|
||||||
|
|
||||||
|
A cost equation may be provided.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
(?:fuzzy){i<=2}
|
||||||
|
(?:fuzzy){i<=1,s<=2,d<=1,1i+1s+1d<3}
|
||||||
|
|
||||||
|
VERSION1: Set operators are supported, and a set can include nested sets. The
|
||||||
|
set operators, in order of increasing precedence, are:
|
||||||
|
|| Set union ("x||y" means "x or y").
|
||||||
|
~~ (double tilde) Symmetric set difference ("x~~y" means "x or y, but not
|
||||||
|
both").
|
||||||
|
&& Set intersection ("x&&y" means "x and y").
|
||||||
|
-- (double dash) Set difference ("x--y" means "x but not y").
|
||||||
|
|
||||||
|
Implicit union, ie, simple juxtaposition like in [ab], has the highest
|
||||||
|
precedence.
|
||||||
|
|
||||||
|
VERSION0 and VERSION1:
|
||||||
|
The special sequences consist of "\\" and a character from the list below. If
|
||||||
|
the ordinary character is not on the list, then the resulting RE will match the
|
||||||
|
second character.
|
||||||
|
\number Matches the contents of the group of the same number if
|
||||||
|
number is no more than 2 digits, otherwise the character
|
||||||
|
with the 3-digit octal code.
|
||||||
|
\a Matches the bell character.
|
||||||
|
\A Matches only at the start of the string.
|
||||||
|
\b Matches the empty string, but only at the start or end of a
|
||||||
|
word.
|
||||||
|
\B Matches the empty string, but not at the start or end of a
|
||||||
|
word.
|
||||||
|
\d Matches any decimal digit; equivalent to the set [0-9] when
|
||||||
|
matching a bytestring or a Unicode string with the ASCII
|
||||||
|
flag, or the whole range of Unicode digits when matching a
|
||||||
|
Unicode string.
|
||||||
|
\D Matches any non-digit character; equivalent to [^\d].
|
||||||
|
\f Matches the formfeed character.
|
||||||
|
\g<name> Matches the text matched by the group named name.
|
||||||
|
\G Matches the empty string, but only at the position where
|
||||||
|
the search started.
|
||||||
|
\L<name> Named list. The list is provided as a keyword argument.
|
||||||
|
\m Matches the empty string, but only at the start of a word.
|
||||||
|
\M Matches the empty string, but only at the end of a word.
|
||||||
|
\n Matches the newline character.
|
||||||
|
\N{name} Matches the named character.
|
||||||
|
\p{name=value} Matches the character if its property has the specified
|
||||||
|
value.
|
||||||
|
\P{name=value} Matches the character if its property hasn't the specified
|
||||||
|
value.
|
||||||
|
\r Matches the carriage-return character.
|
||||||
|
\s Matches any whitespace character; equivalent to
|
||||||
|
[ \t\n\r\f\v].
|
||||||
|
\S Matches any non-whitespace character; equivalent to [^\s].
|
||||||
|
\t Matches the tab character.
|
||||||
|
\uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX.
|
||||||
|
\UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code
|
||||||
|
XXXXXXXX.
|
||||||
|
\v Matches the vertical tab character.
|
||||||
|
\w Matches any alphanumeric character; equivalent to
|
||||||
|
[a-zA-Z0-9_] when matching a bytestring or a Unicode string
|
||||||
|
with the ASCII flag, or the whole range of Unicode
|
||||||
|
alphanumeric characters (letters plus digits plus
|
||||||
|
underscore) when matching a Unicode string. With LOCALE, it
|
||||||
|
will match the set [0-9_] plus characters defined as
|
||||||
|
letters for the current locale.
|
||||||
|
\W Matches the complement of \w; equivalent to [^\w].
|
||||||
|
\xXX Matches the character with 2-digit hex code XX.
|
||||||
|
\X Matches a grapheme.
|
||||||
|
\Z Matches only at the end of the string.
|
||||||
|
\\ Matches a literal backslash.
|
||||||
|
|
||||||
|
This module exports the following functions:
|
||||||
|
match Match a regular expression pattern at the beginning of a string.
|
||||||
|
fullmatch Match a regular expression pattern against all of a string.
|
||||||
|
search Search a string for the presence of a pattern.
|
||||||
|
sub Substitute occurrences of a pattern found in a string using a
|
||||||
|
template string.
|
||||||
|
subf Substitute occurrences of a pattern found in a string using a
|
||||||
|
format string.
|
||||||
|
subn Same as sub, but also return the number of substitutions made.
|
||||||
|
subfn Same as subf, but also return the number of substitutions made.
|
||||||
|
split Split a string by the occurrences of a pattern. VERSION1: will
|
||||||
|
split at zero-width match; VERSION0: won't split at zero-width
|
||||||
|
match.
|
||||||
|
splititer Return an iterator yielding the parts of a split string.
|
||||||
|
findall Find all occurrences of a pattern in a string.
|
||||||
|
finditer Return an iterator yielding a match object for each match.
|
||||||
|
compile Compile a pattern into a Pattern object.
|
||||||
|
purge Clear the regular expression cache.
|
||||||
|
escape Backslash all non-alphanumerics or special characters in a
|
||||||
|
string.
|
||||||
|
|
||||||
|
Most of the functions support a concurrent parameter: if True, the GIL will be
|
||||||
|
released during matching, allowing other Python threads to run concurrently. If
|
||||||
|
the string changes during matching, the behaviour is undefined. This parameter
|
||||||
|
is not needed when working on the builtin (immutable) string classes.
|
||||||
|
|
||||||
|
Some of the functions in this module take flags as optional parameters. Most of
|
||||||
|
these flags can also be set within an RE:
|
||||||
|
A a ASCII Make \w, \W, \b, \B, \d, and \D match the
|
||||||
|
corresponding ASCII character categories. Default
|
||||||
|
when matching a bytestring.
|
||||||
|
B b BESTMATCH Find the best fuzzy match (default is first).
|
||||||
|
D DEBUG Print the parsed pattern.
|
||||||
|
F f FULLCASE Use full case-folding when performing
|
||||||
|
case-insensitive matching in Unicode.
|
||||||
|
I i IGNORECASE Perform case-insensitive matching.
|
||||||
|
L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the
|
||||||
|
current locale. (One byte per character only.)
|
||||||
|
M m MULTILINE "^" matches the beginning of lines (after a newline)
|
||||||
|
as well as the string. "$" matches the end of lines
|
||||||
|
(before a newline) as well as the end of the string.
|
||||||
|
E e ENHANCEMATCH Attempt to improve the fit after finding the first
|
||||||
|
fuzzy match.
|
||||||
|
R r REVERSE Searches backwards.
|
||||||
|
S s DOTALL "." matches any character at all, including the
|
||||||
|
newline.
|
||||||
|
U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the
|
||||||
|
Unicode locale. Default when matching a Unicode
|
||||||
|
string.
|
||||||
|
V0 V0 VERSION0 Turn on the old legacy behaviour.
|
||||||
|
V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag
|
||||||
|
includes the FULLCASE flag.
|
||||||
|
W w WORD Make \b and \B work with default Unicode word breaks
|
||||||
|
and make ".", "^" and "$" work with Unicode line
|
||||||
|
breaks.
|
||||||
|
X x VERBOSE Ignore whitespace and comments for nicer looking REs.
|
||||||
|
|
||||||
|
This module also defines an exception 'error'.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Public symbols.
|
||||||
|
__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match",
|
||||||
|
"purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn",
|
||||||
|
"template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E",
|
||||||
|
"ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L",
|
||||||
|
"LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE",
|
||||||
|
"V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error",
|
||||||
|
"Regex"]
|
||||||
|
|
||||||
|
__version__ = "2.4.45"
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
# Public interface.
|
||||||
|
|
||||||
|
def match(pattern, string, flags=0, pos=None, endpos=None, partial=False,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Try to apply the pattern at the start of the string, returning a match
|
||||||
|
object, or None if no match was found."""
|
||||||
|
return _compile(pattern, flags, kwargs).match(string, pos, endpos,
|
||||||
|
concurrent, partial)
|
||||||
|
|
||||||
|
def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Try to apply the pattern against all of the string, returning a match
|
||||||
|
object, or None if no match was found."""
|
||||||
|
return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos,
|
||||||
|
concurrent, partial)
|
||||||
|
|
||||||
|
def search(pattern, string, flags=0, pos=None, endpos=None, partial=False,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Search through string looking for a match to the pattern, returning a
|
||||||
|
match object, or None if no match was found."""
|
||||||
|
return _compile(pattern, flags, kwargs).search(string, pos, endpos,
|
||||||
|
concurrent, partial)
|
||||||
|
|
||||||
|
def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Return the string obtained by replacing the leftmost (or rightmost with a
|
||||||
|
reverse pattern) non-overlapping occurrences of the pattern in string by the
|
||||||
|
replacement repl. repl can be either a string or a callable; if a string,
|
||||||
|
backslash escapes in it are processed; if a callable, it's passed the match
|
||||||
|
object and must return a replacement string to be used."""
|
||||||
|
return _compile(pattern, flags, kwargs).sub(repl, string, count, pos,
|
||||||
|
endpos, concurrent)
|
||||||
|
|
||||||
|
def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Return the string obtained by replacing the leftmost (or rightmost with a
|
||||||
|
reverse pattern) non-overlapping occurrences of the pattern in string by the
|
||||||
|
replacement format. format can be either a string or a callable; if a string,
|
||||||
|
it's treated as a format string; if a callable, it's passed the match object
|
||||||
|
and must return a replacement string to be used."""
|
||||||
|
return _compile(pattern, flags, kwargs).subf(format, string, count, pos,
|
||||||
|
endpos, concurrent)
|
||||||
|
|
||||||
|
def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Return a 2-tuple containing (new_string, number). new_string is the string
|
||||||
|
obtained by replacing the leftmost (or rightmost with a reverse pattern)
|
||||||
|
non-overlapping occurrences of the pattern in the source string by the
|
||||||
|
replacement repl. number is the number of substitutions that were made. repl
|
||||||
|
can be either a string or a callable; if a string, backslash escapes in it
|
||||||
|
are processed; if a callable, it's passed the match object and must return a
|
||||||
|
replacement string to be used."""
|
||||||
|
return _compile(pattern, flags, kwargs).subn(repl, string, count, pos,
|
||||||
|
endpos, concurrent)
|
||||||
|
|
||||||
|
def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Return a 2-tuple containing (new_string, number). new_string is the string
|
||||||
|
obtained by replacing the leftmost (or rightmost with a reverse pattern)
|
||||||
|
non-overlapping occurrences of the pattern in the source string by the
|
||||||
|
replacement format. number is the number of substitutions that were made. format
|
||||||
|
can be either a string or a callable; if a string, it's treated as a format
|
||||||
|
string; if a callable, it's passed the match object and must return a
|
||||||
|
replacement string to be used."""
|
||||||
|
return _compile(pattern, flags, kwargs).subfn(format, string, count, pos,
|
||||||
|
endpos, concurrent)
|
||||||
|
|
||||||
|
def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
|
||||||
|
"""Split the source string by the occurrences of the pattern, returning a
|
||||||
|
list containing the resulting substrings. If capturing parentheses are used
|
||||||
|
in pattern, then the text of all groups in the pattern are also returned as
|
||||||
|
part of the resulting list. If maxsplit is nonzero, at most maxsplit splits
|
||||||
|
occur, and the remainder of the string is returned as the final element of
|
||||||
|
the list."""
|
||||||
|
return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent)
|
||||||
|
|
||||||
|
def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
|
||||||
|
"Return an iterator yielding the parts of a split string."
|
||||||
|
return _compile(pattern, flags, kwargs).splititer(string, maxsplit,
|
||||||
|
concurrent)
|
||||||
|
|
||||||
|
def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
|
||||||
|
concurrent=None, **kwargs):
|
||||||
|
"""Return a list of all matches in the string. The matches may be overlapped
|
||||||
|
if overlapped is True. If one or more groups are present in the pattern,
|
||||||
|
return a list of groups; this will be a list of tuples if the pattern has
|
||||||
|
more than one group. Empty matches are included in the result."""
|
||||||
|
return _compile(pattern, flags, kwargs).findall(string, pos, endpos,
|
||||||
|
overlapped, concurrent)
|
||||||
|
|
||||||
|
def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False,
|
||||||
|
partial=False, concurrent=None, **kwargs):
|
||||||
|
"""Return an iterator over all matches in the string. The matches may be
|
||||||
|
overlapped if overlapped is True. For each match, the iterator returns a
|
||||||
|
match object. Empty matches are included in the result."""
|
||||||
|
return _compile(pattern, flags, kwargs).finditer(string, pos, endpos,
|
||||||
|
overlapped, concurrent, partial)
|
||||||
|
|
||||||
|
def compile(pattern, flags=0, **kwargs):
|
||||||
|
"Compile a regular expression pattern, returning a pattern object."
|
||||||
|
return _compile(pattern, flags, kwargs)
|
||||||
|
|
||||||
|
def purge():
|
||||||
|
"Clear the regular expression cache"
|
||||||
|
_cache.clear()
|
||||||
|
|
||||||
|
def template(pattern, flags=0):
|
||||||
|
"Compile a template pattern, returning a pattern object."
|
||||||
|
return _compile(pattern, flags | TEMPLATE)
|
||||||
|
|
||||||
|
def escape(pattern, special_only=False):
|
||||||
|
"Escape all non-alphanumeric characters or special characters in pattern."
|
||||||
|
if isinstance(pattern, unicode):
|
||||||
|
s = []
|
||||||
|
if special_only:
|
||||||
|
for c in pattern:
|
||||||
|
if c in _METACHARS:
|
||||||
|
s.append(u"\\")
|
||||||
|
s.append(c)
|
||||||
|
elif c == u"\x00":
|
||||||
|
s.append(u"\\000")
|
||||||
|
else:
|
||||||
|
s.append(c)
|
||||||
|
else:
|
||||||
|
for c in pattern:
|
||||||
|
if c in _ALNUM:
|
||||||
|
s.append(c)
|
||||||
|
elif c == u"\x00":
|
||||||
|
s.append(u"\\000")
|
||||||
|
else:
|
||||||
|
s.append(u"\\")
|
||||||
|
s.append(c)
|
||||||
|
|
||||||
|
return u"".join(s)
|
||||||
|
else:
|
||||||
|
s = []
|
||||||
|
if special_only:
|
||||||
|
for c in pattern:
|
||||||
|
if c in _METACHARS:
|
||||||
|
s.append("\\")
|
||||||
|
s.append(c)
|
||||||
|
elif c == "\x00":
|
||||||
|
s.append("\\000")
|
||||||
|
else:
|
||||||
|
s.append(c)
|
||||||
|
else:
|
||||||
|
for c in pattern:
|
||||||
|
if c in _ALNUM:
|
||||||
|
s.append(c)
|
||||||
|
elif c == "\x00":
|
||||||
|
s.append("\\000")
|
||||||
|
else:
|
||||||
|
s.append("\\")
|
||||||
|
s.append(c)
|
||||||
|
|
||||||
|
return "".join(s)
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
# Internals.
|
||||||
|
|
||||||
|
import _regex_core
|
||||||
|
import sys
|
||||||
|
if sys.version_info < (2, 6):
|
||||||
|
from Python25 import _regex
|
||||||
|
elif sys.version_info < (2, 7):
|
||||||
|
from Python26 import _regex
|
||||||
|
else:
|
||||||
|
from Python27 import _regex
|
||||||
|
from threading import RLock as _RLock
|
||||||
|
from _regex_core import *
|
||||||
|
from _regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError,
|
||||||
|
_UnscopedFlagSet, _check_group_features, _compile_firstset,
|
||||||
|
_compile_replacement, _flatten_code, _fold_case, _get_required_string,
|
||||||
|
_parse_pattern, _shrink_cache)
|
||||||
|
from _regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as
|
||||||
|
_Source, Fuzzy as _Fuzzy)
|
||||||
|
|
||||||
|
# Version 0 is the old behaviour, compatible with the original 're' module.
|
||||||
|
# Version 1 is the new behaviour, which differs slightly.
|
||||||
|
|
||||||
|
DEFAULT_VERSION = VERSION0
|
||||||
|
|
||||||
|
_METACHARS = frozenset("()[]{}?*+|^$\\.")
|
||||||
|
|
||||||
|
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
|
||||||
|
|
||||||
|
# Caches for the patterns and replacements.
|
||||||
|
_cache = {}
|
||||||
|
_cache_lock = _RLock()
|
||||||
|
_named_args = {}
|
||||||
|
_replacement_cache = {}
|
||||||
|
|
||||||
|
# Maximum size of the cache.
|
||||||
|
_MAXCACHE = 500
|
||||||
|
_MAXREPCACHE = 500
|
||||||
|
|
||||||
|
def _compile(pattern, flags=0, kwargs={}):
|
||||||
|
"Compiles a regular expression to a PatternObject."
|
||||||
|
try:
|
||||||
|
# Do we know what keyword arguments are needed?
|
||||||
|
args_key = pattern, type(pattern), flags
|
||||||
|
args_needed = _named_args[args_key]
|
||||||
|
|
||||||
|
# Are we being provided with its required keyword arguments?
|
||||||
|
args_supplied = set()
|
||||||
|
if args_needed:
|
||||||
|
for k, v in args_needed:
|
||||||
|
try:
|
||||||
|
args_supplied.add((k, frozenset(kwargs[k])))
|
||||||
|
except KeyError:
|
||||||
|
raise error("missing named list")
|
||||||
|
|
||||||
|
args_supplied = frozenset(args_supplied)
|
||||||
|
|
||||||
|
# Have we already seen this regular expression and named list?
|
||||||
|
pattern_key = (pattern, type(pattern), flags, args_supplied,
|
||||||
|
DEFAULT_VERSION)
|
||||||
|
return _cache[pattern_key]
|
||||||
|
except KeyError:
|
||||||
|
# It's a new pattern, or new named list for a known pattern.
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Guess the encoding from the class of the pattern string.
|
||||||
|
if isinstance(pattern, unicode):
|
||||||
|
guess_encoding = UNICODE
|
||||||
|
elif isinstance(pattern, str):
|
||||||
|
guess_encoding = ASCII
|
||||||
|
elif isinstance(pattern, _pattern_type):
|
||||||
|
if flags:
|
||||||
|
raise ValueError("can't process flags argument with a compiled pattern")
|
||||||
|
|
||||||
|
return pattern
|
||||||
|
else:
|
||||||
|
raise TypeError("first argument must be a string or compiled pattern")
|
||||||
|
|
||||||
|
# Set the default version in the core code in case it has been changed.
|
||||||
|
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION
|
||||||
|
|
||||||
|
caught_exception = None
|
||||||
|
global_flags = flags
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
source = _Source(pattern)
|
||||||
|
info = _Info(global_flags, source.char_type, kwargs)
|
||||||
|
info.guess_encoding = guess_encoding
|
||||||
|
source.ignore_space = bool(info.flags & VERBOSE)
|
||||||
|
parsed = _parse_pattern(source, info)
|
||||||
|
break
|
||||||
|
except _UnscopedFlagSet:
|
||||||
|
# Remember the global flags for the next attempt.
|
||||||
|
global_flags = info.global_flags
|
||||||
|
except error, e:
|
||||||
|
caught_exception = e
|
||||||
|
|
||||||
|
if caught_exception:
|
||||||
|
raise error(str(caught_exception))
|
||||||
|
|
||||||
|
if not source.at_end():
|
||||||
|
raise error("trailing characters in pattern at position %d" % source.pos)
|
||||||
|
|
||||||
|
# Check the global flags for conflicts.
|
||||||
|
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
|
||||||
|
if version not in (0, VERSION0, VERSION1):
|
||||||
|
raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible")
|
||||||
|
|
||||||
|
if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE):
|
||||||
|
raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible")
|
||||||
|
|
||||||
|
if not (info.flags & _ALL_ENCODINGS):
|
||||||
|
if isinstance(pattern, unicode):
|
||||||
|
info.flags |= UNICODE
|
||||||
|
else:
|
||||||
|
info.flags |= ASCII
|
||||||
|
|
||||||
|
reverse = bool(info.flags & REVERSE)
|
||||||
|
fuzzy = isinstance(parsed, _Fuzzy)
|
||||||
|
|
||||||
|
# Should we print the parsed pattern?
|
||||||
|
if flags & DEBUG:
|
||||||
|
parsed.dump(indent=0, reverse=reverse)
|
||||||
|
|
||||||
|
# Fix the group references.
|
||||||
|
parsed.fix_groups(reverse, False)
|
||||||
|
|
||||||
|
# Optimise the parsed pattern.
|
||||||
|
parsed = parsed.optimise(info)
|
||||||
|
parsed = parsed.pack_characters(info)
|
||||||
|
|
||||||
|
# Get the required string.
|
||||||
|
req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags)
|
||||||
|
|
||||||
|
# Build the named lists.
|
||||||
|
named_lists = {}
|
||||||
|
named_list_indexes = [None] * len(info.named_lists_used)
|
||||||
|
args_needed = set()
|
||||||
|
for key, index in info.named_lists_used.items():
|
||||||
|
name, case_flags = key
|
||||||
|
values = frozenset(kwargs[name])
|
||||||
|
if case_flags:
|
||||||
|
items = frozenset(_fold_case(info, v) for v in values)
|
||||||
|
else:
|
||||||
|
items = values
|
||||||
|
named_lists[name] = values
|
||||||
|
named_list_indexes[index] = items
|
||||||
|
args_needed.add((name, values))
|
||||||
|
|
||||||
|
# Check the features of the groups.
|
||||||
|
_check_group_features(info, parsed)
|
||||||
|
|
||||||
|
# Compile the parsed pattern. The result is a list of tuples.
|
||||||
|
code = parsed.compile(reverse)
|
||||||
|
|
||||||
|
# Is there a group call to the pattern as a whole?
|
||||||
|
key = (0, reverse, fuzzy)
|
||||||
|
ref = info.call_refs.get(key)
|
||||||
|
if ref is not None:
|
||||||
|
code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )]
|
||||||
|
|
||||||
|
# Add the final 'success' opcode.
|
||||||
|
code += [(_OP.SUCCESS, )]
|
||||||
|
|
||||||
|
# Compile the additional copies of the groups that we need.
|
||||||
|
for group, rev, fuz in info.additional_groups:
|
||||||
|
code += group.compile(rev, fuz)
|
||||||
|
|
||||||
|
# Flatten the code into a list of ints.
|
||||||
|
code = _flatten_code(code)
|
||||||
|
|
||||||
|
if not parsed.has_simple_start():
|
||||||
|
# Get the first set, if possible.
|
||||||
|
try:
|
||||||
|
fs_code = _compile_firstset(info, parsed.get_firstset(reverse))
|
||||||
|
fs_code = _flatten_code(fs_code)
|
||||||
|
code = fs_code + code
|
||||||
|
except _FirstSetError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# The named capture groups.
|
||||||
|
index_group = dict((v, n) for n, v in info.group_index.items())
|
||||||
|
|
||||||
|
# Create the PatternObject.
|
||||||
|
#
|
||||||
|
# Local flags like IGNORECASE affect the code generation, but aren't needed
|
||||||
|
# by the PatternObject itself. Conversely, global flags like LOCALE _don't_
|
||||||
|
# affect the code generation but _are_ needed by the PatternObject.
|
||||||
|
compiled_pattern = _regex.compile(pattern, info.flags | version, code,
|
||||||
|
info.group_index, index_group, named_lists, named_list_indexes,
|
||||||
|
req_offset, req_chars, req_flags, info.group_count)
|
||||||
|
|
||||||
|
# Do we need to reduce the size of the cache?
|
||||||
|
if len(_cache) >= _MAXCACHE:
|
||||||
|
_cache_lock.acquire()
|
||||||
|
try:
|
||||||
|
_shrink_cache(_cache, _named_args, _MAXCACHE)
|
||||||
|
finally:
|
||||||
|
_cache_lock.release()
|
||||||
|
|
||||||
|
args_needed = frozenset(args_needed)
|
||||||
|
|
||||||
|
# Store this regular expression and named list.
|
||||||
|
pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION)
|
||||||
|
_cache[pattern_key] = compiled_pattern
|
||||||
|
|
||||||
|
# Store what keyword arguments are needed.
|
||||||
|
_named_args[args_key] = args_needed
|
||||||
|
|
||||||
|
return compiled_pattern
|
||||||
|
|
||||||
|
def _compile_replacement_helper(pattern, template):
|
||||||
|
"Compiles a replacement template."
|
||||||
|
# This function is called by the _regex module.
|
||||||
|
|
||||||
|
# Have we seen this before?
|
||||||
|
key = pattern.pattern, pattern.flags, template
|
||||||
|
compiled = _replacement_cache.get(key)
|
||||||
|
if compiled is not None:
|
||||||
|
return compiled
|
||||||
|
|
||||||
|
if len(_replacement_cache) >= _MAXREPCACHE:
|
||||||
|
_replacement_cache.clear()
|
||||||
|
|
||||||
|
is_unicode = isinstance(template, unicode)
|
||||||
|
source = _Source(template)
|
||||||
|
if is_unicode:
|
||||||
|
def make_string(char_codes):
|
||||||
|
return u"".join(unichr(c) for c in char_codes)
|
||||||
|
else:
|
||||||
|
def make_string(char_codes):
|
||||||
|
return "".join(chr(c) for c in char_codes)
|
||||||
|
|
||||||
|
compiled = []
|
||||||
|
literal = []
|
||||||
|
while True:
|
||||||
|
ch = source.get()
|
||||||
|
if not ch:
|
||||||
|
break
|
||||||
|
if ch == "\\":
|
||||||
|
# '_compile_replacement' will return either an int group reference
|
||||||
|
# or a string literal. It returns items (plural) in order to handle
|
||||||
|
# a 2-character literal (an invalid escape sequence).
|
||||||
|
is_group, items = _compile_replacement(source, pattern, is_unicode)
|
||||||
|
if is_group:
|
||||||
|
# It's a group, so first flush the literal.
|
||||||
|
if literal:
|
||||||
|
compiled.append(make_string(literal))
|
||||||
|
literal = []
|
||||||
|
compiled.extend(items)
|
||||||
|
else:
|
||||||
|
literal.extend(items)
|
||||||
|
else:
|
||||||
|
literal.append(ord(ch))
|
||||||
|
|
||||||
|
# Flush the literal.
|
||||||
|
if literal:
|
||||||
|
compiled.append(make_string(literal))
|
||||||
|
|
||||||
|
_replacement_cache[key] = compiled
|
||||||
|
|
||||||
|
return compiled
|
||||||
|
|
||||||
|
# We define _pattern_type here after all the support objects have been defined.
|
||||||
|
_pattern_type = type(_compile("", 0, {}))
|
||||||
|
|
||||||
|
# We'll define an alias for the 'compile' function so that the repr of a
|
||||||
|
# pattern object is eval-able.
|
||||||
|
Regex = compile
|
||||||
|
|
||||||
|
# Register myself for pickling.
|
||||||
|
import copy_reg as _copy_reg
|
||||||
|
|
||||||
|
def _pickle(p):
|
||||||
|
return _compile, (p.pattern, p.flags)
|
||||||
|
|
||||||
|
_copy_reg.pickle(_pattern_type, _pickle, _compile)
|
||||||
|
|
||||||
|
if not hasattr(str, "format"):
|
||||||
|
# Strings don't have the .format method (below Python 2.6).
|
||||||
|
while True:
|
||||||
|
_start = __doc__.find(" subf")
|
||||||
|
if _start < 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
_end = __doc__.find("\n", _start) + 1
|
||||||
|
while __doc__.startswith(" ", _end):
|
||||||
|
_end = __doc__.find("\n", _end) + 1
|
||||||
|
|
||||||
|
__doc__ = __doc__[ : _start] + __doc__[_end : ]
|
||||||
|
|
||||||
|
__all__ = [_name for _name in __all__ if not _name.startswith("subf")]
|
||||||
|
|
||||||
|
del _start, _end
|
||||||
|
|
||||||
|
del subf, subfn
|
3230
lib/regex/test_regex.py
Normal file
3230
lib/regex/test_regex.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -16,35 +16,33 @@
|
|||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
|
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
|
||||||
import threading
|
import threading
|
||||||
import regexes
|
import regexes
|
||||||
import time
|
|
||||||
import sickbeard
|
import sickbeard
|
||||||
|
|
||||||
from sickbeard import logger, helpers, scene_numbering, db
|
from sickbeard import logger, helpers, scene_numbering
|
||||||
from sickbeard.exceptions import EpisodeNotFoundByAbsoluteNumberException
|
from regex import regex
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
|
||||||
nameparser_lock = threading.Lock()
|
nameparser_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
class NameParser(object):
|
class NameParser(object):
|
||||||
ALL_REGEX = 0
|
ALL_REGEX = 0
|
||||||
NORMAL_REGEX = 1
|
NORMAL_REGEX = 1
|
||||||
SPORTS_REGEX = 2
|
SPORTS_REGEX = 2
|
||||||
ANIME_REGEX = 3
|
ANIME_REGEX = 3
|
||||||
|
|
||||||
def __init__(self, file_name=True, show=None, useIndexers=False):
|
def __init__(self, file_name=True, showObj=None, epObj=None, useIndexers=False, convert=False):
|
||||||
|
|
||||||
regexMode = self.ALL_REGEX
|
regexMode = self.ALL_REGEX
|
||||||
if show and show.is_anime:
|
if showObj and showObj.is_anime:
|
||||||
regexMode = self.ANIME_REGEX
|
regexMode = self.ANIME_REGEX
|
||||||
elif show and show.is_sports:
|
elif showObj and showObj.is_sports:
|
||||||
regexMode = self.SPORTS_REGEX
|
regexMode = self.SPORTS_REGEX
|
||||||
elif show and not show.is_anime and not show.is_sports:
|
elif showObj and not showObj.is_anime and not showObj.is_sports:
|
||||||
regexMode = self.NORMAL_REGEX
|
regexMode = self.NORMAL_REGEX
|
||||||
|
|
||||||
self.file_name = file_name
|
self.file_name = file_name
|
||||||
@ -53,7 +51,9 @@ class NameParser(object):
|
|||||||
self._compile_regexes(self.regexMode)
|
self._compile_regexes(self.regexMode)
|
||||||
self.showList = sickbeard.showList
|
self.showList = sickbeard.showList
|
||||||
self.useIndexers = useIndexers
|
self.useIndexers = useIndexers
|
||||||
self.show = show
|
self.showObj = showObj
|
||||||
|
self.epObj = epObj
|
||||||
|
self.convert = convert
|
||||||
|
|
||||||
def clean_series_name(self, series_name):
|
def clean_series_name(self, series_name):
|
||||||
"""Cleans up series name by removing any . and _
|
"""Cleans up series name by removing any . and _
|
||||||
@ -85,7 +85,7 @@ class NameParser(object):
|
|||||||
uncompiled_regex = [regexes.anime_regexes, regexes.sports_regexs, regexes.normal_regexes]
|
uncompiled_regex = [regexes.anime_regexes, regexes.sports_regexs, regexes.normal_regexes]
|
||||||
|
|
||||||
elif regexMode == self.NORMAL_REGEX:
|
elif regexMode == self.NORMAL_REGEX:
|
||||||
logger.log(u"Using NORMAL regexs", logger.DEBUG)
|
logger.log(u"Using NORMAL reqgexs", logger.DEBUG)
|
||||||
uncompiled_regex = [regexes.normal_regexes]
|
uncompiled_regex = [regexes.normal_regexes]
|
||||||
|
|
||||||
elif regexMode == self.SPORTS_REGEX:
|
elif regexMode == self.SPORTS_REGEX:
|
||||||
@ -101,32 +101,26 @@ class NameParser(object):
|
|||||||
uncompiled_regex = [regexes.normal_regexes]
|
uncompiled_regex = [regexes.normal_regexes]
|
||||||
|
|
||||||
for regexItem in uncompiled_regex:
|
for regexItem in uncompiled_regex:
|
||||||
for regex_type, regex in regexItem.items():
|
for regex_type, regex_pattern in regexItem.items():
|
||||||
|
for (cur_pattern_name, cur_pattern) in regex_pattern:
|
||||||
try:
|
try:
|
||||||
self.compiled_regexes[regex_type]
|
cur_regex = regex.compile(cur_pattern, regex.V1 | regex.VERBOSE | regex.IGNORECASE | regex.BESTMATCH)
|
||||||
except:
|
except regex.error, errormsg:
|
||||||
self.compiled_regexes[regex_type] = {}
|
|
||||||
|
|
||||||
for (cur_pattern_name, cur_pattern) in regex:
|
|
||||||
try:
|
|
||||||
cur_regex = re.compile(cur_pattern, re.VERBOSE | re.IGNORECASE)
|
|
||||||
except re.error, errormsg:
|
|
||||||
logger.log(u"WARNING: Invalid episode_pattern, %s. %s" % (errormsg, cur_pattern))
|
logger.log(u"WARNING: Invalid episode_pattern, %s. %s" % (errormsg, cur_pattern))
|
||||||
else:
|
else:
|
||||||
self.compiled_regexes[regex_type].update({cur_pattern_name: cur_regex})
|
self.compiled_regexes[(regex_type,cur_pattern_name)] = cur_regex
|
||||||
|
|
||||||
def _parse_string(self, name):
|
def _parse_string(self, name):
|
||||||
if not name:
|
if not name:
|
||||||
return
|
return
|
||||||
|
|
||||||
for cur_regex_type, cur_regexes in self.compiled_regexes.items():
|
result = ParseResult(name)
|
||||||
for cur_regex_name, cur_regex in cur_regexes.items():
|
for (cur_regex_type, cur_regex_name), cur_regex in self.compiled_regexes.items():
|
||||||
match = cur_regex.match(name)
|
match = cur_regex.fullmatch(name)
|
||||||
|
|
||||||
if not match:
|
if not match:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
result = ParseResult(name)
|
|
||||||
result.which_regex = [cur_regex_name]
|
result.which_regex = [cur_regex_name]
|
||||||
|
|
||||||
named_groups = match.groupdict().keys()
|
named_groups = match.groupdict().keys()
|
||||||
@ -135,20 +129,7 @@ class NameParser(object):
|
|||||||
result.series_name = match.group('series_name')
|
result.series_name = match.group('series_name')
|
||||||
if result.series_name:
|
if result.series_name:
|
||||||
result.series_name = self.clean_series_name(result.series_name)
|
result.series_name = self.clean_series_name(result.series_name)
|
||||||
|
else:continue
|
||||||
cur_show = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers)
|
|
||||||
if not cur_show:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# if we have a show object to compare against then do so else return the result anyways
|
|
||||||
if self.show:
|
|
||||||
if self.show.indexerid != cur_show.indexerid:
|
|
||||||
logger.log(
|
|
||||||
u"I expected an episode of the show " + self.show.name + " but the parser thinks its the show " + cur_show.name + ". I will continue thinking its " + self.show.name,
|
|
||||||
logger.WARNING)
|
|
||||||
continue
|
|
||||||
|
|
||||||
result.show = cur_show
|
|
||||||
|
|
||||||
if 'season_num' in named_groups:
|
if 'season_num' in named_groups:
|
||||||
tmp_season = int(match.group('season_num'))
|
tmp_season = int(match.group('season_num'))
|
||||||
@ -204,22 +185,36 @@ class NameParser(object):
|
|||||||
tmp_extra_info = match.group('extra_info')
|
tmp_extra_info = match.group('extra_info')
|
||||||
|
|
||||||
# Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season
|
# Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season
|
||||||
if tmp_extra_info and cur_regex_name == 'season_only' and re.search(
|
if tmp_extra_info and cur_regex_name == 'season_only' and regex.search(
|
||||||
r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I):
|
r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, regex.I):
|
||||||
continue
|
continue
|
||||||
result.extra_info = tmp_extra_info
|
result.extra_info = tmp_extra_info
|
||||||
|
|
||||||
if 'release_group' in named_groups:
|
if 'release_group' in named_groups:
|
||||||
result.release_group = match.group('release_group')
|
result.release_group = match.group('release_group')
|
||||||
|
|
||||||
if result.show and result.show.is_anime and cur_regex_type in ['anime', 'normal']:
|
cur_show = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers)
|
||||||
return result
|
if cur_show:
|
||||||
elif result.show and result.show.is_sports and cur_regex_type == 'sports':
|
if self.showObj:
|
||||||
return result
|
if self.showObj.indexerid != cur_show.indexerid:
|
||||||
elif cur_regex_type == 'normal':
|
logger.log(
|
||||||
return result
|
u"I expected an episode of the show " + self.showObj.name + " but the parser thinks its the show " + cur_show.name + ". I will continue thinking its " + self.showObj.name,
|
||||||
|
logger.WARNING)
|
||||||
|
return
|
||||||
|
|
||||||
return None
|
result.show = cur_show
|
||||||
|
|
||||||
|
if not result.show:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Natch found!
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
if self.convert:
|
||||||
|
result = result.convert()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _combine_results(self, first, second, attr):
|
def _combine_results(self, first, second, attr):
|
||||||
# if the first doesn't exist then return the second or nothing
|
# if the first doesn't exist then return the second or nothing
|
||||||
@ -291,7 +286,7 @@ class NameParser(object):
|
|||||||
|
|
||||||
# break it into parts if there are any (dirname, file name, extension)
|
# break it into parts if there are any (dirname, file name, extension)
|
||||||
dir_name, file_name = os.path.split(name)
|
dir_name, file_name = os.path.split(name)
|
||||||
ext_match = re.match('(.*)\.\w{3,4}$', file_name)
|
ext_match = regex.match('(.*)\.\w{3,4}$', file_name)
|
||||||
if ext_match and self.file_name:
|
if ext_match and self.file_name:
|
||||||
base_file_name = ext_match.group(1)
|
base_file_name = ext_match.group(1)
|
||||||
else:
|
else:
|
||||||
@ -364,7 +359,8 @@ class ParseResult(object):
|
|||||||
release_group=None,
|
release_group=None,
|
||||||
air_date=None,
|
air_date=None,
|
||||||
ab_episode_numbers=None,
|
ab_episode_numbers=None,
|
||||||
show=None
|
show=None,
|
||||||
|
score=None
|
||||||
):
|
):
|
||||||
|
|
||||||
self.original_name = original_name
|
self.original_name = original_name
|
||||||
@ -392,6 +388,7 @@ class ParseResult(object):
|
|||||||
|
|
||||||
self.which_regex = None
|
self.which_regex = None
|
||||||
self.show = show
|
self.show = show
|
||||||
|
self.score = score
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not other:
|
if not other:
|
||||||
@ -419,6 +416,8 @@ class ParseResult(object):
|
|||||||
return False
|
return False
|
||||||
if self.show != other.show:
|
if self.show != other.show:
|
||||||
return False
|
return False
|
||||||
|
if self.score != other.score:
|
||||||
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -479,7 +478,7 @@ class ParseResult(object):
|
|||||||
new_episode_numbers.append(e)
|
new_episode_numbers.append(e)
|
||||||
new_season_numbers.append(s)
|
new_season_numbers.append(s)
|
||||||
|
|
||||||
# need to do a quick sanity check here. It's possible that we now have episodes
|
# need to do a quick sanity check heregex. It's possible that we now have episodes
|
||||||
# from more than one season (by tvdb numbering), and this is just too much
|
# from more than one season (by tvdb numbering), and this is just too much
|
||||||
# for sickbeard, so we'd need to flag it.
|
# for sickbeard, so we'd need to flag it.
|
||||||
new_season_numbers = list(set(new_season_numbers)) # remove duplicates
|
new_season_numbers = list(set(new_season_numbers)) # remove duplicates
|
||||||
@ -546,17 +545,7 @@ class NameParserCache(object):
|
|||||||
logger.log("Using cached parse result for: " + name, logger.DEBUG)
|
logger.log("Using cached parse result for: " + name, logger.DEBUG)
|
||||||
return self._previous_parsed[name]
|
return self._previous_parsed[name]
|
||||||
|
|
||||||
|
|
||||||
name_parser_cache = NameParserCache()
|
name_parser_cache = NameParserCache()
|
||||||
|
|
||||||
|
|
||||||
class InvalidNameException(Exception):
|
class InvalidNameException(Exception):
|
||||||
"The given name is not valid"
|
"The given name is not valid"
|
||||||
|
|
||||||
|
|
||||||
class MultipleSceneShowResults(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class MultipleSceneEpisodeResults(Exception):
|
|
||||||
pass
|
|
||||||
|
@ -56,6 +56,31 @@ class TVShow():
|
|||||||
self.anime = 0
|
self.anime = 0
|
||||||
self.scene = 0
|
self.scene = 0
|
||||||
|
|
||||||
|
def _is_anime(self):
|
||||||
|
if (self.anime > 0):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
is_anime = property(_is_anime)
|
||||||
|
|
||||||
|
def _is_sports(self):
|
||||||
|
if (self.sports > 0):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
is_sports = property(_is_sports)
|
||||||
|
|
||||||
|
def _is_scene(self):
|
||||||
|
if (self.scene > 0):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
is_scene = property(_is_scene)
|
||||||
|
|
||||||
|
|
||||||
class TVEpisode(tv.TVEpisode):
|
class TVEpisode(tv.TVEpisode):
|
||||||
def __init__(self, season, episode, absolute_number, name):
|
def __init__(self, season, episode, absolute_number, name):
|
||||||
self.relatedEps = []
|
self.relatedEps = []
|
||||||
@ -139,9 +164,7 @@ def check_valid_sports_naming(pattern=None):
|
|||||||
return valid
|
return valid
|
||||||
|
|
||||||
def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False):
|
def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False):
|
||||||
ep = _generate_sample_ep(multi, abd, sports)
|
ep = generate_sample_ep(multi, abd, sports)
|
||||||
|
|
||||||
parser = NameParser(True)
|
|
||||||
|
|
||||||
new_name = ep.formatted_filename(pattern, multi) + '.ext'
|
new_name = ep.formatted_filename(pattern, multi) + '.ext'
|
||||||
new_path = ep.formatted_dir(pattern, multi)
|
new_path = ep.formatted_dir(pattern, multi)
|
||||||
@ -154,9 +177,11 @@ def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False)
|
|||||||
|
|
||||||
logger.log(u"Trying to parse " + new_name, logger.DEBUG)
|
logger.log(u"Trying to parse " + new_name, logger.DEBUG)
|
||||||
|
|
||||||
|
parser = NameParser(True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = parser.parse(new_name)
|
result = parser.parse(new_name)
|
||||||
except InvalidNameException, e :
|
except Exception, e:
|
||||||
logger.log(u"Unable to parse " + new_name + ", not valid", logger.DEBUG)
|
logger.log(u"Unable to parse " + new_name + ", not valid", logger.DEBUG)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -177,7 +202,7 @@ def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False)
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _generate_sample_ep(multi=None, abd=False, sports=False, anime=False):
|
def generate_sample_ep(multi=None, abd=False, sports=False, anime=False):
|
||||||
# make a fake episode object
|
# make a fake episode object
|
||||||
ep = TVEpisode(2, 3, 3, "Ep Name")
|
ep = TVEpisode(2, 3, 3, "Ep Name")
|
||||||
|
|
||||||
@ -215,6 +240,6 @@ def _generate_sample_ep(multi=None, abd=False, sports=False, anime=False):
|
|||||||
|
|
||||||
|
|
||||||
def test_name(pattern, multi=None, abd=False, sports=False, anime=False):
|
def test_name(pattern, multi=None, abd=False, sports=False, anime=False):
|
||||||
ep = _generate_sample_ep(multi, abd, sports, anime)
|
ep = generate_sample_ep(multi, abd, sports, anime)
|
||||||
|
|
||||||
return {'name': ep.formatted_filename(pattern, multi), 'dir': ep.formatted_dir(pattern, multi)}
|
return {'name': ep.formatted_filename(pattern, multi), 'dir': ep.formatted_dir(pattern, multi)}
|
@ -119,7 +119,7 @@ class ProperFinder():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
myParser = NameParser(False)
|
myParser = NameParser(False)
|
||||||
parse_result = myParser.parse(curProper.name).convert()
|
parse_result = myParser.parse(curProper.name)
|
||||||
except InvalidNameException:
|
except InvalidNameException:
|
||||||
logger.log(u"Unable to parse the filename " + curProper.name + " into a valid episode", logger.DEBUG)
|
logger.log(u"Unable to parse the filename " + curProper.name + " into a valid episode", logger.DEBUG)
|
||||||
continue
|
continue
|
||||||
@ -138,7 +138,7 @@ class ProperFinder():
|
|||||||
|
|
||||||
showObj = parse_result.show
|
showObj = parse_result.show
|
||||||
logger.log(
|
logger.log(
|
||||||
u"Successful match! Result " + parse_result.series_name + " matched to show " + showObj.name,
|
u"Successful match! Result " + parse_result.original_name + " matched to show " + showObj.name,
|
||||||
logger.DEBUG)
|
logger.DEBUG)
|
||||||
|
|
||||||
# set the indexerid in the db to the show's indexerid
|
# set the indexerid in the db to the show's indexerid
|
||||||
|
@ -278,8 +278,8 @@ class GenericProvider:
|
|||||||
|
|
||||||
# parse the file name
|
# parse the file name
|
||||||
try:
|
try:
|
||||||
myParser = NameParser(False, show=show, useIndexers=manualSearch)
|
myParser = NameParser(False, showObj=show, epObj=ep_obj, convert=True)
|
||||||
parse_result = myParser.parse(title).convert()
|
parse_result = myParser.parse(title)
|
||||||
except InvalidNameException:
|
except InvalidNameException:
|
||||||
logger.log(u"Unable to parse the filename " + title + " into a valid episode", logger.WARNING)
|
logger.log(u"Unable to parse the filename " + title + " into a valid episode", logger.WARNING)
|
||||||
continue
|
continue
|
||||||
|
@ -1895,6 +1895,26 @@ class TVEpisode(object):
|
|||||||
else:
|
else:
|
||||||
return ek.ek(os.path.join, self.show.location, self.location)
|
return ek.ek(os.path.join, self.show.location, self.location)
|
||||||
|
|
||||||
|
def createStrings(self, pattern=None):
|
||||||
|
patterns = [
|
||||||
|
'%S.N.S%SE%0E',
|
||||||
|
'%S.N.S%0SE%E',
|
||||||
|
'%S.N.S%SE%E',
|
||||||
|
'%S.N.S%0SE%0E',
|
||||||
|
'%SN S%SE%0E',
|
||||||
|
'%SN S%0SE%E',
|
||||||
|
'%SN S%SE%E',
|
||||||
|
'%SN S%0SE%0E'
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
strings = []
|
||||||
|
if not pattern:
|
||||||
|
for p in patterns:
|
||||||
|
strings += [self._format_pattern(p)]
|
||||||
|
return strings
|
||||||
|
return self._format_pattern(pattern)
|
||||||
|
|
||||||
def prettyName(self):
|
def prettyName(self):
|
||||||
"""
|
"""
|
||||||
Returns the name of this episode in a "pretty" human-readable format. Used for logging
|
Returns the name of this episode in a "pretty" human-readable format. Used for logging
|
||||||
@ -1903,15 +1923,11 @@ class TVEpisode(object):
|
|||||||
Returns: A string representing the episode's name and season/ep numbers
|
Returns: A string representing the episode's name and season/ep numbers
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.show.is_anime and not self.show.is_scene:
|
if self.show.anime and not self.show.scene:
|
||||||
return self._format_pattern('%SN - %A - %EN')
|
return self._format_pattern('%SN - %AB - %EN')
|
||||||
elif self.show.is_anime and self.show.is_scene:
|
|
||||||
return self._format_pattern('%SN - %XA - %EN')
|
|
||||||
elif self.show.is_scene:
|
|
||||||
return self._format_pattern('%SN - %XSx%0XE - %EN')
|
|
||||||
elif self.show.air_by_date:
|
elif self.show.air_by_date:
|
||||||
return self._format_pattern('%SN - %AD - %EN')
|
return self._format_pattern('%SN - %AD - %EN')
|
||||||
else:
|
|
||||||
return self._format_pattern('%SN - %Sx%0E - %EN')
|
return self._format_pattern('%SN - %Sx%0E - %EN')
|
||||||
|
|
||||||
def _ep_name(self):
|
def _ep_name(self):
|
||||||
@ -1980,9 +1996,8 @@ class TVEpisode(object):
|
|||||||
if not name:
|
if not name:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
np = NameParser(name)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
np = NameParser(name)
|
||||||
parse_result = np.parse(name)
|
parse_result = np.parse(name)
|
||||||
except InvalidNameException, e:
|
except InvalidNameException, e:
|
||||||
logger.log(u"Unable to get parse release_group: " + ex(e), logger.DEBUG)
|
logger.log(u"Unable to get parse release_group: " + ex(e), logger.DEBUG)
|
||||||
@ -2017,7 +2032,7 @@ class TVEpisode(object):
|
|||||||
'%0XS': '%02d' % self.scene_season,
|
'%0XS': '%02d' % self.scene_season,
|
||||||
'%XE': str(self.scene_episode),
|
'%XE': str(self.scene_episode),
|
||||||
'%0XE': '%02d' % self.scene_episode,
|
'%0XE': '%02d' % self.scene_episode,
|
||||||
'%A': '%(#)03d' % {'#': self.absolute_number},
|
'%AB': '%(#)03d' % {'#': self.absolute_number},
|
||||||
'%XA': '%(#)03d' % {'#': self.scene_absolute_number},
|
'%XA': '%(#)03d' % {'#': self.scene_absolute_number},
|
||||||
'%RN': release_name(self.release_name),
|
'%RN': release_name(self.release_name),
|
||||||
'%RG': release_group(self.release_name),
|
'%RG': release_group(self.release_name),
|
||||||
|
Loading…
Reference in New Issue
Block a user