diff --git a/gui/slick/interfaces/default/displayShow.tmpl b/gui/slick/interfaces/default/displayShow.tmpl index 74316b3f..5d592dee 100644 --- a/gui/slick/interfaces/default/displayShow.tmpl +++ b/gui/slick/interfaces/default/displayShow.tmpl @@ -302,7 +302,7 @@

#if int($epResult["season"]) == 0 then "Specials" else "Season "+str($epResult["season"])#

- NFOTBNEpisode#if $scene then "Scene #" else ""# #if $scene_anime then "Scene Absolute #" else ""#NameAirdateFilename#if $sickbeard.USE_SUBTITLES and $show.subtitles then "Subtitles" else ""#StatusSearch + NFOTBNEpisode#if $show.is_anime then "Absolute" else ""# #if $scene then "Scene #" else ""# #if $scene_anime then "Scene Absolute" else ""#NameAirdateFilename#if $sickbeard.USE_SUBTITLES and $show.subtitles then "Subtitles" else ""#StatusSearch #set $curSeason = int($epResult["season"]) #end if @@ -317,6 +317,9 @@ \"Y" \"Y" $epResult["episode"] + #if $show.is_anime: + $epResult["absolute_number"] + #end if #if $scene: diff --git a/lib/fuzzywuzzy/StringMatcher.py b/lib/fuzzywuzzy/StringMatcher.py new file mode 100644 index 00000000..9dccfe7e --- /dev/null +++ b/lib/fuzzywuzzy/StringMatcher.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +StringMatcher.py + +ported from python-Levenshtein +[https://github.com/miohtama/python-Levenshtein] +""" + +from Levenshtein import * +from warnings import warn + +class StringMatcher: + """A SequenceMatcher-like class built on the top of Levenshtein""" + + def _reset_cache(self): + self._ratio = self._distance = None + self._opcodes = self._editops = self._matching_blocks = None + + def __init__(self, isjunk=None, seq1='', seq2=''): + if isjunk: + warn("isjunk not NOT implemented, it will be ignored") + self._str1, self._str2 = seq1, seq2 + self._reset_cache() + + def set_seqs(self, seq1, seq2): + self._str1, self._str2 = seq1, seq2 + self._reset_cache() + + def set_seq1(self, seq1): + self._str1 = seq1 + self._reset_cache() + + def set_seq2(self, seq2): + self._str2 = seq2 + self._reset_cache() + + def get_opcodes(self): + if not self._opcodes: + if self._editops: + self._opcodes = opcodes(self._editops, self._str1, self._str2) + else: + self._opcodes = opcodes(self._str1, self._str2) + return self._opcodes + + def get_editops(self): + if not self._editops: + if self._opcodes: + self._editops = editops(self._opcodes, self._str1, self._str2) + else: + self._editops = editops(self._str1, self._str2) + return self._editops + + def get_matching_blocks(self): + if not self._matching_blocks: + self._matching_blocks = matching_blocks(self.get_opcodes(), + self._str1, self._str2) + return self._matching_blocks + + def ratio(self): + if not self._ratio: + self._ratio = ratio(self._str1, self._str2) + return self._ratio + + def quick_ratio(self): + # This is usually quick enough :o) + if not self._ratio: + self._ratio = ratio(self._str1, self._str2) + return self._ratio + + def real_quick_ratio(self): + len1, len2 = len(self._str1), len(self._str2) + return 2.0 * min(len1, len2) / (len1 + len2) + + def distance(self): + if not self._distance: + self._distance = distance(self._str1, self._str2) + return self._distance \ No newline at end of file diff --git a/lib/fuzzywuzzy/__init__.py b/lib/fuzzywuzzy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/fuzzywuzzy/fuzz.py b/lib/fuzzywuzzy/fuzz.py new file mode 100644 index 00000000..26274b9a --- /dev/null +++ b/lib/fuzzywuzzy/fuzz.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +fuzz.py + +Copyright (c) 2011 Adam Cohen + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" +from __future__ import unicode_literals + +try: + from StringMatcher import StringMatcher as SequenceMatcher +except: + from difflib import SequenceMatcher + +from . import utils + + +########################### +# Basic Scoring Functions # +########################### + + +def ratio(s1, s2): + + if s1 is None: + raise TypeError("s1 is None") + if s2 is None: + raise TypeError("s2 is None") + s1, s2 = utils.make_type_consistent(s1, s2) + if len(s1) == 0 or len(s2) == 0: + return 0 + + m = SequenceMatcher(None, s1, s2) + return utils.intr(100 * m.ratio()) + + +# todo: skip duplicate indexes for a little more speed +def partial_ratio(s1, s2): + + if s1 is None: + raise TypeError("s1 is None") + if s2 is None: + raise TypeError("s2 is None") + s1, s2 = utils.make_type_consistent(s1, s2) + if len(s1) == 0 or len(s2) == 0: + return 0 + + if len(s1) <= len(s2): + shorter = s1 + longer = s2 + else: + shorter = s2 + longer = s1 + + m = SequenceMatcher(None, shorter, longer) + blocks = m.get_matching_blocks() + + # each block represents a sequence of matching characters in a string + # of the form (idx_1, idx_2, len) + # the best partial match will block align with at least one of those blocks + # e.g. shorter = "abcd", longer = XXXbcdeEEE + # block = (1,3,3) + # best score === ratio("abcd", "Xbcd") + scores = [] + for block in blocks: + long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0 + long_end = long_start + len(shorter) + long_substr = longer[long_start:long_end] + + m2 = SequenceMatcher(None, shorter, long_substr) + r = m2.ratio() + if r > .995: + return 100 + else: + scores.append(r) + + return int(100 * max(scores)) + + +############################## +# Advanced Scoring Functions # +############################## + +# Sorted Token +# find all alphanumeric tokens in the string +# sort those tokens and take ratio of resulting joined strings +# controls for unordered string elements +def _token_sort(s1, s2, partial=True, force_ascii=True): + + if s1 is None: + raise TypeError("s1 is None") + if s2 is None: + raise TypeError("s2 is None") + + # pull tokens + tokens1 = utils.full_process(s1, force_ascii=force_ascii).split() + tokens2 = utils.full_process(s2, force_ascii=force_ascii).split() + + # sort tokens and join + sorted1 = " ".join(sorted(tokens1)) + sorted2 = " ".join(sorted(tokens2)) + + sorted1 = sorted1.strip() + sorted2 = sorted2.strip() + + if partial: + return partial_ratio(sorted1, sorted2) + else: + return ratio(sorted1, sorted2) + + +def token_sort_ratio(s1, s2, force_ascii=True): + return _token_sort(s1, s2, partial=False, force_ascii=force_ascii) + + +def partial_token_sort_ratio(s1, s2, force_ascii=True): + return _token_sort(s1, s2, partial=True, force_ascii=force_ascii) + + +# Token Set +# find all alphanumeric tokens in each string...treat them as a set +# construct two strings of the form +# +# take ratios of those two strings +# controls for unordered partial matches +def _token_set(s1, s2, partial=True, force_ascii=True): + + if s1 is None: + raise TypeError("s1 is None") + if s2 is None: + raise TypeError("s2 is None") + + p1 = utils.full_process(s1, force_ascii=force_ascii) + p2 = utils.full_process(s2, force_ascii=force_ascii) + + if not utils.validate_string(p1): + return 0 + if not utils.validate_string(p2): + return 0 + + # pull tokens + tokens1 = set(utils.full_process(p1).split()) + tokens2 = set(utils.full_process(p2).split()) + + intersection = tokens1.intersection(tokens2) + diff1to2 = tokens1.difference(tokens2) + diff2to1 = tokens2.difference(tokens1) + + sorted_sect = " ".join(sorted(intersection)) + sorted_1to2 = " ".join(sorted(diff1to2)) + sorted_2to1 = " ".join(sorted(diff2to1)) + + combined_1to2 = sorted_sect + " " + sorted_1to2 + combined_2to1 = sorted_sect + " " + sorted_2to1 + + # strip + sorted_sect = sorted_sect.strip() + combined_1to2 = combined_1to2.strip() + combined_2to1 = combined_2to1.strip() + + pairwise = [ + ratio(sorted_sect, combined_1to2), + ratio(sorted_sect, combined_2to1), + ratio(combined_1to2, combined_2to1) + ] + return max(pairwise) + + +def token_set_ratio(s1, s2, force_ascii=True): + return _token_set(s1, s2, partial=False, force_ascii=force_ascii) + + +def partial_token_set_ratio(s1, s2, force_ascii=True): + return _token_set(s1, s2, partial=True, force_ascii=force_ascii) + + +# TODO: numerics + +################### +# Combination API # +################### + +# q is for quick +def QRatio(s1, s2, force_ascii=True): + + p1 = utils.full_process(s1, force_ascii=force_ascii) + p2 = utils.full_process(s2, force_ascii=force_ascii) + + if not utils.validate_string(p1): + return 0 + if not utils.validate_string(p2): + return 0 + + return ratio(p1, p2) + + +def UQRatio(s1, s2): + return QRatio(s1, s2, force_ascii=False) + + +# w is for weighted +def WRatio(s1, s2, force_ascii=True): + + p1 = utils.full_process(s1, force_ascii=force_ascii) + p2 = utils.full_process(s2, force_ascii=force_ascii) + + if not utils.validate_string(p1): + return 0 + if not utils.validate_string(p2): + return 0 + + # should we look at partials? + try_partial = True + unbase_scale = .95 + partial_scale = .90 + + base = ratio(p1, p2) + len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2)) + + # if strings are similar length, don't use partials + if len_ratio < 1.5: + try_partial = False + + # if one string is much much shorter than the other + if len_ratio > 8: + partial_scale = .6 + + if try_partial: + partial = partial_ratio(p1, p2) * partial_scale + ptsor = partial_token_sort_ratio(p1, p2, force_ascii=force_ascii) \ + * unbase_scale * partial_scale + ptser = partial_token_set_ratio(p1, p2, force_ascii=force_ascii) \ + * unbase_scale * partial_scale + + return int(max(base, partial, ptsor, ptser)) + else: + tsor = token_sort_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale + tser = token_set_ratio(p1, p2, force_ascii=force_ascii) * unbase_scale + + return int(max(base, tsor, tser)) + + +def UWRatio(s1, s2): + return WRatio(s1, s2, force_ascii=False) diff --git a/lib/fuzzywuzzy/process.py b/lib/fuzzywuzzy/process.py new file mode 100644 index 00000000..7571664e --- /dev/null +++ b/lib/fuzzywuzzy/process.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +process.py + +Copyright (c) 2011 Adam Cohen + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" +import itertools + +from . import fuzz +from . import utils + + +def extract(query, choices, processor=None, scorer=None, limit=5): + """Find best matches in a list of choices, return a list of tuples + containing the match and it's score. + + Arguments: + query -- an object representing the thing we want to find + choices -- a list of objects we are attempting to extract + values from + scorer -- f(OBJ, QUERY) --> INT. We will return the objects + with the highest score by default, we use + score.WRatio() and both OBJ and QUERY should be + strings + processor -- f(OBJ_A) --> OBJ_B, where the output is an input + to scorer for example, "processor = lambda x: + x[0]" would return the first element in a + collection x (of, say, strings) this would then + be used in the scoring collection by default, we + use utils.full_process() + + """ + if choices is None or len(choices) == 0: + return [] + + # default, turn whatever the choice is into a workable string + if processor is None: + processor = lambda x: utils.full_process(x) + + # default: wratio + if scorer is None: + scorer = fuzz.WRatio + + sl = list() + + for choice in choices: + processed = processor(choice) + score = scorer(query, processed) + tuple = (choice, score) + sl.append(tuple) + + sl.sort(key=lambda i: i[1], reverse=True) + return sl[:limit] + + +def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5): + """Find best matches above a score in a list of choices, return a + list of tuples containing the match and it's score. + + Convenience method which returns the choices with best scores, see + extract() for full arguments list + + Optional parameter: score_cutoff. + If the choice has a score of less than or equal to score_cutoff + it will not be included on result list + + """ + + best_list = extract(query, choices, processor, scorer, limit) + if len(best_list) > 0: + return list(itertools.takewhile(lambda x: x[1] > score_cutoff, best_list)) + else: + return [] + + +def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0): + """Find the best match above a score in a list of choices, return a + tuple containing the match and it's score if it's above the treshold + or None. + + Convenience method which returns the single best choice, see + extract() for full arguments list + + Optional parameter: score_cutoff. + If the best choice has a score of less than or equal to + score_cutoff we will return none (intuition: not a good enough + match) + + """ + + best_list = extract(query, choices, processor, scorer, limit=1) + if len(best_list) > 0: + best = best_list[0] + if best[1] > score_cutoff: + return best + else: + return None + else: + return None diff --git a/lib/fuzzywuzzy/string_processing.py b/lib/fuzzywuzzy/string_processing.py new file mode 100644 index 00000000..7c706d98 --- /dev/null +++ b/lib/fuzzywuzzy/string_processing.py @@ -0,0 +1,41 @@ +from __future__ import unicode_literals +import re + + +class StringProcessor(object): + """ + This class defines method to process strings in the most + efficient way. Ideally all the methods below use unicode strings + for both input and output. + """ + + @classmethod + def replace_non_letters_non_numbers_with_whitespace(cls, a_string): + """ + This function replaces any sequence of non letters and non + numbers with a single white space. + """ + regex = re.compile(r"(?ui)\W") + return regex.sub(" ", a_string) + + @classmethod + def strip(cls, a_string): + """ + This function strips leading and trailing white space. + """ + + return a_string.strip() + + @classmethod + def to_lower_case(cls, a_string): + """ + This function returns the lower-cased version of the string given. + """ + return a_string.lower() + + @classmethod + def to_upper_case(cls, a_string): + """ + This function returns the upper-cased version of the string given. + """ + return a_string.upper() diff --git a/lib/fuzzywuzzy/utils.py b/lib/fuzzywuzzy/utils.py new file mode 100644 index 00000000..2d3ae3e4 --- /dev/null +++ b/lib/fuzzywuzzy/utils.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals +import sys + +from fuzzywuzzy.string_processing import StringProcessor + + +PY3 = sys.version_info[0] == 3 + + +def validate_string(s): + try: + if len(s) > 0: + return True + else: + return False + except: + return False + +bad_chars = str('') # ascii dammit! +for i in range(128, 256): + bad_chars += chr(i) +if PY3: + translation_table = dict((ord(c), None) for c in bad_chars) + + +def asciionly(s): + if PY3: + return s.translate(translation_table) + else: + return s.translate(None, bad_chars) + + +def asciidammit(s): + if type(s) is str: + return asciionly(s) + elif type(s) is unicode: + return asciionly(s.encode('ascii', 'ignore')) + else: + return asciidammit(unicode(s)) + + +def make_type_consistent(s1, s2): + if isinstance(s1, str) and isinstance(s2, str): + return s1, s2 + + elif isinstance(s1, unicode) and isinstance(s2, unicode): + return s1, s2 + + else: + return unicode(s1), unicode(s2) + + +def full_process(s, force_ascii=False): + """Process string by + -- removing all but letters and numbers + -- trim whitespace + -- force to lower case + if force_ascii == True, force convert to ascii""" + + if s is None: + return "" + + if force_ascii: + s = asciidammit(s) + # Keep only Letters and Numbres (see Unicode docs). + string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s) + # Force into lowercase. + string_out = StringProcessor.to_lower_case(string_out) + # Remove leading and trailing whitespaces. + string_out = StringProcessor.strip(string_out) + return string_out + + +def intr(n): + '''Returns a correctly rounded integer''' + return int(round(n)) diff --git a/lib/regex/Python25/__init__.py b/lib/regex/Python25/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/lib/regex/Python25/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/regex/Python25/_regex.pyd b/lib/regex/Python25/_regex.pyd new file mode 100644 index 00000000..ad8e1c76 Binary files /dev/null and b/lib/regex/Python25/_regex.pyd differ diff --git a/lib/regex/Python26/__init__.py b/lib/regex/Python26/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/lib/regex/Python26/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/regex/Python26/_regex.pyd b/lib/regex/Python26/_regex.pyd new file mode 100644 index 00000000..0616ec7d Binary files /dev/null and b/lib/regex/Python26/_regex.pyd differ diff --git a/lib/regex/Python27/__init__.py b/lib/regex/Python27/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/lib/regex/Python27/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/regex/Python27/_regex.pyd b/lib/regex/Python27/_regex.pyd new file mode 100644 index 00000000..f41bb0b9 Binary files /dev/null and b/lib/regex/Python27/_regex.pyd differ diff --git a/lib/regex/__init__.py b/lib/regex/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/lib/regex/__init__.py @@ -0,0 +1 @@ + diff --git a/lib/regex/_regex.c b/lib/regex/_regex.c new file mode 100644 index 00000000..a40d2091 --- /dev/null +++ b/lib/regex/_regex.c @@ -0,0 +1,22557 @@ +/* Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * partial history: + * 1999-10-24 fl created (based on existing template matcher code) + * 2000-03-06 fl first alpha, sort of + * 2000-08-01 fl fixes for 1.6b1 + * 2000-08-07 fl use PyOS_CheckStack() if available + * 2000-09-20 fl added expand method + * 2001-03-20 fl lots of fixes for 2.1b2 + * 2001-04-15 fl export copyright as Python attribute, not global + * 2001-04-28 fl added __copy__ methods (work in progress) + * 2001-05-14 fl fixes for 1.5.2 compatibility + * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) + * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) + * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 + * 2001-10-21 fl added sub/subn primitive + * 2001-10-24 fl added finditer primitive (for 2.2 only) + * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) + * 2002-11-09 fl fixed empty sub/subn return type + * 2003-04-18 mvl fully support 4-byte codes + * 2003-10-17 gn implemented non recursive scheme + * 2009-07-26 mrab completely re-designed matcher code + * 2011-11-18 mrab added support for PEP 393 strings + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * This version of the SRE library can be redistributed under CNRI's + * Python 1.6 license. For any other use, please contact Secret Labs + * AB (info@pythonware.com). + * + * Portions of this engine have been developed in cooperation with + * CNRI. Hewlett-Packard provided funding for 1.6 integration and + * other compatibility work. + */ + +/* #define VERBOSE */ + +#if defined(VERBOSE) +#define TRACE(X) printf X; +#else +#define TRACE(X) +#endif + +#include "Python.h" +#include "structmember.h" /* offsetof */ +#include +#include "_regex.h" +#include "pyport.h" +#include "pythread.h" + +#if PY_VERSION_HEX < 0x02060000 +#if SIZEOF_SIZE_T == SIZEOF_LONG_LONG +#define T_PYSSIZET T_LONGLONG +#elif SIZEOF_SIZE_T == SIZEOF_LONG +#define T_PYSSIZET T_LONG +#else +#error size_t is the same size as neither LONG nor LONGLONG +#endif + +#endif +typedef unsigned char Py_UCS1; +typedef unsigned short Py_UCS2; + +typedef RE_UINT32 RE_CODE; + +/* Properties in the General Category. */ +#define RE_PROP_GC_CN ((RE_PROP_GC << 16) | RE_PROP_CN) +#define RE_PROP_GC_LU ((RE_PROP_GC << 16) | RE_PROP_LU) +#define RE_PROP_GC_LL ((RE_PROP_GC << 16) | RE_PROP_LL) +#define RE_PROP_GC_LT ((RE_PROP_GC << 16) | RE_PROP_LT) +#define RE_PROP_GC_P ((RE_PROP_GC << 16) | RE_PROP_P) + +/* Unlimited repeat count. */ +#define RE_UNLIMITED (~(RE_CODE)0) + +/* The status of a node. */ +typedef unsigned short RE_STATUS_T; + +/* Whether to match concurrently, i.e. release the GIL while matching. */ +#define RE_CONC_NO 0 +#define RE_CONC_YES 1 +#define RE_CONC_DEFAULT 2 + +/* the side that could truncate in a partial match. + * + * The values RE_PARTIAL_LEFT and RE_PARTIAL_RIGHT are also used as array + * indexes, so they need to be 0 and 1. + */ +#define RE_PARTIAL_NONE -1 +#define RE_PARTIAL_LEFT 0 +#define RE_PARTIAL_RIGHT 1 + +/* Flags for the kind of 'sub' call: 'sub', 'subn', 'subf', 'subfn'. */ +#define RE_SUB 0x0 +#define RE_SUBN 0x1 +#if PY_VERSION_HEX >= 0x02060000 +#define RE_SUBF 0x2 +#endif + +/* The name of this module, minus the leading underscore. */ +#define RE_MODULE "regex" + +/* Error codes. */ +#define RE_ERROR_SUCCESS 1 /* Successful match. */ +#define RE_ERROR_FAILURE 0 /* Unsuccessful match. */ +#define RE_ERROR_ILLEGAL -1 /* Illegal code. */ +#define RE_ERROR_INTERNAL -2 /* Internal error. */ +#define RE_ERROR_CONCURRENT -3 /* "concurrent" invalid. */ +#define RE_ERROR_MEMORY -4 /* Out of memory. */ +#define RE_ERROR_INTERRUPTED -5 /* Signal handler raised exception. */ +#define RE_ERROR_REPLACEMENT -6 /* Invalid replacement string. */ +#define RE_ERROR_INVALID_GROUP_REF -7 /* Invalid group reference. */ +#define RE_ERROR_GROUP_INDEX_TYPE -8 /* Group index type error. */ +#define RE_ERROR_NO_SUCH_GROUP -9 /* No such group. */ +#define RE_ERROR_INDEX -10 /* String index. */ +#define RE_ERROR_BACKTRACKING -11 /* Too much backtracking. */ +#define RE_ERROR_NOT_STRING -12 /* Not a string. */ +#define RE_ERROR_NOT_UNICODE -13 /* Not a Unicode string. */ +#define RE_ERROR_PARTIAL -15 /* Partial match. */ + +/* The number of backtrack entries per allocated block. */ +#define RE_BACKTRACK_BLOCK_SIZE 64 + +/* The maximum number of backtrack entries to allocate. */ +#define RE_MAX_BACKTRACK_ALLOC (1024 * 1024) + +/* The initial maximum capacity of the guard block. */ +#define RE_INIT_GUARDS_BLOCK_SIZE 16 + +/* The initial maximum capacity of the node list. */ +#define RE_INIT_NODE_LIST_SIZE 16 + +/* The size increment for various allocation lists. */ +#define RE_LIST_SIZE_INC 16 + +/* The initial maximum capacity of the capture groups. */ +#define RE_INIT_CAPTURE_SIZE 16 + +/* Node bitflags. */ +#define RE_POSITIVE_OP 0x1 +#define RE_ZEROWIDTH_OP 0x2 +#define RE_FUZZY_OP 0x4 +#define RE_REVERSE_OP 0x8 +#define RE_REQUIRED_OP 0x10 + +/* Guards against further matching can occur at the start of the body and the + * tail of a repeat containing a repeat. + */ +#define RE_STATUS_BODY 0x1 +#define RE_STATUS_TAIL 0x2 + +/* Whether a guard is added depends on whether there's a repeat in the body of + * the repeat or a group reference in the body or tail of the repeat. + */ +#define RE_STATUS_NEITHER 0x0 +#define RE_STATUS_REPEAT 0x4 +#define RE_STATUS_LIMITED 0x8 +#define RE_STATUS_REF 0x10 +#define RE_STATUS_VISITED_AG 0x20 +#define RE_STATUS_VISITED_REP 0x40 + +/* Whether a string node has been initialised for fast searching. */ +#define RE_STATUS_FAST_INIT 0x80 + +/* Whether a node us being used. (Additional nodes may be created while the + * pattern is being built. + */ +#define RE_STATUS_USED 0x100 + +/* Whether a node is a string node. */ +#define RE_STATUS_STRING 0x200 + +/* Whether a repeat node is within another repeat. */ +#define RE_STATUS_INNER 0x400 + +/* Various flags stored in a node status member. */ +#define RE_STATUS_SHIFT 11 + +#define RE_STATUS_FUZZY (RE_FUZZY_OP << RE_STATUS_SHIFT) +#define RE_STATUS_REVERSE (RE_REVERSE_OP << RE_STATUS_SHIFT) +#define RE_STATUS_REQUIRED (RE_REQUIRED_OP << RE_STATUS_SHIFT) + +/* The different error types for fuzzy matching. */ +#define RE_FUZZY_SUB 0 +#define RE_FUZZY_INS 1 +#define RE_FUZZY_DEL 2 +#define RE_FUZZY_ERR 3 +#define RE_FUZZY_COUNT 3 + +/* The various values in a FUZZY node. */ +#define RE_FUZZY_VAL_MAX_SUB 1 +#define RE_FUZZY_VAL_MAX_INS 2 +#define RE_FUZZY_VAL_MAX_DEL 3 +#define RE_FUZZY_VAL_MAX_ERR 4 +#define RE_FUZZY_VAL_SUB_COST 5 +#define RE_FUZZY_VAL_INS_COST 6 +#define RE_FUZZY_VAL_DEL_COST 7 +#define RE_FUZZY_VAL_MAX_COST 8 + +#define RE_FUZZY_VAL_MAX_BASE 1 +#define RE_FUZZY_VAL_COST_BASE 5 + +/* The various values in an END_FUZZY node. */ +#define RE_FUZZY_VAL_MIN_SUB 1 +#define RE_FUZZY_VAL_MIN_INS 2 +#define RE_FUZZY_VAL_MIN_DEL 3 +#define RE_FUZZY_VAL_MIN_ERR 4 + +/* The flags which will be set for full Unicode case folding. */ +#define RE_FULL_CASE_FOLDING (RE_FLAG_UNICODE | RE_FLAG_FULLCASE | RE_FLAG_IGNORECASE) + +/* The shortest string prefix for which we'll use a fast string search. */ +#define RE_MIN_FAST_LENGTH 5 + +static char copyright[] = + " RE 2.3.0 Copyright (c) 1997-2002 by Secret Labs AB "; + +/* The exception to raise on error. */ +static PyObject* error_exception; + +/* The dictionary of Unicode properties. */ +static PyObject* property_dict; + +typedef struct RE_State* RE_StatePtr; + +/* Handlers for ASCII, locale and Unicode. */ +typedef struct RE_EncodingTable { + BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); + BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_default_boundary)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_default_word_start)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_default_word_end)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_grapheme_boundary)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*is_line_sep)(Py_UCS4 ch); + BOOL (*at_line_start)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*at_line_end)(RE_StatePtr state, Py_ssize_t text_pos); + BOOL (*possible_turkic)(Py_UCS4 ch); + int (*all_cases)(Py_UCS4 ch, Py_UCS4* codepoints); + Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int (*all_turkic_i)(Py_UCS4 ch, Py_UCS4* cases); +} RE_EncodingTable; + +/* Position within the regex and text. */ +typedef struct RE_Position { + struct RE_Node* node; + Py_ssize_t text_pos; +} RE_Position; + +/* Info about fuzzy matching. */ +typedef struct RE_FuzzyInfo { + struct RE_Node* node; + size_t counts[RE_FUZZY_COUNT + 1]; /* Add 1 for total errors. */ + size_t total_cost; +} RE_FuzzyInfo; + +/* Storage for backtrack data. */ +typedef struct RE_BacktrackData { + union { + struct { + size_t capture_change; + BOOL too_few_errors; + } atomic; + struct { + RE_Position position; + } branch; + struct { + RE_FuzzyInfo fuzzy_info; + Py_ssize_t text_pos; + RE_CODE index; + } fuzzy; + struct { + RE_Position position; + size_t count; + struct RE_Node* fuzzy_node; + BOOL too_few_errors; + } fuzzy_insert; + struct { + RE_Position position; + RE_INT8 fuzzy_type; + RE_INT8 step; + } fuzzy_item; + struct { + RE_Position position; + Py_ssize_t string_pos; + RE_INT8 fuzzy_type; + RE_INT8 folded_pos; + RE_INT8 folded_len; + RE_INT8 gfolded_pos; + RE_INT8 gfolded_len; + RE_INT8 step; + } fuzzy_string; + struct { + Py_ssize_t text_pos; + Py_ssize_t current_capture; + RE_CODE private_index; + RE_CODE public_index; + BOOL capture; + } group; + struct { + struct RE_Node* node; + size_t capture_change; + } group_call; + struct { + size_t capture_change; + BOOL too_few_errors; + } lookaround; + struct { + RE_Position position; + Py_ssize_t text_pos; + size_t count; + Py_ssize_t start; + size_t capture_change; + RE_CODE index; + } repeat; + }; + RE_UINT8 op; +} RE_BacktrackData; + +/* Storage for backtrack data is allocated in blocks for speed. */ +typedef struct RE_BacktrackBlock { + RE_BacktrackData items[RE_BACKTRACK_BLOCK_SIZE]; + struct RE_BacktrackBlock* previous; + struct RE_BacktrackBlock* next; + size_t capacity; + size_t count; +} RE_BacktrackBlock; + +/* Storage for saved groups. */ +typedef struct RE_SavedGroups { + struct RE_SavedGroups* previous; + struct RE_SavedGroups* next; + struct RE_GroupSpan* spans; + size_t* counts; +} RE_SavedGroups; + +/* Storage for info around a recursive by 'basic'match'. */ +typedef struct RE_Info { + RE_BacktrackBlock* current_backtrack_block; + size_t backtrack_count; + RE_SavedGroups* current_saved_groups; + struct RE_GroupCallFrame* current_group_call_frame; + BOOL must_advance; +} RE_Info; + +/* Storage for the next node. */ +typedef struct RE_NextNode { + struct RE_Node* node; + struct RE_Node* test; + struct RE_Node* match_next; + Py_ssize_t match_step; +} RE_NextNode; + +/* A pattern node. */ +typedef struct RE_Node { + RE_NextNode next_1; + union { + struct { + RE_NextNode next_2; + } nonstring; + struct { + /* Used only if (node->status & RE_STATUS_STRING) is true. */ + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + } string; + }; + Py_ssize_t step; + size_t value_count; + RE_CODE* values; + RE_STATUS_T status; + RE_UINT8 op; + BOOL match; +} RE_Node; + +/* Info about a group's span. */ +typedef struct RE_GroupSpan { + Py_ssize_t start; + Py_ssize_t end; +} RE_GroupSpan; + +/* Span of a guard (inclusive range). */ +typedef struct RE_GuardSpan { + Py_ssize_t low; + Py_ssize_t high; + BOOL protect; +} RE_GuardSpan; + +/* Spans guarded against further matching. */ +typedef struct RE_GuardList { + size_t capacity; + size_t count; + RE_GuardSpan* spans; + Py_ssize_t last_text_pos; + size_t last_low; +} RE_GuardList; + +/* Info about a group in a context. */ +typedef struct RE_GroupData { + RE_GroupSpan span; + size_t capture_count; + size_t capture_capacity; + Py_ssize_t current_capture; + RE_GroupSpan* captures; +} RE_GroupData; + +/* Info about a repeat. */ +typedef struct RE_RepeatData { + RE_GuardList body_guard_list; + RE_GuardList tail_guard_list; + size_t count; + Py_ssize_t start; + size_t capture_change; +} RE_RepeatData; + +/* Storage for saved repeats. */ +typedef struct RE_SavedRepeats { + struct RE_SavedRepeats* previous; + struct RE_SavedRepeats* next; + RE_RepeatData* repeats; +} RE_SavedRepeats; + +/* Guards for fuzzy sections. */ +typedef struct RE_FuzzyGuards { + RE_GuardList body_guard_list; + RE_GuardList tail_guard_list; +} RE_FuzzyGuards; + +/* Info about a capture group. */ +typedef struct RE_GroupInfo { + Py_ssize_t end_index; + RE_Node* node; + BOOL referenced; + BOOL has_name; +} RE_GroupInfo; + +/* Info about a call_ref. */ +typedef struct RE_CallRefInfo { + RE_Node* node; + BOOL defined; + BOOL used; +} RE_CallRefInfo; + +/* Info about a repeat. */ +typedef struct RE_RepeatInfo { + RE_STATUS_T status; +} RE_RepeatInfo; + +/* Stack frame for a group call. */ +typedef struct RE_GroupCallFrame { + struct RE_GroupCallFrame* previous; + struct RE_GroupCallFrame* next; + RE_Node* node; + RE_GroupData* groups; + RE_RepeatData* repeats; +} RE_GroupCallFrame; + +/* Info about a string argument. */ +typedef struct RE_StringInfo { +#if PY_VERSION_HEX >= 0x02060000 + Py_buffer view; /* View of the string if it's a buffer object. */ +#endif + void* characters; /* Pointer to the characters of the string. */ + Py_ssize_t length; /* Length of the string. */ + Py_ssize_t charsize; /* Size of the characters in the string. */ + BOOL is_unicode; /* Whether the string is Unicode. */ + BOOL should_release; /* Whether the buffer should be released. */ +} RE_StringInfo; + +/* Info about where the next match was found, starting from a certain search + * position. This is used when a pattern starts with a BRANCH. + */ +#define MAX_SEARCH_POSITIONS 7 + +/* Info about a search position. */ +typedef struct { + Py_ssize_t start_pos; + Py_ssize_t match_pos; +} RE_SearchPosition; + +/* The state object used during matching. */ +typedef struct RE_State { + struct PatternObject* pattern; /* Parent PatternObject. */ + /* Info about the string being matched. */ + PyObject* string; +#if PY_VERSION_HEX >= 0x02060000 + Py_buffer view; /* View of the string if it's a buffer object. */ +#endif + Py_ssize_t charsize; + void* text; + Py_ssize_t text_length; + /* The slice of the string being searched. */ + Py_ssize_t slice_start; + Py_ssize_t slice_end; + /* Info about the capture groups. */ + RE_GroupData* groups; + Py_ssize_t lastindex; + Py_ssize_t lastgroup; + /* Info about the repeats. */ + RE_RepeatData* repeats; + Py_ssize_t search_anchor; /* Where the last match finished. */ + Py_ssize_t match_pos; /* The start position of the match. */ + Py_ssize_t text_pos; /* The current position of the match. */ + Py_ssize_t final_newline; /* The index of newline at end of string, or -1. */ + Py_ssize_t final_line_sep; /* The index of line separator at end of string, or -1. */ + /* Storage for backtrack info. */ + RE_BacktrackBlock backtrack_block; + RE_BacktrackBlock* current_backtrack_block; + Py_ssize_t backtrack_allocated; + RE_BacktrackData* backtrack; + /* Storage for saved capture groups. */ + RE_SavedGroups* first_saved_groups; + RE_SavedGroups* current_saved_groups; + RE_SavedRepeats* first_saved_repeats; + RE_SavedRepeats* current_saved_repeats; + Py_ssize_t min_width; /* The minimum width of the string to match (assuming it's not a fuzzy pattern). */ + RE_EncodingTable* encoding; /* The 'encoding' of the string being searched. */ + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + void* (*point_to)(void* text, Py_ssize_t pos); + PyThread_type_lock lock; /* A lock for accessing the state across threads. */ + RE_FuzzyInfo fuzzy_info; /* Info about fuzzy matching. */ + size_t total_fuzzy_counts[RE_FUZZY_COUNT]; /* Totals for fuzzy matching. */ + RE_FuzzyGuards* fuzzy_guards; /* The guards for a fuzzy match. */ + size_t total_errors; /* The total number of errors of a fuzzy match. */ + size_t total_cost; /* The total cost of a fuzzy match. */ + size_t max_cost; /* The maximum permitted fuzzy cost. */ + /* The group call stack. */ + RE_GroupCallFrame* first_group_call_frame; + RE_GroupCallFrame* current_group_call_frame; + RE_GuardList* group_call_guard_list; + RE_SearchPosition search_positions[MAX_SEARCH_POSITIONS]; /* Where the search matches next. */ + size_t capture_change; /* Incremented every time a captive group changes. */ + Py_ssize_t req_pos; /* The position where the required string matched. */ + Py_ssize_t req_end; /* The end position where the required string matched. */ + int partial_side; /* The side that could truncate in a partial match. */ + RE_UINT16 iterations; /* The number of iterations the matching engine has performed since checking for KeyboardInterrupt. */ + BOOL is_unicode; /* Whether the string to be matched is Unicode. */ + BOOL should_release; /* Whether the buffer should be released. */ + BOOL overlapped; /* Whether the matches can be overlapped. */ + BOOL reverse; /* Whether it's a reverse pattern. */ + BOOL visible_captures; /* Whether the 'captures' method will be visible. */ + BOOL version_0; /* Whether to perform version_0 behaviour (same as re module). */ + BOOL must_advance; /* Whether the end of the match must advance past its start. */ + BOOL is_multithreaded; /* Whether to release the GIL while matching. */ + BOOL too_few_errors; /* Whether there were too few fuzzy errors. */ + BOOL match_all; /* Whether to match all of the string ('fullmatch'). */ +} RE_State; + +/* Storage for the regex state and thread state. + * + * Scanner objects can sometimes be shared across threads, which means that + * their RE_State structs are also shared. This isn't safe when the GIL is + * released, so in such instances we have a lock (mutex) in the RE_State struct + * to protect it during matching. We also need a thread-safe place to store the + * thread state when releasing the GIL. + */ +typedef struct RE_SafeState { + RE_State* re_state; + PyThreadState* thread_state; +} RE_SafeState; + +/* The PatternObject created from a regular expression. */ +typedef struct PatternObject { + PyObject_HEAD + PyObject* pattern; /* Pattern source (or None). */ + Py_ssize_t flags; /* Flags used when compiling pattern source. */ + PyObject* weakreflist; /* List of weak references */ + /* Nodes into which the regular expression is compiled. */ + RE_Node* start_node; + RE_Node* start_test; + size_t true_group_count; /* The true number of capture groups. */ + size_t public_group_count; /* The number of public capture groups. */ + size_t repeat_count; /* The number of repeats. */ + Py_ssize_t group_end_index; /* The number of group closures. */ + PyObject* groupindex; + PyObject* indexgroup; + PyObject* named_lists; + size_t named_lists_count; + PyObject** partial_named_lists[2]; + PyObject* named_list_indexes; + /* Storage for the pattern nodes. */ + size_t node_capacity; + size_t node_count; + RE_Node** node_list; + /* Info about the capture groups. */ + size_t group_info_capacity; + RE_GroupInfo* group_info; + /* Info about the call_refs. */ + size_t call_ref_info_capacity; + size_t call_ref_info_count; + RE_CallRefInfo* call_ref_info; + Py_ssize_t pattern_call_ref; + /* Info about the repeats. */ + size_t repeat_info_capacity; + RE_RepeatInfo* repeat_info; + Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ + RE_EncodingTable* encoding; /* Encoding handlers. */ + RE_GroupData* groups_storage; + RE_RepeatData* repeats_storage; + size_t fuzzy_count; /* The number of fuzzy sections. */ + Py_ssize_t req_offset; /* The offset to the required string. */ + RE_Node* req_string; /* The required string. */ + BOOL is_fuzzy; /* Whether it's a fuzzy pattern. */ + BOOL do_search_start; /* Whether to do an initial search. */ + BOOL recursive; /* Whether the entire pattern is recursive. */ +} PatternObject; + +/* The MatchObject created when a match is found. */ +typedef struct MatchObject { + PyObject_HEAD + PyObject* string; /* Link to the target string or NULL if detached. */ + PyObject* substring; /* Link to (a substring of) the target string. */ + Py_ssize_t substring_offset; /* Offset into the target string. */ + PatternObject* pattern; /* Link to the regex (pattern) object. */ + Py_ssize_t pos; /* Start of current slice. */ + Py_ssize_t endpos; /* End of current slice. */ + Py_ssize_t match_start; /* Start of matched slice. */ + Py_ssize_t match_end; /* End of matched slice. */ + Py_ssize_t lastindex; /* Last group seen by the engine (-1 if none). */ + Py_ssize_t lastgroup; /* Last named group seen by the engine (-1 if none). */ + size_t group_count; /* The number of groups. */ + RE_GroupData* groups; /* The capture groups. */ + PyObject* regs; + size_t fuzzy_counts[RE_FUZZY_COUNT]; + BOOL partial; /* Whether it's a partial match. */ +} MatchObject; + +/* The ScannerObject. */ +typedef struct ScannerObject { + PyObject_HEAD + PatternObject* pattern; + RE_State state; + int status; +} ScannerObject; + +/* The SplitterObject. */ +typedef struct SplitterObject { + PyObject_HEAD + PatternObject* pattern; + RE_State state; + Py_ssize_t maxsplit; + Py_ssize_t last_pos; + Py_ssize_t split_count; + Py_ssize_t index; + int status; +} SplitterObject; + +/* Info used when compiling a pattern to nodes. */ +typedef struct RE_CompileArgs { + RE_CODE* code; /* The start of the compiled pattern. */ + RE_CODE* end_code; /* The end of the compiled pattern. */ + PatternObject* pattern; /* The pattern object. */ + Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ + RE_Node* start; /* The start node. */ + RE_Node* end; /* The end node. */ + size_t repeat_depth; /* The nesting depth of the repeat. */ + BOOL forward; /* Whether it's a forward (not reverse) pattern. */ + BOOL visible_captures; /* Whether all of the captures will be visible. */ + BOOL has_captures; /* Whether the pattern has capture groups. */ + BOOL is_fuzzy; /* Whether the pattern (or some part of it) is fuzzy. */ + BOOL within_fuzzy; /* Whether the subpattern is within a fuzzy section. */ +} RE_CompileArgs; + +/* The string slices which will be concatenated to make the result string of + * the 'sub' method. + * + * This allows us to avoid creating a list of slices if there of fewer than 2 + * of them. Empty strings aren't recorded, so if 'list' and 'item' are both + * NULL then the result is an empty string. + */ +typedef struct JoinInfo { + PyObject* list; /* The list of slices if there are more than 2 of them. */ + PyObject* item; /* The slice if there is only 1 of them. */ + BOOL reversed; /* Whether the slices have been found in reverse order. */ + BOOL is_unicode; /* Whether the string is Unicode. */ +} JoinInfo; + +/* Info about fuzzy matching. */ +typedef struct { + RE_Node* new_node; + Py_ssize_t new_text_pos; + Py_ssize_t limit; + Py_ssize_t new_string_pos; + int step; + int new_folded_pos; + int folded_len; + int new_gfolded_pos; + int new_group_pos; + int fuzzy_type; + BOOL permit_insertion; +} RE_FuzzyData; + +/* Function types for getting info from a MatchObject. */ +typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index); + +/* Returns the magnitude of a 'Py_ssize_t' value. */ +Py_LOCAL_INLINE(Py_ssize_t) abs_ssize_t(Py_ssize_t x) { + return x >= 0 ? x : -x; +} + +/* Returns the minimum of 2 'Py_ssize_t' values. */ +Py_LOCAL_INLINE(Py_ssize_t) min_ssize_t(Py_ssize_t x, Py_ssize_t y) { + return x <= y ? x : y; +} + +/* Returns the maximum of 2 'Py_ssize_t' values. */ +Py_LOCAL_INLINE(Py_ssize_t) max_ssize_t(Py_ssize_t x, Py_ssize_t y) { + return x >= y ? x : y; +} + +/* Returns the minimum of 2 'size_t' values. */ +Py_LOCAL_INLINE(size_t) min_size_t(size_t x, size_t y) { + return x <= y ? x : y; +} + +/* Returns the maximum of 2 'size_t' values. */ +Py_LOCAL_INLINE(size_t) max_size_t(size_t x, size_t y) { + return x >= y ? x : y; +} + +/* Returns the 'maximum' of 2 RE_STATUS_T values. */ +Py_LOCAL_INLINE(RE_STATUS_T) max_status_2(RE_STATUS_T x, RE_STATUS_T y) { + return x >= y ? x : y; +} + +/* Returns the 'maximum' of 3 RE_STATUS_T values. */ +Py_LOCAL_INLINE(RE_STATUS_T) max_status_3(RE_STATUS_T x, RE_STATUS_T y, + RE_STATUS_T z) { + return max_status_2(x, max_status_2(y, z)); +} + +/* Returns the 'maximum' of 4 RE_STATUS_T values. */ +Py_LOCAL_INLINE(RE_STATUS_T) max_status_4(RE_STATUS_T w, RE_STATUS_T x, + RE_STATUS_T y, RE_STATUS_T z) { + return max_status_2(max_status_2(w, x), max_status_2(y, z)); +} + +/* Gets a character at a position assuming 1 byte per character. */ +static Py_UCS4 bytes1_char_at(void* text, Py_ssize_t pos) { + return *((Py_UCS1*)text + pos); +} + +/* Sets a character at a position assuming 1 byte per character. */ +static void bytes1_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { + *((Py_UCS1*)text + pos) = (Py_UCS1)ch; +} + +/* Gets a pointer to a position assuming 1 byte per character. */ +static void* bytes1_point_to(void* text, Py_ssize_t pos) { + return (Py_UCS1*)text + pos; +} + +/* Gets a character at a position assuming 2 bytes per character. */ +static Py_UCS4 bytes2_char_at(void* text, Py_ssize_t pos) { + return *((Py_UCS2*)text + pos); +} + +/* Sets a character at a position assuming 2 bytes per character. */ +static void bytes2_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { + *((Py_UCS2*)text + pos) = (Py_UCS2)ch; +} + +/* Gets a pointer to a position assuming 2 bytes per character. */ +static void* bytes2_point_to(void* text, Py_ssize_t pos) { + return (Py_UCS2*)text + pos; +} + +/* Gets a character at a position assuming 4 bytes per character. */ +static Py_UCS4 bytes4_char_at(void* text, Py_ssize_t pos) { + return *((Py_UCS4*)text + pos); +} + +/* Sets a character at a position assuming 4 bytes per character. */ +static void bytes4_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { + *((Py_UCS4*)text + pos) = (Py_UCS4)ch; +} + +/* Gets a pointer to a position assuming 4 bytes per character. */ +static void* bytes4_point_to(void* text, Py_ssize_t pos) { + return (Py_UCS4*)text + pos; +} + +/* Default for whether a position is on a word boundary. */ +static BOOL at_boundary_always(RE_State* state, Py_ssize_t text_pos) { + return TRUE; +} + +/* Converts a BOOL to success/failure. */ +Py_LOCAL_INLINE(int) bool_as_status(BOOL value) { + return value ? RE_ERROR_SUCCESS : RE_ERROR_FAILURE; +} + +/* ASCII-specific. */ + +Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch); + +/* Checks whether a character has a property. */ +Py_LOCAL_INLINE(BOOL) ascii_has_property(RE_CODE property, Py_UCS4 ch) { + if (ch > RE_ASCII_MAX) { + /* Outside the ASCII range. */ + RE_UINT32 value; + + value = property & 0xFFFF; + + return value == 0; + } + + return unicode_has_property(property, ch); +} + +/* Wrapper for calling 'ascii_has_property' via a pointer. */ +static BOOL ascii_has_property_wrapper(RE_CODE property, Py_UCS4 ch) { + return ascii_has_property(property, ch); +} + +/* Checks whether there's a word character to the left. */ +Py_LOCAL_INLINE(BOOL) ascii_word_left(RE_State* state, Py_ssize_t text_pos) { + return text_pos > 0 && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); +} + +/* Checks whether there's a word character to the right. */ +Py_LOCAL_INLINE(BOOL) ascii_word_right(RE_State* state, Py_ssize_t text_pos) { + return text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); +} + +/* Checks whether a position is on a word boundary. */ +static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = ascii_word_left(state, text_pos); + right = ascii_word_right(state, text_pos); + + return left != right; +} + +/* Checks whether a position is at the start of a word. */ +static BOOL ascii_at_word_start(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = ascii_word_left(state, text_pos); + right = ascii_word_right(state, text_pos); + + return !left && right; +} + +/* Checks whether a position is at the end of a word. */ +static BOOL ascii_at_word_end(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = ascii_word_left(state, text_pos); + right = ascii_word_right(state, text_pos); + + return left && !right; +} + +/* Checks whether a character is a line separator. */ +static BOOL ascii_is_line_sep(Py_UCS4 ch) { + return 0x0A <= ch && ch <= 0x0D; +} + +/* Checks whether a position is at the start of a line. */ +static BOOL ascii_at_line_start(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos <= 0) + return TRUE; + + ch = state->char_at(state->text, text_pos - 1); + + if (ch == 0x0D) { + if (text_pos >= state->text_length) + return TRUE; + + /* No line break inside CRLF. */ + return state->char_at(state->text, text_pos) != 0x0A; + } + + return 0x0A <= ch && ch <= 0x0D; +} + +/* Checks whether a position is at the end of a line. */ +static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos >= state->text_length) + return TRUE; + + ch = state->char_at(state->text, text_pos); + + if (ch == 0x0A) { + if (text_pos <= 0) + return TRUE; + + /* No line break inside CRLF. */ + return state->char_at(state->text, text_pos - 1) != 0x0D; + } + + return 0x0A <= ch && ch <= 0x0D; +} + +/* Checks whether a character could be Turkic (variants of I/i). For ASCII, it + * won't be. + */ +static BOOL ascii_possible_turkic(Py_UCS4 ch) { + return FALSE; +} + +/* Gets all the cases of a character. */ +static int ascii_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { + int count; + + count = 0; + + codepoints[count++] = ch; + + if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')) + /* It's a letter, so add the other case. */ + codepoints[count++] = ch ^ 0x20; + + return count; +} + +/* Returns a character with its case folded. */ +static Py_UCS4 ascii_simple_case_fold(Py_UCS4 ch) { + if ('A' <= ch && ch <= 'Z') + /* Uppercase folds to lowercase. */ + return ch ^ 0x20; + + return ch; +} + +/* Returns a character with its case folded. */ +static int ascii_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { + if ('A' <= ch && ch <= 'Z') + /* Uppercase folds to lowercase. */ + folded[0] = ch ^ 0x20; + else + folded[0] = ch; + + return 1; +} + +/* Gets all the case variants of Turkic 'I'. The given character will be listed + * first. + */ +static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { + int count; + + count = 0; + + cases[count++] = ch; + + if (ch != 'I') + cases[count++] = 'I'; + + if (ch != 'i') + cases[count++] = 'i'; + + return count; +} + +/* The handlers for ASCII characters. */ +static RE_EncodingTable ascii_encoding = { + ascii_has_property_wrapper, + ascii_at_boundary, + ascii_at_word_start, + ascii_at_word_end, + ascii_at_boundary, /* No special "default word boundary" for ASCII. */ + ascii_at_word_start, /* No special "default start of word" for ASCII. */ + ascii_at_word_end, /* No special "default end of a word" for ASCII. */ + at_boundary_always, /* No special "grapheme boundary" for ASCII. */ + ascii_is_line_sep, + ascii_at_line_start, + ascii_at_line_end, + ascii_possible_turkic, + ascii_all_cases, + ascii_simple_case_fold, + ascii_full_case_fold, + ascii_all_turkic_i, +}; + +/* Locale-specific. */ + +/* Checks whether a character has a property. */ +Py_LOCAL_INLINE(BOOL) locale_has_property(RE_CODE property, Py_UCS4 ch) { + RE_UINT32 value; + RE_UINT32 v; + + value = property & 0xFFFF; + + if (ch > RE_LOCALE_MAX) + /* Outside the locale range. */ + return value == 0; + + switch (property >> 16) { + case RE_PROP_ALNUM >> 16: + v = isalnum((int)ch) != 0; + break; + case RE_PROP_ALPHA >> 16: + v = isalpha((int)ch) != 0; + break; + case RE_PROP_ANY >> 16: + v = 1; + break; + case RE_PROP_ASCII >> 16: + v = ch <= RE_ASCII_MAX; + break; + case RE_PROP_BLANK >> 16: + v = ch == '\t' || ch == ' '; + break; + case RE_PROP_GC: + switch (property) { + case RE_PROP_ASSIGNED: + v = ch <= RE_LOCALE_MAX; + break; + case RE_PROP_CASEDLETTER: + v = isalpha((int)ch) ? value : 0xFFFF; + break; + case RE_PROP_CNTRL: + v = iscntrl((int)ch) ? value : 0xFFFF; + break; + case RE_PROP_DIGIT: + v = isdigit((int)ch) ? value : 0xFFFF; + break; + case RE_PROP_GC_CN: + v = ch > RE_LOCALE_MAX; + break; + case RE_PROP_GC_LL: + v = islower((int)ch) ? value : 0xFFFF; + break; + case RE_PROP_GC_LU: + v = isupper((int)ch) ? value : 0xFFFF; + break; + case RE_PROP_GC_P: + v = ispunct((int)ch) ? value : 0xFFFF; + break; + default: + v = 0xFFFF; + break; + } + break; + case RE_PROP_GRAPH >> 16: + v = isgraph((int)ch) != 0; + break; + case RE_PROP_LOWER >> 16: + v = islower((int)ch) != 0; + break; + case RE_PROP_PRINT >> 16: + v = isprint((int)ch) != 0; + break; + case RE_PROP_SPACE >> 16: + v = isspace((int)ch) != 0; + break; + case RE_PROP_UPPER >> 16: + v = isupper((int)ch) != 0; + break; + case RE_PROP_WORD >> 16: + v = ch == '_' || isalnum((int)ch) != 0; + break; + case RE_PROP_XDIGIT >> 16: + v = re_get_hex_digit(ch) != 0; + break; + default: + v = 0; + break; + } + + return v == value; +} + +/* Wrapper for calling 'locale_has_property' via a pointer. */ +static BOOL locale_has_property_wrapper(RE_CODE property, Py_UCS4 ch) { + return locale_has_property(property, ch); +} + +/* Checks whether there's a word character to the left. */ +Py_LOCAL_INLINE(BOOL) locale_word_left(RE_State* state, Py_ssize_t text_pos) { + return text_pos > 0 && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); +} + +/* Checks whether there's a word character to the right. */ +Py_LOCAL_INLINE(BOOL) locale_word_right(RE_State* state, Py_ssize_t text_pos) { + return text_pos < state->text_length && locale_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); +} + +/* Checks whether a position is on a word boundary. */ +static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = locale_word_left(state, text_pos); + right = locale_word_right(state, text_pos); + + return left != right; +} + +/* Checks whether a position is at the start of a word. */ +static BOOL locale_at_word_start(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = locale_word_left(state, text_pos); + right = locale_word_right(state, text_pos); + + return !left && right; +} + +/* Checks whether a position is at the end of a word. */ +static BOOL locale_at_word_end(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = locale_word_left(state, text_pos); + right = locale_word_right(state, text_pos); + + return left && !right; +} + +/* Checks whether a character could be Turkic (variants of I/i). */ +static BOOL locale_possible_turkic(Py_UCS4 ch) { + return toupper((int)ch) == 'I' || tolower((int)ch) == 'i'; +} + +/* Gets all the cases of a character. */ +static int locale_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { + int count; + Py_UCS4 other; + + count = 0; + + codepoints[count++] = ch; + + other = (Py_UCS4)toupper((int)ch); + if (other != ch) + codepoints[count++] = other; + + other = (Py_UCS4)tolower((int)ch); + if (other != ch) + codepoints[count++] = other; + + return count; +} + +/* Returns a character with its case folded. */ +static Py_UCS4 locale_simple_case_fold(Py_UCS4 ch) { + if (ch <= RE_LOCALE_MAX) + return (Py_UCS4)tolower((int)ch); + + return ch; +} + +/* Returns a character with its case folded. */ +static int locale_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { + if (ch <= RE_LOCALE_MAX) + folded[0] = (Py_UCS4)tolower((int)ch); + else + folded[0] = ch; + + return 1; +} + +/* Gets all the case variants of Turkic 'I'. The given character will be listed + * first. + */ +static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { + int count; + Py_UCS4 other; + + count = 0; + + cases[count++] = ch; + + if (ch != 'I') + cases[count++] = 'I'; + + if (ch != 'i') + cases[count++] = 'i'; + + /* Uppercase 'i' will be either dotted (Turkic) or dotless (non-Turkic). */ + other = (Py_UCS4)toupper('i'); + if (other != ch && other != 'I') + cases[count++] = other; + + /* Lowercase 'I' will be either dotless (Turkic) or dotted (non-Turkic). */ + other = (Py_UCS4)tolower('I'); + if (other != ch && other != 'i') + cases[count++] = other; + + return count; +} + +/* The handlers for locale characters. */ +static RE_EncodingTable locale_encoding = { + locale_has_property_wrapper, + locale_at_boundary, + locale_at_word_start, + locale_at_word_end, + locale_at_boundary, /* No special "default word boundary" for locale. */ + locale_at_word_start, /* No special "default start of a word" for locale. */ + locale_at_word_end, /* No special "default end of a word" for locale. */ + at_boundary_always, /* No special "grapheme boundary" for locale. */ + ascii_is_line_sep, /* Assume locale line separators are same as ASCII. */ + ascii_at_line_start, /* Assume locale line separators are same as ASCII. */ + ascii_at_line_end, /* Assume locale line separators are same as ASCII. */ + locale_possible_turkic, + locale_all_cases, + locale_simple_case_fold, + locale_full_case_fold, + locale_all_turkic_i, +}; + +/* Unicode-specific. */ + +/* Checks whether a Unicode character has a property. */ +Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch) { + RE_UINT32 prop; + RE_UINT32 value; + RE_UINT32 v; + + prop = property >> 16; + if (prop >= sizeof(re_get_property) / sizeof(re_get_property[0])) + return FALSE; + + value = property & 0xFFFF; + v = re_get_property[prop](ch); + + if (v == value) + return TRUE; + + if (prop == RE_PROP_GC) { + switch (value) { + case RE_PROP_ASSIGNED: + return v != RE_PROP_CN; + case RE_PROP_C: + return (RE_PROP_C_MASK & (1 << v)) != 0; + case RE_PROP_CASEDLETTER: + return v == RE_PROP_LU || v == RE_PROP_LL || v == RE_PROP_LT; + case RE_PROP_L: + return (RE_PROP_L_MASK & (1 << v)) != 0; + case RE_PROP_M: + return (RE_PROP_M_MASK & (1 << v)) != 0; + case RE_PROP_N: + return (RE_PROP_N_MASK & (1 << v)) != 0; + case RE_PROP_P: + return (RE_PROP_P_MASK & (1 << v)) != 0; + case RE_PROP_S: + return (RE_PROP_S_MASK & (1 << v)) != 0; + case RE_PROP_Z: + return (RE_PROP_Z_MASK & (1 << v)) != 0; + } + } + + return FALSE; +} + +/* Wrapper for calling 'unicode_has_property' via a pointer. */ +static BOOL unicode_has_property_wrapper(RE_CODE property, Py_UCS4 ch) { + return unicode_has_property(property, ch); +} + +/* Checks whether there's a word character to the left. */ +Py_LOCAL_INLINE(BOOL) unicode_word_left(RE_State* state, Py_ssize_t text_pos) { + return text_pos > 0 && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos - 1)); +} + +/* Checks whether there's a word character to the right. */ +Py_LOCAL_INLINE(BOOL) unicode_word_right(RE_State* state, Py_ssize_t text_pos) + { + return text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, + state->char_at(state->text, text_pos)); +} + +/* Checks whether a position is on a word boundary. */ +static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = unicode_word_left(state, text_pos); + right = unicode_word_right(state, text_pos); + + return left != right; +} + +/* Checks whether a position is at the start of a word. */ +static BOOL unicode_at_word_start(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = unicode_word_left(state, text_pos); + right = unicode_word_right(state, text_pos); + + return !left && right; +} + +/* Checks whether a position is at the end of a word. */ +static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) { + BOOL left; + BOOL right; + + left = unicode_word_left(state, text_pos); + right = unicode_word_right(state, text_pos); + + return left && !right; +} + +/* Checks whether a character is a Unicode vowel. + * + * Only a limited number are treated as vowels. + */ +Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) { + switch (Py_UNICODE_TOLOWER((Py_UNICODE)ch)) { + case 'a': case 0xE0: case 0xE1: case 0xE2: + case 'e': case 0xE8: case 0xE9: case 0xEA: + case 'i': case 0xEC: case 0xED: case 0xEE: + case 'o': case 0xF2: case 0xF3: case 0xF4: + case 'u': case 0xF9: case 0xFA: case 0xFB: + return TRUE; + default: + return FALSE; + } +} + +/* Checks whether a position is on a default word boundary. + * + * The rules are defined here: + * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries + */ +static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + int prop; + int prop_m1; + Py_ssize_t pos_m1; + Py_ssize_t pos_m2; + int prop_m2; + Py_ssize_t pos_p0; + int prop_p0; + Py_ssize_t pos_p1; + int prop_p1; + + /* Break at the start and end of the text. */ + if (text_pos <= 0) + return TRUE; + + if (text_pos >= state->text_length) + return TRUE; + + char_at = state->char_at; + + prop = (int)re_get_word_break(char_at(state->text, text_pos)); + prop_m1 = (int)re_get_word_break(char_at(state->text, text_pos - 1)); + + /* Don't break within CRLF. */ + if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) + return FALSE; + + /* Otherwise break before and after Newlines (including CR and LF). */ + if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == + RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == + RE_BREAK_LF) + return TRUE; + + /* Get the property of the previous character. */ + pos_m1 = text_pos - 1; + prop_m1 = RE_BREAK_OTHER; + while (pos_m1 >= 0) { + prop_m1 = (int)re_get_word_break(char_at(state->text, pos_m1)); + if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) + break; + + --pos_m1; + } + + /* Get the property of the preceding character. */ + pos_m2 = pos_m1 - 1; + prop_m2 = RE_BREAK_OTHER; + while (pos_m2 >= 0) { + prop_m2 = (int)re_get_word_break(char_at(state->text, pos_m2)); + if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT) + break; + + --pos_m2; + } + + /* Get the property of the next character. */ + pos_p0 = text_pos; + prop_p0 = prop; + while (pos_p0 < state->text_length) { + prop_p0 = (int)re_get_word_break(char_at(state->text, pos_p0)); + if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT) + break; + + ++pos_p0; + } + + /* Get the property of the following character. */ + pos_p1 = pos_p0 + 1; + prop_p1 = RE_BREAK_OTHER; + while (pos_p1 < state->text_length) { + prop_p1 = (int)re_get_word_break(char_at(state->text, pos_p1)); + if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) + break; + + ++pos_p1; + } + + /* Don't break between most letters. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && + (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) + return FALSE; + + /* Break between apostrophe and vowels (French, Italian). */ + if (pos_m1 >= 0 && char_at(state->text, pos_m1) == '\'' && + is_unicode_vowel(char_at(state->text, text_pos))) + return TRUE; + + /* Don't break letters across certain punctuation. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && + (prop_p0 == RE_BREAK_MIDLETTER || prop_p0 == RE_BREAK_MIDNUMLET || + prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER || + prop_p1 == RE_BREAK_HEBREWLETTER)) + return FALSE; + if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) && + (prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET || + prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER || + prop_p0 == RE_BREAK_HEBREWLETTER)) + return FALSE; + if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE) + return FALSE; + if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE && + prop_p1 == RE_BREAK_HEBREWLETTER) + return FALSE; + if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE && + prop_p0 == RE_BREAK_HEBREWLETTER) + return FALSE; + + /* Don't break within sequences of digits, or digits adjacent to letters + * ("3a", or "A3"). + */ + if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC) + return FALSE; + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && + prop_p0 == RE_BREAK_NUMERIC) + return FALSE; + if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0 + == RE_BREAK_HEBREWLETTER)) + return FALSE; + + /* Don't break within sequences, such as "3.2" or "3,456.789". */ + if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 + == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 == + RE_BREAK_NUMERIC) + return FALSE; + if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0 + == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 == + RE_BREAK_NUMERIC) + return FALSE; + + /* Don't break between Katakana. */ + if (prop_m1 == RE_BREAK_KATAKANA && prop_p0 == RE_BREAK_KATAKANA) + return FALSE; + + /* Don't break from extenders. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER || + prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 == + RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET) + return FALSE; + if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER || + prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC || + prop_p0 == RE_BREAK_KATAKANA)) + return FALSE; + + /* Don't break between regional indicator symbols. */ + if (prop_m1 == RE_BREAK_REGIONALINDICATOR && prop_p0 == + RE_BREAK_REGIONALINDICATOR) + return FALSE; + + /* Otherwise, break everywhere (including around ideographs). */ + return TRUE; +} + +/* Checks whether a position is at the start/end of a word. */ +Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, + Py_ssize_t text_pos, BOOL at_start) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + BOOL before; + BOOL after; + Py_UCS4 char_0; + Py_UCS4 char_m1; + int prop; + int prop_m1; + Py_ssize_t pos_m1; + Py_UCS4 char_p1; + Py_ssize_t pos_p1; + int prop_p1; + Py_ssize_t pos_m2; + Py_UCS4 char_m2; + int prop_m2; + + char_at = state->char_at; + + /* At the start or end of the text. */ + if (text_pos <= 0 || text_pos >= state->text_length) { + before = unicode_word_left(state, text_pos); + after = unicode_word_right(state, text_pos); + + return before != at_start && after == at_start; + } + + char_0 = char_at(state->text, text_pos); + char_m1 = char_at(state->text, text_pos - 1); + prop = (int)re_get_word_break(char_0); + prop_m1 = (int)re_get_word_break(char_m1); + + /* No break within CRLF. */ + if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) + return FALSE; + + /* Break before and after Newlines (including CR and LF). */ + if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == + RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == + RE_BREAK_LF) { + before = unicode_has_property(RE_PROP_WORD, char_m1); + after = unicode_has_property(RE_PROP_WORD, char_0); + + return before != at_start && after == at_start; + } + + /* No break just before Format or Extend characters. */ + if (prop == RE_BREAK_EXTEND || prop == RE_BREAK_FORMAT) + return FALSE; + + /* Get the property of the previous character. */ + pos_m1 = text_pos - 1; + prop_m1 = RE_BREAK_OTHER; + while (pos_m1 >= 0) { + char_m1 = char_at(state->text, pos_m1); + prop_m1 = (int)re_get_word_break(char_m1); + if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) + break; + + --pos_m1; + } + + /* No break between most letters. */ + if (prop_m1 == RE_BREAK_ALETTER && prop == RE_BREAK_ALETTER) + return FALSE; + + if (pos_m1 >= 0 && char_m1 == '\'' && is_unicode_vowel(char_0)) + return TRUE; + + pos_p1 = text_pos + 1; + prop_p1 = RE_BREAK_OTHER; + while (pos_p1 < state->text_length) { + char_p1 = char_at(state->text, pos_p1); + prop_p1 = (int)re_get_word_break(char_p1); + if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) + break; + + ++pos_p1; + } + + /* No break letters across certain punctuation. */ + if (prop_m1 == RE_BREAK_ALETTER && (prop == RE_BREAK_MIDLETTER || prop == + RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_ALETTER) + return FALSE; + + pos_m2 = pos_m1 - 1; + prop_m2 = RE_BREAK_OTHER; + while (pos_m2 >= 0) { + char_m2 = char_at(state->text, pos_m2); + prop_m2 = (int)re_get_word_break(char_m2); + if (prop_m2 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) + break; + + --pos_m2; + } + + if (prop_m2 == RE_BREAK_ALETTER && (prop_m1 == RE_BREAK_MIDLETTER || + prop_m1 == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_ALETTER) + return FALSE; + + /* No break within sequences of digits, or digits adjacent to letters + * ("3a", or "A3"). + */ + if ((prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_ALETTER) && prop == + RE_BREAK_NUMERIC) + return FALSE; + + if (prop_m1 == RE_BREAK_NUMERIC && prop == RE_BREAK_ALETTER) + return FALSE; + + /* No break within sequences, such as "3.2" or "3,456.789". */ + if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 + == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_NUMERIC) + return FALSE; + + if (prop_m1 == RE_BREAK_NUMERIC && (prop == RE_BREAK_MIDNUM || prop == + RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_NUMERIC) + return FALSE; + + /* No break between Katakana. */ + if (prop_m1 == RE_BREAK_KATAKANA && prop == RE_BREAK_KATAKANA) + return FALSE; + + /* No break from extenders. */ + if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_NUMERIC || prop_m1 + == RE_BREAK_KATAKANA || prop_m1 == RE_BREAK_EXTENDNUMLET) && prop == + RE_BREAK_EXTENDNUMLET) + return FALSE; + + if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop == RE_BREAK_ALETTER || prop + == RE_BREAK_NUMERIC || prop == RE_BREAK_KATAKANA)) + return FALSE; + + /* Otherwise, break everywhere (including around ideographs). */ + before = unicode_has_property(RE_PROP_WORD, char_m1); + after = unicode_has_property(RE_PROP_WORD, char_0); + + return before != at_start && after == at_start; +} + +/* Checks whether a position is at the start of a word. */ +static BOOL unicode_at_default_word_start(RE_State* state, Py_ssize_t text_pos) + { + return unicode_at_default_word_start_or_end(state, text_pos, TRUE); +} + +/* Checks whether a position is at the end of a word. */ +static BOOL unicode_at_default_word_end(RE_State* state, Py_ssize_t text_pos) { + return unicode_at_default_word_start_or_end(state, text_pos, FALSE); +} + +/* Checks whether a position is on a grapheme boundary. + * + * The rules are defined here: + * http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries + */ +static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos) + { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + int prop; + int prop_m1; + + /* Break at the start and end of the text. */ + if (text_pos <= 0) + return TRUE; + + if (text_pos >= state->text_length) + return TRUE; + + char_at = state->char_at; + + prop = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos)); + prop_m1 = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos + - 1)); + + /* Don't break within CRLF. */ + if (prop_m1 == RE_GBREAK_CR && prop == RE_GBREAK_LF) + return FALSE; + + /* Otherwise break before and after controls (including CR and LF). */ + if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 == + RE_GBREAK_LF || prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop + == RE_GBREAK_LF) + return TRUE; + + /* Don't break Hangul syllable sequences. */ + if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V + || prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT)) + return FALSE; + if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop == + RE_GBREAK_V || prop == RE_GBREAK_T)) + return FALSE; + if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop == + RE_GBREAK_T)) + return FALSE; + + /* Don't break between regional indicator symbols. */ + if (prop_m1 == RE_GBREAK_REGIONALINDICATOR && prop == + RE_GBREAK_REGIONALINDICATOR) + return FALSE; + + /* Don't break just before Extend characters. */ + if (prop == RE_GBREAK_EXTEND) + return FALSE; + + /* Don't break before SpacingMarks, or after Prepend characters. */ + if (prop == RE_GBREAK_SPACINGMARK) + return FALSE; + + if (prop_m1 == RE_GBREAK_PREPEND) + return FALSE; + + /* Otherwise, break everywhere. */ + return TRUE; +} + +/* Checks whether a character is a line separator. */ +static BOOL unicode_is_line_sep(Py_UCS4 ch) { + return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == + 0x2029; +} + +/* Checks whether a position is at the start of a line. */ +static BOOL unicode_at_line_start(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos <= 0) + return TRUE; + + ch = state->char_at(state->text, text_pos - 1); + + if (ch == 0x0D) { + if (text_pos >= state->text_length) + return TRUE; + + /* No line break inside CRLF. */ + return state->char_at(state->text, text_pos) != 0x0A; + } + + return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == + 0x2029; +} + +/* Checks whether a position is at the end of a line. */ +static BOOL unicode_at_line_end(RE_State* state, Py_ssize_t text_pos) { + Py_UCS4 ch; + + if (text_pos >= state->text_length) + return TRUE; + + ch = state->char_at(state->text, text_pos); + + if (ch == 0x0A) { + if (text_pos <= 0) + return TRUE; + + /* No line break inside CRLF. */ + return state->char_at(state->text, text_pos - 1) != 0x0D; + } + + return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == + 0x2029; +} + +/* Checks whether a character could be Turkic (variants of I/i). */ +static BOOL unicode_possible_turkic(Py_UCS4 ch) { + return ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131; +} + +/* Gets all the cases of a character. */ +static int unicode_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { + return re_get_all_cases(ch, codepoints); +} + +/* Returns a character with its case folded, unless it could be Turkic + * (variants of I/i). + */ +static Py_UCS4 unicode_simple_case_fold(Py_UCS4 ch) { + /* Is it a possible Turkic character? If so, pass it through unchanged. */ + if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) + return ch; + + return (Py_UCS4)re_get_simple_case_folding(ch); +} + +/* Returns a character with its case folded, unless it could be Turkic + * (variants of I/i). + */ +static int unicode_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { + /* Is it a possible Turkic character? If so, pass it through unchanged. */ + if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) { + folded[0] = ch; + return 1; + } + + return re_get_full_case_folding(ch, folded); +} + +/* Gets all the case variants of Turkic 'I'. */ +static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { + int count; + + count = 0; + + cases[count++] = ch; + + if (ch != 'I') + cases[count++] = 'I'; + + if (ch != 'i') + cases[count++] = 'i'; + + if (ch != 0x130) + cases[count++] = 0x130; + + if (ch != 0x131) + cases[count++] = 0x131; + + return count; + +} + +/* The handlers for Unicode characters. */ +static RE_EncodingTable unicode_encoding = { + unicode_has_property_wrapper, + unicode_at_boundary, + unicode_at_word_start, + unicode_at_word_end, + unicode_at_default_boundary, + unicode_at_default_word_start, + unicode_at_default_word_end, + unicode_at_grapheme_boundary, + unicode_is_line_sep, + unicode_at_line_start, + unicode_at_line_end, + unicode_possible_turkic, + unicode_all_cases, + unicode_simple_case_fold, + unicode_full_case_fold, + unicode_all_turkic_i, +}; + +Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name); + +/* Sets the error message. */ +Py_LOCAL_INLINE(void) set_error(int status, PyObject* object) { + TRACE(("<>\n")) + + if (!error_exception) + error_exception = get_object("_" RE_MODULE "_core", "error"); + + switch (status) { + case RE_ERROR_BACKTRACKING: + PyErr_SetString(error_exception, "too much backtracking"); + break; + case RE_ERROR_CONCURRENT: + PyErr_SetString(PyExc_ValueError, "concurrent not int or None"); + break; + case RE_ERROR_GROUP_INDEX_TYPE: + if (object) + PyErr_Format(PyExc_TypeError, + "group indices must be integers or strings, not %.200s", + object->ob_type->tp_name); + else + PyErr_Format(PyExc_TypeError, + "group indices must be integers or strings"); + break; + case RE_ERROR_ILLEGAL: + PyErr_SetString(PyExc_RuntimeError, "invalid RE code"); + break; + case RE_ERROR_INDEX: + PyErr_SetString(PyExc_TypeError, "string indices must be integers"); + break; + case RE_ERROR_INTERRUPTED: + /* An exception has already been raised, so let it fly. */ + break; + case RE_ERROR_INVALID_GROUP_REF: + PyErr_SetString(error_exception, "invalid group reference"); + break; + case RE_ERROR_MEMORY: + PyErr_NoMemory(); + break; + case RE_ERROR_NOT_STRING: + PyErr_Format(PyExc_TypeError, "expected string instance, %.200s found", + object->ob_type->tp_name); + break; + case RE_ERROR_NOT_UNICODE: + PyErr_Format(PyExc_TypeError, + "expected unicode instance, %.200s found", object->ob_type->tp_name); + break; + case RE_ERROR_NO_SUCH_GROUP: + PyErr_SetString(PyExc_IndexError, "no such group"); + break; + case RE_ERROR_REPLACEMENT: + PyErr_SetString(error_exception, "invalid replacement"); + break; + default: + /* Other error codes indicate compiler/engine bugs. */ + PyErr_SetString(PyExc_RuntimeError, + "internal error in regular expression engine"); + break; + } +} + +/* Allocates memory. + * + * Sets the Python error handler and returns NULL if the allocation fails. + */ +Py_LOCAL_INLINE(void*) re_alloc(size_t size) { + void* new_ptr; + + new_ptr = PyMem_Malloc(size); + if (!new_ptr) + set_error(RE_ERROR_MEMORY, NULL); + + return new_ptr; +} + +/* Reallocates memory. + * + * Sets the Python error handler and returns NULL if the reallocation fails. + */ +Py_LOCAL_INLINE(void*) re_realloc(void* ptr, size_t size) { + void* new_ptr; + + new_ptr = PyMem_Realloc(ptr, size); + if (!new_ptr) + set_error(RE_ERROR_MEMORY, NULL); + + return new_ptr; +} + +/* Deallocates memory. */ +Py_LOCAL_INLINE(void) re_dealloc(void* ptr) { + PyMem_Free(ptr); +} + +/* Releases the GIL if multithreading is enabled. */ +Py_LOCAL_INLINE(void) release_GIL(RE_SafeState* safe_state) { + if (safe_state->re_state->is_multithreaded) + safe_state->thread_state = PyEval_SaveThread(); +} + +/* Acquires the GIL if multithreading is enabled. */ +Py_LOCAL_INLINE(void) acquire_GIL(RE_SafeState* safe_state) { + if (safe_state->re_state->is_multithreaded) + PyEval_RestoreThread(safe_state->thread_state); +} + +/* Allocates memory, holding the GIL during the allocation. + * + * Sets the Python error handler and returns NULL if the allocation fails. + */ +Py_LOCAL_INLINE(void*) safe_alloc(RE_SafeState* safe_state, size_t size) { + void* new_ptr; + + acquire_GIL(safe_state); + + new_ptr = re_alloc(size); + + release_GIL(safe_state); + + return new_ptr; +} + +/* Reallocates memory, holding the GIL during the reallocation. + * + * Sets the Python error handler and returns NULL if the reallocation fails. + */ +Py_LOCAL_INLINE(void*) safe_realloc(RE_SafeState* safe_state, void* ptr, size_t + size) { + void* new_ptr; + + acquire_GIL(safe_state); + + new_ptr = re_realloc(ptr, size); + + release_GIL(safe_state); + + return new_ptr; +} + +/* Deallocates memory, holding the GIL during the deallocation. */ +Py_LOCAL_INLINE(void) safe_dealloc(RE_SafeState* safe_state, void* ptr) { + acquire_GIL(safe_state); + + re_dealloc(ptr); + + release_GIL(safe_state); +} + +/* Checks for KeyboardInterrupt, holding the GIL during the check. */ +Py_LOCAL_INLINE(BOOL) safe_check_signals(RE_SafeState* safe_state) { + BOOL result; + + acquire_GIL(safe_state); + + result = (BOOL)PyErr_CheckSignals(); + + release_GIL(safe_state); + + return result; +} + +/* Checks whether a character is in a range. */ +Py_LOCAL_INLINE(BOOL) in_range(RE_EncodingTable* encoding, Py_UCS4 lower, + Py_UCS4 upper, Py_UCS4 ch) { + return lower <= ch && ch <= upper; +} + +/* Checks whether a character is in a range, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_range_ign(RE_EncodingTable* encoding, Py_UCS4 lower, + Py_UCS4 upper, Py_UCS4 ch) { + Py_UCS4 cases[RE_MAX_CASES]; + int count; + int i; + + count = encoding->all_cases(ch, cases); + + for (i = 0; i < count; i++) { + if (in_range(encoding, lower, upper, cases[i])) + return TRUE; + } + + return FALSE; +} + +/* Checks whether 2 characters are the same. */ +Py_LOCAL_INLINE(BOOL) same_char(RE_EncodingTable* encoding, Py_UCS4 ch1, + Py_UCS4 ch2) { + return ch1 == ch2; +} + +/* Wrapper for calling 'same_char' via a pointer. */ +static BOOL same_char_wrapper(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 + ch2) { + return same_char(encoding, ch1, ch2); +} + +/* Checks whether 2 characters are the same, ignoring case. */ +Py_LOCAL_INLINE(BOOL) same_char_ign(RE_EncodingTable* encoding, Py_UCS4 ch1, + Py_UCS4 ch2) { + Py_UCS4 cases[RE_MAX_CASES]; + int count; + int i; + + if (ch1 == ch2) + return TRUE; + + count = encoding->all_cases(ch1, cases); + + for (i = 1; i < count; i++) { + if (cases[i] == ch2) + return TRUE; + } + + return FALSE; +} + +/* Wrapper for calling 'same_char' via a pointer. */ +static BOOL same_char_ign_wrapper(RE_EncodingTable* encoding, Py_UCS4 ch1, + Py_UCS4 ch2) { + return same_char_ign(encoding, ch1, ch2); +} + +/* Checks whether a character is anything except a newline. */ +Py_LOCAL_INLINE(BOOL) matches_ANY(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + return ch != '\n'; +} + +/* Checks whether a character is anything except a line separator. */ +Py_LOCAL_INLINE(BOOL) matches_ANY_U(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + return !encoding->is_line_sep(ch); +} + +/* Checks whether 2 characters are the same. */ +Py_LOCAL_INLINE(BOOL) matches_CHARACTER(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + return same_char(encoding, node->values[0], ch); +} + +/* Checks whether 2 characters are the same, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_CHARACTER_IGN(RE_EncodingTable* encoding, + RE_Node* node, Py_UCS4 ch) { + return same_char_ign(encoding, node->values[0], ch); +} + +/* Checks whether a character has a property. */ +Py_LOCAL_INLINE(BOOL) matches_PROPERTY(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + return encoding->has_property(node->values[0], ch); +} + +/* Checks whether a character has a property, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_PROPERTY_IGN(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + RE_UINT32 property; + RE_UINT32 prop; + + property = node->values[0]; + prop = property >> 16; + + /* We need to do special handling of case-sensitive properties according to + * the 'encoding'. + */ + if (encoding == &unicode_encoding) { + /* We are working with Unicode. */ + if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property + == RE_PROP_GC_LT) { + RE_UINT32 value; + + value = re_get_general_category(ch); + + return value == RE_PROP_LU || value == RE_PROP_LL || value == + RE_PROP_LT; + } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) + return (BOOL)re_get_cased(ch); + + /* The property is case-insensitive. */ + return unicode_has_property(property, ch); + } else if (encoding == &ascii_encoding) { + /* We are working with ASCII. */ + if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property + == RE_PROP_GC_LT) { + RE_UINT32 value; + + value = re_get_general_category(ch); + + return value == RE_PROP_LU || value == RE_PROP_LL || value == + RE_PROP_LT; + } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) + return (BOOL)re_get_cased(ch); + + /* The property is case-insensitive. */ + return ascii_has_property(property, ch); + } else { + /* We are working with Locale. */ + if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property + == RE_PROP_GC_LT) + return ch <= RE_LOCALE_MAX && (isupper((int)ch) || + islower((int)ch)) != 0; + else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) + return ch <= RE_LOCALE_MAX && (isupper((int)ch) || + islower((int)ch)) != 0; + + /* The property is case-insensitive. */ + return locale_has_property(property, ch); + } +} + +/* Checks whether a character is in a range. */ +Py_LOCAL_INLINE(BOOL) matches_RANGE(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + return in_range(encoding, node->values[0], node->values[1], ch); +} + +/* Checks whether a character is in a range, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_RANGE_IGN(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + return in_range_ign(encoding, node->values[0], node->values[1], ch); +} + +Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch); +Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch); + +/* Checks whether a character matches a set member. */ +Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* + member, Py_UCS4 ch) { + switch (member->op) { + case RE_OP_CHARACTER: + /* values are: char_code */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + return ch == member->values[0]; + case RE_OP_PROPERTY: + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + return encoding->has_property(member->values[0], ch); + case RE_OP_RANGE: + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, + member->values[0], member->values[1])) + return in_range(encoding, member->values[0], member->values[1], ch); + case RE_OP_SET_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_diff(encoding, member, ch); + case RE_OP_SET_INTER: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_inter(encoding, member, ch); + case RE_OP_SET_SYM_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_sym_diff(encoding, member, ch); + case RE_OP_SET_UNION: + TRACE(("%s\n", re_op_text[member->op])) + return in_set_union(encoding, member, ch); + case RE_OP_STRING: + { + /* values are: char_code, char_code, ... */ + size_t i; + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->value_count)) + + for (i = 0; i < member->value_count; i++) { + if (ch == member->values[i]) + return TRUE; + } + return FALSE; + } + default: + return FALSE; + } +} + +/* Checks whether a character matches a set member, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* + member, int case_count, Py_UCS4* cases) { + int i; + + for (i = 0; i < case_count; i++) { + switch (member->op) { + case RE_OP_CHARACTER: + /* values are: char_code */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + if (cases[i] == member->values[0]) + return TRUE; + break; + case RE_OP_PROPERTY: + /* values are: property */ + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->values[0])) + if (encoding->has_property(member->values[0], cases[i])) + return TRUE; + break; + case RE_OP_RANGE: + /* values are: lower, upper */ + TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, + member->values[0], member->values[1])) + if (in_range(encoding, member->values[0], member->values[1], + cases[i])) + return TRUE; + break; + case RE_OP_SET_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_diff(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_SET_INTER: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_inter(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_SET_SYM_DIFF: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_sym_diff(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_SET_UNION: + TRACE(("%s\n", re_op_text[member->op])) + if (in_set_union(encoding, member, cases[i])) + return TRUE; + break; + case RE_OP_STRING: + { + size_t j; + TRACE(("%s %d %d\n", re_op_text[member->op], member->match, + member->value_count)) + + for (j = 0; j < member->value_count; j++) { + if (cases[i] == member->values[j]) + return TRUE; + } + break; + } + default: + return TRUE; + } + } + + return FALSE; +} + +/* Checks whether a character is in a set difference. */ +Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + if (matches_member(encoding, member, ch) != member->match) + return FALSE; + + member = member->next_1.node; + + while (member) { + if (matches_member(encoding, member, ch) == member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set difference, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + if (matches_member_ign(encoding, member, case_count, cases) != + member->match) + return FALSE; + + member = member->next_1.node; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) == + member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set intersection. */ +Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member(encoding, member, ch) != member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set intersection, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) != + member->match) + return FALSE; + + member = member->next_1.node; + } + + return TRUE; +} + +/* Checks whether a character is in a set symmetric difference. */ +Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + RE_Node* member; + BOOL result; + + member = node->nonstring.next_2.node; + + result = FALSE; + + while (member) { + if (matches_member(encoding, member, ch) == member->match) + result = !result; + + member = member->next_1.node; + } + + return result; +} + +/* Checks whether a character is in a set symmetric difference, ignoring case. + */ +Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + BOOL result; + + member = node->nonstring.next_2.node; + + result = FALSE; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) == + member->match) + result = !result; + + member = member->next_1.node; + } + + return result; +} + +/* Checks whether a character is in a set union. */ +Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member(encoding, member, ch) == member->match) + return TRUE; + + member = member->next_1.node; + } + + return FALSE; +} + +/* Checks whether a character is in a set union, ignoring case. */ +Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, RE_Node* + node, int case_count, Py_UCS4* cases) { + RE_Node* member; + + member = node->nonstring.next_2.node; + + while (member) { + if (matches_member_ign(encoding, member, case_count, cases) == + member->match) + return TRUE; + + member = member->next_1.node; + } + + return FALSE; +} + +/* Checks whether a character is in a set. */ +Py_LOCAL_INLINE(BOOL) matches_SET(RE_EncodingTable* encoding, RE_Node* node, + Py_UCS4 ch) { + switch (node->op) { + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_REV: + return in_set_diff(encoding, node, ch); + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_REV: + return in_set_inter(encoding, node, ch); + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_REV: + return in_set_sym_diff(encoding, node, ch); + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_REV: + return in_set_union(encoding, node, ch); + } + + return FALSE; +} + +/* Checks whether a character is in a set, ignoring case. */ +Py_LOCAL_INLINE(BOOL) matches_SET_IGN(RE_EncodingTable* encoding, RE_Node* + node, Py_UCS4 ch) { + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + case_count = encoding->all_cases(ch, cases); + + switch (node->op) { + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + return in_set_diff_ign(encoding, node, case_count, cases); + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + return in_set_inter_ign(encoding, node, case_count, cases); + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + return in_set_sym_diff_ign(encoding, node, case_count, cases); + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + return in_set_union_ign(encoding, node, case_count, cases); + } + + return FALSE; +} + +/* Resets a guard list. */ +Py_LOCAL_INLINE(void) reset_guard_list(RE_GuardList* guard_list) { + guard_list->count = 0; + guard_list->last_text_pos = -1; +} + +/* Initialises the state for a match. */ +Py_LOCAL_INLINE(void) init_match(RE_State* state) { + size_t i; + + /* Reset the backtrack. */ + state->current_backtrack_block = &state->backtrack_block; + state->current_backtrack_block->count = 0; + state->current_saved_groups = state->first_saved_groups; + state->backtrack = NULL; + state->search_anchor = state->text_pos; + state->match_pos = state->text_pos; + + /* Reset the guards for the repeats. */ + for (i = 0; i < state->pattern->repeat_count; i++) { + reset_guard_list(&state->repeats[i].body_guard_list); + reset_guard_list(&state->repeats[i].tail_guard_list); + } + + /* Reset the guards for the fuzzy sections. */ + for (i = 0; i < state->pattern->fuzzy_count; i++) { + reset_guard_list(&state->fuzzy_guards[i].body_guard_list); + reset_guard_list(&state->fuzzy_guards[i].tail_guard_list); + } + + for (i = 0; i < state->pattern->true_group_count; i++) { + RE_GroupData* group; + + group = &state->groups[i]; + group->span.start = -1; + group->span.end = -1; + group->capture_count = 0; + group->current_capture = -1; + } + + /* Reset the guards for the group calls. */ + for (i = 0; i < state->pattern->call_ref_info_count; i++) + reset_guard_list(&state->group_call_guard_list[i]); + + /* Clear the counts and cost for matching. */ + memset(state->fuzzy_info.counts, 0, sizeof(state->fuzzy_info.counts)); + state->fuzzy_info.total_cost = 0; + memset(state->total_fuzzy_counts, 0, sizeof(state->total_fuzzy_counts)); + state->total_errors = 0; + state->total_cost = 0; + state->too_few_errors = FALSE; + state->capture_change = 0; + state->iterations = 0; +} + +/* Adds a new backtrack entry. */ +Py_LOCAL_INLINE(BOOL) add_backtrack(RE_SafeState* safe_state, RE_UINT8 op) { + RE_State* state; + RE_BacktrackBlock* current; + + state = safe_state->re_state; + + current = state->current_backtrack_block; + if (current->count >= current->capacity) { + if (!current->next) { + RE_BacktrackBlock* next; + + /* Is there too much backtracking? */ + if (state->backtrack_allocated >= RE_MAX_BACKTRACK_ALLOC) + return FALSE; + + next = (RE_BacktrackBlock*)safe_alloc(safe_state, + sizeof(RE_BacktrackBlock)); + if (!next) + return FALSE; + + next->previous = current; + next->next = NULL; + next->capacity = RE_BACKTRACK_BLOCK_SIZE; + current->next = next; + + state->backtrack_allocated += RE_BACKTRACK_BLOCK_SIZE; + } + + current = current->next; + current->count = 0; + state->current_backtrack_block = current; + } + + state->backtrack = ¤t->items[current->count++]; + state->backtrack->op = op; + + return TRUE; +} + +/* Gets the last backtrack entry. + * + * It'll never be called when there are _no_ entries. + */ +Py_LOCAL_INLINE(RE_BacktrackData*) last_backtrack(RE_State* state) { + RE_BacktrackBlock* current; + + current = state->current_backtrack_block; + state->backtrack = ¤t->items[current->count - 1]; + + return state->backtrack; +} + +/* Discards the last backtrack entry. + * + * It'll never be called to discard the _only_ entry. + */ +Py_LOCAL_INLINE(void) discard_backtrack(RE_State* state) { + RE_BacktrackBlock* current; + + current = state->current_backtrack_block; + --current->count; + if (current->count == 0 && current->previous) + state->current_backtrack_block = current->previous; +} + +/* Copies a repeat guard list. */ +Py_LOCAL_INLINE(BOOL) copy_guard_data(RE_SafeState* safe_state, RE_GuardList* + dst, RE_GuardList* src) { + if (dst->capacity < src->count) { + RE_GuardSpan* new_spans; + + if (!safe_state) + return FALSE; + + dst->capacity = src->count; + new_spans = (RE_GuardSpan*)safe_realloc(safe_state, dst->spans, + dst->capacity * sizeof(RE_GuardSpan)); + if (!new_spans) + return FALSE; + + dst->spans = new_spans; + } + + dst->count = src->count; + memmove(dst->spans, src->spans, dst->count * sizeof(RE_GuardSpan)); + + dst->last_text_pos = -1; + + return TRUE; +} + +/* Copies a repeat. */ +Py_LOCAL_INLINE(BOOL) copy_repeat_data(RE_SafeState* safe_state, RE_RepeatData* + dst, RE_RepeatData* src) { + if (!copy_guard_data(safe_state, &dst->body_guard_list, + &src->body_guard_list) || !copy_guard_data(safe_state, + &dst->tail_guard_list, &src->tail_guard_list)) { + safe_dealloc(safe_state, dst->body_guard_list.spans); + safe_dealloc(safe_state, dst->tail_guard_list.spans); + + return FALSE; + } + + dst->count = src->count; + dst->start = src->start; + dst->capture_change = src->capture_change; + + return TRUE; +} + +/* Pushes a return node onto the group call stack. */ +Py_LOCAL_INLINE(BOOL) push_group_return(RE_SafeState* safe_state, RE_Node* + return_node) { + RE_State* state; + PatternObject* pattern; + RE_GroupCallFrame* frame; + + state = safe_state->re_state; + pattern = state->pattern; + + if (state->current_group_call_frame && + state->current_group_call_frame->next) + /* Advance to the next allocated frame. */ + frame = state->current_group_call_frame->next; + else if (!state->current_group_call_frame && state->first_group_call_frame) + /* Advance to the first allocated frame. */ + frame = state->first_group_call_frame; + else { + /* Create a new frame. */ + frame = (RE_GroupCallFrame*)safe_alloc(safe_state, + sizeof(RE_GroupCallFrame)); + if (!frame) + return FALSE; + + frame->groups = (RE_GroupData*)safe_alloc(safe_state, + pattern->true_group_count * sizeof(RE_GroupData)); + frame->repeats = (RE_RepeatData*)safe_alloc(safe_state, + pattern->repeat_count * sizeof(RE_RepeatData)); + if (!frame->groups || !frame->repeats) { + safe_dealloc(safe_state, frame->groups); + safe_dealloc(safe_state, frame->repeats); + safe_dealloc(safe_state, frame); + + return FALSE; + } + + memset(frame->groups, 0, pattern->true_group_count * + sizeof(RE_GroupData)); + memset(frame->repeats, 0, pattern->repeat_count * + sizeof(RE_RepeatData)); + + frame->previous = state->current_group_call_frame; + frame->next = NULL; + + if (frame->previous) + frame->previous->next = frame; + else + state->first_group_call_frame = frame; + } + + frame->node = return_node; + + /* Push the groups and guards. */ + if (return_node) { + size_t g; + size_t r; + + for (g = 0; g < pattern->true_group_count; g++) { + frame->groups[g].span = state->groups[g].span; + frame->groups[g].current_capture = + state->groups[g].current_capture; + } + + for (r = 0; r < pattern->repeat_count; r++) { + if (!copy_repeat_data(safe_state, &frame->repeats[r], + &state->repeats[r])) + return FALSE; + } + } + + state->current_group_call_frame = frame; + + return TRUE; +} + +/* Pops a return node from the group call stack. */ +Py_LOCAL_INLINE(RE_Node*) pop_group_return(RE_State* state) { + RE_GroupCallFrame* frame; + + frame = state->current_group_call_frame; + + /* Pop the groups and repeats. */ + if (frame->node) { + PatternObject* pattern; + size_t g; + size_t r; + + pattern = state->pattern; + + for (g = 0; g < pattern->true_group_count; g++) { + state->groups[g].span = frame->groups[g].span; + state->groups[g].current_capture = + frame->groups[g].current_capture; + } + + for (r = 0; r < pattern->repeat_count; r++) + copy_repeat_data(NULL, &state->repeats[r], &frame->repeats[r]); + } + + /* Withdraw to previous frame. */ + state->current_group_call_frame = frame->previous; + + return frame->node; +} + +/* Returns the return node from the top of the group call stack. */ +Py_LOCAL_INLINE(RE_Node*) top_group_return(RE_State* state) { + RE_GroupCallFrame* frame; + + frame = state->current_group_call_frame; + + return frame->node; +} + +/* Checks whether a node matches only 1 character. */ +Py_LOCAL_INLINE(BOOL) node_matches_one_character(RE_Node* node) { + switch (node->op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + return TRUE; + default: + return FALSE; + } +} + +/* Checks whether the node is a firstset. */ +Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { + if (node->step != 0) + return FALSE; + + return node_matches_one_character(node); +} + +/* Locates the start node for testing ahead. */ +Py_LOCAL_INLINE(RE_Node*) locate_test_start(RE_Node* node) { + for (;;) { + switch (node->op) { + case RE_OP_BOUNDARY: + switch (node->next_1.node->op) { + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + return node->next_1.node; + default: + return node; + } + case RE_OP_CALL_REF: + case RE_OP_END_GROUP: + case RE_OP_START_GROUP: + node = node->next_1.node; + break; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + if (node->values[1] == 0) + return node; + node = node->next_1.node; + break; + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + if (node->values[1] == 0) + return node; + return node->nonstring.next_2.node; + case RE_OP_LOOKAROUND: + node = node->next_1.node; + break; + default: + if (is_firstset(node)) { + switch (node->next_1.node->op) { + case RE_OP_END_OF_STRING: + case RE_OP_START_OF_STRING: + return node->next_1.node; + } + } + + return node; + } + } +} + +/* Checks whether a character matches any of a set of case characters. */ +Py_LOCAL_INLINE(BOOL) any_case(Py_UCS4 ch, int case_count, Py_UCS4* cases) { + int i; + + for (i = 0; i < case_count; i++) { + if (ch == cases[i]) + return TRUE; + } + + return FALSE; +} + +/* Matches many ANYs, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many ANYs, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_ANY(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_ANY(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_ANY(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many ANY_Us, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many ANY_Us, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 ch; + + text = state->text; + match = node->match == match; + ch = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs, up to a limit, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + text = state->text; + match = node->match == match; + case_count = state->encoding->all_cases(node->values[0], cases); + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs, up to a limit, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + text = state->text; + match = node->match == match; + case_count = state->encoding->all_cases(node->values[0], cases); + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, + cases) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, + cases) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, + cases) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many CHARACTERs, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + Py_UCS4 ch; + + text = state->text; + match = node->match == match; + ch = node->values[0]; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_PROPERTY(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_PROPERTY(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_PROPERTY(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs, up to a limit, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs, up to a limit, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many PROPERTYs, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_PROPERTY(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_PROPERTY(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_PROPERTY(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_RANGE(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_RANGE(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_RANGE(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs, up to a limit, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs, up to a limit, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many RANGEs, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_RANGE(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_RANGE(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_RANGE(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs, up to a limit. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_SET(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_SET(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_SET(encoding, node, text_ptr[0]) + == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs, up to a limit, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr < limit_ptr && matches_SET_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr < limit_ptr && matches_SET_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr < limit_ptr && matches_SET_IGN(encoding, node, + text_ptr[0]) == match) + ++text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs, up to a limit, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_SET_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_SET_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_SET_IGN(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Matches many SETs, up to a limit, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { + void* text; + RE_EncodingTable* encoding; + + text = state->text; + match = node->match == match; + encoding = state->encoding; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr > limit_ptr && matches_SET(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS1*)text; + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr > limit_ptr && matches_SET(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS2*)text; + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr > limit_ptr && matches_SET(encoding, node, + text_ptr[-1]) == match) + --text_ptr; + + text_pos = text_ptr - (Py_UCS4*)text; + break; + } + } + + return text_pos; +} + +/* Counts a repeated character pattern. */ +Py_LOCAL_INLINE(size_t) count_one(RE_State* state, RE_Node* node, Py_ssize_t + text_pos, size_t max_count, BOOL* is_partial) { + size_t count; + + *is_partial = FALSE; + + if (max_count < 1) + return 0; + + switch (node->op) { + case RE_OP_ANY: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_ANY(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_ANY_ALL: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_ANY_ALL_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_ANY_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_ANY_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_ANY_U: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_ANY_U(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_ANY_U_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_ANY_U_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_CHARACTER: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_CHARACTER(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_CHARACTER_IGN: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_CHARACTER_IGN(state, node, text_pos, + text_pos + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_CHARACTER_IGN_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_CHARACTER_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_CHARACTER_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_CHARACTER_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_PROPERTY: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_PROPERTY(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_PROPERTY_IGN: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_PROPERTY_IGN(state, node, text_pos, + text_pos + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_PROPERTY_IGN_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_PROPERTY_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_PROPERTY_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_PROPERTY_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_RANGE: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_RANGE(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_RANGE_IGN: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_RANGE_IGN(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_RANGE_IGN_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_RANGE_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_RANGE_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_RANGE_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_SET(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + count = min_size_t((size_t)(state->slice_end - text_pos), max_count); + + count = (size_t)(match_many_SET_IGN(state, node, text_pos, text_pos + + (Py_ssize_t)count, TRUE) - text_pos); + + *is_partial = count == (size_t)(state->text_length - text_pos) && count + < max_count && state->partial_side == RE_PARTIAL_RIGHT; + + return count; + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_SET_IGN_REV(state, node, + text_pos, text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + count = min_size_t((size_t)(text_pos - state->slice_start), max_count); + + count = (size_t)(text_pos - match_many_SET_REV(state, node, text_pos, + text_pos - (Py_ssize_t)count, TRUE)); + + *is_partial = count == (size_t)(text_pos) && count < max_count && + state->partial_side == RE_PARTIAL_LEFT; + + return count; + } + + return 0; +} + +/* Performs a simple string search. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; + RE_CODE* values; + RE_EncodingTable* encoding; + Py_UCS4 check_char; + + length = (Py_ssize_t)node->value_count; + values = node->values; + encoding = state->encoding; + check_char = values[0]; + + *is_partial = FALSE; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; + + while (text_ptr < limit_ptr) { + if (text_ptr[0] == check_char) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(encoding, text_ptr[s_pos], values[s_pos])) + break; + + ++s_pos; + } + } + + ++text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 2: + { + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; + + while (text_ptr < limit_ptr) { + if (text_ptr[0] == check_char) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(encoding, text_ptr[s_pos], values[s_pos])) + break; + + ++s_pos; + } + } + + ++text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 4: + { + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; + + while (text_ptr < limit_ptr) { + if (text_ptr[0] == check_char) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(encoding, text_ptr[s_pos], values[s_pos])) + break; + + ++s_pos; + } + } + + ++text_ptr; + } + text_pos = text_ptr - text; + break; + } + } + + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + + return -1; +} + +/* Performs a simple string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; + RE_CODE* values; + RE_EncodingTable* encoding; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + length = (Py_ssize_t)node->value_count; + values = node->values; + encoding = state->encoding; + case_count = encoding->all_cases(values[0], cases); + + *is_partial = FALSE; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; + + while (text_ptr < limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, text_ptr[s_pos], + values[s_pos])) + break; + + ++s_pos; + } + } + + ++text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 2: + { + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; + + while (text_ptr < limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, text_ptr[s_pos], + values[s_pos])) + break; + + ++s_pos; + } + } + + ++text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 4: + { + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; + + while (text_ptr < limit_ptr) { + if (any_case(text_ptr[0], case_count, cases)) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr + s_pos >= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, text_ptr[s_pos], + values[s_pos])) + break; + + ++s_pos; + } + } + + ++text_ptr; + } + text_pos = text_ptr - text; + break; + } + } + + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_RIGHT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + + return -1; +} + +/* Performs a simple string search, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; + RE_CODE* values; + RE_EncodingTable* encoding; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + length = (Py_ssize_t)node->value_count; + values = node->values; + encoding = state->encoding; + case_count = encoding->all_cases(values[length - 1], cases); + + *is_partial = FALSE; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; + + while (text_ptr > limit_ptr) { + if (any_case(text_ptr[-1], case_count, cases)) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, text_ptr[- s_pos - 1], + values[length - s_pos - 1])) + break; + + ++s_pos; + } + } + + --text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 2: + { + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; + + while (text_ptr > limit_ptr) { + if (any_case(text_ptr[-1], case_count, cases)) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, text_ptr[- s_pos - 1], + values[length - s_pos - 1])) + break; + + ++s_pos; + } + } + + --text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 4: + { + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; + + while (text_ptr > limit_ptr) { + if (any_case(text_ptr[-1], case_count, cases)) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char_ign(encoding, text_ptr[- s_pos - 1], + values[length - s_pos - 1])) + break; + + ++s_pos; + } + } + + --text_ptr; + } + text_pos = text_ptr - text; + break; + } + } + + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + + return -1; +} + +/* Performs a simple string search, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_rev(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + Py_ssize_t length; + RE_CODE* values; + RE_EncodingTable* encoding; + Py_UCS4 check_char; + + length = (Py_ssize_t)node->value_count; + values = node->values; + encoding = state->encoding; + check_char = values[length - 1]; + + *is_partial = FALSE; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text = (Py_UCS1*)state->text; + Py_UCS1* text_ptr = text + text_pos; + Py_UCS1* limit_ptr = text + limit; + + while (text_ptr > limit_ptr) { + if (text_ptr[-1] == check_char) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(encoding, text_ptr[- s_pos - 1], + values[length - s_pos - 1])) + break; + + ++s_pos; + } + } + + --text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 2: + { + Py_UCS2* text = (Py_UCS2*)state->text; + Py_UCS2* text_ptr = text + text_pos; + Py_UCS2* limit_ptr = text + limit; + + while (text_ptr > limit_ptr) { + if (text_ptr[-1] == check_char) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(encoding, text_ptr[- s_pos - 1], + values[length - s_pos - 1])) + break; + + ++s_pos; + } + } + + --text_ptr; + } + text_pos = text_ptr - text; + break; + } + case 4: + { + Py_UCS4* text = (Py_UCS4*)state->text; + Py_UCS4* text_ptr = text + text_pos; + Py_UCS4* limit_ptr = text + limit; + + while (text_ptr > limit_ptr) { + if (text_ptr[-1] == check_char) { + Py_ssize_t s_pos; + + s_pos = 1; + + for (;;) { + if (s_pos >= length) + /* End of search string. */ + return text_ptr - text; + + if (text_ptr - s_pos <= limit_ptr) { + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_ptr - text; + } + + return -1; + + } + + if (!same_char(encoding, text_ptr[- s_pos - 1], + values[length - s_pos - 1])) + break; + + ++s_pos; + } + } + + --text_ptr; + } + text_pos = text_ptr - text; + break; + } + } + + /* Off the end of the text. */ + if (state->partial_side == RE_PARTIAL_LEFT) { + /* Partial match. */ + *is_partial = TRUE; + return text_pos; + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, + Py_ssize_t text_pos, Py_ssize_t limit) { + RE_EncodingTable* encoding; + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_ssize_t last_pos; + Py_UCS4 check_char; + + encoding = state->encoding; + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + last_pos = length - 1; + check_char = values[last_pos]; + limit -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS1*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS2*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS4*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_EncodingTable* encoding; + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_ssize_t last_pos; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + encoding = state->encoding; + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + last_pos = length - 1; + case_count = encoding->all_cases(values[last_pos], cases); + limit -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char_ign(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS1*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char_ign(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS2*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr <= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[last_pos]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = last_pos - 1; + while (pos >= 0 && same_char_ign(encoding, + text_ptr[pos], values[pos])) + --pos; + + if (pos < 0) + return text_ptr - (Py_UCS4*)text; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_EncodingTable* encoding; + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_UCS4 cases[RE_MAX_CASES]; + int case_count; + + encoding = state->encoding; + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + case_count = encoding->all_cases(values[0], cases); + text_pos -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (any_case(ch, case_count, cases)) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char_ign(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Performs a Boyer-Moore fast string search, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit) { + RE_EncodingTable* encoding; + void* text; + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad_character_offset; + Py_ssize_t* good_suffix_offset; + Py_UCS4 check_char; + + encoding = state->encoding; + text = state->text; + length = (Py_ssize_t)node->value_count; + values = node->values; + good_suffix_offset = node->string.good_suffix_offset; + bad_character_offset = node->string.bad_character_offset; + check_char = values[0]; + text_pos -= length; + + switch (state->charsize) { + case 1: + { + Py_UCS1* text_ptr; + Py_UCS1* limit_ptr; + + text_ptr = (Py_UCS1*)text + text_pos; + limit_ptr = (Py_UCS1*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS1*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 2: + { + Py_UCS2* text_ptr; + Py_UCS2* limit_ptr; + + text_ptr = (Py_UCS2*)text + text_pos; + limit_ptr = (Py_UCS2*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS2*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + case 4: + { + Py_UCS4* text_ptr; + Py_UCS4* limit_ptr; + + text_ptr = (Py_UCS4*)text + text_pos; + limit_ptr = (Py_UCS4*)text + limit; + + while (text_ptr >= limit_ptr) { + Py_UCS4 ch; + + ch = text_ptr[0]; + if (ch == check_char) { + Py_ssize_t pos; + + pos = 1; + while (pos < length && same_char(encoding, + text_ptr[pos], values[pos])) + ++pos; + + if (pos >= length) + return text_ptr - (Py_UCS4*)text + length; + + text_ptr += good_suffix_offset[pos]; + } else + text_ptr += bad_character_offset[ch & 0xFF]; + } + break; + } + } + + return -1; +} + +/* Builds the tables for a Boyer-Moore fast string search. */ +Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* + node, BOOL ignore) { + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad; + Py_ssize_t* good; + Py_UCS4 ch; + Py_ssize_t last_pos; + Py_ssize_t pos; + BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); + Py_ssize_t suffix_len; + BOOL saved_start; + Py_ssize_t s; + Py_ssize_t i; + Py_ssize_t s_start; + Py_UCS4 codepoints[RE_MAX_CASES]; + + length = (Py_ssize_t)node->value_count; + + if (length < RE_MIN_FAST_LENGTH) + return TRUE; + + values = node->values; + bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); + good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); + + if (!bad || !good) { + re_dealloc(bad); + re_dealloc(good); + + return FALSE; + } + + for (ch = 0; ch < 0x100; ch++) + bad[ch] = length; + + last_pos = length - 1; + + for (pos = 0; pos < last_pos; pos++) { + Py_ssize_t offset; + + offset = last_pos - pos; + ch = values[pos]; + if (ignore) { + int count; + int i; + + count = encoding->all_cases(ch, codepoints); + + for (i = 0; i < count; i++) + bad[codepoints[i] & 0xFF] = offset; + } else + bad[ch & 0xFF] = offset; + } + + is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; + + suffix_len = 2; + pos = length - suffix_len; + saved_start = FALSE; + s = pos - 1; + i = suffix_len - 1; + s_start = s; + + while (pos >= 0) { + /* Look for another occurrence of the suffix. */ + while (i > 0) { + /* Have we dropped off the end of the string? */ + if (s + i < 0) + break; + + if (is_same_char(encoding, values[s + i], values[pos + i])) + /* It still matches. */ + --i; + else { + /* Start again further along. */ + --s; + i = suffix_len - 1; + } + } + + if (s >= 0 && is_same_char(encoding, values[s], values[pos])) { + /* We haven't dropped off the end of the string, and the suffix has + * matched this far, so this is a good starting point for the next + * iteration. + */ + --s; + if (!saved_start) { + s_start = s; + saved_start = TRUE; + } + } else { + /* Calculate the suffix offset. */ + good[pos] = pos - s; + + /* Extend the suffix and start searching for _this_ one. */ + --pos; + ++suffix_len; + + /* Where's a good place to start searching? */ + if (saved_start) { + s = s_start; + saved_start = FALSE; + } else + --s; + + /* Can we short-circuit the searching? */ + if (s < 0) + break; + } + + i = suffix_len - 1; + } + + /* Fill-in any remaining entries. */ + while (pos >= 0) { + good[pos] = pos - s; + --pos; + --s; + } + + node->string.bad_character_offset = bad; + node->string.good_suffix_offset = good; + + return TRUE; +} + +/* Builds the tables for a Boyer-Moore fast string search, backwards. */ +Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, + RE_Node* node, BOOL ignore) { + Py_ssize_t length; + RE_CODE* values; + Py_ssize_t* bad; + Py_ssize_t* good; + Py_UCS4 ch; + Py_ssize_t last_pos; + Py_ssize_t pos; + BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); + Py_ssize_t suffix_len; + BOOL saved_start; + Py_ssize_t s; + Py_ssize_t i; + Py_ssize_t s_start; + Py_UCS4 codepoints[RE_MAX_CASES]; + + length = (Py_ssize_t)node->value_count; + + if (length < RE_MIN_FAST_LENGTH) + return TRUE; + + values = node->values; + bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); + good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); + + if (!bad || !good) { + re_dealloc(bad); + re_dealloc(good); + + return FALSE; + } + + for (ch = 0; ch < 0x100; ch++) + bad[ch] = -length; + + last_pos = length - 1; + + for (pos = last_pos; pos > 0; pos--) { + Py_ssize_t offset; + + offset = -pos; + ch = values[pos]; + if (ignore) { + int count; + int i; + + count = encoding->all_cases(ch, codepoints); + + for (i = 0; i < count; i++) + bad[codepoints[i] & 0xFF] = offset; + } else + bad[ch & 0xFF] = offset; + } + + is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; + + suffix_len = 2; + pos = suffix_len - 1; + saved_start = FALSE; + s = pos + 1; + i = suffix_len - 1; + s_start = s; + + while (pos < length) { + /* Look for another occurrence of the suffix. */ + while (i > 0) { + /* Have we dropped off the end of the string? */ + if (s - i >= length) + break; + + if (is_same_char(encoding, values[s - i], values[pos - i])) + /* It still matches. */ + --i; + else { + /* Start again further along. */ + ++s; + i = suffix_len - 1; + } + } + + if (s < length && is_same_char(encoding, values[s], values[pos])) { + /* We haven't dropped off the end of the string, and the suffix has + * matched this far, so this is a good starting point for the next + * iteration. + */ + ++s; + if (!saved_start) { + s_start = s; + saved_start = TRUE; + } + } else { + /* Calculate the suffix offset. */ + good[pos] = pos - s; + + /* Extend the suffix and start searching for _this_ one. */ + ++pos; + ++suffix_len; + + /* Where's a good place to start searching? */ + if (saved_start) { + s = s_start; + saved_start = FALSE; + } else + ++s; + + /* Can we short-circuit the searching? */ + if (s >= length) + break; + } + + i = suffix_len - 1; + } + + /* Fill-in any remaining entries. */ + while (pos < length) { + good[pos] = pos - s; + ++pos; + ++s; + } + + node->string.bad_character_offset = bad; + node->string.good_suffix_offset = good; + + return TRUE; +} + +/* Performs a string search. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search(RE_SafeState* safe_state, RE_Node* + node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + *is_partial = FALSE; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables(state->encoding, node, FALSE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ + found_pos = fast_string_search(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search(state, node, limit - + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search(state, node, text_pos, limit, + is_partial); + + return found_pos; +} + +/* Performs a string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_fld(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, + BOOL* is_partial) { + RE_State* state; + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + RE_CODE* values; + Py_ssize_t start_pos; + int f_pos; + int folded_len; + Py_ssize_t length; + Py_ssize_t s_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + state = safe_state->re_state; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + char_at = state->char_at; + text = state->text; + + values = node->values; + start_pos = text_pos; + f_pos = 0; + folded_len = 0; + length = (Py_ssize_t)node->value_count; + s_pos = 0; + + *is_partial = FALSE; + + while (s_pos < length || f_pos < folded_len) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos >= limit) { + if (text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) { + *is_partial = TRUE; + return start_pos; + } + + return -1; + } + + folded_len = full_case_fold(char_at(text, text_pos), folded); + f_pos = 0; + } + + if (same_char_ign(encoding, values[s_pos], folded[f_pos])) { + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) + ++text_pos; + } else { + ++start_pos; + text_pos = start_pos; + f_pos = 0; + folded_len = 0; + s_pos = 0; + } + } + + /* We found the string. */ + if (new_pos) + *new_pos = text_pos; + + return start_pos; +} + +/* Performs a string search, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_fld_rev(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, + BOOL* is_partial) { + RE_State* state; + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void* text; + RE_CODE* values; + Py_ssize_t start_pos; + int f_pos; + int folded_len; + Py_ssize_t length; + Py_ssize_t s_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + state = safe_state->re_state; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + char_at = state->char_at; + text = state->text; + + values = node->values; + start_pos = text_pos; + f_pos = 0; + folded_len = 0; + length = (Py_ssize_t)node->value_count; + s_pos = 0; + + *is_partial = FALSE; + + while (s_pos < length || f_pos < folded_len) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos <= limit) { + if (text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) { + *is_partial = TRUE; + return start_pos; + } + + return -1; + } + + folded_len = full_case_fold(char_at(text, text_pos - 1), folded); + f_pos = 0; + } + + if (same_char_ign(encoding, values[length - s_pos - 1], + folded[folded_len - f_pos - 1])) { + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) + --text_pos; + } else { + --start_pos; + text_pos = start_pos; + f_pos = 0; + folded_len = 0; + s_pos = 0; + } + } + + /* We found the string. */ + if (new_pos) + *new_pos = text_pos; + + return start_pos; +} + +/* Performs a string search, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_ign(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + *is_partial = FALSE; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables(state->encoding, node, TRUE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ + found_pos = fast_string_search_ign(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search_ign(state, node, limit - + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search_ign(state, node, text_pos, limit, + is_partial); + + return found_pos; +} + +/* Performs a string search, backwards, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_ign_rev(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + *is_partial = FALSE; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables_rev(state->encoding, node, TRUE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ + found_pos = fast_string_search_ign_rev(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search_ign_rev(state, node, limit + + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search_ign_rev(state, node, text_pos, limit, + is_partial); + + return found_pos; +} + +/* Performs a string search, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) string_search_rev(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { + RE_State* state; + Py_ssize_t found_pos; + + state = safe_state->re_state; + + *is_partial = FALSE; + + /* Has the node been initialised for fast searching, if necessary? */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + /* Ideally the pattern should immutable and shareable across threads. + * Internally, however, it isn't. For safety we need to hold the GIL. + */ + acquire_GIL(safe_state); + + /* Double-check because of multithreading. */ + if (!(node->status & RE_STATUS_FAST_INIT)) { + build_fast_tables_rev(state->encoding, node, FALSE); + node->status |= RE_STATUS_FAST_INIT; + } + + release_GIL(safe_state); + } + + if (node->string.bad_character_offset) { + /* Start with a fast search. This will find the string if it's complete + * (i.e. not truncated). + */ + found_pos = fast_string_search_rev(state, node, text_pos, limit); + if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) + /* We didn't find the string, but it could've been truncated, so + * try again, starting close to the end. + */ + found_pos = simple_string_search_rev(state, node, limit + + (Py_ssize_t)(node->value_count - 1), limit, is_partial); + } else + found_pos = simple_string_search_rev(state, node, text_pos, limit, + is_partial); + + return found_pos; +} + +/* Returns how many characters there could be before full case-folding. */ +Py_LOCAL_INLINE(Py_ssize_t) possible_unfolded_length(Py_ssize_t length) { + if (length == 0) + return 0; + + if (length < RE_MAX_FOLDED) + return 1; + + return length / RE_MAX_FOLDED; +} + +/* Checks whether there's any character except a newline at a position. */ +Py_LOCAL_INLINE(int) try_match_ANY(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_ANY(state->encoding, node, state->char_at(state->text, + text_pos))); +} + +/* Checks whether there's any character at all at a position. */ +Py_LOCAL_INLINE(int) try_match_ANY_ALL(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end); +} + +/* Checks whether there's any character at all at a position, backwards. */ +Py_LOCAL_INLINE(int) try_match_ANY_ALL_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start); +} + +/* Checks whether there's any character except a newline at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_ANY_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_ANY(state->encoding, node, state->char_at(state->text, text_pos - + 1))); +} + +/* Checks whether there's any character except a line separator at a position. + */ +Py_LOCAL_INLINE(int) try_match_ANY_U(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_ANY_U(state->encoding, node, state->char_at(state->text, + text_pos))); +} + +/* Checks whether there's any character except a line separator at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_ANY_U_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_ANY_U(state->encoding, node, state->char_at(state->text, text_pos + - 1))); +} + +/* Checks whether a position is on a word boundary. */ +Py_LOCAL_INLINE(int) try_match_BOUNDARY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_boundary(state, text_pos) == + node->match); +} + +/* Checks whether there's a character at a position. */ +Py_LOCAL_INLINE(int) try_match_CHARACTER(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_CHARACTER(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character at a position, ignoring case. */ +Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_CHARACTER_IGN(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character at a position, ignoring case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN_REV(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_CHARACTER_IGN(state->encoding, node, state->char_at(state->text, + text_pos - 1)) == node->match); +} + +/* Checks whether there's a character at a position, backwards. */ +Py_LOCAL_INLINE(int) try_match_CHARACTER_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_CHARACTER(state->encoding, node, state->char_at(state->text, + text_pos - 1)) == node->match); +} + +/* Checks whether a position is on a default word boundary. */ +Py_LOCAL_INLINE(int) try_match_DEFAULT_BOUNDARY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_default_boundary(state, text_pos) + == node->match); +} + +/* Checks whether a position is at the default end of a word. */ +Py_LOCAL_INLINE(int) try_match_DEFAULT_END_OF_WORD(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_default_word_end(state, + text_pos)); +} + +/* Checks whether a position is at the default start of a word. */ +Py_LOCAL_INLINE(int) try_match_DEFAULT_START_OF_WORD(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_default_word_start(state, + text_pos)); +} + +/* Checks whether a position is at the end of a line. */ +Py_LOCAL_INLINE(int) try_match_END_OF_LINE(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->slice_end || + state->char_at(state->text, text_pos) == '\n'); +} + +/* Checks whether a position is at the end of a line. */ +Py_LOCAL_INLINE(int) try_match_END_OF_LINE_U(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_line_end(state, text_pos)); +} + +/* Checks whether a position is at the end of the string. */ +Py_LOCAL_INLINE(int) try_match_END_OF_STRING(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->text_length); +} + +/* Checks whether a position is at the end of a line or the string. */ +Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->text_length || text_pos == + state->final_newline); +} + +/* Checks whether a position is at the end of the string. */ +Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE_U(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(text_pos >= state->text_length || text_pos == + state->final_line_sep); +} + +/* Checks whether a position is at the end of a word. */ +Py_LOCAL_INLINE(int) try_match_END_OF_WORD(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_word_end(state, text_pos)); +} + +/* Checks whether a position is on a grapheme boundary. */ +Py_LOCAL_INLINE(int) try_match_GRAPHEME_BOUNDARY(RE_State* state, RE_Node* + node, Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_grapheme_boundary(state, + text_pos)); +} + +/* Checks whether there's a character with a certain property at a position. */ +Py_LOCAL_INLINE(int) try_match_PROPERTY(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_PROPERTY(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character with a certain property at a position, + * ignoring case. + */ +Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_PROPERTY_IGN(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character with a certain property at a position, + * ignoring case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_PROPERTY_IGN(state->encoding, node, state->char_at(state->text, + text_pos - 1)) == node->match); +} + +/* Checks whether there's a character with a certain property at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_PROPERTY_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_PROPERTY(state->encoding, node, state->char_at(state->text, + text_pos - 1)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position. */ +Py_LOCAL_INLINE(int) try_match_RANGE(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_RANGE(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position, + * ignoring case. + */ +Py_LOCAL_INLINE(int) try_match_RANGE_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_RANGE_IGN(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position, + * ignoring case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_RANGE_IGN_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_RANGE_IGN(state->encoding, node, state->char_at(state->text, + text_pos - 1)) == node->match); +} + +/* Checks whether there's a character in a certain range at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_RANGE_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_RANGE(state->encoding, node, state->char_at(state->text, text_pos + - 1)) == node->match); +} + +/* Checks whether a position is at the search anchor. */ +Py_LOCAL_INLINE(int) try_match_SEARCH_ANCHOR(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos == state->search_anchor); +} + +/* Checks whether there's a character in a certain set at a position. */ +Py_LOCAL_INLINE(int) try_match_SET(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_SET(state->encoding, node, state->char_at(state->text, text_pos)) + == node->match); +} + +/* Checks whether there's a character in a certain set at a position, ignoring + * case. + */ +Py_LOCAL_INLINE(int) try_match_SET_IGN(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos < state->slice_end && + matches_SET_IGN(state->encoding, node, state->char_at(state->text, + text_pos)) == node->match); +} + +/* Checks whether there's a character in a certain set at a position, ignoring + * case, backwards. + */ +Py_LOCAL_INLINE(int) try_match_SET_IGN_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_SET_IGN(state->encoding, node, state->char_at(state->text, + text_pos - 1)) == node->match); +} + +/* Checks whether there's a character in a certain set at a position, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_SET_REV(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + if (text_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + return bool_as_status(text_pos > state->slice_start && + matches_SET(state->encoding, node, state->char_at(state->text, text_pos - + 1)) == node->match); +} + +/* Checks whether a position is at the start of a line. */ +Py_LOCAL_INLINE(int) try_match_START_OF_LINE(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos <= 0 || state->char_at(state->text, text_pos + - 1) == '\n'); +} + +/* Checks whether a position is at the start of a line. */ +Py_LOCAL_INLINE(int) try_match_START_OF_LINE_U(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_line_start(state, text_pos)); +} + +/* Checks whether a position is at the start of the string. */ +Py_LOCAL_INLINE(int) try_match_START_OF_STRING(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(text_pos <= 0); +} + +/* Checks whether a position is at the start of a word. */ +Py_LOCAL_INLINE(int) try_match_START_OF_WORD(RE_State* state, RE_Node* node, + Py_ssize_t text_pos) { + return bool_as_status(state->encoding->at_word_start(state, text_pos)); +} + +/* Checks whether there's a certain string at a position. */ +Py_LOCAL_INLINE(int) try_match_STRING(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos + s_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char(encoding, char_at(state->text, text_pos + s_pos), + values[s_pos])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case. */ +Py_LOCAL_INLINE(int) try_match_STRING_FLD(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t s_pos; + RE_CODE* values; + int folded_len; + int f_pos; + Py_ssize_t start_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + + s_pos = 0; + values = node->values; + folded_len = 0; + f_pos = 0; + start_pos = text_pos; + + while (s_pos < length) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + folded_len = full_case_fold(char_at(state->text, text_pos), + folded); + f_pos = 0; + } + + if (!same_char_ign(encoding, folded[f_pos], values[s_pos])) + return RE_ERROR_FAILURE; + + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) + ++text_pos; + } + + if (f_pos < folded_len) + return RE_ERROR_FAILURE; + + next_position->node = next->match_next; + if (next->match_step == 0) + next_position->text_pos = start_pos; + else + next_position->text_pos = text_pos; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_STRING_FLD_REV(RE_State* state, RE_NextNode* + next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t s_pos; + RE_CODE* values; + int folded_len; + int f_pos; + Py_ssize_t start_pos; + Py_UCS4 folded[RE_MAX_FOLDED]; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + full_case_fold = encoding->full_case_fold; + + s_pos = 0; + values = node->values; + folded_len = 0; + f_pos = 0; + start_pos = text_pos; + + while (s_pos < length) { + if (f_pos >= folded_len) { + /* Fetch and casefold another character. */ + if (text_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + folded_len = full_case_fold(char_at(state->text, text_pos - 1), + folded); + f_pos = 0; + } + + if (!same_char_ign(encoding, folded[folded_len - f_pos - 1], + values[length - s_pos - 1])) + return RE_ERROR_FAILURE; + + ++s_pos; + ++f_pos; + + if (f_pos >= folded_len) + --text_pos; + } + + if (f_pos < folded_len) + return RE_ERROR_FAILURE; + + next_position->node = next->match_next; + if (next->match_step == 0) + next_position->text_pos = start_pos; + else + next_position->text_pos = text_pos; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case. */ +Py_LOCAL_INLINE(int) try_match_STRING_IGN(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos + s_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char_ign(encoding, char_at(state->text, text_pos + s_pos), + values[s_pos])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, ignoring case, + * backwards. + */ +Py_LOCAL_INLINE(int) try_match_STRING_IGN_REV(RE_State* state, RE_NextNode* + next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos - s_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char_ign(encoding, char_at(state->text, text_pos - s_pos - + 1), values[length - s_pos - 1])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Checks whether there's a certain string at a position, backwards. */ +Py_LOCAL_INLINE(int) try_match_STRING_REV(RE_State* state, RE_NextNode* next, + RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { + Py_ssize_t length; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + RE_EncodingTable* encoding; + RE_CODE* values; + Py_ssize_t s_pos; + + length = (Py_ssize_t)node->value_count; + char_at = state->char_at; + encoding = state->encoding; + values = node->values; + + for (s_pos = 0; s_pos < length; s_pos++) { + if (text_pos - s_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + + if (!same_char(encoding, char_at(state->text, text_pos - s_pos - 1), + values[length - s_pos - 1])) + return RE_ERROR_FAILURE; + } + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Tries a match at the current text position. + * + * Returns the next node and text position if the match succeeds. + */ +Py_LOCAL_INLINE(int) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t + text_pos, RE_Position* next_position) { + RE_Node* test; + int status; + + test = next->test; + + if (test->status & RE_STATUS_FUZZY) { + next_position->node = next->node; + next_position->text_pos = text_pos; + return RE_ERROR_SUCCESS; + } + + switch (test->op) { + case RE_OP_ANY: + status = try_match_ANY(state, test, text_pos); + break; + case RE_OP_ANY_ALL: + status = try_match_ANY_ALL(state, test, text_pos); + break; + case RE_OP_ANY_ALL_REV: + status = try_match_ANY_ALL_REV(state, test, text_pos); + break; + case RE_OP_ANY_REV: + status = try_match_ANY_REV(state, test, text_pos); + break; + case RE_OP_ANY_U: + status = try_match_ANY_U(state, test, text_pos); + break; + case RE_OP_ANY_U_REV: + status = try_match_ANY_U_REV(state, test, text_pos); + break; + case RE_OP_BOUNDARY: + status = try_match_BOUNDARY(state, test, text_pos); + break; + case RE_OP_BRANCH: + status = try_match(state, &test->next_1, text_pos, next_position); + if (status == RE_ERROR_FAILURE) + status = try_match(state, &test->nonstring.next_2, text_pos, + next_position); + break; + case RE_OP_CHARACTER: + status = try_match_CHARACTER(state, test, text_pos); + break; + case RE_OP_CHARACTER_IGN: + status = try_match_CHARACTER_IGN(state, test, text_pos); + break; + case RE_OP_CHARACTER_IGN_REV: + status = try_match_CHARACTER_IGN_REV(state, test, text_pos); + break; + case RE_OP_CHARACTER_REV: + status = try_match_CHARACTER_REV(state, test, text_pos); + break; + case RE_OP_DEFAULT_BOUNDARY: + status = try_match_DEFAULT_BOUNDARY(state, test, text_pos); + break; + case RE_OP_DEFAULT_END_OF_WORD: + status = try_match_DEFAULT_END_OF_WORD(state, test, text_pos); + break; + case RE_OP_DEFAULT_START_OF_WORD: + status = try_match_DEFAULT_START_OF_WORD(state, test, text_pos); + break; + case RE_OP_END_OF_LINE: + status = try_match_END_OF_LINE(state, test, text_pos); + break; + case RE_OP_END_OF_LINE_U: + status = try_match_END_OF_LINE_U(state, test, text_pos); + break; + case RE_OP_END_OF_STRING: + status = try_match_END_OF_STRING(state, test, text_pos); + break; + case RE_OP_END_OF_STRING_LINE: + status = try_match_END_OF_STRING_LINE(state, test, text_pos); + break; + case RE_OP_END_OF_STRING_LINE_U: + status = try_match_END_OF_STRING_LINE_U(state, test, text_pos); + break; + case RE_OP_END_OF_WORD: + status = try_match_END_OF_WORD(state, test, text_pos); + break; + case RE_OP_GRAPHEME_BOUNDARY: + status = try_match_GRAPHEME_BOUNDARY(state, test, text_pos); + break; + case RE_OP_PROPERTY: + status = try_match_PROPERTY(state, test, text_pos); + break; + case RE_OP_PROPERTY_IGN: + status = try_match_PROPERTY_IGN(state, test, text_pos); + break; + case RE_OP_PROPERTY_IGN_REV: + status = try_match_PROPERTY_IGN_REV(state, test, text_pos); + break; + case RE_OP_PROPERTY_REV: + status = try_match_PROPERTY_REV(state, test, text_pos); + break; + case RE_OP_RANGE: + status = try_match_RANGE(state, test, text_pos); + break; + case RE_OP_RANGE_IGN: + status = try_match_RANGE_IGN(state, test, text_pos); + break; + case RE_OP_RANGE_IGN_REV: + status = try_match_RANGE_IGN_REV(state, test, text_pos); + break; + case RE_OP_RANGE_REV: + status = try_match_RANGE_REV(state, test, text_pos); + break; + case RE_OP_SEARCH_ANCHOR: + status = try_match_SEARCH_ANCHOR(state, test, text_pos); + break; + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + status = try_match_SET(state, test, text_pos); + break; + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + status = try_match_SET_IGN(state, test, text_pos); + break; + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + status = try_match_SET_IGN_REV(state, test, text_pos); + break; + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + status = try_match_SET_REV(state, test, text_pos); + break; + case RE_OP_START_OF_LINE: + status = try_match_START_OF_LINE(state, test, text_pos); + break; + case RE_OP_START_OF_LINE_U: + status = try_match_START_OF_LINE_U(state, test, text_pos); + break; + case RE_OP_START_OF_STRING: + status = try_match_START_OF_STRING(state, test, text_pos); + break; + case RE_OP_START_OF_WORD: + status = try_match_START_OF_WORD(state, test, text_pos); + break; + case RE_OP_STRING: + return try_match_STRING(state, next, test, text_pos, next_position); + case RE_OP_STRING_FLD: + return try_match_STRING_FLD(state, next, test, text_pos, + next_position); + case RE_OP_STRING_FLD_REV: + return try_match_STRING_FLD_REV(state, next, test, text_pos, + next_position); + case RE_OP_STRING_IGN: + return try_match_STRING_IGN(state, next, test, text_pos, + next_position); + case RE_OP_STRING_IGN_REV: + return try_match_STRING_IGN_REV(state, next, test, text_pos, + next_position); + case RE_OP_STRING_REV: + return try_match_STRING_REV(state, next, test, text_pos, + next_position); + default: + next_position->node = next->node; + next_position->text_pos = text_pos; + return RE_ERROR_SUCCESS; + } + + if (status != RE_ERROR_SUCCESS) + return status; + + next_position->node = next->match_next; + next_position->text_pos = text_pos + next->match_step; + + return RE_ERROR_SUCCESS; +} + +/* Searches for a word boundary. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_boundary = state->encoding->at_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for a word boundary, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY_rev(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_boundary = state->encoding->at_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for a default word boundary. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_default_boundary = state->encoding->at_default_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_default_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for a default word boundary, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_default_boundary = state->encoding->at_default_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_default_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the default end of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_end = state->encoding->at_default_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the default end of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD_rev(RE_State* + state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_end = state->encoding->at_default_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the default start of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_start = state->encoding->at_default_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the default start of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD_rev(RE_State* + state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_default_word_start = state->encoding->at_default_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_default_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the end of line. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos >= state->text_length || state->char_at(state->text, + text_pos) == '\n') + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the end of line, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos >= state->text_length || state->char_at(state->text, + text_pos) == '\n') + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the end of the string. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (state->slice_end >= state->text_length) + return state->text_length; + + return -1; +} + +/* Searches for the end of the string, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos >= state->text_length) + return text_pos; + + return -1; +} + +/* Searches for the end of the string or line. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos <= state->final_newline) + text_pos = state->final_newline; + else if (text_pos <= state->text_length) + text_pos = state->text_length; + + if (text_pos > state->slice_end) + return -1; + + if (text_pos >= state->text_length) + return text_pos; + + return text_pos; +} + +/* Searches for the end of the string or line, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE_rev(RE_State* + state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos >= state->text_length) + text_pos = state->text_length; + else if (text_pos >= state->final_newline) + text_pos = state->final_newline; + else + return -1; + + if (text_pos < state->slice_start) + return -1; + + if (text_pos <= 0) + return text_pos; + + return text_pos; +} + +/* Searches for the end of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD(RE_State* state, RE_Node* + node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_word_end = state->encoding->at_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the end of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); + + at_word_end = state->encoding->at_word_end; + + *is_partial = FALSE; + + for (;;) { + if (at_word_end(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for a grapheme boundary. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_grapheme_boundary = state->encoding->at_grapheme_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_grapheme_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for a grapheme boundary, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); + + at_grapheme_boundary = state->encoding->at_grapheme_boundary; + + *is_partial = FALSE; + + for (;;) { + if (at_grapheme_boundary(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the start of line. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the start of line, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + for (;;) { + if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for the start of the string. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (text_pos <= 0) + return text_pos; + + return -1; +} + +/* Searches for the start of the string, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + *is_partial = FALSE; + + if (state->slice_start <= 0) + return 0; + + return -1; +} + +/* Searches for the start of a word. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_word_start = state->encoding->at_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos >= state->slice_end) + return -1; + + ++text_pos; + } +} + +/* Searches for the start of a word, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD_rev(RE_State* state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); + + at_word_start = state->encoding->at_word_start; + + *is_partial = FALSE; + + for (;;) { + if (at_word_start(state, text_pos) == node->match) + return text_pos; + + if (text_pos <= state->slice_start) + return -1; + + --text_pos; + } +} + +/* Searches for a string. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search(safe_state, node, text_pos, state->slice_end, + is_partial); +} + +/* Searches for a string, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { + *new_pos = state->req_end; + return text_pos; + } + + return string_search_fld(safe_state, node, text_pos, state->slice_end, + new_pos, is_partial); +} + +/* Searches for a string, ignoring case, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD_REV(RE_SafeState* + safe_state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* + is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { + *new_pos = state->req_end; + return text_pos; + } + + return string_search_fld_rev(safe_state, node, text_pos, + state->slice_start, new_pos, is_partial); +} + +/* Searches for a string, ignoring case. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search_ign(safe_state, node, text_pos, state->slice_end, + is_partial); +} + +/* Searches for a string, ignoring case, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN_REV(RE_SafeState* + safe_state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search_ign_rev(safe_state, node, text_pos, + state->slice_start, is_partial); +} + +/* Searches for a string, backwards. */ +Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_REV(RE_SafeState* safe_state, + RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { + RE_State* state; + + state = safe_state->re_state; + + *is_partial = FALSE; + + if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) + return text_pos; + + return string_search_rev(safe_state, node, text_pos, state->slice_start, + is_partial); +} + +/* Searches for the start of a match. */ +Py_LOCAL_INLINE(int) search_start(RE_SafeState* safe_state, RE_NextNode* next, + RE_Position* new_position, int search_index) { + RE_State* state; + Py_ssize_t text_pos; + RE_Node* test; + RE_Node* node; + Py_ssize_t start_pos; + RE_SearchPosition* info; + + state = safe_state->re_state; + + start_pos = state->text_pos; + TRACE(("<> at %d\n", start_pos)) + + test = next->test; + node = next->node; + + if (state->reverse) { + if (start_pos < state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + } else { + if (start_pos > state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = state->slice_end; + return RE_ERROR_PARTIAL; + } + } + } + + if (test->status & RE_STATUS_FUZZY) { + /* Don't call 'search_start' again. */ + state->pattern->do_search_start = FALSE; + + state->match_pos = start_pos; + new_position->node = node; + new_position->text_pos = start_pos; + + return RE_ERROR_SUCCESS; + } + +again: + if (!state->pattern->is_fuzzy && state->partial_side == RE_PARTIAL_NONE) { + if (state->reverse) { + if (start_pos - state->min_width < state->slice_start) + return RE_ERROR_FAILURE; + } else { + if (start_pos + state->min_width > state->slice_end) + return RE_ERROR_FAILURE; + } + } + + if (search_index < MAX_SEARCH_POSITIONS) { + info = &state->search_positions[search_index]; + if (state->reverse) { + if (info->start_pos >= 0 && info->start_pos >= start_pos && + start_pos >= info->match_pos) { + state->match_pos = info->match_pos; + + new_position->text_pos = state->match_pos; + new_position->node = node; + + return RE_ERROR_SUCCESS; + } + } else { + if (info->start_pos >= 0 && info->start_pos <= start_pos && + start_pos <= info->match_pos) { + state->match_pos = info->match_pos; + + new_position->text_pos = state->match_pos; + new_position->node = node; + + return RE_ERROR_SUCCESS; + } + } + } else + info = NULL; + + switch (test->op) { + case RE_OP_ANY: + start_pos = match_many_ANY(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_ANY_ALL: + break; + case RE_OP_ANY_ALL_REV: + break; + case RE_OP_ANY_REV: + start_pos = match_many_ANY_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_ANY_U: + start_pos = match_many_ANY_U(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_ANY_U_REV: + start_pos = match_many_ANY_U_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_BOUNDARY: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_BOUNDARY_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_BOUNDARY(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_CHARACTER: + start_pos = match_many_CHARACTER(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_CHARACTER_IGN: + start_pos = match_many_CHARACTER_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_CHARACTER_IGN_REV: + start_pos = match_many_CHARACTER_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_CHARACTER_REV: + start_pos = match_many_CHARACTER_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_DEFAULT_BOUNDARY: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_DEFAULT_BOUNDARY_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_DEFAULT_BOUNDARY(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_DEFAULT_END_OF_WORD: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_DEFAULT_END_OF_WORD_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_DEFAULT_END_OF_WORD(state, test, + start_pos, &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_DEFAULT_START_OF_WORD: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_DEFAULT_START_OF_WORD_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_DEFAULT_START_OF_WORD(state, test, + start_pos, &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_END_OF_LINE: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_END_OF_LINE_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_END_OF_LINE(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_END_OF_STRING: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_END_OF_STRING_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_END_OF_STRING(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_END_OF_STRING_LINE: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_END_OF_STRING_LINE_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_END_OF_STRING_LINE(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_END_OF_WORD: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_END_OF_WORD_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_END_OF_WORD(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_GRAPHEME_BOUNDARY: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_GRAPHEME_BOUNDARY_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_GRAPHEME_BOUNDARY(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_PROPERTY: + start_pos = match_many_PROPERTY(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_PROPERTY_IGN: + start_pos = match_many_PROPERTY_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_PROPERTY_IGN_REV: + start_pos = match_many_PROPERTY_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_PROPERTY_REV: + start_pos = match_many_PROPERTY_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_RANGE: + start_pos = match_many_RANGE(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_RANGE_IGN: + start_pos = match_many_RANGE_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return RE_ERROR_FAILURE; + break; + case RE_OP_RANGE_IGN_REV: + start_pos = match_many_RANGE_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_RANGE_REV: + start_pos = match_many_RANGE_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return RE_ERROR_FAILURE; + break; + case RE_OP_SEARCH_ANCHOR: + if (state->reverse) { + if (start_pos < state->search_anchor) + return RE_ERROR_FAILURE; + } else { + if (start_pos > state->search_anchor) + return RE_ERROR_FAILURE; + } + + start_pos = state->search_anchor; + break; + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + start_pos = match_many_SET(state, test, start_pos, state->slice_end, + FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return FALSE; + break; + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + start_pos = match_many_SET_IGN(state, test, start_pos, + state->slice_end, FALSE); + + if (start_pos >= state->text_length) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos >= state->slice_end) + return FALSE; + break; + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + start_pos = match_many_SET_IGN_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return FALSE; + break; + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + start_pos = match_many_SET_REV(state, test, start_pos, + state->slice_start, FALSE); + + if (start_pos <= 0) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + } + + if (start_pos <= state->slice_start) + return FALSE; + break; + case RE_OP_START_OF_LINE: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_START_OF_LINE_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_START_OF_LINE(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_START_OF_STRING: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_START_OF_STRING_rev(state, test, + start_pos, &is_partial); + else + start_pos = search_start_START_OF_STRING(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_START_OF_WORD: + { + BOOL is_partial; + + if (state->reverse) + start_pos = search_start_START_OF_WORD_rev(state, test, start_pos, + &is_partial); + else + start_pos = search_start_START_OF_WORD(state, test, start_pos, + &is_partial); + + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_STRING: + { + BOOL is_partial; + + start_pos = search_start_STRING(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_STRING_FLD: + { + Py_ssize_t new_pos; + BOOL is_partial; + + start_pos = search_start_STRING_FLD(safe_state, test, start_pos, + &new_pos, &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + + /* Can we look further ahead? */ + if (test == node) { + if (test->next_1.node) { + int status; + + status = try_match(state, &test->next_1, new_pos, + new_position); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) { + ++start_pos; + + if (start_pos >= state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + + goto again; + } + } + + /* It's a possible match. */ + state->match_pos = start_pos; + + if (info) { + info->start_pos = state->text_pos; + info->match_pos = state->match_pos; + } + + return RE_ERROR_SUCCESS; + } + break; + } + case RE_OP_STRING_FLD_REV: + { + Py_ssize_t new_pos; + BOOL is_partial; + + start_pos = search_start_STRING_FLD_REV(safe_state, test, start_pos, + &new_pos, &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + + /* Can we look further ahead? */ + if (test == node) { + if (test->next_1.node) { + int status; + + status = try_match(state, &test->next_1, new_pos, + new_position); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) { + --start_pos; + + if (start_pos <= state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + + goto again; + } + } + + /* It's a possible match. */ + state->match_pos = start_pos; + + if (info) { + info->start_pos = state->text_pos; + info->match_pos = state->match_pos; + } + + return RE_ERROR_SUCCESS; + } + break; + } + case RE_OP_STRING_IGN: + { + BOOL is_partial; + + start_pos = search_start_STRING_IGN(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_STRING_IGN_REV: + { + BOOL is_partial; + + start_pos = search_start_STRING_IGN_REV(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + case RE_OP_STRING_REV: + { + BOOL is_partial; + + start_pos = search_start_STRING_REV(safe_state, test, start_pos, + &is_partial); + if (start_pos < 0) + return RE_ERROR_FAILURE; + + if (is_partial) { + new_position->text_pos = start_pos; + return RE_ERROR_PARTIAL; + } + break; + } + default: + /* Don't call 'search_start' again. */ + state->pattern->do_search_start = FALSE; + + state->match_pos = start_pos; + new_position->node = node; + new_position->text_pos = start_pos; + return RE_ERROR_SUCCESS; + } + + /* Can we look further ahead? */ + if (test == node) { + text_pos = start_pos + test->step; + + if (test->next_1.node) { + int status; + + status = try_match(state, &test->next_1, text_pos, new_position); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) { + if (state->reverse) { + --start_pos; + + if (start_pos < state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) { + new_position->text_pos = state->slice_start; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + } else { + ++start_pos; + + if (start_pos > state->slice_end) { + if (state->partial_side == RE_PARTIAL_RIGHT) { + new_position->text_pos = state->slice_end; + return RE_ERROR_PARTIAL; + } + + return RE_ERROR_FAILURE; + } + } + + goto again; + } + } + } else { + new_position->node = node; + new_position->text_pos = start_pos; + } + + /* It's a possible match. */ + state->match_pos = start_pos; + + if (info) { + info->start_pos = state->text_pos; + info->match_pos = state->match_pos; + } + + return RE_ERROR_SUCCESS; +} + +/* Saves a capture group. */ +Py_LOCAL_INLINE(BOOL) save_capture(RE_SafeState* safe_state, size_t + private_index, size_t public_index) { + RE_State* state; + RE_GroupData* private_group; + RE_GroupData* public_group; + + state = safe_state->re_state; + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + private_group = &state->groups[private_index - 1]; + public_group = &state->groups[public_index - 1]; + + /* Will the repeated captures ever be visible? */ + if (!state->visible_captures) { + public_group->captures[0] = private_group->span; + public_group->capture_count = 1; + + return TRUE; + } + + if (public_group->capture_count >= public_group->capture_capacity) { + size_t new_capacity; + RE_GroupSpan* new_captures; + + new_capacity = public_group->capture_capacity * 2; + new_capacity = max_size_t(new_capacity, RE_INIT_CAPTURE_SIZE); + new_captures = (RE_GroupSpan*)safe_realloc(safe_state, + public_group->captures, new_capacity * sizeof(RE_GroupSpan)); + if (!new_captures) + return FALSE; + + public_group->captures = new_captures; + public_group->capture_capacity = new_capacity; + } + + public_group->captures[public_group->capture_count++] = + private_group->span; + + return TRUE; +} + +/* Unsaves a capture group. */ +Py_LOCAL_INLINE(void) unsave_capture(RE_State* state, size_t private_index, + size_t public_index) { + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + if (state->groups[public_index - 1].capture_count > 0) + --state->groups[public_index - 1].capture_count; +} + +/* Pushes the groups for backtracking. */ +Py_LOCAL_INLINE(BOOL) push_groups(RE_SafeState* safe_state) { + RE_State* state; + size_t group_count; + RE_SavedGroups* current; + size_t g; + + state = safe_state->re_state; + + group_count = state->pattern->true_group_count; + if (group_count == 0) + return TRUE; + + current = state->current_saved_groups; + + if (current && current->next) + current = current->next; + else if (!current && state->first_saved_groups) + current = state->first_saved_groups; + else { + RE_SavedGroups* new_block; + + new_block = (RE_SavedGroups*)safe_alloc(safe_state, + sizeof(RE_SavedGroups)); + if (!new_block) + return FALSE; + + new_block->spans = (RE_GroupSpan*)safe_alloc(safe_state, group_count * + sizeof(RE_GroupSpan)); + new_block->counts = (size_t*)safe_alloc(safe_state, group_count * + sizeof(Py_ssize_t)); + if (!new_block->spans || !new_block->counts) { + safe_dealloc(safe_state, new_block->spans); + safe_dealloc(safe_state, new_block->counts); + safe_dealloc(safe_state, new_block); + return FALSE; + } + + new_block->previous = current; + new_block->next = NULL; + + if (new_block->previous) + new_block->previous->next = new_block; + else + state->first_saved_groups = new_block; + + current = new_block; + } + + for (g = 0; g < group_count; g++) { + current->spans[g] = state->groups[g].span; + current->counts[g] = state->groups[g].capture_count; + } + + state->current_saved_groups = current; + + return TRUE; +} + +/* Pops the groups for backtracking. */ +Py_LOCAL_INLINE(void) pop_groups(RE_State* state) { + size_t group_count; + RE_SavedGroups* current; + size_t g; + + group_count = state->pattern->true_group_count; + if (group_count == 0) + return; + + current = state->current_saved_groups; + + for (g = 0; g < group_count; g++) { + state->groups[g].span = current->spans[g]; + state->groups[g].capture_count = current->counts[g]; + } + + state->current_saved_groups = current->previous; +} + +/* Drops the groups for backtracking. */ +Py_LOCAL_INLINE(void) drop_groups(RE_State* state) { + if (state->pattern->true_group_count != 0) + state->current_saved_groups = state->current_saved_groups->previous; +} + +/* Pushes the repeats for backtracking. */ +Py_LOCAL_INLINE(BOOL) push_repeats(RE_SafeState* safe_state) { + RE_State* state; + PatternObject* pattern; + size_t repeat_count; + RE_SavedRepeats* current; + size_t r; + + state = safe_state->re_state; + pattern = state->pattern; + + repeat_count = pattern->repeat_count; + if (repeat_count == 0) + return TRUE; + + current = state->current_saved_repeats; + + if (current && current->next) + current = current->next; + else if (!current && state->first_saved_repeats) + current = state->first_saved_repeats; + else { + RE_SavedRepeats* new_block; + + new_block = (RE_SavedRepeats*)safe_alloc(safe_state, + sizeof(RE_SavedRepeats)); + if (!new_block) + return FALSE; + + memset(new_block, 0, sizeof(RE_SavedRepeats)); + + new_block->repeats = (RE_RepeatData*)safe_alloc(safe_state, + repeat_count * sizeof(RE_RepeatData)); + if (!new_block->repeats) { + safe_dealloc(safe_state, new_block); + return FALSE; + } + + memset(new_block->repeats, 0, repeat_count * sizeof(RE_RepeatData)); + + new_block->previous = current; + new_block->next = NULL; + + if (new_block->previous) + new_block->previous->next = new_block; + else + state->first_saved_repeats = new_block; + + current = new_block; + } + + for (r = 0; r < repeat_count; r++) { + if (!copy_repeat_data(safe_state, ¤t->repeats[r], + &state->repeats[r])) + return FALSE; + } + + state->current_saved_repeats = current; + + return TRUE; +} + +/* Pops the repeats for backtracking. */ +Py_LOCAL_INLINE(void) pop_repeats(RE_State* state) { + PatternObject* pattern; + size_t repeat_count; + RE_SavedRepeats* current; + size_t r; + + pattern = state->pattern; + + repeat_count = pattern->repeat_count; + if (repeat_count == 0) + return; + + current = state->current_saved_repeats; + + for (r = 0; r < repeat_count; r++) + copy_repeat_data(NULL, &state->repeats[r], ¤t->repeats[r]); + + state->current_saved_repeats = current->previous; +} + +/* Saves state info before a recusive call by 'basic_match'. */ +Py_LOCAL_INLINE(void) save_info(RE_State* state, RE_Info* info) { + info->backtrack_count = state->current_backtrack_block->count; + info->current_backtrack_block = state->current_backtrack_block; + info->current_saved_groups = state->current_saved_groups; + info->must_advance = state->must_advance; + info->current_group_call_frame = state->current_group_call_frame; +} + +/* Restores state info after a recusive call by 'basic_match'. */ +Py_LOCAL_INLINE(void) restore_info(RE_State* state, RE_Info* info) { + state->current_group_call_frame = info->current_group_call_frame; + state->must_advance = info->must_advance; + state->current_saved_groups = info->current_saved_groups; + state->current_backtrack_block = info->current_backtrack_block; + state->current_backtrack_block->count = info->backtrack_count; +} + +/* Inserts a new span in a guard list. */ +Py_LOCAL_INLINE(BOOL) insert_guard_span(RE_SafeState* safe_state, RE_GuardList* + guard_list, size_t index) { + size_t n; + + if (guard_list->count >= guard_list->capacity) { + size_t new_capacity; + RE_GuardSpan* new_spans; + + new_capacity = guard_list->capacity * 2; + if (new_capacity == 0) + new_capacity = RE_INIT_GUARDS_BLOCK_SIZE; + new_spans = (RE_GuardSpan*)safe_realloc(safe_state, guard_list->spans, + new_capacity * sizeof(RE_GuardSpan)); + if (!new_spans) + return FALSE; + + guard_list->capacity = new_capacity; + guard_list->spans = new_spans; + } + + n = guard_list->count - index; + if (n > 0) + memmove(guard_list->spans + index + 1, guard_list->spans + index, n * + sizeof(RE_GuardSpan)); + ++guard_list->count; + + return TRUE; +} + +/* Deletes a span in a guard list. */ +Py_LOCAL_INLINE(void) delete_guard_span(RE_GuardList* guard_list, size_t index) + { + size_t n; + + n = guard_list->count - index - 1; + if (n > 0) + memmove(guard_list->spans + index, guard_list->spans + index + 1, n * + sizeof(RE_GuardSpan)); + --guard_list->count; +} + +/* Checks whether a position is guarded against further matching. */ +Py_LOCAL_INLINE(BOOL) is_guarded(RE_GuardList* guard_list, Py_ssize_t text_pos) + { + size_t low; + size_t high; + + /* Is this position in the guard list? */ + low = 0; + high = guard_list->count; + while (low < high) { + size_t mid; + RE_GuardSpan* span; + + mid = (low + high) / 2; + span = &guard_list->spans[mid]; + if (text_pos < span->low) + high = mid; + else if (text_pos > span->high) + low = mid + 1; + else + return span->protect; + } + + guard_list->last_text_pos = text_pos; + guard_list->last_low = low; + + return FALSE; +} + +/* Guards a position against further matching. */ +Py_LOCAL_INLINE(BOOL) guard(RE_SafeState* safe_state, RE_GuardList* guard_list, + Py_ssize_t text_pos, BOOL protect) { + size_t low; + size_t high; + + /* Where should be new position be added? */ + if (text_pos == guard_list->last_text_pos) + low = guard_list->last_low; + else { + low = 0; + high = guard_list->count; + while (low < high) { + size_t mid; + RE_GuardSpan* span; + + mid = (low + high) / 2; + span = &guard_list->spans[mid]; + if (text_pos < span->low) + high = mid; + else if (text_pos > span->high) + low = mid + 1; + else + return TRUE; + } + } + + /* Add the position to the guard list. */ + if (low > 0 && guard_list->spans[low - 1].high + 1 == text_pos && + guard_list->spans[low - 1].protect == protect) { + /* The new position is just above this span. */ + if (low < guard_list->count && guard_list->spans[low].low - 1 == + text_pos && guard_list->spans[low].protect == protect) { + /* The new position joins 2 spans */ + guard_list->spans[low - 1].high = guard_list->spans[low].high; + delete_guard_span(guard_list, low); + } else + /* Extend the span. */ + guard_list->spans[low - 1].high = text_pos; + } else if (low < guard_list->count && guard_list->spans[low].low - 1 == + text_pos && guard_list->spans[low].protect == protect) + /* The new position is just below this span. */ + /* Extend the span. */ + guard_list->spans[low].low = text_pos; + else { + /* Insert a new span. */ + if (!insert_guard_span(safe_state, guard_list, low)) + return FALSE; + guard_list->spans[low].low = text_pos; + guard_list->spans[low].high = text_pos; + guard_list->spans[low].protect = protect; + } + + guard_list->last_text_pos = -1; + + return TRUE; +} + +/* Guards a position against further matching for a repeat. */ +Py_LOCAL_INLINE(BOOL) guard_repeat(RE_SafeState* safe_state, size_t index, + Py_ssize_t text_pos, RE_STATUS_T guard_type, BOOL protect) { + RE_State* state; + RE_GuardList* guard_list; + + state = safe_state->re_state; + + /* Is a guard active here? */ + if (!(state->pattern->repeat_info[index].status & guard_type)) + return TRUE; + + /* Which guard list? */ + if (guard_type & RE_STATUS_BODY) + guard_list = &state->repeats[index].body_guard_list; + else + guard_list = &state->repeats[index].tail_guard_list; + + return guard(safe_state, guard_list, text_pos, protect); +} + +/* Checks whether a position is guarded against further matching for a repeat. + */ +Py_LOCAL_INLINE(BOOL) is_repeat_guarded(RE_SafeState* safe_state, size_t index, + Py_ssize_t text_pos, RE_STATUS_T guard_type) { + RE_State* state; + RE_GuardList* guard_list; + + state = safe_state->re_state; + + /* Is a guard active here? */ + if (!(state->pattern->repeat_info[index].status & guard_type)) + return FALSE; + + /* Which guard list? */ + if (guard_type == RE_STATUS_BODY) + guard_list = &state->repeats[index].body_guard_list; + else + guard_list = &state->repeats[index].tail_guard_list; + + return is_guarded(guard_list, text_pos); +} + +/* Resets the guards inside atomic subpatterns and lookarounds. */ +Py_LOCAL_INLINE(void) reset_guards(RE_State* state, RE_CODE* values) { + PatternObject* pattern; + size_t repeat_count; + + pattern = state->pattern; + repeat_count = pattern->repeat_count; + + if (values) { + size_t i; + + for (i = 1; i <= values[0]; i++) { + size_t index; + + index = values[i]; + + if (index < repeat_count) { + reset_guard_list(&state->repeats[index].body_guard_list); + reset_guard_list(&state->repeats[index].tail_guard_list); + } else { + index -= repeat_count; + + reset_guard_list(&state->fuzzy_guards[index].body_guard_list); + reset_guard_list(&state->fuzzy_guards[index].tail_guard_list); + } + } + } else { + size_t index; + size_t fuzzy_count; + + for (index = 0; index < repeat_count; index++) { + reset_guard_list(&state->repeats[index].body_guard_list); + reset_guard_list(&state->repeats[index].tail_guard_list); + } + + fuzzy_count = pattern->fuzzy_count; + + for (index = 0; index < fuzzy_count; index++) { + reset_guard_list(&state->fuzzy_guards[index].body_guard_list); + reset_guard_list(&state->fuzzy_guards[index].tail_guard_list); + } + } +} + +/* Builds a Unicode string. */ +Py_LOCAL_INLINE(PyObject*) build_unicode_value(void* buffer, Py_ssize_t len, + Py_ssize_t buffer_charsize) { + return PyUnicode_FromUnicode(buffer, len); +} + +/* Builds a bytestring. Returns NULL if any member is too wide. */ +Py_LOCAL_INLINE(PyObject*) build_bytes_value(void* buffer, Py_ssize_t len, + Py_ssize_t buffer_charsize) +{ + Py_UCS1* byte_buffer; + Py_ssize_t i; + PyObject* result; + + if (buffer_charsize == 1) + return Py_BuildValue("s#", buffer, len); + + byte_buffer = re_alloc((size_t)len); + if (!byte_buffer) + return NULL; + + for (i = 0; i < len; i++) { + Py_UCS2 c = ((Py_UCS2*)buffer)[i]; + if (c > 0xFF) + goto too_wide; + + byte_buffer[i] = (Py_UCS1)c; + } + + result = Py_BuildValue("s#", byte_buffer, len); + + re_dealloc(byte_buffer); + + return result; + +too_wide: + re_dealloc(byte_buffer); + + return NULL; +} + +/* Looks for a string in a string set. */ +Py_LOCAL_INLINE(int) string_set_contains(RE_State* state, PyObject* string_set, + Py_ssize_t first, Py_ssize_t last) { + PyObject* string; + int status; + + if (state->is_unicode) + string = build_unicode_value(state->point_to(state->text, first), last + - first, state->charsize); + else + string = build_bytes_value(state->point_to(state->text, first), last - + first, state->charsize); + if (!string) + return RE_ERROR_INTERNAL; + + status = PySet_Contains(string_set, string); + Py_DECREF(string); + + return status; +} + +/* Looks for a string in a string set, ignoring case. */ +Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* + string_set, void* buffer, Py_ssize_t index, Py_ssize_t len, Py_ssize_t + buffer_charsize) { + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + RE_EncodingTable* encoding; + BOOL (*possible_turkic)(Py_UCS4 ch); + Py_UCS4 codepoints[4]; + + switch (buffer_charsize) { + case 1: + char_at = bytes1_char_at; + set_char_at = bytes1_set_char_at; + break; + case 2: + char_at = bytes2_char_at; + set_char_at = bytes2_set_char_at; + break; + case 4: + char_at = bytes4_char_at; + set_char_at = bytes4_set_char_at; + break; + default: + char_at = bytes1_char_at; + set_char_at = bytes1_set_char_at; + break; + } + + encoding = state->encoding; + possible_turkic = encoding->possible_turkic; + + /* Look for a possible Turkic 'I'. */ + while (index < len && !possible_turkic(char_at(buffer, index))) + ++index; + + if (index < len) { + /* Possible Turkic 'I'. */ + int count; + int i; + + /* Try all the alternatives to the 'I'. */ + count = encoding->all_turkic_i(char_at(buffer, index), codepoints); + + for (i = 0; i < count; i++) { + int status; + + set_char_at(buffer, index, codepoints[i]); + + /* Recurse for the remainder of the string. */ + status = string_set_contains_ign(state, string_set, buffer, index + + 1, len, buffer_charsize); + if (status != 0) + return status; + } + + return 0; + } else { + /* No Turkic 'I'. */ + PyObject* string; + int status; + + if (state->is_unicode) + string = build_unicode_value(buffer, len, buffer_charsize); + else + string = build_bytes_value(buffer, len, buffer_charsize); + if (!string) + return RE_ERROR_MEMORY; + + status = PySet_Contains(string_set, string); + Py_DECREF(string); + + return status; + } +} + +/* Creates a partial string set for truncation at the left or right side. */ +Py_LOCAL_INLINE(int) make_partial_string_set(RE_State* state, RE_Node* node) { + PatternObject* pattern; + int partial_side; + PyObject* string_set; + PyObject* partial_set; + PyObject* iter = NULL; + PyObject* item = NULL; + PyObject* slice = NULL; + + pattern = state->pattern; + partial_side = state->partial_side; + if (partial_side != RE_PARTIAL_LEFT && partial_side != RE_PARTIAL_RIGHT) + return RE_ERROR_INTERNAL; + + /* Fetch the full string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(pattern->named_list_indexes, node->values[0]); + if (!string_set) + return RE_ERROR_INTERNAL; + + /* Gets the list of partial string sets. */ + if (!pattern->partial_named_lists[partial_side]) { + size_t size; + + size = pattern->named_lists_count * sizeof(PyObject*); + pattern->partial_named_lists[partial_side] = re_alloc(size); + if (!pattern->partial_named_lists[partial_side]) + return RE_ERROR_INTERNAL; + + memset(pattern->partial_named_lists[partial_side], 0, size); + } + + /* Get the partial string set. */ + partial_set = pattern->partial_named_lists[partial_side][node->values[0]]; + if (partial_set) + return 1; + + /* Build the partial string set. */ + partial_set = PySet_New(NULL); + if (!partial_set) + return RE_ERROR_INTERNAL; + + iter = PyObject_GetIter(string_set); + if (!iter) + goto error; + + item = PyIter_Next(iter); + + while (item) { + Py_ssize_t len; + Py_ssize_t first; + Py_ssize_t last; + + len = PySequence_Length(item); + if (len == -1) + goto error; + + first = 0; + last = len; + + while (last - first > 1) { + int status; + + /* Shorten the entry. */ + if (partial_side == RE_PARTIAL_LEFT) + ++first; + else + --last; + + slice = PySequence_GetSlice(item, first, last); + if (!slice) + goto error; + + status = PySet_Add(partial_set, slice); + Py_DECREF(slice); + if (status < 0) + goto error; + } + + Py_DECREF(item); + item = PyIter_Next(iter); + } + + if (PyErr_Occurred()) + goto error; + + Py_DECREF(iter); + + pattern->partial_named_lists[partial_side][node->values[0]] = partial_set; + + return 1; + +error: + Py_XDECREF(item); + Py_XDECREF(iter); + Py_DECREF(partial_set); + + return RE_ERROR_INTERNAL; +} + +/* Tries to match a string at the current position with a member of a string + * set, forwards or backwards. + */ +Py_LOCAL_INLINE(int) string_set_match_fwdrev(RE_SafeState* safe_state, RE_Node* + node, BOOL reverse) { + RE_State* state; + Py_ssize_t min_len; + Py_ssize_t max_len; + Py_ssize_t text_available; + Py_ssize_t slice_available; + int partial_side; + Py_ssize_t len; + Py_ssize_t first; + Py_ssize_t last; + int status; + PyObject* string_set; + + state = safe_state->re_state; + + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + acquire_GIL(safe_state); + + if (reverse) { + text_available = state->text_pos; + slice_available = state->text_pos - state->slice_start; + partial_side = RE_PARTIAL_LEFT; + } else { + text_available = state->text_length - state->text_pos; + slice_available = state->slice_end - state->text_pos; + partial_side = RE_PARTIAL_RIGHT; + } + + /* Get as many characters as we need for the longest possible match. */ + len = min_ssize_t(max_len, slice_available); + + if (reverse) { + first = state->text_pos - len; + last = state->text_pos; + } else { + first = state->text_pos; + last = state->text_pos + len; + } + + /* If we didn't get all of the characters we need, is a partial match + * allowed? + */ + if (len < max_len && len == text_available && state->partial_side == + partial_side) { + if (len == 0) { + /* An empty string is always a possible partial match. */ + status = RE_ERROR_PARTIAL; + goto finished; + } + + /* Make a set of the possible partial matches. */ + status = make_partial_string_set(state, node); + if (status < 0) + goto finished; + + /* Fetch the partial string set. */ + string_set = + state->pattern->partial_named_lists[partial_side][node->values[0]]; + + /* Is the text we have a partial match? */ + status = string_set_contains(state, string_set, first, last); + if (status < 0) + goto finished; + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; + + status = RE_ERROR_PARTIAL; + goto finished; + } + } + + /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, + node->values[0]); + if (!string_set) { + status = RE_ERROR_INTERNAL; + goto finished; + } + + /* We've already looked for a partial match (if allowed), but what about a + * complete match? + */ + while (len >= min_len) { + status = string_set_contains(state, string_set, first, last); + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; + + status = 1; + goto finished; + } + + /* Look for a shorter match. */ + --len; + if (reverse) + ++first; + else + --last; + } + + /* No match. */ + status = 0; + +finished: + release_GIL(safe_state); + + return status; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, forwards or backwards. + */ +Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state, + RE_Node* node, BOOL reverse) { + RE_State* state; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + Py_ssize_t min_len; + Py_ssize_t max_len; + Py_ssize_t buf_len; + void* folded; + int status; + BOOL* end_of_fold = NULL; + Py_ssize_t text_available; + Py_ssize_t slice_available; + Py_ssize_t t_pos; + Py_ssize_t f_pos; + int step; + int partial_side; + Py_ssize_t len; + Py_ssize_t consumed; + Py_UCS4 codepoints[RE_MAX_FOLDED]; + PyObject* string_set; + Py_ssize_t first; + Py_ssize_t last; + + state = safe_state->re_state; + full_case_fold = state->encoding->full_case_fold; + char_at = state->char_at; + + /* The folded string will have the same width as the original string. */ + folded_charsize = state->charsize; + + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: + return RE_ERROR_INTERNAL; + } + + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + acquire_GIL(safe_state); + + /* Allocate a buffer for the folded string. */ + buf_len = max_len + RE_MAX_FOLDED; + folded = re_alloc((size_t)(buf_len * folded_charsize)); + if (!folded) { + status = RE_ERROR_MEMORY; + goto finished; + } + + end_of_fold = re_alloc((size_t)buf_len * sizeof(BOOL)); + if (!end_of_fold) { + status = RE_ERROR_MEMORY; + goto finished; + } + + memset(end_of_fold, 0, (size_t)buf_len * sizeof(BOOL)); + + if (reverse) { + text_available = state->text_pos; + slice_available = state->text_pos - state->slice_start; + t_pos = state->text_pos - 1; + f_pos = buf_len; + step = -1; + partial_side = RE_PARTIAL_LEFT; + } else { + text_available = state->text_length - state->text_pos; + slice_available = state->slice_end - state->text_pos; + t_pos = state->text_pos; + f_pos = 0; + step = 1; + partial_side = RE_PARTIAL_RIGHT; + } + + /* We can stop getting characters as soon as the case-folded string is long + * enough (each codepoint from the text can expand to more than one folded + * codepoint). + */ + len = 0; + end_of_fold[len] = TRUE; + + consumed = 0; + while (len < max_len && consumed < slice_available) { + int count; + int j; + + count = full_case_fold(char_at(state->text, t_pos), codepoints); + + if (reverse) + f_pos -= count; + + for (j = 0; j < count; j++) + set_char_at(folded, f_pos + j, codepoints[j]); + + if (!reverse) + f_pos += count; + + len += count; + end_of_fold[len] = TRUE; + ++consumed; + t_pos += step; + } + + if (reverse) { + first = f_pos; + last = buf_len; + } else { + first = 0; + last = f_pos; + } + + /* If we didn't get all of the characters we need, is a partial match + * allowed? + */ + if (len < max_len && len == text_available && state->partial_side == + partial_side) { + if (len == 0) { + /* An empty string is always a possible partial match. */ + status = RE_ERROR_PARTIAL; + goto finished; + } + + /* Make a set of the possible partial matches. */ + status = make_partial_string_set(state, node); + if (status < 0) + goto finished; + + /* Fetch the partial string set. */ + string_set = + state->pattern->partial_named_lists[partial_side][node->values[0]]; + + /* Is the text we have a partial match? */ + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + if (status < 0) + goto finished; + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= consumed; + else + state->text_pos += consumed; + + status = RE_ERROR_PARTIAL; + goto finished; + } + } + + /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, + node->values[0]); + if (!string_set) { + status = RE_ERROR_INTERNAL; + goto finished; + } + + /* We've already looked for a partial match (if allowed), but what about a + * complete match? + */ + while (len >= min_len) { + if (end_of_fold[len]) { + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= consumed; + else + state->text_pos += consumed; + + status = 1; + goto finished; + } + + --consumed; + } + + /* Look for a shorter match. */ + --len; + if (reverse) + ++first; + else + --last; + } + + /* No match. */ + status = 0; + +finished: + re_dealloc(end_of_fold); + re_dealloc(folded); + + release_GIL(safe_state); + + return status; +} + +/* Tries to match a string at the current position with a member of a string + * set, ignoring case, forwards or backwards. + */ +Py_LOCAL_INLINE(int) string_set_match_ign_fwdrev(RE_SafeState* safe_state, + RE_Node* node, BOOL reverse) { + RE_State* state; + Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + Py_ssize_t min_len; + Py_ssize_t max_len; + void* folded; + int status; + Py_ssize_t text_available; + Py_ssize_t slice_available; + Py_ssize_t t_pos; + Py_ssize_t f_pos; + int step; + int partial_side; + Py_ssize_t len; + Py_ssize_t i; + Py_ssize_t first; + Py_ssize_t last; + PyObject* string_set; + + state = safe_state->re_state; + simple_case_fold = state->encoding->simple_case_fold; + char_at = state->char_at; + + /* The folded string will have the same width as the original string. */ + folded_charsize = state->charsize; + + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: + return RE_ERROR_INTERNAL; + } + + min_len = (Py_ssize_t)node->values[1]; + max_len = (Py_ssize_t)node->values[2]; + + acquire_GIL(safe_state); + + /* Allocate a buffer for the folded string. */ + folded = re_alloc((size_t)(max_len * folded_charsize)); + if (!folded) { + status = RE_ERROR_MEMORY; + goto finished; + } + + if (reverse) { + text_available = state->text_pos; + slice_available = state->text_pos - state->slice_start; + t_pos = state->text_pos - 1; + f_pos = max_len - 1; + step = -1; + partial_side = RE_PARTIAL_LEFT; + } else { + text_available = state->text_length - state->text_pos; + slice_available = state->slice_end - state->text_pos; + t_pos = state->text_pos; + f_pos = 0; + step = 1; + partial_side = RE_PARTIAL_RIGHT; + } + + /* Get as many characters as we need for the longest possible match. */ + len = min_ssize_t(max_len, slice_available); + + for (i = 0; i < len; i ++) { + Py_UCS4 ch; + + ch = simple_case_fold(char_at(state->text, t_pos)); + set_char_at(folded, f_pos, ch); + t_pos += step; + f_pos += step; + } + + if (reverse) { + first = f_pos; + last = max_len; + } else { + first = 0; + last = f_pos; + } + + /* If we didn't get all of the characters we need, is a partial match + * allowed? + */ + if (len < max_len && len == text_available && state->partial_side == + partial_side) { + if (len == 0) { + /* An empty string is always a possible partial match. */ + status = RE_ERROR_PARTIAL; + goto finished; + } + + /* Make a set of the possible partial matches. */ + status = make_partial_string_set(state, node); + if (status < 0) + goto finished; + + /* Fetch the partial string set. */ + string_set = + state->pattern->partial_named_lists[partial_side][node->values[0]]; + + /* Is the text we have a partial match? */ + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + if (status < 0) + goto finished; + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; + + status = RE_ERROR_PARTIAL; + goto finished; + } + } + + /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ + string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, + node->values[0]); + if (!string_set) { + status = RE_ERROR_INTERNAL; + goto finished; + } + + /* We've already looked for a partial match (if allowed), but what about a + * complete match? + */ + while (len >= min_len) { + status = string_set_contains_ign(state, string_set, folded, first, + last, folded_charsize); + + if (status == 1) { + /* Advance past the match. */ + if (reverse) + state->text_pos -= len; + else + state->text_pos += len; + + status = 1; + goto finished; + } + + /* Look for a shorter match. */ + --len; + if (reverse) + ++first; + else + --last; + } + + /* No match. */ + status = 0; + +finished: + re_dealloc(folded); + + release_GIL(safe_state); + + return status; +} + +/* Checks whether any additional fuzzy error is permitted. */ +Py_LOCAL_INLINE(BOOL) any_error_permitted(RE_State* state) { + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + return fuzzy_info->total_cost <= values[RE_FUZZY_VAL_MAX_COST] && + fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MAX_ERR] && + state->total_cost <= state->max_cost; +} + +/* Checks whether this additional fuzzy error is permitted. */ +Py_LOCAL_INLINE(BOOL) this_error_permitted(RE_State* state, int fuzzy_type) { + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + return fuzzy_info->total_cost + values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] + <= values[RE_FUZZY_VAL_MAX_COST] && fuzzy_info->counts[fuzzy_type] < + values[RE_FUZZY_VAL_MAX_BASE + fuzzy_type] && state->total_cost + + values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] <= state->max_cost; +} + +/* Checks whether we've reachsd the end of the text during a fuzzy partial + * match. + */ +Py_LOCAL_INLINE(int) check_fuzzy_partial(RE_State* state, Py_ssize_t text_pos) + { + switch (state->partial_side) { + case RE_PARTIAL_LEFT: + if (text_pos < 0) + return RE_ERROR_PARTIAL; + break; + case RE_PARTIAL_RIGHT: + if (text_pos > state->text_length) + return RE_ERROR_PARTIAL; + break; + } + + return RE_ERROR_FAILURE; +} + +/* Checks a fuzzy match of an item. */ +Py_LOCAL_INLINE(int) next_fuzzy_match_item(RE_State* state, RE_FuzzyData* data, + BOOL is_string, int step) { + Py_ssize_t new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + if (is_string) + data->new_string_pos += step; + else + data->new_node = data->new_node->next_1.node; + return RE_ERROR_SUCCESS; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + if (!data->permit_insertion) + return RE_ERROR_FAILURE; + + new_pos = data->new_text_pos + step; + if (state->slice_start <= new_pos && new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_text_pos + step; + if (state->slice_start <= new_pos && new_pos <= state->slice_end) { + data->new_text_pos = new_pos; + if (is_string) + data->new_string_pos += step; + else + data->new_node = data->new_node->next_1.node; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); + } + } + + return RE_ERROR_FAILURE; +} + +/* Tries a fuzzy match of an item of width 0 or 1. */ +Py_LOCAL_INLINE(int) fuzzy_match_item(RE_SafeState* safe_state, BOOL search, + Py_ssize_t* text_pos, RE_Node** node, int step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *node = NULL; + return RE_ERROR_SUCCESS; + } + + data.new_text_pos = *text_pos; + data.new_node = *node; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + if (step == 0) { + if (data.new_node->status & RE_STATUS_REVERSE) { + data.step = -1; + data.limit = state->slice_start; + } else { + data.step = 1; + data.limit = state->slice_end; + } + } else + data.step = step; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_item(state, &data, FALSE, step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + *node = NULL; + return RE_ERROR_SUCCESS; + +found: + if (!add_backtrack(safe_state, (*node)->op)) + return RE_ERROR_FAILURE; + bt_data = state->backtrack; + bt_data->fuzzy_item.position.text_pos = *text_pos; + bt_data->fuzzy_item.position.node = *node; + bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_item.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = data.new_node; + + return RE_ERROR_SUCCESS; +} + +/* Retries a fuzzy match of a item of width 0 or 1. */ +Py_LOCAL_INLINE(int) retry_fuzzy_match_item(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node, BOOL advance) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + int step; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + data.new_text_pos = bt_data->fuzzy_item.position.text_pos; + data.new_node = bt_data->fuzzy_item.position.node; + data.fuzzy_type = bt_data->fuzzy_item.fuzzy_type; + data.step = bt_data->fuzzy_item.step; + + if (data.fuzzy_type >= 0) { + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + } + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + step = advance ? data.step : 0; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_item(state, &data, FALSE, step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + discard_backtrack(state); + *node = NULL; + return RE_ERROR_SUCCESS; + +found: + bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = data.new_node; + + return RE_ERROR_SUCCESS; +} + +/* Tries a fuzzy insertion. */ +Py_LOCAL_INLINE(int) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t + text_pos, RE_Node* node) { + RE_State* state; + RE_BacktrackData* bt_data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + + state = safe_state->re_state; + + /* No insertion or deletion. */ + if (!add_backtrack(safe_state, node->op)) + return RE_ERROR_FAILURE; + bt_data = state->backtrack; + bt_data->fuzzy_insert.position.text_pos = text_pos; + bt_data->fuzzy_insert.position.node = node; + bt_data->fuzzy_insert.count = 0; + bt_data->fuzzy_insert.too_few_errors = state->too_few_errors; + bt_data->fuzzy_insert.fuzzy_node = node; /* END_FUZZY node. */ + + /* Check whether there are too few errors. */ + fuzzy_info = &state->fuzzy_info; + + /* The node in this case is the END_FUZZY node. */ + values = node->values; + + if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || + fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || + fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || + fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) + state->too_few_errors = RE_ERROR_SUCCESS; + + return RE_ERROR_SUCCESS; +} + +/* Retries a fuzzy insertion. */ +Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* + text_pos, RE_Node** node) { + RE_State* state; + RE_FuzzyInfo* fuzzy_info; + RE_BacktrackData* bt_data; + Py_ssize_t new_text_pos; + RE_Node* new_node; + int step; + Py_ssize_t limit; + RE_Node* fuzzy_node; + RE_CODE* values; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + new_text_pos = bt_data->fuzzy_insert.position.text_pos; + new_node = bt_data->fuzzy_insert.position.node; + + if (new_node->status & RE_STATUS_REVERSE) { + step = -1; + limit = state->slice_start; + } else { + step = 1; + limit = state->slice_end; + } + + /* Could the character at text_pos have been inserted? */ + if (!this_error_permitted(state, RE_FUZZY_INS) || new_text_pos == limit) { + size_t count; + + count = bt_data->fuzzy_insert.count; + + fuzzy_info->counts[RE_FUZZY_INS] -= count; + fuzzy_info->counts[RE_FUZZY_ERR] -= count; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; + state->total_errors -= count; + state->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; + state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; + + discard_backtrack(state); + *node = NULL; + return RE_ERROR_SUCCESS; + } + + ++bt_data->fuzzy_insert.count; + + ++fuzzy_info->counts[RE_FUZZY_INS]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_INS_COST]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_INS_COST]; + + /* Check whether there are too few errors. */ + state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; + fuzzy_node = bt_data->fuzzy_insert.fuzzy_node; /* END_FUZZY node. */ + values = fuzzy_node->values; + if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || + fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || + fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || + fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) + state->too_few_errors = RE_ERROR_SUCCESS; + + *text_pos = new_text_pos + step * (Py_ssize_t)bt_data->fuzzy_insert.count; + *node = new_node; + + return RE_ERROR_SUCCESS; +} + +/* Tries a fuzzy match of a string. */ +Py_LOCAL_INLINE(int) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, + Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, BOOL* matched, + int step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *matched = FALSE; + return RE_ERROR_SUCCESS; + } + + data.new_text_pos = *text_pos; + data.new_string_pos = *string_pos; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_item(state, &data, TRUE, data.step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + *matched = FALSE; + return RE_ERROR_SUCCESS; + +found: + if (!add_backtrack(safe_state, node->op)) + return RE_ERROR_FAILURE; + bt_data = state->backtrack; + bt_data->fuzzy_string.position.text_pos = *text_pos; + bt_data->fuzzy_string.position.node = node; + bt_data->fuzzy_string.string_pos = *string_pos; + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_string.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *string_pos = data.new_string_pos; + *matched = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Retries a fuzzy match of a string. */ +Py_LOCAL_INLINE(int) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, BOOL* + matched) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + RE_Node* new_node; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + data.new_text_pos = bt_data->fuzzy_string.position.text_pos; + new_node = bt_data->fuzzy_string.position.node; + data.new_string_pos = bt_data->fuzzy_string.string_pos; + data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; + data.step = bt_data->fuzzy_string.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || data.new_text_pos != + state->search_anchor; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_item(state, &data, TRUE, data.step); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + discard_backtrack(state); + *matched = FALSE; + return RE_ERROR_SUCCESS; + +found: + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = data.new_text_pos; + *node = new_node; + *string_pos = data.new_string_pos; + *matched = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Checks a fuzzy match of a atring. */ +Py_LOCAL_INLINE(int) next_fuzzy_match_string_fld(RE_State* state, RE_FuzzyData* + data) { + int new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_string_pos += data->step; + return RE_ERROR_SUCCESS; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + if (!data->permit_insertion) + return RE_ERROR_FAILURE; + + new_pos = data->new_folded_pos + data->step; + if (0 <= new_pos && new_pos <= data->folded_len) { + data->new_folded_pos = new_pos; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_folded_pos + data->step; + if (0 <= new_pos && new_pos <= data->folded_len) { + data->new_folded_pos = new_pos; + data->new_string_pos += data->step; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); + } + } + + return RE_ERROR_FAILURE; +} + +/* Tries a fuzzy match of a string, ignoring case. */ +Py_LOCAL_INLINE(int) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, int* + folded_pos, int folded_len, BOOL* matched, int step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + Py_ssize_t new_text_pos; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *matched = FALSE; + return RE_ERROR_SUCCESS; + } + + new_text_pos = *text_pos; + data.new_string_pos = *string_pos; + data.new_folded_pos = *folded_pos; + data.folded_len = folded_len; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor; + if (step > 0) { + if (data.new_folded_pos != 0) + data.permit_insertion = RE_ERROR_SUCCESS; + } else { + if (data.new_folded_pos != folded_len) + data.permit_insertion = RE_ERROR_SUCCESS; + } + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_string_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + *matched = FALSE; + return RE_ERROR_SUCCESS; + +found: + if (!add_backtrack(safe_state, node->op)) + return RE_ERROR_FAILURE; + bt_data = state->backtrack; + bt_data->fuzzy_string.position.text_pos = *text_pos; + bt_data->fuzzy_string.position.node = node; + bt_data->fuzzy_string.string_pos = *string_pos; + bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); + bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_string.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *string_pos = data.new_string_pos; + *folded_pos = data.new_folded_pos; + *matched = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Retries a fuzzy match of a string, ignoring case. */ +Py_LOCAL_INLINE(int) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, + BOOL search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, + int* folded_pos, BOOL* matched) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + Py_ssize_t new_text_pos; + RE_Node* new_node; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + new_text_pos = bt_data->fuzzy_string.position.text_pos; + new_node = bt_data->fuzzy_string.position.node; + data.new_string_pos = bt_data->fuzzy_string.string_pos; + data.new_folded_pos = bt_data->fuzzy_string.folded_pos; + data.folded_len = bt_data->fuzzy_string.folded_len; + data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; + data.step = bt_data->fuzzy_string.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor; + if (data.step > 0) { + if (data.new_folded_pos != 0) + data.permit_insertion = RE_ERROR_SUCCESS; + } else { + if (data.new_folded_pos != bt_data->fuzzy_string.folded_len) + data.permit_insertion = RE_ERROR_SUCCESS; + } + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_string_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + discard_backtrack(state); + *matched = FALSE; + return RE_ERROR_SUCCESS; + +found: + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *node = new_node; + *string_pos = data.new_string_pos; + *folded_pos = data.new_folded_pos; + *matched = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Checks a fuzzy match of a atring. */ +Py_LOCAL_INLINE(int) next_fuzzy_match_group_fld(RE_State* state, RE_FuzzyData* + data) { + int new_pos; + + if (this_error_permitted(state, data->fuzzy_type)) { + switch (data->fuzzy_type) { + case RE_FUZZY_DEL: + /* Could a character at text_pos have been deleted? */ + data->new_gfolded_pos += data->step; + return RE_ERROR_SUCCESS; + case RE_FUZZY_INS: + /* Could the character at text_pos have been inserted? */ + if (!data->permit_insertion) + return RE_ERROR_FAILURE; + + new_pos = data->new_folded_pos + data->step; + if (0 <= new_pos && new_pos <= data->folded_len) { + data->new_folded_pos = new_pos; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); + case RE_FUZZY_SUB: + /* Could the character at text_pos have been substituted? */ + new_pos = data->new_folded_pos + data->step; + if (0 <= new_pos && new_pos <= data->folded_len) { + data->new_folded_pos = new_pos; + data->new_gfolded_pos += data->step; + return RE_ERROR_SUCCESS; + } + + return check_fuzzy_partial(state, new_pos); + } + } + + return RE_ERROR_FAILURE; +} + +/* Tries a fuzzy match of a group reference, ignoring case. */ +Py_LOCAL_INLINE(int) fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node* node, int* folded_pos, int folded_len, + Py_ssize_t* group_pos, int* gfolded_pos, int gfolded_len, BOOL* matched, int + step) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + Py_ssize_t new_text_pos; + Py_ssize_t new_group_pos; + RE_BacktrackData* bt_data; + + state = safe_state->re_state; + + if (!any_error_permitted(state)) { + *matched = FALSE; + return RE_ERROR_SUCCESS; + } + + new_text_pos = *text_pos; + data.new_folded_pos = *folded_pos; + data.folded_len = folded_len; + new_group_pos = *group_pos; + data.new_gfolded_pos = *gfolded_pos; + data.step = step; + + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor; + if (data.step > 0) { + if (data.new_folded_pos != 0) + data.permit_insertion = RE_ERROR_SUCCESS; + } else { + if (data.new_folded_pos != folded_len) + data.permit_insertion = RE_ERROR_SUCCESS; + } + + for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_group_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + *matched = FALSE; + return RE_ERROR_SUCCESS; + +found: + if (!add_backtrack(safe_state, node->op)) + return RE_ERROR_FAILURE; + bt_data = state->backtrack; + bt_data->fuzzy_string.position.text_pos = *text_pos; + bt_data->fuzzy_string.position.node = node; + bt_data->fuzzy_string.string_pos = *group_pos; + bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); + bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; + bt_data->fuzzy_string.gfolded_pos = (RE_INT8)(*gfolded_pos); + bt_data->fuzzy_string.gfolded_len = (RE_INT8)gfolded_len; + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + bt_data->fuzzy_string.step = (RE_INT8)step; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *group_pos = new_group_pos; + *folded_pos = data.new_folded_pos; + *gfolded_pos = data.new_gfolded_pos; + *matched = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Retries a fuzzy match of a group reference, ignoring case. */ +Py_LOCAL_INLINE(int) retry_fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL + search, Py_ssize_t* text_pos, RE_Node** node, int* folded_pos, Py_ssize_t* + group_pos, int* gfolded_pos, BOOL* matched) { + RE_State* state; + RE_FuzzyData data; + RE_FuzzyInfo* fuzzy_info; + RE_CODE* values; + RE_BacktrackData* bt_data; + Py_ssize_t new_text_pos; + Py_ssize_t new_group_pos; + RE_Node* new_node; + + state = safe_state->re_state; + fuzzy_info = &state->fuzzy_info; + values = fuzzy_info->node->values; + + bt_data = state->backtrack; + new_text_pos = bt_data->fuzzy_string.position.text_pos; + new_node = bt_data->fuzzy_string.position.node; + new_group_pos = bt_data->fuzzy_string.string_pos; + data.new_folded_pos = bt_data->fuzzy_string.folded_pos; + data.folded_len = bt_data->fuzzy_string.folded_len; + data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos; + data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; + data.step = bt_data->fuzzy_string.step; + + --fuzzy_info->counts[data.fuzzy_type]; + --fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + --state->total_errors; + state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + /* Permit insertion except initially when searching (it's better just to + * start searching one character later). + */ + data.permit_insertion = !search || new_text_pos != state->search_anchor || + data.new_folded_pos != bt_data->fuzzy_string.folded_len; + + for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; + data.fuzzy_type++) { + int status; + + status = next_fuzzy_match_group_fld(state, &data); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + goto found; + } + + discard_backtrack(state); + *matched = FALSE; + return RE_ERROR_SUCCESS; + +found: + bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; + + ++fuzzy_info->counts[data.fuzzy_type]; + ++fuzzy_info->counts[RE_FUZZY_ERR]; + fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + ++state->total_errors; + state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; + + *text_pos = new_text_pos; + *node = new_node; + *group_pos = new_group_pos; + *folded_pos = data.new_folded_pos; + *gfolded_pos = data.new_gfolded_pos; + *matched = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Locates the required string, if there's one. */ +Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { + RE_State* state; + PatternObject* pattern; + Py_ssize_t found_pos; + Py_ssize_t end_pos; + + state = safe_state->re_state; + pattern = state->pattern; + + /* We haven't matched the required string yet. */ + state->req_pos = -1; + + if (!pattern->req_string) + /* There isn't a required string, so start matching from the current + * position. + */ + return state->text_pos; + + /* Search for the required string and calculate where to start matching. */ + switch (pattern->req_string->op) { + case RE_OP_STRING: + { + BOOL is_partial; + + found_pos = string_search(safe_state, pattern->req_string, + state->text_pos, state->slice_end, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos + + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_FLD: + { + BOOL is_partial; + + found_pos = string_search_fld(safe_state, pattern->req_string, + state->text_pos, state->slice_end, &end_pos, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = end_pos; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_FLD_REV: + { + BOOL is_partial; + + found_pos = string_search_fld_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start, &end_pos, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = end_pos; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_IGN: + { + BOOL is_partial; + + found_pos = string_search_ign(safe_state, pattern->req_string, + state->text_pos, state->slice_end, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos + + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos -= pattern->req_offset; + if (found_pos >= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_IGN_REV: + { + BOOL is_partial; + + found_pos = string_search_ign_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos - + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + } + case RE_OP_STRING_REV: + { + BOOL is_partial; + + found_pos = string_search_rev(safe_state, pattern->req_string, + state->text_pos, state->slice_start, &is_partial); + if (found_pos < 0) + /* The required string wasn't found. */ + return -1; + + if (is_partial) + /* We found a partial match, so start matching from there. */ + return found_pos; + + /* Record where the required string matched. */ + state->req_pos = found_pos; + state->req_end = found_pos - + (Py_ssize_t)pattern->req_string->value_count; + + if (pattern->req_offset >= 0) { + /* Step back from the required string to where we should start + * matching. + */ + found_pos += pattern->req_offset; + if (found_pos <= state->text_pos) + return found_pos; + } + break; + } + } + + /* Start matching from the current position. */ + return state->text_pos; +} + +/* Tries to match a character pattern. */ +Py_LOCAL_INLINE(int) match_one(RE_State* state, RE_Node* node, Py_ssize_t + text_pos) { + switch (node->op) { + case RE_OP_ANY: + return try_match_ANY(state, node, text_pos); + case RE_OP_ANY_ALL: + return try_match_ANY_ALL(state, node, text_pos); + case RE_OP_ANY_ALL_REV: + return try_match_ANY_ALL_REV(state, node, text_pos); + case RE_OP_ANY_REV: + return try_match_ANY_REV(state, node, text_pos); + case RE_OP_ANY_U: + return try_match_ANY_U(state, node, text_pos); + case RE_OP_ANY_U_REV: + return try_match_ANY_U_REV(state, node, text_pos); + case RE_OP_CHARACTER: + return try_match_CHARACTER(state, node, text_pos); + case RE_OP_CHARACTER_IGN: + return try_match_CHARACTER_IGN(state, node, text_pos); + case RE_OP_CHARACTER_IGN_REV: + return try_match_CHARACTER_IGN_REV(state, node, text_pos); + case RE_OP_CHARACTER_REV: + return try_match_CHARACTER_REV(state, node, text_pos); + case RE_OP_PROPERTY: + return try_match_PROPERTY(state, node, text_pos); + case RE_OP_PROPERTY_IGN: + return try_match_PROPERTY_IGN(state, node, text_pos); + case RE_OP_PROPERTY_IGN_REV: + return try_match_PROPERTY_IGN_REV(state, node, text_pos); + case RE_OP_PROPERTY_REV: + return try_match_PROPERTY_REV(state, node, text_pos); + case RE_OP_RANGE: + return try_match_RANGE(state, node, text_pos); + case RE_OP_RANGE_IGN: + return try_match_RANGE_IGN(state, node, text_pos); + case RE_OP_RANGE_IGN_REV: + return try_match_RANGE_IGN_REV(state, node, text_pos); + case RE_OP_RANGE_REV: + return try_match_RANGE_REV(state, node, text_pos); + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + return try_match_SET(state, node, text_pos); + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + return try_match_SET_IGN(state, node, text_pos); + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + return try_match_SET_IGN_REV(state, node, text_pos); + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + return try_match_SET_REV(state, node, text_pos); + } + + return FALSE; +} + +/* Performs a depth-first match or search from the context. */ +Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, + BOOL search, BOOL recursive_call) { + RE_State* state; + RE_EncodingTable* encoding; + PatternObject* pattern; + RE_NextNode start_pair; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t pattern_step; /* The overall step of the pattern (forwards or backwards). */ + Py_ssize_t string_pos; + BOOL do_search_start; + Py_ssize_t found_pos; + int folded_pos; + int gfolded_pos; + RE_Node* node; + int status; + TRACE(("<>\n")) + + state = safe_state->re_state; + encoding = state->encoding; + pattern = state->pattern; + + /* Look beyond any initial group node. */ + start_pair.node = start_node; + if (recursive_call) + start_pair.test = locate_test_start(start_node); + else + start_pair.test = pattern->start_test; + + /* Is the pattern anchored to the start or end of the string? */ + switch (start_pair.test->op) { + case RE_OP_END_OF_STRING: + if (state->reverse) { + /* Searching backwards. */ + if (state->text_pos != state->text_length) + return RE_ERROR_FAILURE; + + /* Don't bother to search further because it's anchored. */ + search = FALSE; + } + break; + case RE_OP_START_OF_STRING: + if (!state->reverse) { + /* Searching forwards. */ + if (state->text_pos != 0) + return RE_ERROR_FAILURE; + + /* Don't bother to search further because it's anchored. */ + search = FALSE; + } + break; + } + + char_at = state->char_at; + pattern_step = state->reverse ? -1 : 1; + string_pos = -1; + do_search_start = pattern->do_search_start; + + /* Add a backtrack entry for failure. */ + if (!add_backtrack(safe_state, RE_OP_FAILURE)) + return RE_ERROR_BACKTRACKING; + +start_match: + /* If we're searching, advance along the string until there could be a + * match. + */ + if (pattern->pattern_call_ref >= 0) { + RE_GuardList* guard_list; + + guard_list = &state->group_call_guard_list[pattern->pattern_call_ref]; + guard_list->count = 0; + guard_list->last_text_pos = -1; + } + + /* Locate the required string, if there's one, unless this is a recursive + * call of 'basic_match'. + */ + if (!pattern->req_string || recursive_call) + found_pos = state->text_pos; + else { + found_pos = locate_required_string(safe_state); + if (found_pos < 0) + return RE_ERROR_FAILURE; + } + + if (search) { + state->text_pos = found_pos; + + if (do_search_start) { + RE_Position new_position; + +next_match_1: + /* 'search_start' will clear 'do_search_start' if it can't perform + * a fast search for the next possible match. This enables us to + * avoid the overhead of the call subsequently. + */ + status = search_start(safe_state, &start_pair, &new_position, 0); + if (status != RE_ERROR_SUCCESS) + return status; + + node = new_position.node; + state->text_pos = new_position.text_pos; + + if (node->op == RE_OP_SUCCESS) { + /* Must the match advance past its start? */ + if (state->text_pos != state->search_anchor || + !state->must_advance) + return RE_ERROR_SUCCESS; + + state->text_pos = state->match_pos + pattern_step; + goto next_match_1; + } + + /* 'do_search_start' may have been cleared. */ + do_search_start = pattern->do_search_start; + } else { + /* Avoiding 'search_start', which we've found can't perform a fast + * search for the next possible match. + */ + node = start_node; + +next_match_2: + if (state->reverse) { + if (state->text_pos < state->slice_start) { + if (state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + } else { + if (state->text_pos > state->slice_end) { + if (state-> partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + return RE_ERROR_FAILURE; + } + } + + state->match_pos = state->text_pos; + + if (node->op == RE_OP_SUCCESS) { + /* Must the match advance past its start? */ + if (state->text_pos != state->search_anchor || + !state->must_advance) { + BOOL success; + + if (state->match_all && !recursive_call) { + /* We want to match all of the slice. */ + if (state->reverse) + success = state->text_pos == state->slice_start; + else + success = state->text_pos == state->slice_end; + } else + success = TRUE; + + if (success) + return RE_ERROR_SUCCESS; + } + + state->text_pos = state->match_pos + pattern_step; + goto next_match_2; + } + } + } else { + /* The start position is anchored to the current position. */ + if (found_pos != state->text_pos) + return RE_ERROR_FAILURE; + + node = start_node; + } + +advance: + /* The main matching loop. */ + for (;;) { + TRACE(("%d|", state->text_pos)) + + /* Should we abort the matching? */ + ++state->iterations; + + if (state->iterations == 0 && safe_check_signals(safe_state)) + return RE_ERROR_INTERRUPTED; + + switch (node->op) { + case RE_OP_ANY: /* Any character except a newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_ANY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + ++state->text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_ALL: /* Any character at all. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_ANY_ALL(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + ++state->text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_ANY_ALL_REV(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + --state->text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_ANY_REV(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + --state->text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_U: /* Any character except a line separator. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_ANY_U(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + ++state->text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_ANY_U_REV(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + --state->text_pos; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_ATOMIC: /* Atomic subpattern. */ + { + RE_Info info; + int status; + TRACE(("%s\n", re_op_text[node->op])) + + if (!add_backtrack(safe_state, RE_OP_ATOMIC)) + return RE_ERROR_BACKTRACKING; + state->backtrack->atomic.too_few_errors = state->too_few_errors; + state->backtrack->atomic.capture_change = state->capture_change; + + /* Save the groups. */ + if (!push_groups(safe_state)) + return RE_ERROR_MEMORY; + + save_info(state, &info); + + state->must_advance = FALSE; + + status = basic_match(safe_state, node->nonstring.next_2.node, + FALSE, TRUE); + if (status < 0) + return status; + + reset_guards(state, node->values); + + restore_info(state, &info); + + if (status != RE_ERROR_SUCCESS) + goto backtrack; + + node = node->next_1.node; + break; + } + case RE_OP_BOUNDARY: /* On a word boundary. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + status = try_match_BOUNDARY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_BRANCH: /* 2-way branch. */ + { + RE_Position next_position; + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match(state, &node->next_1, state->text_pos, + &next_position); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) { + if (!add_backtrack(safe_state, RE_OP_BRANCH)) + return RE_ERROR_BACKTRACKING; + state->backtrack->branch.position.node = + node->nonstring.next_2.node; + state->backtrack->branch.position.text_pos = state->text_pos; + + node = next_position.node; + state->text_pos = next_position.text_pos; + } else + node = node->nonstring.next_2.node; + break; + } + case RE_OP_CALL_REF: /* A group call reference. */ + { + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + if (!push_group_return(safe_state, NULL)) + return RE_ERROR_MEMORY; + + if (!add_backtrack(safe_state, RE_OP_CALL_REF)) + return RE_ERROR_BACKTRACKING; + + node = node->next_1.node; + break; + } + case RE_OP_CHARACTER: /* A character. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_CHARACTER(encoding, node, char_at(state->text, + state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_CHARACTER_IGN(encoding, node, char_at(state->text, + state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_CHARACTER_IGN_REV: /* A character, backwards, ignoring case. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_CHARACTER_IGN(encoding, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_CHARACTER_REV: /* A character, backwards. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_CHARACTER(encoding, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + status = try_match_DEFAULT_BOUNDARY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_DEFAULT_END_OF_WORD: /* At the default end of a word. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_DEFAULT_END_OF_WORD(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_DEFAULT_START_OF_WORD: /* At the default start of a word. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_DEFAULT_START_OF_WORD(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_FUZZY: /* End of fuzzy matching. */ + TRACE(("%s\n", re_op_text[node->op])) + + if (!fuzzy_insert(safe_state, state->text_pos, node)) + return RE_ERROR_BACKTRACKING; + + /* If there were too few errors, in the fuzzy section, try again. + */ + if (state->too_few_errors) { + state->too_few_errors = FALSE; + goto backtrack; + } + + state->total_fuzzy_counts[RE_FUZZY_SUB] += + state->fuzzy_info.counts[RE_FUZZY_SUB]; + state->total_fuzzy_counts[RE_FUZZY_INS] += + state->fuzzy_info.counts[RE_FUZZY_INS]; + state->total_fuzzy_counts[RE_FUZZY_DEL] += + state->fuzzy_info.counts[RE_FUZZY_DEL]; + + node = node->next_1.node; + break; + case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + BOOL changed; + BOOL try_body; + int body_status; + RE_Position next_body_position; + BOOL try_tail; + int tail_status; + RE_Position next_tail_position; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* The body has matched successfully at this position. */ + if (!guard_repeat(safe_state, index, rp_data->start, + RE_STATUS_BODY, FALSE)) + return RE_ERROR_MEMORY; + + ++rp_data->count; + + /* Have we advanced through the text or has a capture group change? + */ + changed = rp_data->capture_change != state->capture_change || + state->text_pos != rp_data->start; + + /* The counts are of type size_t, so the format needs to specify + * that. + */ + TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T + "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], + node->values[2], rp_data->count)) + + /* Could the body or tail match? */ + try_body = changed && (rp_data->count < node->values[2] || + ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, + state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = (!changed || rp_data->count >= node->values[1]) && + !is_repeat_guarded(safe_state, index, state->text_pos, + RE_STATUS_TAIL); + if(try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + + if (!try_body && !try_tail) { + /* Neither the body nor the tail could match. */ + --rp_data->count; + goto backtrack; + } + + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + + /* Record info in case we backtrack into the body. */ + if (!add_backtrack(safe_state, RE_OP_BODY_END)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count - 1; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + + if (try_body) { + /* Both the body and the tail could match. */ + if (try_tail) { + /* The body takes precedence. If the body fails to match + * then we want to try the tail before backtracking + * further. + */ + + /* Record backtracking info for matching the tail. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_tail_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = state->text_pos; + } + + /* Record backtracking info in case the body fails to match. */ + if (!add_backtrack(safe_state, RE_OP_BODY_START)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = state->text_pos; + + rp_data->capture_change = state->capture_change; + rp_data->start = state->text_pos; + + /* Advance into the body. */ + node = next_body_position.node; + state->text_pos = next_body_position.text_pos; + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + state->text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_END_GROUP: /* End of a capture group. */ + { + RE_CODE private_index; + RE_CODE public_index; + RE_GroupData* group; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + */ + private_index = node->values[0]; + public_index = node->values[1]; + group = &state->groups[private_index - 1]; + + if (!add_backtrack(safe_state, RE_OP_END_GROUP)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->group.private_index = private_index; + bt_data->group.public_index = public_index; + bt_data->group.text_pos = group->span.end; + bt_data->group.capture = (BOOL)node->values[2]; + bt_data->group.current_capture = group->current_capture; + + if (pattern->group_info[private_index - 1].referenced && + group->span.end != state->text_pos) + ++state->capture_change; + group->span.end = state->text_pos; + + /* Save the capture? */ + if (node->values[2]) { + group->current_capture = (Py_ssize_t)group->capture_count; + if (!save_capture(safe_state, private_index, public_index)) + return RE_ERROR_MEMORY; + } + + node = node->next_1.node; + break; + } + case RE_OP_END_LAZY_REPEAT: /* End of a lazy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + BOOL changed; + BOOL try_body; + int body_status; + RE_Position next_body_position; + BOOL try_tail; + int tail_status; + RE_Position next_tail_position; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* The body has matched successfully at this position. */ + if (!guard_repeat(safe_state, index, rp_data->start, + RE_STATUS_BODY, FALSE)) + return RE_ERROR_MEMORY; + + ++rp_data->count; + + /* Have we advanced through the text or has a capture group change? + */ + changed = rp_data->capture_change != state->capture_change || + state->text_pos != rp_data->start; + + /* The counts are of type size_t, so the format needs to specify + * that. + */ + TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T + "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], + node->values[2], rp_data->count)) + + /* Could the body or tail match? */ + try_body = changed && (rp_data->count < node->values[2] || + ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, + state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = (!changed || rp_data->count >= node->values[1]); + if (try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + + if (!try_body && !try_tail) { + /* Neither the body nor the tail could match. */ + --rp_data->count; + goto backtrack; + } + + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + + /* Record info in case we backtrack into the body. */ + if (!add_backtrack(safe_state, RE_OP_BODY_END)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count - 1; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + + if (try_body) { + /* Both the body and the tail could match. */ + if (try_tail) { + /* The tail takes precedence. If the tail fails to match + * then we want to try the body before backtracking + * further. + */ + + /* Record backtracking info for matching the body. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_body_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = state->text_pos; + + /* Advance into the tail. */ + node = next_tail_position.node; + state->text_pos = next_tail_position.text_pos; + } else { + /* Only the body could match. */ + + /* Record backtracking info in case the body fails to + * match. + */ + if (!add_backtrack(safe_state, RE_OP_BODY_START)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = state->text_pos; + + rp_data->capture_change = state->capture_change; + rp_data->start = state->text_pos; + + /* Advance into the body. */ + node = next_body_position.node; + state->text_pos = next_body_position.text_pos; + } + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + state->text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_END_OF_LINE: /* At the end of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_END_OF_LINE(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_LINE_U: /* At the end of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_END_OF_LINE_U(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_STRING: /* At the end of the string. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_END_OF_STRING(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_END_OF_STRING_LINE(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_END_OF_STRING_LINE_U(state, node, + state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_END_OF_WORD: /* At the end of a word. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_END_OF_WORD(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_FUZZY: /* Fuzzy matching. */ + { + RE_FuzzyInfo* fuzzy_info; + RE_BacktrackData* bt_data; + TRACE(("%s\n", re_op_text[node->op])) + + fuzzy_info = &state->fuzzy_info; + + /* Save the current fuzzy info. */ + if (!add_backtrack(safe_state, RE_OP_FUZZY)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + memmove(&bt_data->fuzzy.fuzzy_info, fuzzy_info, + sizeof(RE_FuzzyInfo)); + bt_data->fuzzy.index = node->values[0]; + bt_data->fuzzy.text_pos = state->text_pos; + + /* Initialise the new fuzzy info. */ + memset(fuzzy_info->counts, 0, 4 * sizeof(fuzzy_info->counts[0])); + fuzzy_info->total_cost = 0; + fuzzy_info->node = node; + + node = node->next_1.node; + break; + } + case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_GRAPHEME_BOUNDARY(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + RE_BacktrackData* bt_data; + BOOL try_body; + int body_status; + RE_Position next_body_position; + BOOL try_tail; + int tail_status; + RE_Position next_tail_position; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* We might need to backtrack into the head, so save the current + * repeat. + */ + if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = state->text_pos; + + /* Initialise the new repeat. */ + rp_data->count = 0; + rp_data->start = state->text_pos; + rp_data->capture_change = state->capture_change; + + /* Could the body or tail match? */ + try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, + index, state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = node->values[1] == 0; + if (try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + if (!try_body && !try_tail) + /* Neither the body nor the tail could match. */ + goto backtrack; + + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + + if (try_body) { + if (try_tail) { + /* Both the body and the tail could match, but the body + * takes precedence. If the body fails to match then we + * want to try the tail before backtracking further. + */ + + /* Record backtracking info for matching the tail. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_tail_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = state->text_pos; + } + + /* Advance into the body. */ + node = next_body_position.node; + state->text_pos = next_body_position.text_pos; + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + state->text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + size_t count; + BOOL is_partial; + BOOL match; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + if (is_repeat_guarded(safe_state, index, state->text_pos, + RE_STATUS_BODY)) + goto backtrack; + + /* Count how many times the character repeats, up to the maximum. + */ + count = count_one(state, node->nonstring.next_2.node, + state->text_pos, node->values[2], &is_partial); + if (is_partial) { + state->text_pos += (Py_ssize_t)count * node->step; + return RE_ERROR_PARTIAL; + } + + /* Unmatch until it's not guarded. */ + match = FALSE; + for (;;) { + if (count < node->values[1]) + /* The number of repeats is below the minimum. */ + break; + + if (!is_repeat_guarded(safe_state, index, state->text_pos + + (Py_ssize_t)count * node->step, RE_STATUS_TAIL)) { + /* It's not guarded at this position. */ + match = TRUE; + break; + } + + if (count == 0) + break; + + --count; + } + + if (!match) { + /* The repeat has failed to match at this position. */ + if (!guard_repeat(safe_state, index, state->text_pos, + RE_STATUS_BODY, TRUE)) + return RE_ERROR_MEMORY; + goto backtrack; + } + + /* Record the backtracking info. */ + if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT_ONE)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position.node = node; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = rp_data->start; + bt_data->repeat.count = rp_data->count; + + rp_data->start = state->text_pos; + rp_data->count = count; + + /* Advance into the tail. */ + state->text_pos += (Py_ssize_t)count * node->step; + node = node->next_1.node; + break; + } + case RE_OP_GROUP_CALL: /* Group call. */ + { + size_t index; + size_t g; + size_t r; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + index = node->values[0]; + + /* Save the capture groups and repeat guards. */ + if (!push_group_return(safe_state, node->next_1.node)) + return RE_ERROR_MEMORY; + + /* Clear the capture groups for the group call. They'll be restored + * on return. + */ + for (g = 0; g < state->pattern->true_group_count; g++) { + RE_GroupData* group; + + group = &state->groups[g]; + group->span.start = -1; + group->span.end = -1; + group->current_capture = -1; + } + + /* Clear the repeat guards for the group call. They'll be restored + * on return. + */ + for (r = 0; r < state->pattern->repeat_count; r++) { + RE_RepeatData* repeat; + + repeat = &state->repeats[r]; + repeat->body_guard_list.count = 0; + repeat->body_guard_list.last_text_pos = -1; + repeat->tail_guard_list.count = 0; + repeat->tail_guard_list.last_text_pos = -1; + } + + /* Call a group, skipping its CALL_REF node. */ + node = pattern->call_ref_info[index].node->next_1.node; + + if (!add_backtrack(safe_state, RE_OP_GROUP_CALL)) + return RE_ERROR_BACKTRACKING; + break; + } + case RE_OP_GROUP_EXISTS: /* Capture group exists. */ + { + RE_GroupData* group; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture >= 0) + node = node->next_1.node; + else + node = node->nonstring.next_2.node; + break; + } + case RE_OP_GROUP_RETURN: /* Group return. */ + { + RE_Node* return_node; + RE_BacktrackData* bt_data; + TRACE(("%s\n", re_op_text[node->op])) + + return_node = top_group_return(state); + + if (!add_backtrack(safe_state, RE_OP_GROUP_RETURN)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->group_call.node = return_node; + bt_data->group_call.capture_change = state->capture_change; + + if (return_node) { + /* The group was called. */ + node = return_node; + + /* Save the groups. */ + if (!push_groups(safe_state)) + return RE_ERROR_MEMORY; + + /* Save the repeats. */ + if (!push_repeats(safe_state)) + return RE_ERROR_MEMORY; + } else + /* The group was not called. */ + node = node->next_1.node; + + pop_group_return(state); + break; + } + case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + RE_BacktrackData* bt_data; + BOOL try_body; + int body_status; + RE_Position next_body_position; + BOOL try_tail; + int tail_status; + RE_Position next_tail_position; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + /* We might need to backtrack into the head, so save the current + * repeat. + */ + if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = state->text_pos; + + /* Initialise the new repeat. */ + rp_data->count = 0; + rp_data->start = state->text_pos; + rp_data->capture_change = state->capture_change; + + /* Could the body or tail match? */ + try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, + index, state->text_pos, RE_STATUS_BODY); + if (try_body) { + body_status = try_match(state, &node->next_1, state->text_pos, + &next_body_position); + + if (body_status == RE_ERROR_FAILURE) + try_body = FALSE; + } else + body_status = RE_ERROR_FAILURE; + + try_tail = node->values[1] == 0; + if(try_tail) { + tail_status = try_match(state, &node->nonstring.next_2, + state->text_pos, &next_tail_position); + + if (tail_status == RE_ERROR_FAILURE) + try_tail = FALSE; + } else + tail_status = RE_ERROR_FAILURE; + + if (!try_body && !try_tail) + /* Neither the body nor the tail could match. */ + goto backtrack; + + if (body_status < 0 || (body_status == 0 && tail_status < 0)) + return RE_ERROR_PARTIAL; + + if (try_body) { + if (try_tail) { + /* Both the body and the tail could match, but the tail + * takes precedence. If the tail fails to match then we + * want to try the body before backtracking further. + */ + + /* Record backtracking info for matching the tail. */ + if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position = next_body_position; + bt_data->repeat.index = index; + bt_data->repeat.count = rp_data->count; + bt_data->repeat.start = rp_data->start; + bt_data->repeat.capture_change = rp_data->capture_change; + bt_data->repeat.text_pos = state->text_pos; + + /* Advance into the tail. */ + node = next_tail_position.node; + state->text_pos = next_tail_position.text_pos; + } else { + /* Advance into the body. */ + node = next_body_position.node; + state->text_pos = next_body_position.text_pos; + } + } else { + /* Only the tail could match. */ + + /* Advance into the tail. */ + node = next_tail_position.node; + state->text_pos = next_tail_position.text_pos; + } + break; + } + case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ + { + RE_CODE index; + RE_RepeatData* rp_data; + size_t count; + BOOL is_partial; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Repeat indexes are 0-based. */ + index = node->values[0]; + rp_data = &state->repeats[index]; + + if (is_repeat_guarded(safe_state, index, state->text_pos, + RE_STATUS_BODY)) + goto backtrack; + + /* Count how many times the character repeats, up to the minimum. + */ + count = count_one(state, node->nonstring.next_2.node, + state->text_pos, node->values[1], &is_partial); + if (is_partial) { + state->text_pos += (Py_ssize_t)count * node->step; + return RE_ERROR_PARTIAL; + } + + /* Have we matched at least the minimum? */ + if (count < node->values[1]) { + /* The repeat has failed to match at this position. */ + if (!guard_repeat(safe_state, index, state->text_pos, + RE_STATUS_BODY, TRUE)) + return RE_ERROR_MEMORY; + goto backtrack; + } + + if (count < node->values[2]) { + /* The match is shorter than the maximum, so we might need to + * backtrack the repeat to consume more. + */ + RE_BacktrackData* bt_data; + + /* Get the offset to the repeat values in the context. */ + rp_data = &state->repeats[index]; + if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT_ONE)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->repeat.position.node = node; + bt_data->repeat.index = index; + bt_data->repeat.text_pos = rp_data->start; + bt_data->repeat.count = rp_data->count; + + rp_data->start = state->text_pos; + rp_data->count = count; + } + + /* Advance into the tail. */ + state->text_pos += (Py_ssize_t)count * node->step; + node = node->next_1.node; + break; + } + case RE_OP_LOOKAROUND: /* Lookaround. */ + { + RE_Info info; + size_t capture_change; + Py_ssize_t saved_slice_start; + Py_ssize_t saved_slice_end; + Py_ssize_t saved_text_pos; + BOOL too_few_errors; + int status; + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + /* Save the groups. */ + if (!push_groups(safe_state)) + return RE_ERROR_MEMORY; + + capture_change = state->capture_change; + + /* Save the other info. */ + save_info(state, &info); + + saved_slice_start = state->slice_start; + saved_slice_end = state->slice_end; + saved_text_pos = state->text_pos; + state->slice_start = 0; + state->slice_end = state->text_length; + state->must_advance = FALSE; + + too_few_errors = state->too_few_errors; + + status = basic_match(safe_state, node->nonstring.next_2.node, + FALSE, TRUE); + if (status < 0) + return status; + + reset_guards(state, node->values + 1); + + state->text_pos = saved_text_pos; + state->slice_end = saved_slice_end; + state->slice_start = saved_slice_start; + + /* Restore the other info. */ + restore_info(state, &info); + + if (node->match) { + /* It's a positive lookaround. */ + if (status == RE_ERROR_SUCCESS) { + /* It succeeded, so the groups and certain flags may have + * changed. + */ + if (!add_backtrack(safe_state, RE_OP_LOOKAROUND)) + return RE_ERROR_BACKTRACKING; + + /* We'll restore the groups and flags on backtracking. */ + state->backtrack->lookaround.too_few_errors = + too_few_errors; + state->backtrack->lookaround.capture_change = + capture_change; + } else { + /* It failed, so the groups and certain flags haven't + * changed. + */ + drop_groups(state); + goto backtrack; + } + } else { + /* It's a negative lookaround. */ + if (status == RE_ERROR_SUCCESS) { + /* It succeeded, so the groups and certain flags may have + * changed. We need to restore them. + */ + pop_groups(state); + state->too_few_errors = too_few_errors; + state->capture_change = capture_change; + goto backtrack; + } else + /* It failed, so the groups and certain flags haven't + * changed. + */ + drop_groups(state); + } + + node = node->next_1.node; + break; + } + case RE_OP_PROPERTY: /* A property. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_PROPERTY(encoding, node, char_at(state->text, + state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_PROPERTY_IGN(encoding, node, char_at(state->text, + state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_PROPERTY_IGN_REV: /* A property, backwards, ignoring case. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_PROPERTY_IGN(encoding, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_PROPERTY_REV: /* A property, backwards. */ + TRACE(("%s %d %d\n", re_op_text[node->op], node->match, + node->values[0])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_PROPERTY(encoding, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE: /* A range. */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && matches_RANGE(encoding, + node, char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_IGN: /* A range, ignoring case. */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + matches_RANGE_IGN(encoding, node, char_at(state->text, + state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_IGN_REV: /* A range, backwards, ignoring case. */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_RANGE_IGN(encoding, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_RANGE_REV: /* A range, backwards. */ + TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, + node->values[0], node->values[1])) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && matches_RANGE(encoding, + node, char_at(state->text, state->text_pos - 1)) == node->match) + { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_REF_GROUP: /* Reference to a capture group. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + if (string_pos < 0) + string_pos = span->start; + + /* Try comparing. */ + while (string_pos < span->end) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && same_char(encoding, + char_at(state->text, state->text_pos), char_at(state->text, + string_pos))) { + ++string_pos; + ++state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int folded_len; + int gfolded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + Py_UCS4 gfolded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = span->start; + folded_pos = 0; + folded_len = 0; + gfolded_pos = 0; + gfolded_len = 0; + } else { + folded_len = full_case_fold(char_at(state->text, + state->text_pos), folded); + gfolded_len = full_case_fold(char_at(state->text, string_pos), + gfolded); + } + + /* Try comparing. */ + while (string_pos < span->end) { + /* Case-fold at current position in text. */ + if (folded_pos >= folded_len) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end) + folded_len = full_case_fold(char_at(state->text, + state->text_pos), folded); + else + folded_len = 0; + + folded_pos = 0; + } + + /* Case-fold at current position in group. */ + if (gfolded_pos >= gfolded_len) { + gfolded_len = full_case_fold(char_at(state->text, + string_pos), gfolded); + gfolded_pos = 0; + } + + if (folded_pos < folded_len && folded[folded_pos] == + gfolded[gfolded_pos]) { + ++folded_pos; + ++gfolded_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_group_fld(safe_state, search, + &state->text_pos, node, &folded_pos, folded_len, + &string_pos, &gfolded_pos, gfolded_len, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + + if (folded_pos >= folded_len && folded_len > 0) + ++state->text_pos; + + if (gfolded_pos >= gfolded_len) + ++string_pos; + } + + string_pos = -1; + + if (folded_pos < folded_len || gfolded_pos < gfolded_len) + goto backtrack; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + int folded_len; + int gfolded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + Py_UCS4 gfolded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = span->end; + folded_pos = 0; + folded_len = 0; + gfolded_pos = 0; + gfolded_len = 0; + } else { + folded_len = full_case_fold(char_at(state->text, + state->text_pos - 1), folded); + gfolded_len = full_case_fold(char_at(state->text, string_pos - + 1), gfolded); + } + + /* Try comparing. */ + while (string_pos > span->start) { + /* Case-fold at current position in text. */ + if (folded_pos <= 0) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start) + folded_len = full_case_fold(char_at(state->text, + state->text_pos - 1), folded); + else + folded_len = 0; + + folded_pos = folded_len; + } + + /* Case-fold at current position in group. */ + if (gfolded_pos <= 0) { + gfolded_len = full_case_fold(char_at(state->text, + string_pos - 1), gfolded); + gfolded_pos = gfolded_len; + } + + if (folded_pos > 0 && folded[folded_pos - 1] == + gfolded[gfolded_pos - 1]) { + --folded_pos; + --gfolded_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_group_fld(safe_state, search, + &state->text_pos, node, &folded_pos, folded_len, + &string_pos, &gfolded_pos, gfolded_len, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + + if (folded_pos <= 0 && folded_len > 0) + --state->text_pos; + + if (gfolded_pos <= 0) + --string_pos; + } + + string_pos = -1; + + if (folded_pos > 0 || gfolded_pos > 0) + goto backtrack; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + if (string_pos < 0) + string_pos = span->start; + + /* Try comparing. */ + while (string_pos < span->end) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char_ign(encoding, char_at(state->text, + state->text_pos), char_at(state->text, string_pos))) { + ++string_pos; + ++state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, ignoring case. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + if (string_pos < 0) + string_pos = span->end; + + /* Try comparing. */ + while (string_pos > span->start) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char_ign(encoding, char_at(state->text, state->text_pos + - 1), char_at(state->text, string_pos - 1))) { + --string_pos; + --state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_REF_GROUP_REV: /* Reference to a capture group. */ + { + RE_GroupData* group; + RE_GroupSpan* span; + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + * + * Check whether the captured text, if any, exists at this position + * in the string. + */ + + /* Did the group capture anything? */ + group = &state->groups[node->values[0] - 1]; + if (group->current_capture < 0) + goto backtrack; + + span = &group->captures[group->current_capture]; + + if (string_pos < 0) + string_pos = span->end; + + /* Try comparing. */ + while (string_pos > span->start) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && same_char(encoding, + char_at(state->text, state->text_pos - 1), + char_at(state->text, string_pos - 1))) { + --string_pos; + --state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) + + if (state->text_pos == state->search_anchor) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF: /* Character set. */ + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && matches_SET(encoding, + node, char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION_IGN: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (state->text_pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && matches_SET_IGN(encoding, + node, char_at(state->text, state->text_pos)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_UNION_IGN_REV: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + matches_SET_IGN(encoding, node, char_at(state->text, + state->text_pos - 1)) == node->match) { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_SET_DIFF_REV: /* Character set. */ + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_REV: + TRACE(("%s %d\n", re_op_text[node->op], node->match)) + + if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && matches_SET(encoding, + node, char_at(state->text, state->text_pos - 1)) == node->match) + { + state->text_pos += node->step; + node = node->next_1.node; + } else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_GROUP: /* Start of a capture group. */ + { + RE_CODE private_index; + RE_CODE public_index; + RE_GroupData* group; + RE_BacktrackData* bt_data; + TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) + + /* Capture group indexes are 1-based (excluding group 0, which is + * the entire matched string). + */ + private_index = node->values[0]; + public_index = node->values[1]; + group = &state->groups[private_index - 1]; + + if (!add_backtrack(safe_state, RE_OP_START_GROUP)) + return RE_ERROR_BACKTRACKING; + bt_data = state->backtrack; + bt_data->group.private_index = private_index; + bt_data->group.public_index = public_index; + bt_data->group.text_pos = group->span.start; + bt_data->group.capture = (BOOL)node->values[2]; + bt_data->group.current_capture = group->current_capture; + + if (pattern->group_info[private_index - 1].referenced && + group->span.start != state->text_pos) + ++state->capture_change; + group->span.start = state->text_pos; + + /* Save the capture? */ + if (node->values[2]) { + group->current_capture = (Py_ssize_t)group->capture_count; + if (!save_capture(safe_state, private_index, public_index)) + return RE_ERROR_MEMORY; + } + + node = node->next_1.node; + break; + } + case RE_OP_START_OF_LINE: /* At the start of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_START_OF_LINE(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_OF_LINE_U: /* At the start of a line. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_START_OF_LINE_U(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_OF_STRING: /* At the start of the string. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_START_OF_STRING(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_START_OF_WORD: /* At the start of a word. */ + TRACE(("%s\n", re_op_text[node->op])) + + status = try_match_START_OF_WORD(state, node, state->text_pos); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS) + node = node->next_1.node; + else if (node->status & RE_STATUS_FUZZY) { + status = fuzzy_match_item(safe_state, search, &state->text_pos, + &node, 0); + if (status < 0) + return status; + + if (!node) + goto backtrack; + } else + goto backtrack; + break; + case RE_OP_STRING: /* A string. */ + { + Py_ssize_t length; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == + state->req_pos && string_pos < 0) + state->text_pos = state->req_end; + else { + length = (Py_ssize_t)node->value_count; + + if (string_pos < 0) + string_pos = 0; + + values = node->values; + + /* Try comparing. */ + while (string_pos < length) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char(encoding, char_at(state->text, + state->text_pos), values[string_pos])) { + ++string_pos; + ++state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_FLD: /* A string, ignoring case. */ + { + Py_ssize_t length; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + RE_CODE* values; + int folded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == + state->req_pos && string_pos < 0) + state->text_pos = state->req_end; + else { + length = (Py_ssize_t)node->value_count; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = 0; + folded_pos = 0; + folded_len = 0; + } else { + folded_len = full_case_fold(char_at(state->text, + state->text_pos), folded); + if (folded_pos >= folded_len) { + if (state->text_pos >= state->slice_end) + goto backtrack; + + ++state->text_pos; + folded_pos = 0; + folded_len = 0; + } + } + + values = node->values; + + /* Try comparing. */ + while (string_pos < length) { + if (folded_pos >= folded_len) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + folded_len = full_case_fold(char_at(state->text, + state->text_pos), folded); + folded_pos = 0; + } + + if (same_char_ign(encoding, folded[folded_pos], + values[string_pos])) { + ++string_pos; + ++folded_pos; + + if (folded_pos >= folded_len) + ++state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string_fld(safe_state, search, + &state->text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos >= folded_len) + ++state->text_pos; + } else { + string_pos = -1; + goto backtrack; + } + } + + if (node->status & RE_STATUS_FUZZY) { + while (folded_pos < folded_len) { + BOOL matched; + + if (!fuzzy_match_string_fld(safe_state, search, + &state->text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, 1)) + return RE_ERROR_BACKTRACKING; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos >= folded_len) + ++state->text_pos; + } + } + + string_pos = -1; + + if (folded_pos < folded_len) + goto backtrack; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_FLD_REV: /* A string, ignoring case. */ + { + Py_ssize_t length; + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + RE_CODE* values; + int folded_len; + Py_UCS4 folded[RE_MAX_FOLDED]; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == + state->req_pos && string_pos < 0) + state->text_pos = state->req_end; + else { + length = (Py_ssize_t)node->value_count; + + full_case_fold = encoding->full_case_fold; + + if (string_pos < 0) { + string_pos = length; + folded_pos = 0; + folded_len = 0; + } else { + folded_len = full_case_fold(char_at(state->text, + state->text_pos - 1), folded); + if (folded_pos <= 0) { + if (state->text_pos <= state->slice_start) + goto backtrack; + + --state->text_pos; + folded_pos = 0; + folded_len = 0; + } + } + + values = node->values; + + /* Try comparing. */ + while (string_pos > 0) { + if (folded_pos <= 0) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + folded_len = full_case_fold(char_at(state->text, + state->text_pos - 1), folded); + folded_pos = folded_len; + } + + if (same_char_ign(encoding, folded[folded_pos - 1], + values[string_pos - 1])) { + --string_pos; + --folded_pos; + + if (folded_pos <= 0) + --state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string_fld(safe_state, search, + &state->text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos <= 0) + --state->text_pos; + } else { + string_pos = -1; + goto backtrack; + } + } + + if (node->status & RE_STATUS_FUZZY) { + while (folded_pos > 0) { + BOOL matched; + + if (!fuzzy_match_string_fld(safe_state, search, + &state->text_pos, node, &string_pos, &folded_pos, + folded_len, &matched, -1)) + return RE_ERROR_BACKTRACKING; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + + if (folded_pos <= 0) + --state->text_pos; + } + } + + string_pos = -1; + + if (folded_pos > 0) + goto backtrack; + } + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_IGN: /* A string, ignoring case. */ + { + Py_ssize_t length; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == + state->req_pos && string_pos < 0) + state->text_pos = state->req_end; + else { + length = (Py_ssize_t)node->value_count; + + if (string_pos < 0) + string_pos = 0; + + values = node->values; + + /* Try comparing. */ + while (string_pos < length) { + if (state->text_pos >= state->text_length && + state->partial_side == RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (state->text_pos < state->slice_end && + same_char_ign(encoding, char_at(state->text, + state->text_pos), values[string_pos])) { + ++string_pos; + ++state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, 1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_IGN_REV: /* A string, ignoring case. */ + { + Py_ssize_t length; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == + state->req_pos && string_pos < 0) + state->text_pos = state->req_end; + else { + length = (Py_ssize_t)node->value_count; + + if (string_pos < 0) + string_pos = length; + + values = node->values; + + /* Try comparing. */ + while (string_pos > 0) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char_ign(encoding, char_at(state->text, + state->text_pos - 1), values[string_pos - 1])) { + --string_pos; + --state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_REV: /* A string. */ + { + Py_ssize_t length; + RE_CODE* values; + TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) + + if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == + state->req_pos && string_pos < 0) + state->text_pos = state->req_end; + else { + length = (Py_ssize_t)node->value_count; + + if (string_pos < 0) + string_pos = length; + + values = node->values; + + /* Try comparing. */ + while (string_pos > 0) { + if (state->text_pos <= 0 && state->partial_side == + RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (state->text_pos > state->slice_start && + same_char(encoding, char_at(state->text, state->text_pos + - 1), values[string_pos - 1])) { + --string_pos; + --state->text_pos; + } else if (node->status & RE_STATUS_FUZZY) { + BOOL matched; + + status = fuzzy_match_string(safe_state, search, + &state->text_pos, node, &string_pos, &matched, -1); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (!matched) { + string_pos = -1; + goto backtrack; + } + } else { + string_pos = -1; + goto backtrack; + } + } + } + + string_pos = -1; + + /* Successful match. */ + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET: /* Member of a string set. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + status = string_set_match_fwdrev(safe_state, node, FALSE); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_FLD: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + status = string_set_match_fld_fwdrev(safe_state, node, FALSE); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_FLD_REV: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + status = string_set_match_fld_fwdrev(safe_state, node, TRUE); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_IGN: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + status = string_set_match_ign_fwdrev(safe_state, node, FALSE); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_IGN_REV: /* Member of a string set, ignoring case. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + status = string_set_match_ign_fwdrev(safe_state, node, TRUE); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + node = node->next_1.node; + break; + } + case RE_OP_STRING_SET_REV: /* Member of a string set. */ + { + int status; + TRACE(("%s\n", re_op_text[node->op])) + + status = string_set_match_fwdrev(safe_state, node, TRUE); + if (status < 0) + return status; + if (status == 0) + goto backtrack; + node = node->next_1.node; + break; + } + case RE_OP_SUCCESS: /* Success. */ + /* Must the match advance past its start? */ + TRACE(("%s\n", re_op_text[node->op])) + + if (state->text_pos == state->search_anchor && state->must_advance) + goto backtrack; + + if (state->match_all && !recursive_call) { + /* We want to match all of the slice. */ + if (state->reverse) { + if (state->text_pos != state->slice_start) + goto backtrack; + } else { + if (state->text_pos != state->slice_end) + goto backtrack; + } + } + + return RE_ERROR_SUCCESS; + default: /* Illegal opcode! */ + TRACE(("UNKNOWN OP %d\n", node->op)) + return RE_ERROR_ILLEGAL; + } + } + +backtrack: + for (;;) { + RE_BacktrackData* bt_data; + TRACE(("BACKTRACK ")) + + /* Should we abort the matching? */ + ++state->iterations; + + if (state->iterations == 0 && safe_check_signals(safe_state)) + return RE_ERROR_INTERRUPTED; + + bt_data = last_backtrack(state); + + switch (bt_data->op) { + case RE_OP_ANY: /* Any character except a newline. */ + case RE_OP_ANY_ALL: /* Any character at all. */ + case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ + case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ + case RE_OP_ANY_U: /* Any character except a line separator. */ + case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ + case RE_OP_CHARACTER: /* A character. */ + case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ + case RE_OP_CHARACTER_IGN_REV: /* A character, ignoring case, backwards. */ + case RE_OP_CHARACTER_REV: /* A character, backwards. */ + case RE_OP_PROPERTY: /* A property. */ + case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ + case RE_OP_PROPERTY_IGN_REV: /* A property, ignoring case, backwards. */ + case RE_OP_PROPERTY_REV: /* A property, backwards. */ + case RE_OP_RANGE: /* A range. */ + case RE_OP_RANGE_IGN: /* A range, ignoring case. */ + case RE_OP_RANGE_IGN_REV: /* A range, ignoring case, backwards. */ + case RE_OP_RANGE_REV: /* A range, backwards. */ + case RE_OP_SET_DIFF: /* Set difference. */ + case RE_OP_SET_DIFF_IGN: /* Set difference, ignoring case. */ + case RE_OP_SET_DIFF_IGN_REV: /* Set difference, ignoring case, backwards. */ + case RE_OP_SET_DIFF_REV: /* Set difference, backwards. */ + case RE_OP_SET_INTER: /* Set intersection. */ + case RE_OP_SET_INTER_IGN: /* Set intersection, ignoring case. */ + case RE_OP_SET_INTER_IGN_REV: /* Set intersection, ignoring case, backwards. */ + case RE_OP_SET_INTER_REV: /* Set intersection, backwards. */ + case RE_OP_SET_SYM_DIFF: /* Set symmetric difference. */ + case RE_OP_SET_SYM_DIFF_IGN: /* Set symmetric difference, ignoring case. */ + case RE_OP_SET_SYM_DIFF_IGN_REV: /* Set symmetric difference, ignoring case, backwards. */ + case RE_OP_SET_SYM_DIFF_REV: /* Set symmetric difference, backwards. */ + case RE_OP_SET_UNION: /* Set union. */ + case RE_OP_SET_UNION_IGN: /* Set union, ignoring case. */ + case RE_OP_SET_UNION_IGN_REV: /* Set union, ignoring case, backwards. */ + case RE_OP_SET_UNION_REV: /* Set union, backwards. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + status = retry_fuzzy_match_item(safe_state, search, + &state->text_pos, &node, TRUE); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (node) + goto advance; + break; + case RE_OP_ATOMIC: /* Atomic subpattern. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Restore the groups and certain flags and then backtrack. */ + pop_groups(state); + state->too_few_errors = bt_data->atomic.too_few_errors; + state->capture_change = bt_data->atomic.capture_change; + discard_backtrack(state); + break; + case RE_OP_BODY_END: + { + RE_RepeatData* rp_data; + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* We're backtracking into the body. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Restore the repeat info. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + discard_backtrack(state); + break; + } + case RE_OP_BODY_START: + { + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* The body may have failed to match at this position. */ + if (!guard_repeat(safe_state, bt_data->repeat.index, + bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) + return RE_ERROR_MEMORY; + + discard_backtrack(state); + break; + } + case RE_OP_BOUNDARY: /* On a word boundary. */ + case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ + case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ + case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ + case RE_OP_END_OF_LINE: /* At the end of a line. */ + case RE_OP_END_OF_LINE_U: /* At the end of a line. */ + case RE_OP_END_OF_STRING: /* At the end of the string. */ + case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ + case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ + case RE_OP_END_OF_WORD: /* At end of a word. */ + case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ + case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ + case RE_OP_START_OF_LINE: /* At the start of a line. */ + case RE_OP_START_OF_LINE_U: /* At the start of a line. */ + case RE_OP_START_OF_STRING: /* At the start of the string. */ + case RE_OP_START_OF_WORD: /* At start of a word. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + status = retry_fuzzy_match_item(safe_state, search, + &state->text_pos, &node, FALSE); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (node) + goto advance; + break; + case RE_OP_BRANCH: /* 2-way branch. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + node = bt_data->branch.position.node; + state->text_pos = bt_data->branch.position.text_pos; + discard_backtrack(state); + goto advance; + case RE_OP_CALL_REF: /* A group call ref. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + pop_group_return(state); + discard_backtrack(state); + break; + case RE_OP_END_FUZZY: /* End of fuzzy matching. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + state->total_fuzzy_counts[RE_FUZZY_SUB] -= + state->fuzzy_info.counts[RE_FUZZY_SUB]; + state->total_fuzzy_counts[RE_FUZZY_INS] -= + state->fuzzy_info.counts[RE_FUZZY_INS]; + state->total_fuzzy_counts[RE_FUZZY_DEL] -= + state->fuzzy_info.counts[RE_FUZZY_DEL]; + + /* We need to retry the fuzzy match. */ + status = retry_fuzzy_insert(safe_state, &state->text_pos, &node); + if (status < 0) + return RE_ERROR_PARTIAL; + + /* If there were too few errors, in the fuzzy section, try again. + */ + if (state->too_few_errors) { + state->too_few_errors = FALSE; + goto backtrack; + } + + if (node) { + state->total_fuzzy_counts[RE_FUZZY_SUB] += + state->fuzzy_info.counts[RE_FUZZY_SUB]; + state->total_fuzzy_counts[RE_FUZZY_INS] += + state->fuzzy_info.counts[RE_FUZZY_INS]; + state->total_fuzzy_counts[RE_FUZZY_DEL] += + state->fuzzy_info.counts[RE_FUZZY_DEL]; + + node = node->next_1.node; + goto advance; + } + break; + case RE_OP_END_GROUP: /* End of a capture group. */ + { + RE_CODE private_index; + RE_GroupData* group; + TRACE(("%s %d\n", re_op_text[bt_data->op], + bt_data->group.public_index)) + + private_index = bt_data->group.private_index; + group = &state->groups[private_index - 1]; + + /* Unsave the capture? */ + if (bt_data->group.capture) + unsave_capture(state, bt_data->group.private_index, + bt_data->group.public_index); + + if (pattern->group_info[private_index - 1].referenced && + group->span.end != bt_data->group.text_pos) + --state->capture_change; + group->span.end = bt_data->group.text_pos; + group->current_capture = bt_data->group.current_capture; + + discard_backtrack(state); + break; + } + case RE_OP_FAILURE: + { + Py_ssize_t end_pos; + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Do we have to advance? */ + if (!search) + return RE_ERROR_FAILURE; + + /* Can we advance? */ + state->text_pos = state->match_pos; + end_pos = state->reverse ? state->slice_start : state->slice_end; + if (state->text_pos == end_pos) + return RE_ERROR_FAILURE; + + /* Skip over any repeated leading characters. */ + switch (start_node->op) { + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + { + size_t count; + BOOL is_partial; + + /* How many characters did the repeat actually match? */ + count = count_one(state, start_node->nonstring.next_2.node, + state->text_pos, start_node->values[2], &is_partial); + + /* If it's fewer than the maximum then skip over those + * characters. + */ + if (count < start_node->values[2]) + state->text_pos += (Py_ssize_t)count * pattern_step; + break; + } + } + + /* Advance and try to match again. */ + state->text_pos += pattern_step; + + goto start_match; + } + case RE_OP_FUZZY: /* Fuzzy matching. */ + { + RE_FuzzyInfo* fuzzy_info; + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Restore the previous fuzzy info. */ + fuzzy_info = &state->fuzzy_info; + memmove(fuzzy_info, &bt_data->fuzzy.fuzzy_info, + sizeof(RE_FuzzyInfo)); + + discard_backtrack(state); + break; + } + case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ + case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ + { + RE_RepeatData* rp_data; + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* The repeat failed to match. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* The body may have failed to match at this position. */ + if (!guard_repeat(safe_state, bt_data->repeat.index, + bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) + return RE_ERROR_MEMORY; + + /* Restore the previous repeat. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + discard_backtrack(state); + break; + } + case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ + { + RE_RepeatData* rp_data; + size_t count; + Py_ssize_t step; + Py_ssize_t pos; + Py_ssize_t limit; + RE_Node* test; + BOOL match; + BOOL m; + size_t index; + TRACE(("%s\n", re_op_text[bt_data->op])) + + node = bt_data->repeat.position.node; + + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Unmatch one character at a time until the tail could match or we + * have reached the minimum. + */ + state->text_pos = rp_data->start; + + count = rp_data->count; + step = node->step; + pos = state->text_pos + (Py_ssize_t)count * step; + limit = state->text_pos + (Py_ssize_t)node->values[1] * step; + + /* The tail failed to match at this position. */ + if (!guard_repeat(safe_state, bt_data->repeat.index, pos, + RE_STATUS_TAIL, TRUE)) + return RE_ERROR_MEMORY; + + if (count == node->values[1]) { + /* We've backtracked the repeat as far as we can. */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + break; + } + + test = node->next_1.test; + + m = test->match; + index = node->values[0]; + + match = FALSE; + + if (test->status & RE_STATUS_FUZZY) { + for (;;) { + RE_Position next_position; + + pos -= step; + + if (try_match(state, &node->next_1, pos, &next_position) && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + } else { + /* A repeated single-character match is often followed by a + * literal, so checking specially for it can be a good + * optimisation when working with long strings. + */ + switch (test->op) { + case RE_OP_CHARACTER: + { + Py_UCS4 ch; + + ch = test->values[0]; + + for (;;) { + --pos; + + if (same_char(encoding, char_at(state->text, pos), ch) + == m && !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + + } + break; + } + case RE_OP_CHARACTER_IGN: + { + Py_UCS4 ch; + + ch = test->values[0]; + + for (;;) { + --pos; + + if (same_char_ign(encoding, char_at(state->text, pos), + ch) == m && !is_repeat_guarded(safe_state, index, + pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + + } + break; + } + case RE_OP_CHARACTER_IGN_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + for (;;) { + ++pos; + + if (same_char_ign(encoding, char_at(state->text, pos - + 1), ch) == m && !is_repeat_guarded(safe_state, index, + pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + + } + break; + } + case RE_OP_CHARACTER_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + for (;;) { + ++pos; + + if (same_char(encoding, char_at(state->text, pos - 1), + ch) == m && !is_repeat_guarded(safe_state, index, + pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + + } + break; + } + case RE_OP_STRING: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = min_ssize_t(pos, state->slice_end - length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos < limit) + break; + + found = string_search_rev(safe_state, test, pos + + length, limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + pos = found - length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + --pos; + } + break; + } + case RE_OP_STRING_FLD: + { + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t folded_length; + size_t i; + Py_UCS4 folded[RE_MAX_FOLDED]; + + full_case_fold = encoding->full_case_fold; + + folded_length = 0; + for (i = 0; i < test->value_count; i++) + folded_length += full_case_fold(test->values[i], + folded); + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = min_ssize_t(pos, state->slice_end - folded_length); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + BOOL is_partial; + + if (pos < limit) + break; + + found = string_search_fld_rev(safe_state, test, pos + + folded_length, limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + pos = found - folded_length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + --pos; + } + break; + } + case RE_OP_STRING_FLD_REV: + { + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t folded_length; + size_t i; + Py_UCS4 folded[RE_MAX_FOLDED]; + + full_case_fold = encoding->full_case_fold; + + folded_length = 0; + for (i = 0; i < test->value_count; i++) + folded_length += full_case_fold(test->values[i], + folded); + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = max_ssize_t(pos, state->slice_start + folded_length); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + BOOL is_partial; + + if (pos > limit) + break; + + found = string_search_fld(safe_state, test, pos - + folded_length, limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + pos = found + folded_length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + ++pos; + } + break; + } + case RE_OP_STRING_IGN: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = min_ssize_t(pos, state->slice_end - length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos < limit) + break; + + found = string_search_ign_rev(safe_state, test, pos + + length, limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + pos = found - length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + --pos; + } + break; + } + case RE_OP_STRING_IGN_REV: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = max_ssize_t(pos, state->slice_start + length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos > limit) + break; + + found = string_search_ign(safe_state, test, pos - + length, limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + pos = found + length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + ++pos; + } + break; + } + case RE_OP_STRING_REV: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + pos = max_ssize_t(pos, state->slice_start + length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos > limit) + break; + + found = string_search(safe_state, test, pos - length, + limit, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + pos = found + length; + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + ++pos; + } + break; + } + default: + for (;;) { + RE_Position next_position; + + pos -= step; + + status = try_match(state, &node->next_1, pos, + &next_position); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + break; + } + } + + if (match) { + count = (size_t)abs_ssize_t(pos - state->text_pos); + + /* The tail could match. */ + if (count > node->values[1]) + /* The match is longer than the minimum, so we might need + * to backtrack the repeat again to consume less. + */ + rp_data->count = count; + else { + /* We've reached or passed the minimum, so we won't need to + * backtrack the repeat again. + */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + + /* Have we passed the minimum? */ + if (count < node->values[1]) + goto backtrack; + } + + node = node->next_1.node; + state->text_pos = pos; + goto advance; + } else { + /* We've backtracked the repeat as far as we can. */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + } + break; + } + case RE_OP_GROUP_CALL: /* Group call. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + pop_group_return(state); + discard_backtrack(state); + break; + case RE_OP_GROUP_RETURN: /* Group return. */ + { + RE_Node* return_node; + TRACE(("%s\n", re_op_text[bt_data->op])) + + return_node = bt_data->group_call.node; + + push_group_return(safe_state, return_node); + + if (return_node) { + /* Restore the groups. */ + pop_groups(state); + state->capture_change = bt_data->group_call.capture_change; + + /* Restore the repeats. */ + pop_repeats(state); + } + + discard_backtrack(state); + break; + } + case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ + { + RE_RepeatData* rp_data; + size_t count; + Py_ssize_t step; + Py_ssize_t pos; + Py_ssize_t available; + size_t max_count; + Py_ssize_t limit; + RE_Node* repeated; + RE_Node* test; + BOOL match; + BOOL m; + size_t index; + TRACE(("%s\n", re_op_text[bt_data->op])) + + node = bt_data->repeat.position.node; + + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Match one character at a time until the tail could match or we + * have reached the maximum. + */ + state->text_pos = rp_data->start; + count = rp_data->count; + + step = node->step; + pos = state->text_pos + (Py_ssize_t)count * step; + available = step > 0 ? state->slice_end - state->text_pos : + state->text_pos - state->slice_start; + max_count = min_size_t((size_t)available, node->values[2]); + limit = state->text_pos + (Py_ssize_t)max_count * step; + + repeated = node->nonstring.next_2.node; + + test = node->next_1.test; + + m = test->match; + index = node->values[0]; + + match = FALSE; + + if (test->status & RE_STATUS_FUZZY) { + for (;;) { + RE_Position next_position; + + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + pos += step; + + status = try_match(state, &node->next_1, pos, + &next_position); + if (status < 0) + return status; + + if (status == RE_ERROR_SUCCESS && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + } else { + /* A repeated single-character match is often followed by a + * literal, so checking specially for it can be a good + * optimisation when working with long strings. + */ + switch (test->op) { + case RE_OP_CHARACTER: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = min_ssize_t(limit, state->slice_end - 1); + + for (;;) { + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (pos >= limit) + break; + + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + ++pos; + + if (same_char(encoding, char_at(state->text, pos), ch) + == m && !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_IGN: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = min_ssize_t(limit, state->slice_end - 1); + + for (;;) { + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (pos >= limit) + break; + + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + ++pos; + + if (same_char_ign(encoding, char_at(state->text, pos), + ch) == m && !is_repeat_guarded(safe_state, index, + pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_IGN_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = max_ssize_t(limit, state->slice_start + 1); + + for (;;) { + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (pos <= limit) + break; + + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + --pos; + + if (same_char_ign(encoding, char_at(state->text, pos - + 1), ch) == m && !is_repeat_guarded(safe_state, index, + pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_CHARACTER_REV: + { + Py_UCS4 ch; + + ch = test->values[0]; + + /* The tail is a character. We don't want to go off the end + * of the slice. + */ + limit = max_ssize_t(limit, state->slice_start + 1); + + for (;;) { + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (pos <= limit) + break; + + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + --pos; + + if (same_char(encoding, char_at(state->text, pos - 1), + ch) == m && !is_repeat_guarded(safe_state, index, + pos, RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = min_ssize_t(limit, state->slice_end - length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (pos >= limit) + break; + + /* Look for the tail string. */ + found = string_search(safe_state, test, pos + 1, limit + + length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + ++pos; + } + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_FLD: + { + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = min_ssize_t(limit, state->slice_end); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + BOOL is_partial; + + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (pos >= limit) + break; + + /* Look for the tail string. */ + found = string_search_fld(safe_state, test, pos + 1, + limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + ++pos; + } + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_FLD_REV: + { + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = max_ssize_t(limit, state->slice_start); + + for (;;) { + Py_ssize_t found; + Py_ssize_t new_pos; + BOOL is_partial; + + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (pos <= limit) + break; + + /* Look for the tail string. */ + found = string_search_fld_rev(safe_state, test, pos - + 1, limit, &new_pos, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + --pos; + } + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_IGN: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = min_ssize_t(limit, state->slice_end - length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos >= state->text_length && state->partial_side == + RE_PARTIAL_RIGHT) + return RE_ERROR_PARTIAL; + + if (pos >= limit) + break; + + /* Look for the tail string. */ + found = string_search_ign(safe_state, test, pos + 1, + limit + length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + ++pos; + } + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_IGN_REV: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = max_ssize_t(limit, state->slice_start + length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (pos <= limit) + break; + + /* Look for the tail string. */ + found = string_search_ign_rev(safe_state, test, pos - + 1, limit - length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + --pos; + } + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + case RE_OP_STRING_REV: + { + Py_ssize_t length; + + length = (Py_ssize_t)test->value_count; + + /* The tail is a string. We don't want to go off the end of + * the slice. + */ + limit = max_ssize_t(limit, state->slice_start + length); + + for (;;) { + Py_ssize_t found; + BOOL is_partial; + + if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) + return RE_ERROR_PARTIAL; + + if (pos <= limit) + break; + + /* Look for the tail string. */ + found = string_search_rev(safe_state, test, pos - 1, + limit - length, &is_partial); + if (is_partial) + return RE_ERROR_PARTIAL; + + if (found < 0) + break; + + if (repeated->op == RE_OP_ANY_ALL) + /* Anything can precede the tail. */ + pos = found; + else { + /* Check that what precedes the tail will match. */ + while (pos != found) { + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + --pos; + } + + if (pos != found) + /* Something preceding the tail didn't match. + */ + break; + } + + if (!is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + } + break; + } + default: + for (;;) { + RE_Position next_position; + + status = match_one(state, repeated, pos); + if (status < 0) + return status; + + if (status == RE_ERROR_FAILURE) + break; + + pos += step; + + status = try_match(state, &node->next_1, pos, + &next_position); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (status == RE_ERROR_SUCCESS && + !is_repeat_guarded(safe_state, index, pos, + RE_STATUS_TAIL)) { + match = TRUE; + break; + } + + if (pos == limit) + break; + } + break; + } + } + + if (match) { + /* The tail could match. */ + count = (size_t)abs_ssize_t(pos - state->text_pos); + state->text_pos = pos; + + if (count < max_count) { + /* The match is shorter than the maximum, so we might need + * to backtrack the repeat again to consume more. + */ + rp_data->count = count; + } else { + /* We've reached or passed the maximum, so we won't need to + * backtrack the repeat again. + */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + + /* Have we passed the maximum? */ + if (count > max_count) + goto backtrack; + } + + node = node->next_1.node; + goto advance; + } else { + /* The tail couldn't match. */ + rp_data->start = bt_data->repeat.text_pos; + rp_data->count = bt_data->repeat.count; + discard_backtrack(state); + } + break; + } + case RE_OP_LOOKAROUND: /* Lookaround. */ + TRACE(("%s\n", re_op_text[bt_data->op])) + + /* Restore the groups and certain flags and then backtrack. */ + pop_groups(state); + state->too_few_errors = bt_data->lookaround.too_few_errors; + state->capture_change = bt_data->lookaround.capture_change; + discard_backtrack(state); + break; + case RE_OP_MATCH_BODY: + { + RE_RepeatData* rp_data; + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* We want to match the body. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Restore the repeat info. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + /* Record backtracking info in case the body fails to match. */ + bt_data->op = RE_OP_BODY_START; + + /* Advance into the body. */ + node = bt_data->repeat.position.node; + state->text_pos = bt_data->repeat.position.text_pos; + goto advance; + } + case RE_OP_MATCH_TAIL: + { + RE_RepeatData* rp_data; + TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) + + /* We want to match the tail. */ + rp_data = &state->repeats[bt_data->repeat.index]; + + /* Restore the repeat info. */ + rp_data->count = bt_data->repeat.count; + rp_data->start = bt_data->repeat.start; + rp_data->capture_change = bt_data->repeat.capture_change; + + /* Advance into the tail. */ + node = bt_data->repeat.position.node; + state->text_pos = bt_data->repeat.position.text_pos; + + discard_backtrack(state); + goto advance; + } + case RE_OP_REF_GROUP: /* Reference to a capture group. */ + case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ + case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, backwards, ignoring case. */ + case RE_OP_REF_GROUP_REV: /* Reference to a capture group, backwards. */ + case RE_OP_STRING: /* A string. */ + case RE_OP_STRING_IGN: /* A string, ignoring case. */ + case RE_OP_STRING_IGN_REV: /* A string, backwards, ignoring case. */ + case RE_OP_STRING_REV: /* A string, backwards. */ + { + BOOL matched; + TRACE(("%s\n", re_op_text[bt_data->op])) + + status = retry_fuzzy_match_string(safe_state, search, + &state->text_pos, &node, &string_pos, &matched); + if (status < 0) + return RE_ERROR_PARTIAL; + + + if (matched) + goto advance; + + string_pos = -1; + break; + } + case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ + case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, backwards, ignoring case. */ + { + BOOL matched; + TRACE(("%s\n", re_op_text[bt_data->op])) + + status = retry_fuzzy_match_group_fld(safe_state, search, + &state->text_pos, &node, &folded_pos, &string_pos, &gfolded_pos, + &matched); + if (status < 0) + return RE_ERROR_PARTIAL; + + + if (matched) + goto advance; + + string_pos = -1; + break; + } + case RE_OP_START_GROUP: /* Start of a capture group. */ + { + RE_CODE private_index; + RE_GroupData* group; + TRACE(("%s %d\n", re_op_text[bt_data->op], + bt_data->group.public_index)) + + private_index = bt_data->group.private_index; + group = &state->groups[private_index - 1]; + + /* Unsave the capture? */ + if (bt_data->group.capture) + unsave_capture(state, bt_data->group.private_index, + bt_data->group.public_index); + + if (pattern->group_info[private_index - 1].referenced && + group->span.start != bt_data->group.text_pos) + --state->capture_change; + group->span.start = bt_data->group.text_pos; + group->current_capture = bt_data->group.current_capture; + + discard_backtrack(state); + break; + } + case RE_OP_STRING_FLD: /* A string, ignoring case. */ + case RE_OP_STRING_FLD_REV: /* A string, backwards, ignoring case. */ + { + BOOL matched; + TRACE(("%s\n", re_op_text[bt_data->op])) + + status = retry_fuzzy_match_string_fld(safe_state, search, + &state->text_pos, &node, &string_pos, &folded_pos, &matched); + if (status < 0) + return RE_ERROR_PARTIAL; + + if (matched) + goto advance; + + string_pos = -1; + break; + } + default: + TRACE(("UNKNOWN OP %d\n", bt_data->op)) + return RE_ERROR_ILLEGAL; + } + } +} + +/* Saves group data for fuzzy matching. */ +Py_LOCAL_INLINE(RE_GroupData*) save_groups(RE_SafeState* safe_state, + RE_GroupData* saved_groups) { + RE_State* state; + PatternObject* pattern; + size_t g; + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + state = safe_state->re_state; + pattern = state->pattern; + + if (!saved_groups) { + saved_groups = (RE_GroupData*)re_alloc(pattern->true_group_count * + sizeof(RE_GroupData)); + if (!saved_groups) + goto error; + + memset(saved_groups, 0, pattern->true_group_count * + sizeof(RE_GroupData)); + } + + for (g = 0; g < pattern->true_group_count; g++) { + RE_GroupData* orig; + RE_GroupData* copy; + + orig = &state->groups[g]; + copy = &saved_groups[g]; + + copy->span = orig->span; + + if (orig->capture_count > copy->capture_capacity) { + RE_GroupSpan* cap_copy; + + cap_copy = (RE_GroupSpan*)re_realloc(copy->captures, + orig->capture_count * sizeof(RE_GroupSpan)); + if (!cap_copy) + goto error; + + copy->capture_capacity = orig->capture_count; + copy->captures = cap_copy; + } + + copy->capture_count = orig->capture_count; + Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * + sizeof(RE_GroupSpan)); + } + + /* Release the GIL. */ + release_GIL(safe_state); + + return saved_groups; + +error: + if (saved_groups) { + for (g = 0; g < pattern->true_group_count; g++) + re_dealloc(saved_groups[g].captures); + + re_dealloc(saved_groups); + } + + /* Release the GIL. */ + release_GIL(safe_state); + + return NULL; +} + +/* Restores group data for fuzzy matching. */ +Py_LOCAL_INLINE(void) restore_groups(RE_SafeState* safe_state, RE_GroupData* + saved_groups) { + RE_State* state; + PatternObject* pattern; + size_t g; + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + state = safe_state->re_state; + pattern = state->pattern; + + for (g = 0; g < pattern->true_group_count; g++) + re_dealloc(state->groups[g].captures); + + Py_MEMCPY(state->groups, saved_groups, pattern->true_group_count * + sizeof(RE_GroupData)); + + re_dealloc(saved_groups); + + /* Release the GIL. */ + release_GIL(safe_state); +} + +/* Discards group data for fuzzy matching. */ +Py_LOCAL_INLINE(void) discard_groups(RE_SafeState* safe_state, RE_GroupData* + saved_groups) { + RE_State* state; + PatternObject* pattern; + size_t g; + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + state = safe_state->re_state; + pattern = state->pattern; + + for (g = 0; g < pattern->true_group_count; g++) + re_dealloc(saved_groups[g].captures); + + re_dealloc(saved_groups); + + /* Release the GIL. */ + release_GIL(safe_state); +} + +/* Saves the fuzzy info. */ +Py_LOCAL_INLINE(void) save_fuzzy_counts(RE_State* state, size_t* fuzzy_counts) + { + Py_MEMCPY(fuzzy_counts, state->total_fuzzy_counts, + sizeof(state->total_fuzzy_counts)); +} + +/* Restores the fuzzy info. */ +Py_LOCAL_INLINE(void) restore_fuzzy_counts(RE_State* state, size_t* + fuzzy_counts) { + Py_MEMCPY(state->total_fuzzy_counts, fuzzy_counts, + sizeof(state->total_fuzzy_counts)); +} + +/* Performs a match or search from the current text position. + * + * The state can sometimes be shared across threads. In such instances there's + * a lock (mutex) on it. The lock is held for the duration of matching. + */ +Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { + RE_State* state; + PatternObject* pattern; + Py_ssize_t available; + BOOL get_best; + BOOL enhance_match; + BOOL must_advance; + RE_GroupData* best_groups; + Py_ssize_t best_match_pos; + Py_ssize_t best_text_pos = 0; /* Initialise to stop compiler warning. */ + int status; + Py_ssize_t slice_start; + Py_ssize_t slice_end; + size_t best_fuzzy_counts[RE_FUZZY_COUNT]; + TRACE(("<>\n")) + + state = safe_state->re_state; + pattern = state->pattern; + + /* Release the GIL. */ + release_GIL(safe_state); + + /* Is there enough to search? */ + if (state->reverse) { + if (state->text_pos < state->slice_start) { + acquire_GIL(safe_state); + return FALSE; + } + + available = state->text_pos - state->slice_start; + } else { + if (state->text_pos > state->slice_end) { + acquire_GIL(safe_state); + return FALSE; + } + + available = state->slice_end - state->text_pos; + } + + get_best = (pattern->flags & RE_FLAG_BESTMATCH) != 0; + enhance_match = (pattern->flags & RE_FLAG_ENHANCEMATCH) != 0 && !get_best; + + /* The maximum permitted cost. */ + state->max_cost = pattern->is_fuzzy ? PY_SSIZE_T_MAX : 0; + + best_groups = NULL; + + best_match_pos = state->text_pos; + must_advance = state->must_advance; + + slice_start = state->slice_start; + slice_end = state->slice_end; + + for (;;) { + /* If there's a better match, it won't start earlier in the string than + * the current best match, so there's no need to start earlier than + * that match. + */ + state->text_pos = best_match_pos; + state->must_advance = must_advance; + + /* Initialise the state. */ + init_match(state); + + status = RE_ERROR_SUCCESS; + if (state->max_cost == 0 && state->partial_side == RE_PARTIAL_NONE) { + /* An exact match, and partial matches not permitted. */ + if (available < state->min_width || (available == 0 && + state->must_advance)) + status = RE_ERROR_FAILURE; + } + + if (status == RE_ERROR_SUCCESS) + status = basic_match(safe_state, pattern->start_node, search, + FALSE); + + /* Has an error occurred, or is it a partial match? */ + if (status < 0) + break; + + if (status == RE_ERROR_FAILURE || (status == RE_ERROR_SUCCESS && + state->total_cost == 0)) + break; + + if (!get_best && !enhance_match) + break; + + save_fuzzy_counts(state, best_fuzzy_counts); + + if (!get_best && state->text_pos == state->match_pos) + /* We want the first match. The match is already zero-width, so the + * cost can't get any lower (because the fit can't get any better). + */ + break; + + if (best_groups) { + BOOL same; + size_t g; + + /* Did we get the same match as the best so far? */ + same = state->match_pos == best_match_pos && state->text_pos == + best_text_pos; + for (g = 0; same && g < pattern->public_group_count; g++) { + same = state->groups[g].span.start == best_groups[g].span.start + && state->groups[g].span.end == best_groups[g].span.end; + } + + if (same) + break; + } + + /* Save the best result so far. */ + best_groups = save_groups(safe_state, best_groups); + if (!best_groups) { + status = RE_ERROR_MEMORY; + break; + } + + best_match_pos = state->match_pos; + best_text_pos = state->text_pos; + + if (state->max_cost == 0) + break; + + /* Reduce the maximum permitted cost and try again. */ + state->max_cost = state->total_cost - 1; + + if (enhance_match) { + if (state->reverse) { + state->slice_start = state->text_pos; + state->slice_end = state->match_pos; + } else { + state->slice_start = state->match_pos; + state->slice_end = state->text_pos; + } + } + } + + state->slice_start = slice_start; + state->slice_end = slice_end; + + if (best_groups) { + if (status == RE_ERROR_SUCCESS && state->total_cost == 0) + /* We have a perfect match, so the previous best match. */ + discard_groups(safe_state, best_groups); + else { + /* Restore the previous best match. */ + status = RE_ERROR_SUCCESS; + + state->match_pos = best_match_pos; + state->text_pos = best_text_pos; + + restore_groups(safe_state, best_groups); + restore_fuzzy_counts(state, best_fuzzy_counts); + } + } + + if (status == RE_ERROR_SUCCESS || status == RE_ERROR_PARTIAL) { + Py_ssize_t max_end_index; + RE_GroupInfo* group_info; + size_t g; + + /* Store the results. */ + state->lastindex = -1; + state->lastgroup = -1; + max_end_index = -1; + + /* Store the capture groups. */ + group_info = pattern->group_info; + + for (g = 0; g < pattern->public_group_count; g++) { + RE_GroupSpan* span; + + span = &state->groups[g].span; + /* The string positions are of type Py_ssize_t, so the format needs + * to specify that. + */ + TRACE(("group %d from %" PY_FORMAT_SIZE_T "d to %" PY_FORMAT_SIZE_T + "d\n", g + 1, span->start, span->end)) + + if (span->start >= 0 && span->end >= 0 && group_info[g].end_index > + max_end_index) { + max_end_index = group_info[g].end_index; + state->lastindex = (Py_ssize_t)g + 1; + if (group_info[g].has_name) + state->lastgroup = (Py_ssize_t)g + 1; + } + } + } + + /* Re-acquire the GIL. */ + acquire_GIL(safe_state); + + if (status < 0 && status != RE_ERROR_PARTIAL && !PyErr_Occurred()) + set_error(status, NULL); + + return status; +} + +/* Gets a string from a Python object. + * + * If the function returns true and str_info->should_release is true then it's + * the responsibility of the caller to release the buffer when it's no longer + * needed. + */ +Py_LOCAL_INLINE(BOOL) get_string(PyObject* string, RE_StringInfo* str_info) { + /* Given a Python object, return a data pointer, a length (in characters), + * and a character size. Return FALSE if the object is not a string (or not + * compatible). + */ + PyBufferProcs* buffer; + Py_ssize_t bytes; + Py_ssize_t size; + + /* Unicode objects do not support the buffer API. So, get the data directly + * instead. + */ + if (PyUnicode_Check(string)) { + /* Unicode strings doesn't always support the buffer interface. */ + str_info->characters = (void*)PyUnicode_AS_DATA(string); + str_info->length = PyUnicode_GET_SIZE(string); + str_info->charsize = sizeof(Py_UNICODE); + str_info->is_unicode = TRUE; + str_info->should_release = FALSE; + return TRUE; + } + + /* Get pointer to string buffer. */ +#if PY_VERSION_HEX >= 0x02060000 + buffer = Py_TYPE(string)->tp_as_buffer; + str_info->view.len = -1; +#else + buffer = string->ob_type->tp_as_buffer; +#endif + + if (!buffer) { + PyErr_SetString(PyExc_TypeError, "expected string or buffer"); + return FALSE; + } + +#if PY_VERSION_HEX >= 0x02060000 + if (buffer->bf_getbuffer && (*buffer->bf_getbuffer)(string, + &str_info->view, PyBUF_SIMPLE) >= 0) + /* It's a new-style buffer. */ + str_info->should_release = TRUE; + else +#endif + if (buffer->bf_getreadbuffer && buffer->bf_getsegcount && + buffer->bf_getsegcount(string, NULL) == 1) + /* It's an old-style buffer. */ + str_info->should_release = FALSE; + else { + PyErr_SetString(PyExc_TypeError, "expected string or buffer"); + return FALSE; + } + + /* Determine buffer size. */ +#if PY_VERSION_HEX >= 0x02060000 + if (str_info->should_release) { + /* It's a new-style buffer. */ + bytes = str_info->view.len; + str_info->characters = str_info->view.buf; + + if (str_info->characters == NULL) { + PyBuffer_Release(&str_info->view); + PyErr_SetString(PyExc_ValueError, "buffer is NULL"); + return FALSE; + } + } else +#endif + /* It's an old-style buffer. */ + bytes = buffer->bf_getreadbuffer(string, 0, &str_info->characters); + + if (bytes < 0) { +#if PY_VERSION_HEX >= 0x02060000 + if (str_info->should_release) + PyBuffer_Release(&str_info->view); +#endif + PyErr_SetString(PyExc_TypeError, "buffer has negative size"); + return FALSE; + } + + /* Determine character size. */ + size = PyObject_Size(string); + + if (PyString_Check(string) || bytes == size) + str_info->charsize = 1; + else { +#if PY_VERSION_HEX >= 0x02060000 + if (str_info->should_release) + PyBuffer_Release(&str_info->view); +#endif + PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); + return FALSE; + } + + str_info->length = size; + str_info->is_unicode = FALSE; + + return TRUE; +} + +/* Deallocates the groups storage. */ +Py_LOCAL_INLINE(void) dealloc_groups(RE_GroupData* groups, size_t group_count) + { + size_t g; + + if (!groups) + return; + + for (g = 0; g < group_count; g++) + re_dealloc(groups[g].captures); + + re_dealloc(groups); +} + +/* Initialises a state object. */ +Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, + PyObject* string, RE_StringInfo* str_info, Py_ssize_t start, Py_ssize_t end, + BOOL overlapped, int concurrent, BOOL partial, BOOL use_lock, BOOL + visible_captures, BOOL match_all) { + Py_ssize_t final_pos; + int i; + + state->groups = NULL; + state->repeats = NULL; + state->visible_captures = visible_captures; + state->match_all = match_all; + state->backtrack_block.previous = NULL; + state->backtrack_block.next = NULL; + state->backtrack_block.capacity = RE_BACKTRACK_BLOCK_SIZE; + state->backtrack_allocated = RE_BACKTRACK_BLOCK_SIZE; + state->first_saved_groups = NULL; + state->current_saved_groups = NULL; + state->first_saved_repeats = NULL; + state->current_saved_repeats = NULL; + state->lock = NULL; + state->fuzzy_guards = NULL; + state->first_group_call_frame = NULL; + state->current_group_call_frame = NULL; + state->group_call_guard_list = NULL; + state->req_pos = -1; + + /* The call guards used by recursive patterns. */ + if (pattern->call_ref_info_count > 0) { + state->group_call_guard_list = + (RE_GuardList*)re_alloc(pattern->call_ref_info_count * + sizeof(RE_GuardList)); + if (!state->group_call_guard_list) + goto error; + memset(state->group_call_guard_list, 0, pattern->call_ref_info_count * + sizeof(RE_GuardList)); + } + + /* The capture groups. */ + if (pattern->true_group_count) { + size_t g; + + if (pattern->groups_storage) { + state->groups = pattern->groups_storage; + pattern->groups_storage = NULL; + } else { + state->groups = (RE_GroupData*)re_alloc(pattern->true_group_count * + sizeof(RE_GroupData)); + if (!state->groups) + goto error; + memset(state->groups, 0, pattern->true_group_count * + sizeof(RE_GroupData)); + + for (g = 0; g < pattern->true_group_count; g++) { + RE_GroupSpan* captures; + + captures = (RE_GroupSpan*)re_alloc(sizeof(RE_GroupSpan)); + if (!captures) { + size_t i; + + for (i = 0; i < g; i++) + re_dealloc(state->groups[i].captures); + + goto error; + } + + state->groups[g].captures = captures; + state->groups[g].capture_capacity = 1; + } + } + } + + /* Adjust boundaries. */ + if (start < 0) + start += str_info->length; + if (start < 0) + start = 0; + else if (start > str_info->length) + start = str_info->length; + + if (end < 0) + end += str_info->length; + if (end < 0) + end = 0; + else if (end > str_info->length) + end = str_info->length; + + state->overlapped = overlapped; + state->min_width = pattern->min_width; + + /* Initialise the getters and setters for the character size. */ + state->charsize = str_info->charsize; + state->is_unicode = str_info->is_unicode; + +#if PY_VERSION_HEX >= 0x02060000 + /* Are we using a buffer object? If so, we need to copy the info. */ + state->should_release = str_info->should_release; + if (state->should_release) + state->view = str_info->view; + +#endif + switch (state->charsize) { + case 1: + state->char_at = bytes1_char_at; + state->set_char_at = bytes1_set_char_at; + state->point_to = bytes1_point_to; + break; + case 2: + state->char_at = bytes2_char_at; + state->set_char_at = bytes2_set_char_at; + state->point_to = bytes2_point_to; + break; + case 4: + state->char_at = bytes4_char_at; + state->set_char_at = bytes4_set_char_at; + state->point_to = bytes4_point_to; + break; + default: + goto error; + } + + state->encoding = pattern->encoding; + + /* The state object contains a reference to the string and also a pointer + * to its contents. + * + * The documentation says that the end of the slice behaves like the end of + * the string. + */ + state->text = str_info->characters; + state->text_length = end; + + state->reverse = (pattern->flags & RE_FLAG_REVERSE) != 0; + if (partial) + state->partial_side = state->reverse ? RE_PARTIAL_LEFT : + RE_PARTIAL_RIGHT; + else + state->partial_side = RE_PARTIAL_NONE; + + state->slice_start = start; + state->slice_end = state->text_length; + state->text_pos = state->reverse ? state->slice_end : state->slice_start; + + /* Point to the final newline and line separator if it's at the end of the + * string, otherwise just -1. + */ + state->final_newline = -1; + state->final_line_sep = -1; + final_pos = state->text_length - 1; + if (final_pos >= 0) { + Py_UCS4 ch; + + ch = state->char_at(state->text, final_pos); + if (ch == 0x0A) { + /* The string ends with LF. */ + state->final_newline = final_pos; + state->final_line_sep = final_pos; + + /* Does the string end with CR/LF? */ + --final_pos; + if (final_pos >= 0 && state->char_at(state->text, final_pos) == + 0x0D) + state->final_line_sep = final_pos; + } else { + /* The string doesn't end with LF, but it could be another kind of + * line separator. + */ + if (state->encoding->is_line_sep(ch)) + state->final_line_sep = final_pos; + } + } + + /* If the 'new' behaviour is enabled then split correctly on zero-width + * matches. + */ + state->version_0 = (pattern->flags & RE_FLAG_VERSION1) == 0; + state->must_advance = FALSE; + + state->pattern = pattern; + state->string = string; + + if (pattern->repeat_count) { + if (pattern->repeats_storage) { + state->repeats = pattern->repeats_storage; + pattern->repeats_storage = NULL; + } else { + state->repeats = (RE_RepeatData*)re_alloc(pattern->repeat_count * + sizeof(RE_RepeatData)); + if (!state->repeats) + goto error; + memset(state->repeats, 0, pattern->repeat_count * + sizeof(RE_RepeatData)); + } + } + + if (pattern->fuzzy_count) { + state->fuzzy_guards = (RE_FuzzyGuards*)re_alloc(pattern->fuzzy_count * + sizeof(RE_FuzzyGuards)); + if (!state->fuzzy_guards) + goto error; + memset(state->fuzzy_guards, 0, pattern->fuzzy_count * + sizeof(RE_FuzzyGuards)); + } + + Py_INCREF(state->pattern); + Py_INCREF(state->string); + + /* Multithreading is allowed during matching when explicitly enabled or on + * immutable strings. + */ + switch (concurrent) { + case RE_CONC_NO: + state->is_multithreaded = FALSE; + break; + case RE_CONC_YES: + state->is_multithreaded = TRUE; + break; + default: + state->is_multithreaded = PyUnicode_Check(string) || + PyString_Check(string); + break; + } + + /* A state struct can sometimes be shared across threads. In such + * instances, if multithreading is enabled we need to protect the state + * with a lock (mutex) during matching. + */ + if (state->is_multithreaded && use_lock) + state->lock = PyThread_allocate_lock(); + + for (i = 0; i < MAX_SEARCH_POSITIONS; i++) + state->search_positions[i].start_pos = -1; + + return TRUE; + +error: + re_dealloc(state->group_call_guard_list); + re_dealloc(state->repeats); + dealloc_groups(state->groups, pattern->true_group_count); + re_dealloc(state->fuzzy_guards); + state->repeats = NULL; + state->groups = NULL; + state->fuzzy_guards = NULL; + return FALSE; +} + +#if PY_VERSION_HEX >= 0x02060000 +/* Releases the string's buffer, if necessary. */ +Py_LOCAL_INLINE(void) release_buffer(RE_StringInfo* str_info) { + if (str_info->should_release) + PyBuffer_Release(&str_info->view); +} + +#endif +/* Initialises a state object. */ +Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, + PyObject* string, Py_ssize_t start, Py_ssize_t end, BOOL overlapped, int + concurrent, BOOL partial, BOOL use_lock, BOOL visible_captures, BOOL + match_all) { + RE_StringInfo str_info; + + /* Get the string to search or match. */ + if (!get_string(string, &str_info)) + return FALSE; + + /* If we fail to initialise the state then we need to release the buffer if + * the string is a buffer object. + */ + if (!state_init_2(state, pattern, string, &str_info, start, end, + overlapped, concurrent, partial, use_lock, visible_captures, match_all)) + { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return FALSE; + } + + /* The state has been initialised successfully, so now the state has the + * responsibility of releasing the buffer if the string is a buffer object. + */ + return TRUE; +} + +/* Deallocates repeat data. */ +Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, size_t + repeat_count) { + size_t i; + + if (!repeats) + return; + + for (i = 0; i < repeat_count; i++) { + re_dealloc(repeats[i].body_guard_list.spans); + re_dealloc(repeats[i].tail_guard_list.spans); + } + + re_dealloc(repeats); +} + +/* Deallocates fuzzy guards. */ +Py_LOCAL_INLINE(void) dealloc_fuzzy_guards(RE_FuzzyGuards* guards, size_t + fuzzy_count) { + size_t i; + + if (!guards) + return; + + for (i = 0; i < fuzzy_count; i++) { + re_dealloc(guards[i].body_guard_list.spans); + re_dealloc(guards[i].tail_guard_list.spans); + } + + re_dealloc(guards); +} + +/* Finalises a state object, discarding its contents. */ +Py_LOCAL_INLINE(void) state_fini(RE_State* state) { + RE_BacktrackBlock* current; + PatternObject* pattern; + RE_SavedGroups* saved_groups; + RE_SavedRepeats* saved_repeats; + RE_GroupCallFrame* frame; + size_t i; + + /* Discard the lock (mutex) if there's one. */ + if (state->lock) + PyThread_free_lock(state->lock); + + /* Deallocate the backtrack blocks. */ + current = state->backtrack_block.next; + while (current) { + RE_BacktrackBlock* next; + + next = current->next; + re_dealloc(current); + state->backtrack_allocated -= RE_BACKTRACK_BLOCK_SIZE; + current = next; + } + + pattern = state->pattern; + + saved_groups = state->first_saved_groups; + while (saved_groups) { + RE_SavedGroups* next; + + next = saved_groups->next; + re_dealloc(saved_groups->spans); + re_dealloc(saved_groups->counts); + re_dealloc(saved_groups); + saved_groups = next; + } + + saved_repeats = state->first_saved_repeats; + while (saved_repeats) { + RE_SavedRepeats* next; + + next = saved_repeats->next; + + dealloc_repeats(saved_repeats->repeats, pattern->repeat_count); + + re_dealloc(saved_repeats); + saved_repeats = next; + } + + if (pattern->groups_storage) + dealloc_groups(state->groups, pattern->true_group_count); + else + pattern->groups_storage = state->groups; + + if (pattern->repeats_storage) + dealloc_repeats(state->repeats, pattern->repeat_count); + else + pattern->repeats_storage = state->repeats; + + frame = state->first_group_call_frame; + while (frame) { + RE_GroupCallFrame* next; + + next = frame->next; + + dealloc_groups(frame->groups, pattern->true_group_count); + dealloc_repeats(frame->repeats, pattern->repeat_count); + + re_dealloc(frame); + frame = next; + } + + for (i = 0; i < pattern->call_ref_info_count; i++) + re_dealloc(state->group_call_guard_list[i].spans); + + if (state->group_call_guard_list) + re_dealloc(state->group_call_guard_list); + + if (state->fuzzy_guards) + dealloc_fuzzy_guards(state->fuzzy_guards, pattern->fuzzy_count); + + Py_DECREF(state->pattern); + Py_DECREF(state->string); +#if PY_VERSION_HEX >= 0x02060000 + + if (state->should_release) + PyBuffer_Release(&state->view); +#endif +} + +/* Converts a string index to an integer. + * + * If the index is None then the default will be returned. + */ +Py_LOCAL_INLINE(Py_ssize_t) as_string_index(PyObject* obj, Py_ssize_t def) { + Py_ssize_t value; + + if (obj == Py_None) + return def; + + value = PyInt_AsSsize_t(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + PyErr_Clear(); + + value = PyLong_AsLong(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + set_error(RE_ERROR_INDEX, NULL); + return 0; +} + +/* Deallocates a MatchObject. */ +static void match_dealloc(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + Py_XDECREF(self->string); + Py_XDECREF(self->substring); + Py_DECREF(self->pattern); + if (self->groups) + re_dealloc(self->groups); + Py_XDECREF(self->regs); + PyObject_DEL(self); +} + +/* Restricts a value to a range. */ +Py_LOCAL_INLINE(Py_ssize_t) limited_range(Py_ssize_t value, Py_ssize_t lower, + Py_ssize_t upper) { + if (value < lower) + return lower; + + if (value > upper) + return upper; + + return value; +} + +/* Gets a slice from a Unicode string. */ +Py_LOCAL_INLINE(PyObject*) unicode_slice(PyObject* string, Py_ssize_t start, + Py_ssize_t end) { + Py_ssize_t length; + Py_UNICODE* buffer; + + length = PyUnicode_GET_SIZE(string); + start = limited_range(start, 0, length); + end = limited_range(end, 0, length); + + buffer = PyUnicode_AsUnicode(string); + + return PyUnicode_FromUnicode(buffer + start, end - start); +} + +/* Gets a slice from a bytestring. */ +Py_LOCAL_INLINE(PyObject*) bytes_slice(PyObject* string, Py_ssize_t start, + Py_ssize_t end) { + Py_ssize_t length; + char* buffer; + + length = PyString_GET_SIZE(string); + start = limited_range(start, 0, length); + end = limited_range(end, 0, length); + + buffer = PyString_AsString(string); + + return PyString_FromStringAndSize(buffer + start, end - start); +} + +/* Gets a slice from a string, returning either a Unicode string or a + * bytestring. + */ +Py_LOCAL_INLINE(PyObject*) get_slice(PyObject* string, Py_ssize_t start, + Py_ssize_t end) { + if (PyUnicode_Check(string)) + return unicode_slice(string, start, end); + + if (PyString_Check(string)) + return bytes_slice(string, start, end); + + return PySequence_GetSlice(string, start, end); +} + +/* Gets a MatchObject's group by integer index. */ +static PyObject* match_get_group_by_index(MatchObject* self, Py_ssize_t index, + PyObject* def) { + RE_GroupSpan* span; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + + if (span->start < 0 || span->end < 0) { + /* Return default value if the string or group is undefined. */ + Py_INCREF(def); + return def; + } + + return get_slice(self->substring, span->start - self->substring_offset, + span->end - self->substring_offset); +} + +/* Gets a MatchObject's start by integer index. */ +static PyObject* match_get_start_by_index(MatchObject* self, Py_ssize_t index) + { + RE_GroupSpan* span; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return Py_BuildValue("n", self->match_start); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + return Py_BuildValue("n", span->start); +} + +/* Gets a MatchObject's starts by integer index. */ +static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) + { + RE_GroupData* group; + PyObject* result; + PyObject* item; + size_t i; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + item = Py_BuildValue("n", self->match_start); + if (!item) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, 0, item); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New((Py_ssize_t)group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + item = Py_BuildValue("n", group->captures[i].start); + if (!item) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Gets a MatchObject's end by integer index. */ +static PyObject* match_get_end_by_index(MatchObject* self, Py_ssize_t index) { + RE_GroupSpan* span; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return Py_BuildValue("n", self->match_end); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + return Py_BuildValue("n", span->end); +} + +/* Gets a MatchObject's ends by integer index. */ +static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { + RE_GroupData* group; + PyObject* result; + PyObject* item; + size_t i; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + item = Py_BuildValue("n", self->match_end); + if (!item) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, 0, item); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New((Py_ssize_t)group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + item = Py_BuildValue("n", group->captures[i].end); + if (!item) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Gets a MatchObject's span by integer index. */ +static PyObject* match_get_span_by_index(MatchObject* self, Py_ssize_t index) { + RE_GroupSpan* span; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) + return Py_BuildValue("nn", self->match_start, self->match_end); + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + span = &self->groups[index - 1].span; + return Py_BuildValue("nn", span->start, span->end); +} + +/* Gets a MatchObject's spans by integer index. */ +static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) + { + RE_GroupData* group; + PyObject* result; + PyObject* item; + size_t i; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + item = Py_BuildValue("nn", self->match_start, self->match_end); + if (!item) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, 0, item); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New((Py_ssize_t)group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + item = Py_BuildValue("nn", group->captures[i].start, + group->captures[i].end); + if (!item) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Gets a MatchObject's captures by integer index. */ +static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t + index) { + RE_GroupData* group; + PyObject* result; + PyObject* slice; + size_t i; + + if (index < 0 || (size_t)index > self->group_count) { + /* Raise error if we were given a bad group number. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } + + if (index == 0) { + result = PyList_New(1); + if (!result) + return NULL; + + slice = get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + if (!slice) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, 0, slice); + + return result; + } + + /* Capture group indexes are 1-based (excluding group 0, which is the + * entire matched string). + */ + group = &self->groups[index - 1]; + + result = PyList_New((Py_ssize_t)group->capture_count); + if (!result) + return NULL; + + for (i = 0; i < group->capture_count; i++) { + slice = get_slice(self->substring, group->captures[i].start - + self->substring_offset, group->captures[i].end - + self->substring_offset); + if (!slice) + goto error; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(result, i, slice); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Converts a group index to an integer. */ +Py_LOCAL_INLINE(Py_ssize_t) as_group_index(PyObject* obj) { + Py_ssize_t value; + + value = PyInt_AsSsize_t(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + PyErr_Clear(); + + value = PyLong_AsLong(obj); + if (value != -1 || !PyErr_Occurred()) + return value; + + set_error(RE_ERROR_INDEX, NULL); + return -1; +} + +/* Gets a MatchObject's group index. + * + * The supplied index can be an integer or a string (group name) object. + */ +Py_LOCAL_INLINE(Py_ssize_t) match_get_group_index(MatchObject* self, PyObject* + index, BOOL allow_neg) { + Py_ssize_t group; + + /* Is the index an integer? */ + group = as_group_index(index); + if (group != -1 || !PyErr_Occurred()) { + Py_ssize_t min_group = 0; + + /* Adjust negative indices where valid and allowed. */ + if (group < 0 && allow_neg) { + group += (Py_ssize_t)self->group_count + 1; + min_group = 1; + } + + if (min_group <= group && (size_t)group <= self->group_count) + return group; + + return -1; + } + + /* The index might be a group name. */ + if (self->pattern->groupindex) { + /* Look up the name. */ + PyErr_Clear(); + + index = PyObject_GetItem(self->pattern->groupindex, index); + if (index) { + /* Check that we have an integer. */ + group = as_group_index(index); + Py_DECREF(index); + if (group != -1 || !PyErr_Occurred()) + return group; + } + } + + PyErr_Clear(); + return -1; +} + +/* Gets a MatchObject's group by object index. */ +Py_LOCAL_INLINE(PyObject*) match_get_group(MatchObject* self, PyObject* index, + PyObject* def, BOOL allow_neg) { + /* Check that the index is an integer or a string. */ + if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || + PyString_Check(index)) + return match_get_group_by_index(self, match_get_group_index(self, + index, allow_neg), def); + + set_error(RE_ERROR_GROUP_INDEX_TYPE, index); + return NULL; +} + +/* Gets info from a MatchObject by object index. */ +Py_LOCAL_INLINE(PyObject*) get_by_arg(MatchObject* self, PyObject* index, + RE_GetByIndexFunc get_by_index) { + /* Check that the index is an integer or a string. */ + if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || + PyString_Check(index)) + return get_by_index(self, match_get_group_index(self, index, FALSE)); + + set_error(RE_ERROR_GROUP_INDEX_TYPE, index); + return NULL; +} + +/* MatchObject's 'group' method. */ +static PyObject* match_group(MatchObject* self, PyObject* args) { + Py_ssize_t size; + PyObject* result; + Py_ssize_t i; + + size = PyTuple_GET_SIZE(args); + + switch (size) { + case 0: + /* group() */ + result = match_get_group_by_index(self, 0, Py_None); + break; + case 1: + /* group(x). PyTuple_GET_ITEM borrows the reference. */ + result = match_get_group(self, PyTuple_GET_ITEM(args, 0), Py_None, + FALSE); + break; + default: + /* group(x, y, z, ...) */ + /* Fetch multiple items. */ + result = PyTuple_New(size); + if (!result) + return NULL; + + for (i = 0; i < size; i++) { + PyObject* item; + + /* PyTuple_GET_ITEM borrows the reference. */ + item = match_get_group(self, PyTuple_GET_ITEM(args, i), Py_None, + FALSE); + if (!item) { + Py_DECREF(result); + return NULL; + } + + /* PyTuple_SET_ITEM borrows the reference. */ + PyTuple_SET_ITEM(result, i, item); + } + break; + } + + return result; +} + +/* Generic method for getting info from a MatchObject. */ +Py_LOCAL_INLINE(PyObject*) get_from_match(MatchObject* self, PyObject* args, + RE_GetByIndexFunc get_by_index) { + Py_ssize_t size; + PyObject* result; + Py_ssize_t i; + + size = PyTuple_GET_SIZE(args); + + switch (size) { + case 0: + /* get() */ + result = get_by_index(self, 0); + break; + case 1: + /* get(x). PyTuple_GET_ITEM borrows the reference. */ + result = get_by_arg(self, PyTuple_GET_ITEM(args, 0), get_by_index); + break; + default: + /* get(x, y, z, ...) */ + /* Fetch multiple items. */ + result = PyTuple_New(size); + if (!result) + return NULL; + + for (i = 0; i < size; i++) { + PyObject* item; + + /* PyTuple_GET_ITEM borrows the reference. */ + item = get_by_arg(self, PyTuple_GET_ITEM(args, i), get_by_index); + if (!item) { + Py_DECREF(result); + return NULL; + } + + /* PyTuple_SET_ITEM borrows the reference. */ + PyTuple_SET_ITEM(result, i, item); + } + break; + } + + return result; +} + +/* MatchObject's 'start' method. */ +static PyObject* match_start(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_start_by_index); +} + +/* MatchObject's 'starts' method. */ +static PyObject* match_starts(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_starts_by_index); +} + +/* MatchObject's 'end' method. */ +static PyObject* match_end(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_end_by_index); +} + +/* MatchObject's 'ends' method. */ +static PyObject* match_ends(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_ends_by_index); +} + +/* MatchObject's 'span' method. */ +static PyObject* match_span(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_span_by_index); +} + +/* MatchObject's 'spans' method. */ +static PyObject* match_spans(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_spans_by_index); +} + +/* MatchObject's 'captures' method. */ +static PyObject* match_captures(MatchObject* self, PyObject* args) { + return get_from_match(self, args, match_get_captures_by_index); +} + +/* MatchObject's 'groups' method. */ +static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* + kwargs) { + PyObject* result; + size_t g; + + PyObject* def = Py_None; + static char* kwlist[] = { "default", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groups", kwlist, &def)) + return NULL; + + result = PyTuple_New((Py_ssize_t)self->group_count); + if (!result) + return NULL; + + /* Group 0 is the entire matched portion of the string. */ + for (g = 0; g < self->group_count; g++) { + PyObject* item; + + item = match_get_group_by_index(self, (Py_ssize_t)g + 1, def); + if (!item) + goto error; + + /* PyTuple_SET_ITEM borrows the reference. */ + PyTuple_SET_ITEM(result, g, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* MatchObject's 'groupdict' method. */ +static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject* + kwargs) { + PyObject* result; + PyObject* keys; + Py_ssize_t g; + + PyObject* def = Py_None; + static char* kwlist[] = { "default", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groupdict", kwlist, + &def)) + return NULL; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (g = 0; g < PyList_GET_SIZE(keys); g++) { + PyObject* key; + PyObject* value; + int status; + + /* PyList_GET_ITEM borrows a reference. */ + key = PyList_GET_ITEM(keys, g); + if (!key) + goto failed; + + value = match_get_group(self, key, def, FALSE); + if (!value) + goto failed; + + status = PyDict_SetItem(result, key, value); + Py_DECREF(value); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +/* MatchObject's 'capturesdict' method. */ +static PyObject* match_capturesdict(MatchObject* self) { + PyObject* result; + PyObject* keys; + Py_ssize_t g; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (g = 0; g < PyList_GET_SIZE(keys); g++) { + PyObject* key; + Py_ssize_t group; + PyObject* captures; + int status; + + /* PyList_GET_ITEM borrows a reference. */ + key = PyList_GET_ITEM(keys, g); + if (!key) + goto failed; + + group = match_get_group_index(self, key, FALSE); + if (group < 0) + goto failed; + + captures = match_get_captures_by_index(self, group); + if (!captures) + goto failed; + + status = PyDict_SetItem(result, key, captures); + Py_DECREF(captures); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +/* Gets a Python object by name from a named module. */ +Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name) { + PyObject* module; + PyObject* object; + + module = PyImport_ImportModule(module_name); + if (!module) + return NULL; + + object = PyObject_GetAttrString(module, object_name); + Py_DECREF(module); + + return object; +} + +/* Calls a function in a module. */ +Py_LOCAL_INLINE(PyObject*) call(char* module_name, char* function_name, + PyObject* args) { + PyObject* function; + PyObject* result; + + if (!args) + return NULL; + + function = get_object(module_name, function_name); + if (!function) + return NULL; + + result = PyObject_CallObject(function, args); + Py_DECREF(function); + Py_DECREF(args); + + return result; +} + +/* Gets a replacement item from the replacement list. + * + * The replacement item could be a string literal or a group. + */ +Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* + item, size_t group_count) { + Py_ssize_t index; + + if (PyUnicode_Check(item) || PyString_Check(item)) { + /* It's a literal, which can be added directly to the list. */ + Py_INCREF(item); + return item; + } + + /* Is it a group reference? */ + index = as_group_index(item); + if (index == -1 && PyErr_Occurred()) { + /* Not a group either! */ + set_error(RE_ERROR_REPLACEMENT, NULL); + return NULL; + } + + if (index == 0) { + /* The entire matched portion of the string. */ + return get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + } else if (index >= 1 && (size_t)index <= group_count) { + /* A group. If it didn't match then return None instead. */ + RE_GroupData* group; + + group = &self->groups[index - 1]; + + if (group->capture_count > 0) + return get_slice(self->substring, group->span.start - + self->substring_offset, group->span.end - + self->substring_offset); + else { + Py_INCREF(Py_None); + return Py_None; + } + } else { + /* No such group. */ + set_error(RE_ERROR_NO_SUCH_GROUP, NULL); + return NULL; + } +} + +/* Initialises the join list. */ +Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL + is_unicode) { + join_info->list = NULL; + join_info->item = NULL; + join_info->reversed = reversed; + join_info->is_unicode = is_unicode; +} + +/* Adds an item to the join list. */ +Py_LOCAL_INLINE(int) add_to_join_list(JoinInfo* join_info, PyObject* item) { + PyObject* new_item; + int status; + + if (join_info->is_unicode) { + if (PyUnicode_Check(item)) { + new_item = item; + Py_INCREF(new_item); + } else { + new_item = PyUnicode_FromObject(item); + if (!new_item) { + set_error(RE_ERROR_NOT_UNICODE, item); + return RE_ERROR_NOT_UNICODE; + } + } + } else { + if (PyString_Check(item)) { + new_item = item; + Py_INCREF(new_item); + } else { + new_item = PyUnicode_FromObject(item); + if (!new_item) { + set_error(RE_ERROR_NOT_STRING, item); + return RE_ERROR_NOT_STRING; + } + } + } + + /* If the list already exists then just add the item to it. */ + if (join_info->list) { + status = PyList_Append(join_info->list, new_item); + if (status < 0) + goto error; + + Py_DECREF(new_item); + return status; + } + + /* If we already have an item then we now have 2(!) and we need to put them + * into a list. + */ + if (join_info->item) { + join_info->list = PyList_New(2); + if (!join_info->list) { + status = RE_ERROR_MEMORY; + goto error; + } + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(join_info->list, 0, join_info->item); + join_info->item = NULL; + + /* PyList_SET_ITEM borrows the reference. */ + PyList_SET_ITEM(join_info->list, 1, new_item); + return 0; + } + + /* This is the first item. */ + join_info->item = new_item; + + return 0; + +error: + Py_DECREF(new_item); + set_error(status, NULL); + return status; +} + +/* Clears the join list. */ +Py_LOCAL_INLINE(void) clear_join_list(JoinInfo* join_info) { + Py_XDECREF(join_info->list); + Py_XDECREF(join_info->item); +} + +/* Joins together a list of strings for pattern_subx. */ +Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { + /* If the list already exists then just do the join. */ + if (join_info->list) { + PyObject* joiner; + PyObject* result; + + if (join_info->reversed) + /* The list needs to be reversed before being joined. */ + PyList_Reverse(join_info->list); + + if (join_info->is_unicode) { + /* Concatenate the Unicode strings. */ + joiner = PyUnicode_FromUnicode(NULL, 0); + if (!joiner) { + clear_join_list(join_info); + return NULL; + } + + result = PyUnicode_Join(joiner, join_info->list); + } else { + joiner = PyString_FromString(""); + if (!joiner) { + clear_join_list(join_info); + return NULL; + } + + /* Concatenate the bytestrings. */ + result = _PyString_Join(joiner, join_info->list); + } + + Py_DECREF(joiner); + clear_join_list(join_info); + + return result; + } + + /* If we have only 1 item, so we'll just return it. */ + if (join_info->item) + return join_info->item; + + /* There are no items, so return an empty string. */ + if (join_info->is_unicode) + return PyUnicode_FromUnicode(NULL, 0); + else + return PyString_FromString(""); +} + +/* Checks whether a string replacement is a literal. + * + * To keep it simple we'll say that a literal is a string which can be used + * as-is. + * + * Returns its length if it is a literal, otherwise -1. + */ +Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement, + unsigned char special_char) { + RE_StringInfo str_info; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t pos; + + if (!get_string(str_replacement, &str_info)) + return -1; + + switch (str_info.charsize) { + case 1: + char_at = bytes1_char_at; + break; + case 2: + char_at = bytes2_char_at; + break; + case 4: + char_at = bytes4_char_at; + break; + default: +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); +#endif + return -1; + } + + for (pos = 0; pos < str_info.length; pos++) { + if (char_at(str_info.characters, pos) == special_char) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return -1; + } + } + +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return str_info.length; +} + +/* MatchObject's 'expand' method. */ +static PyObject* match_expand(MatchObject* self, PyObject* str_template) { + Py_ssize_t literal_length; + PyObject* replacement; + JoinInfo join_info; + Py_ssize_t size; + Py_ssize_t i; + + /* Is the template just a literal? */ + literal_length = check_replacement_string(str_template, '\\'); + if (literal_length >= 0) { + /* It's a literal. */ + Py_INCREF(str_template); + return str_template; + } + + /* Hand the template to the template compiler. */ + replacement = call(RE_MODULE, "_compile_replacement_helper", + PyTuple_Pack(2, self->pattern, str_template)); + if (!replacement) + return NULL; + + init_join_list(&join_info, FALSE, PyUnicode_Check(self->string)); + + /* Add each part of the template to the list. */ + size = PyList_GET_SIZE(replacement); + for (i = 0; i < size; i++) { + PyObject* item; + PyObject* str_item; + + /* PyList_GET_ITEM borrows a reference. */ + item = PyList_GET_ITEM(replacement, i); + str_item = get_match_replacement(self, item, self->group_count); + if (!str_item) + goto error; + + /* Add to the list. */ + if (str_item == Py_None) + Py_DECREF(str_item); + else { + int status; + + status = add_to_join_list(&join_info, str_item); + Py_DECREF(str_item); + if (status < 0) + goto error; + } + } + + Py_DECREF(replacement); + + /* Convert the list to a single string (also cleans up join_info). */ + return join_list_info(&join_info); + +error: + clear_join_list(&join_info); + Py_DECREF(replacement); + return NULL; +} + +#if PY_VERSION_HEX >= 0x02060000 +/* Gets a MatchObject's group dictionary. */ +Py_LOCAL_INLINE(PyObject*) match_get_group_dict(MatchObject* self) { + PyObject* result; + PyObject* keys; + Py_ssize_t g; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + keys = PyMapping_Keys(self->pattern->groupindex); + if (!keys) + goto failed; + + for (g = 0; g < PyList_GET_SIZE(keys); g++) { + int status; + PyObject* key; + PyObject* value; + + /* PyList_GET_ITEM borrows a reference. */ + key = PyList_GET_ITEM(keys, g); + if (!key) + goto failed; + + value = match_get_group(self, key, Py_None, FALSE); + if (!value) + goto failed; + + status = PyDict_SetItem(result, key, value); + Py_DECREF(value); + if (status < 0) + goto failed; + } + + Py_DECREF(keys); + + return result; + +failed: + Py_XDECREF(keys); + Py_DECREF(result); + return NULL; +} + +/* MatchObject's 'expandf' method. */ +static PyObject* match_expandf(MatchObject* self, PyObject* str_template) { + PyObject* format_func; + PyObject* args = NULL; + size_t g; + PyObject* kwargs = NULL; + PyObject* result; + + format_func = PyObject_GetAttrString(str_template, "format"); + if (!format_func) + return NULL; + + args = PyTuple_New((Py_ssize_t)self->group_count + 1); + if (!args) + goto error; + + for (g = 0; g < self->group_count + 1; g++) + /* PyTuple_SetItem borrows the reference. */ + PyTuple_SetItem(args, (Py_ssize_t)g, match_get_group_by_index(self, + (Py_ssize_t)g, Py_None)); + + kwargs = match_get_group_dict(self); + if (!kwargs) + goto error; + + result = PyObject_Call(format_func, args, kwargs); + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(format_func); + + return result; + +error: + Py_XDECREF(args); + Py_DECREF(format_func); + return NULL; +} + +#endif +Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self); + +/* MatchObject's '__copy__' method. */ +static PyObject* match_copy(MatchObject* self, PyObject *unused) { + return make_match_copy(self); +} + +/* MatchObject's '__deepcopy__' method. */ +static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) { + return make_match_copy(self); +} + +/* MatchObject's 'regs' attribute. */ +static PyObject* match_regs(MatchObject* self) { + PyObject* regs; + PyObject* item; + size_t g; + + regs = PyTuple_New((Py_ssize_t)self->group_count + 1); + if (!regs) + return NULL; + + item = Py_BuildValue("nn", self->match_start, self->match_end); + if (!item) + goto error; + + /* PyTuple_SET_ITEM borrows the reference. */ + PyTuple_SET_ITEM(regs, 0, item); + + for (g = 0; g < self->group_count; g++) { + RE_GroupSpan* span; + + span = &self->groups[g].span; + item = Py_BuildValue("nn", span->start, span->end); + if (!item) + goto error; + + /* PyTuple_SET_ITEM borrows the reference. */ + PyTuple_SET_ITEM(regs, g + 1, item); + } + + Py_INCREF(regs); + self->regs = regs; + + return regs; + +error: + Py_DECREF(regs); + return NULL; +} + +/* MatchObject's slice method. */ +Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* + slice) { + Py_ssize_t start; + Py_ssize_t end; + Py_ssize_t step; + Py_ssize_t slice_length; + + if (PySlice_GetIndicesEx((PySliceObject*)slice, + (Py_ssize_t)self->group_count + 1, &start, &end, &step, &slice_length) < + 0) + return NULL; + + if (slice_length <= 0) + return PyTuple_New(0); + else { + PyObject* result; + Py_ssize_t cur; + Py_ssize_t i; + + result = PyTuple_New(slice_length); + if (!result) + return NULL; + + cur = start; + for (i = 0; i < slice_length; i++) { + /* PyTuple_SetItem borrows the reference. */ + PyTuple_SetItem(result, i, match_get_group_by_index(self, cur, + Py_None)); + cur += step; + } + + return result; + } +} + +/* MatchObject's length method. */ +Py_LOCAL_INLINE(Py_ssize_t) match_length(MatchObject* self) { + return (Py_ssize_t)self->group_count + 1; +} + +/* MatchObject's '__getitem__' method. */ +static PyObject* match_getitem(MatchObject* self, PyObject* item) { + if (PySlice_Check(item)) + return match_get_group_slice(self, item); + + return match_get_group(self, item, Py_None, TRUE); +} + +/* Determines the portion of the target string which is covered by the group + * captures. + */ +Py_LOCAL_INLINE(void) determine_target_substring(MatchObject* match, + Py_ssize_t* slice_start, Py_ssize_t* slice_end) { + Py_ssize_t start; + Py_ssize_t end; + size_t g; + + start = match->pos; + end = match->endpos; + + for (g = 0; g < match->group_count; g++) { + RE_GroupSpan* span; + size_t c; + + span = &match->groups[g].span; + if (span->start >= 0 && span->start < start) + start = span->start; + if (span->end >= 0 && span->end > end) + end = span->end; + + for (c = 0; c < match->groups[g].capture_count; c++) { + RE_GroupSpan* span; + + span = match->groups[g].captures; + if (span->start >= 0 && span->start < start) + start = span->start; + if (span->end >= 0 && span->end > end) + end = span->end; + } + } + + *slice_start = start; + *slice_end = end; +} + +/* MatchObject's 'detach_string' method. */ +static PyObject* match_detach_string(MatchObject* self, PyObject* unused) { + if (self->string) { + Py_ssize_t start; + Py_ssize_t end; + PyObject* substring; + + determine_target_substring(self, &start, &end); + + substring = get_slice(self->string, start, end); + if (substring) { + Py_XDECREF(self->substring); + self->substring = substring; + self->substring_offset = start; + + Py_DECREF(self->string); + self->string = NULL; + } + } + + Py_INCREF(Py_None); + return Py_None; +} + +/* The documentation of a MatchObject. */ +PyDoc_STRVAR(match_group_doc, + "group([group1, ...]) --> string or tuple of strings.\n\ + Return one or more subgroups of the match. If there is a single argument,\n\ + the result is a single string, or None if the group did not contribute to\n\ + the match; if there are multiple arguments, the result is a tuple with one\n\ + item per argument; if there are no arguments, the whole match is returned.\n\ + Group 0 is the whole match."); + +PyDoc_STRVAR(match_start_doc, + "start([group1, ...]) --> int or tuple of ints.\n\ + Return the index of the start of one or more subgroups of the match. If\n\ + there is a single argument, the result is an index, or -1 if the group did\n\ + not contribute to the match; if there are multiple arguments, the result is\n\ + a tuple with one item per argument; if there are no arguments, the index of\n\ + the start of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_end_doc, + "end([group1, ...]) --> int or tuple of ints.\n\ + Return the index of the end of one or more subgroups of the match. If there\n\ + is a single argument, the result is an index, or -1 if the group did not\n\ + contribute to the match; if there are multiple arguments, the result is a\n\ + tuple with one item per argument; if there are no arguments, the index of\n\ + the end of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_span_doc, + "span([group1, ...]) --> 2-tuple of int or tuple of 2-tuple of ints.\n\ + Return the span (a 2-tuple of the indices of the start and end) of one or\n\ + more subgroups of the match. If there is a single argument, the result is a\n\ + span, or (-1, -1) if the group did not contribute to the match; if there are\n\ + multiple arguments, the result is a tuple with one item per argument; if\n\ + there are no arguments, the span of the whole match is returned. Group 0 is\n\ + the whole match."); + +PyDoc_STRVAR(match_groups_doc, + "groups(default=None) --> tuple of strings.\n\ + Return a tuple containing all the subgroups of the match. The argument is\n\ + the default for groups that did not participate in the match."); + +PyDoc_STRVAR(match_groupdict_doc, + "groupdict(default=None) --> dict.\n\ + Return a dictionary containing all the named subgroups of the match, keyed\n\ + by the subgroup name. The argument is the value to be given for groups that\n\ + did not participate in the match."); + +PyDoc_STRVAR(match_capturesdict_doc, + "capturesdict() --> dict.\n\ + Return a dictionary containing the captures of all the named subgroups of the\n\ + match, keyed by the subgroup name."); + +PyDoc_STRVAR(match_expand_doc, + "expand(template) --> string.\n\ + Return the string obtained by doing backslash substitution on the template,\n\ + as done by the sub() method."); + +#if PY_VERSION_HEX >= 0x02060000 +PyDoc_STRVAR(match_expandf_doc, + "expandf(format) --> string.\n\ + Return the string obtained by using the format, as done by the subf()\n\ + method."); + +#endif +PyDoc_STRVAR(match_captures_doc, + "captures([group1, ...]) --> list of strings or tuple of list of strings.\n\ + Return the captures of one or more subgroups of the match. If there is a\n\ + single argument, the result is a list of strings; if there are multiple\n\ + arguments, the result is a tuple of lists with one item per argument; if\n\ + there are no arguments, the captures of the whole match is returned. Group\n\ + 0 is the whole match."); + +PyDoc_STRVAR(match_starts_doc, + "starts([group1, ...]) --> list of ints or tuple of list of ints.\n\ + Return the indices of the starts of the captures of one or more subgroups of\n\ + the match. If there is a single argument, the result is a list of indices;\n\ + if there are multiple arguments, the result is a tuple of lists with one\n\ + item per argument; if there are no arguments, the indices of the starts of\n\ + the captures of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_ends_doc, + "ends([group1, ...]) --> list of ints or tuple of list of ints.\n\ + Return the indices of the ends of the captures of one or more subgroups of\n\ + the match. If there is a single argument, the result is a list of indices;\n\ + if there are multiple arguments, the result is a tuple of lists with one\n\ + item per argument; if there are no arguments, the indices of the ends of the\n\ + captures of the whole match is returned. Group 0 is the whole match."); + +PyDoc_STRVAR(match_spans_doc, + "spans([group1, ...]) --> list of 2-tuple of ints or tuple of list of 2-tuple of ints.\n\ + Return the spans (a 2-tuple of the indices of the start and end) of the\n\ + captures of one or more subgroups of the match. If there is a single\n\ + argument, the result is a list of spans; if there are multiple arguments,\n\ + the result is a tuple of lists with one item per argument; if there are no\n\ + arguments, the spans of the captures of the whole match is returned. Group\n\ + 0 is the whole match."); + +PyDoc_STRVAR(match_detach_string_doc, + "detach_string()\n\ + Detaches the target string from the match object. The 'string' attribute\n\ + will become None."); + +/* MatchObject's methods. */ +static PyMethodDef match_methods[] = { + {"group", (PyCFunction)match_group, METH_VARARGS, match_group_doc}, + {"start", (PyCFunction)match_start, METH_VARARGS, match_start_doc}, + {"end", (PyCFunction)match_end, METH_VARARGS, match_end_doc}, + {"span", (PyCFunction)match_span, METH_VARARGS, match_span_doc}, + {"groups", (PyCFunction)match_groups, METH_VARARGS|METH_KEYWORDS, + match_groups_doc}, + {"groupdict", (PyCFunction)match_groupdict, METH_VARARGS|METH_KEYWORDS, + match_groupdict_doc}, + {"capturesdict", (PyCFunction)match_capturesdict, METH_NOARGS, + match_capturesdict_doc}, + {"expand", (PyCFunction)match_expand, METH_O, match_expand_doc}, +#if PY_VERSION_HEX >= 0x02060000 + {"expandf", (PyCFunction)match_expandf, METH_O, match_expandf_doc}, +#endif + {"captures", (PyCFunction)match_captures, METH_VARARGS, + match_captures_doc}, + {"starts", (PyCFunction)match_starts, METH_VARARGS, match_starts_doc}, + {"ends", (PyCFunction)match_ends, METH_VARARGS, match_ends_doc}, + {"spans", (PyCFunction)match_spans, METH_VARARGS, match_spans_doc}, + {"detach_string", (PyCFunction)match_detach_string, METH_NOARGS, + match_detach_string_doc}, + {"__copy__", (PyCFunction)match_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)match_deepcopy, METH_O}, + {"__getitem__", (PyCFunction)match_getitem, METH_O|METH_COEXIST}, + {NULL, NULL} +}; + +PyDoc_STRVAR(match_doc, "Match object"); + +/* MatchObject's 'lastindex' attribute. */ +static PyObject* match_lastindex(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + if (self->lastindex >= 0) + return Py_BuildValue("n", self->lastindex); + + Py_INCREF(Py_None); + return Py_None; +} + +/* MatchObject's 'lastgroup' attribute. */ +static PyObject* match_lastgroup(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + if (self->pattern->indexgroup && self->lastgroup >= 0) { + PyObject* index; + PyObject* result; + + index = Py_BuildValue("n", self->lastgroup); + + /* PyDict_GetItem returns borrows a reference. */ + result = PyDict_GetItem(self->pattern->indexgroup, index); + Py_DECREF(index); + if (result) { + Py_INCREF(result); + return result; + } + PyErr_Clear(); + } + + Py_INCREF(Py_None); + return Py_None; +} + +/* MatchObject's 'string' attribute. */ +static PyObject* match_string(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + if (self->string) { + Py_INCREF(self->string); + return self->string; + } else { + Py_INCREF(Py_None); + return Py_None; + } +} +#if PY_VERSION_HEX < 0x02060000 + +/* MatchObject's 'partial' attribute. */ +static PyObject* match_partial(PyObject* self_) { + MatchObject* self; + PyObject* result; + + self = (MatchObject*)self_; + + result = self->partial ? Py_True : Py_False; + Py_INCREF(result); + + return result; +} +#endif + +/* MatchObject's 'fuzzy_counts' attribute. */ +static PyObject* match_fuzzy_counts(PyObject* self_) { + MatchObject* self; + + self = (MatchObject*)self_; + + return Py_BuildValue("nnn", self->fuzzy_counts[RE_FUZZY_SUB], + self->fuzzy_counts[RE_FUZZY_INS], self->fuzzy_counts[RE_FUZZY_DEL]); +} + +static PyGetSetDef match_getset[] = { + {"lastindex", (getter)match_lastindex, (setter)NULL, + "The group number of the last matched capturing group, or None."}, + {"lastgroup", (getter)match_lastgroup, (setter)NULL, + "The name of the last matched capturing group, or None."}, + {"regs", (getter)match_regs, (setter)NULL, + "A tuple of the spans of the capturing groups."}, + {"string", (getter)match_string, (setter)NULL, + "The string that was searched, or None if it has been detached."}, +#if PY_VERSION_HEX < 0x02060000 + {"partial", (getter)match_partial, (setter)NULL, + "Whether it's a partial match."}, +#endif + {"fuzzy_counts", (getter)match_fuzzy_counts, (setter)NULL, + "A tuple of the number of substitutions, insertions and deletions."}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef match_members[] = { + {"re", T_OBJECT, offsetof(MatchObject, pattern), READONLY, + "The regex object that produced this match object."}, + {"pos", T_PYSSIZET, offsetof(MatchObject, pos), READONLY, + "The position at which the regex engine starting searching."}, + {"endpos", T_PYSSIZET, offsetof(MatchObject, endpos), READONLY, + "The final position beyond which the regex engine won't search."}, +#if PY_VERSION_HEX >= 0x02060000 + {"partial", T_BOOL, offsetof(MatchObject, partial), READONLY, + "Whether it's a partial match."}, +#endif + {NULL} /* Sentinel */ +}; + +static PyMappingMethods match_as_mapping = { + (lenfunc)match_length, /* mp_length */ + (binaryfunc)match_getitem, /* mp_subscript */ + 0, /* mp_ass_subscript */ +}; + +static PyTypeObject Match_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Match", + sizeof(MatchObject) +}; + +/* Copies the groups. */ +Py_LOCAL_INLINE(RE_GroupData*) copy_groups(RE_GroupData* groups, size_t + group_count) { + size_t span_count; + size_t g; + RE_GroupData* groups_copy; + RE_GroupSpan* spans_copy; + size_t offset; + + /* Calculate the total size of the group info. */ + span_count = 0; + for (g = 0; g < group_count; g++) + span_count += groups[g].capture_count; + + /* Allocate the storage for the group info in a single block. */ + groups_copy = (RE_GroupData*)re_alloc(group_count * sizeof(RE_GroupData) + + span_count * sizeof(RE_GroupSpan)); + if (!groups_copy) + return NULL; + + /* The storage for the spans comes after the other group info. */ + spans_copy = (RE_GroupSpan*)&groups_copy[group_count]; + + /* There's no need to initialise the spans info. */ + memset(groups_copy, 0, group_count * sizeof(RE_GroupData)); + + offset = 0; + for (g = 0; g < group_count; g++) { + RE_GroupData* orig; + RE_GroupData* copy; + + orig = &groups[g]; + copy = &groups_copy[g]; + copy->span = orig->span; + + copy->captures = &spans_copy[offset]; + offset += orig->capture_count; + + if (orig->capture_count > 0) { + Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * + sizeof(RE_GroupSpan)); + copy->capture_capacity = orig->capture_count; + copy->capture_count = orig->capture_count; + } + } + + return groups_copy; +} + +/* Makes a copy of a MatchObject. */ +Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { + MatchObject* match; + + if (!self->string) { + /* The target string has been detached, so the MatchObject is now + * immutable. + */ + Py_INCREF(self); + return (PyObject*)self; + } + + /* Create a MatchObject. */ + match = PyObject_NEW(MatchObject, &Match_Type); + if (!match) + return NULL; + + Py_MEMCPY(match, self, sizeof(MatchObject)); + + Py_INCREF(match->string); + Py_INCREF(match->substring); + Py_INCREF(match->pattern); + + /* Copy the groups to the MatchObject. */ + if (self->group_count > 0) { + match->groups = copy_groups(self->groups, self->group_count); + if (!match->groups) { + Py_DECREF(match); + return NULL; + } + } + + return (PyObject*)match; +} + +/* Creates a new MatchObject. */ +Py_LOCAL_INLINE(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* + state, int status) { + /* Create MatchObject (from state object). */ + if (status > 0 || status == RE_ERROR_PARTIAL) { + MatchObject* match; + + /* Create a MatchObject. */ + match = PyObject_NEW(MatchObject, &Match_Type); + if (!match) + return NULL; + + match->string = state->string; + match->substring = state->string; + match->substring_offset = 0; + match->pattern = pattern; + match->regs = NULL; + match->fuzzy_counts[RE_FUZZY_SUB] = + state->total_fuzzy_counts[RE_FUZZY_SUB]; + match->fuzzy_counts[RE_FUZZY_INS] = + state->total_fuzzy_counts[RE_FUZZY_INS]; + match->fuzzy_counts[RE_FUZZY_DEL] = + state->total_fuzzy_counts[RE_FUZZY_DEL]; + match->partial = status == RE_ERROR_PARTIAL; + Py_INCREF(match->string); + Py_INCREF(match->substring); + Py_INCREF(match->pattern); + + /* Copy the groups to the MatchObject. */ + if (pattern->public_group_count > 0) { + match->groups = copy_groups(state->groups, + pattern->public_group_count); + if (!match->groups) { + Py_DECREF(match); + return NULL; + } + } else + match->groups = NULL; + + match->group_count = pattern->public_group_count; + + match->pos = state->slice_start; + match->endpos = state->slice_end; + + if (state->reverse) { + match->match_start = state->text_pos; + match->match_end = state->match_pos; + } else { + match->match_start = state->match_pos; + match->match_end = state->text_pos; + } + + match->lastindex = state->lastindex; + match->lastgroup = state->lastgroup; + + return (PyObject*)match; + } else if (status == 0) { + /* No match. */ + Py_INCREF(Py_None); + return Py_None; + } else { + /* Internal error. */ + set_error(status, NULL); + return NULL; + } +} + +/* Gets the text of a capture group from a state. */ +Py_LOCAL_INLINE(PyObject*) state_get_group(RE_State* state, Py_ssize_t index, + PyObject* string, BOOL empty) { + RE_GroupData* group; + Py_ssize_t start; + Py_ssize_t end; + + group = &state->groups[index - 1]; + + if (string != Py_None && index >= 1 && (size_t)index <= + state->pattern->public_group_count && group->capture_count > 0) { + start = group->span.start; + end = group->span.end; + } else { + if (empty) + /* Want an empty string. */ + start = end = 0; + else { + Py_INCREF(Py_None); + return Py_None; + } + } + + return get_slice(string, start, end); +} + +/* Acquires the lock (mutex) on the state if there's one. + * + * It also increments the owner's refcount just to ensure that it won't be + * destroyed by another thread. + */ +Py_LOCAL_INLINE(void) acquire_state_lock(PyObject* owner, RE_SafeState* + safe_state) { + RE_State* state; + + state = safe_state->re_state; + + if (state->lock) { + /* In order to avoid deadlock we need to release the GIL while trying + * to acquire the lock. + */ + Py_INCREF(owner); + if (!PyThread_acquire_lock(state->lock, 0)) { + release_GIL(safe_state); + PyThread_acquire_lock(state->lock, 1); + acquire_GIL(safe_state); + } + } +} + +/* Releases the lock (mutex) on the state if there's one. + * + * It also decrements the owner's refcount, which was incremented when the lock + * was acquired. + */ +Py_LOCAL_INLINE(void) release_state_lock(PyObject* owner, RE_SafeState* + safe_state) { + RE_State* state; + + state = safe_state->re_state; + + if (state->lock) { + PyThread_release_lock(state->lock); + Py_DECREF(owner); + } +} + +/* Implements the functionality of ScanObject's search and match methods. */ +Py_LOCAL_INLINE(PyObject*) scanner_search_or_match(ScannerObject* self, BOOL + search) { + RE_State* state; + RE_SafeState safe_state; + PyObject* match; + + state = &self->state; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = state; + safe_state.thread_state = NULL; + + /* Acquire the state lock in case we're sharing the scanner object across + * threads. + */ + acquire_state_lock((PyObject*)self, &safe_state); + + if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { + /* No or partial match. */ + release_state_lock((PyObject*)self, &safe_state); + Py_INCREF(Py_None); + return Py_None; + } else if (self->status < 0) { + /* Internal error. */ + release_state_lock((PyObject*)self, &safe_state); + set_error(self->status, NULL); + return NULL; + } + + /* Look for another match. */ + self->status = do_match(&safe_state, search); + if (self->status >= 0 || self->status == RE_ERROR_PARTIAL) { + /* Create the match object. */ + match = pattern_new_match(self->pattern, state, self->status); + + if (search && state->overlapped) { + /* Advance one character. */ + Py_ssize_t step; + + step = state->reverse ? -1 : 1; + state->text_pos = state->match_pos + step; + state->must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow 2 contiguous + * zero-width matches. + */ + state->must_advance = state->text_pos == state->match_pos; + } else + /* Internal error. */ + match = NULL; + + /* Release the state lock. */ + release_state_lock((PyObject*)self, &safe_state); + + return match; +} + +/* ScannerObject's 'match' method. */ +static PyObject* scanner_match(ScannerObject* self, PyObject* unused) { + return scanner_search_or_match(self, FALSE); +} + +/* ScannerObject's 'search' method. */ +static PyObject* scanner_search(ScannerObject* self, PyObject *unused) { + return scanner_search_or_match(self, TRUE); +} + +/* ScannerObject's 'next' method. */ +static PyObject* scanner_next(PyObject* self) { + PyObject* match; + + match = scanner_search((ScannerObject*)self, NULL); + + if (match == Py_None) { + /* No match. */ + Py_DECREF(Py_None); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + return match; +} + +/* Returns an iterator for a ScannerObject. + * + * The iterator is actually the ScannerObject itself. + */ +static PyObject* scanner_iter(PyObject* self) { + Py_INCREF(self); + return self; +} + +/* Gets the next result from a scanner iterator. */ +static PyObject* scanner_iternext(PyObject* self) { + PyObject* match; + + match = scanner_search((ScannerObject*)self, NULL); + + if (match == Py_None) { + /* No match. */ + Py_DECREF(match); + return NULL; + } + + return match; +} + +/* Makes a copy of a ScannerObject. + * + * It actually doesn't make a copy, just returns the original object. + */ +Py_LOCAL_INLINE(PyObject*) make_scanner_copy(ScannerObject* self) { + Py_INCREF(self); + return (PyObject*)self; +} + +/* ScannerObject's '__copy__' method. */ +static PyObject* scanner_copy(ScannerObject* self, PyObject *unused) { + return make_scanner_copy(self); +} + +/* ScannerObject's '__deepcopy__' method. */ +static PyObject* scanner_deepcopy(ScannerObject* self, PyObject* memo) { + return make_scanner_copy(self); +} + +/* The documentation of a ScannerObject. */ +PyDoc_STRVAR(scanner_match_doc, + "match() --> MatchObject or None.\n\ + Match at the current position in the string."); + +PyDoc_STRVAR(scanner_search_doc, + "search() --> MatchObject or None.\n\ + Search from the current position in the string."); + +/* ScannerObject's methods. */ +static PyMethodDef scanner_methods[] = { + {"next", (PyCFunction)scanner_next, METH_NOARGS}, + {"match", (PyCFunction)scanner_match, METH_NOARGS, scanner_match_doc}, + {"search", (PyCFunction)scanner_search, METH_NOARGS, scanner_search_doc}, + {"__copy__", (PyCFunction)scanner_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)scanner_deepcopy, METH_O}, + {NULL, NULL} +}; + +PyDoc_STRVAR(scanner_doc, "Scanner object"); + +/* Deallocates a ScannerObject. */ +static void scanner_dealloc(PyObject* self_) { + ScannerObject* self; + + self = (ScannerObject*)self_; + + state_fini(&self->state); + Py_DECREF(self->pattern); + PyObject_DEL(self); +} + +static PyMemberDef scanner_members[] = { + {"pattern", T_OBJECT, offsetof(ScannerObject, pattern), READONLY, + "The regex object that produced this scanner object."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject Scanner_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Scanner", + sizeof(ScannerObject) +}; + +/* Decodes a 'concurrent' argument. */ +Py_LOCAL_INLINE(int) decode_concurrent(PyObject* concurrent) { + Py_ssize_t value; + + if (concurrent == Py_None) + return RE_CONC_DEFAULT; + + value = PyLong_AsLong(concurrent); + if (value == -1 && PyErr_Occurred()) { + set_error(RE_ERROR_CONCURRENT, NULL); + return -1; + } + + return value ? RE_CONC_YES : RE_CONC_NO; +} + +/* Decodes a 'partial' argument. */ +Py_LOCAL_INLINE(BOOL) decode_partial(PyObject* partial) { + Py_ssize_t value; + + if (partial == Py_False) + return FALSE; + + if (partial == Py_True) + return TRUE; + + value = PyLong_AsLong(partial); + if (value == -1 && PyErr_Occurred()) { + PyErr_Clear(); + return TRUE; + } + + return value != 0; +} + +/* Creates a new ScannerObject. */ +static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + /* Create search state object. */ + ScannerObject* self; + Py_ssize_t start; + Py_ssize_t end; + int conc; + BOOL part; + + PyObject* string; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + Py_ssize_t overlapped = FALSE; + PyObject* concurrent = Py_None; + PyObject* partial = Py_False; + static char* kwlist[] = { "string", "pos", "endpos", "overlapped", + "concurrent", "partial", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnOO:scanner", kwlist, + &string, &pos, &endpos, &overlapped, &concurrent, &partial)) + return NULL; + + start = as_string_index(pos, 0); + if (start == -1 && PyErr_Occurred()) + return NULL; + + end = as_string_index(endpos, PY_SSIZE_T_MAX); + if (end == -1 && PyErr_Occurred()) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + part = decode_partial(partial); + + /* Create a scanner object. */ + self = PyObject_NEW(ScannerObject, &Scanner_Type); + if (!self) + return NULL; + + self->pattern = pattern; + Py_INCREF(self->pattern); + + /* The MatchObject, and therefore repeated captures, will be visible. */ + if (!state_init(&self->state, pattern, string, start, end, overlapped != 0, + conc, part, TRUE, TRUE, FALSE)) { + PyObject_DEL(self); + return NULL; + } + + self->status = RE_ERROR_SUCCESS; + + return (PyObject*) self; +} + +/* Performs the split for the SplitterObject. */ +Py_LOCAL_INLINE(PyObject*) next_split_part(SplitterObject* self) { + RE_State* state; + RE_SafeState safe_state; + PyObject* result = NULL; /* Initialise to stop compiler warning. */ + + state = &self->state; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = state; + safe_state.thread_state = NULL; + + /* Acquire the state lock in case we're sharing the splitter object across + * threads. + */ + acquire_state_lock((PyObject*)self, &safe_state); + + if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { + /* Finished. */ + release_state_lock((PyObject*)self, &safe_state); + result = Py_False; + Py_INCREF(result); + return result; + } else if (self->status < 0) { + /* Internal error. */ + release_state_lock((PyObject*)self, &safe_state); + set_error(self->status, NULL); + return NULL; + } + + if (self->index == 0) { + if (self->split_count < self->maxsplit) { + Py_ssize_t step; + Py_ssize_t end_pos; + + if (state->reverse) { + step = -1; + end_pos = state->slice_start; + } else { + step = 1; + end_pos = state->slice_end; + } + +retry: + self->status = do_match(&safe_state, TRUE); + if (self->status < 0) + goto error; + + if (self->status == RE_ERROR_SUCCESS) { + if (state->version_0) { + /* Version 0 behaviour is to advance one character if the + * split was zero-width. Unfortunately, this can give an + * incorrect result. GvR wants this behaviour to be + * retained so as not to break any existing software which + * might rely on it. + */ + if (state->text_pos == state->match_pos) { + if (self->last_pos == end_pos) + goto no_match; + + /* Advance one character. */ + state->text_pos += step; + state->must_advance = FALSE; + goto retry; + } + } + + ++self->split_count; + + /* Get segment before this match. */ + if (state->reverse) + result = get_slice(state->string, state->match_pos, + self->last_pos); + else + result = get_slice(state->string, self->last_pos, + state->match_pos); + if (!result) + goto error; + + self->last_pos = state->text_pos; + + /* Version 0 behaviour is to advance one character if the match + * was zero-width. Unfortunately, this can give an incorrect + * result. GvR wants this behaviour to be retained so as not to + * break any existing software which might rely on it. + */ + if (state->version_0) { + if (state->text_pos == state->match_pos) + /* Advance one character. */ + state->text_pos += step; + + state->must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow a + * contiguous zero-width match. + */ + state->must_advance = TRUE; + } + } else + goto no_match; + + if (self->status == RE_ERROR_FAILURE || self->status == + RE_ERROR_PARTIAL) { +no_match: + /* Get segment following last match (even if empty). */ + if (state->reverse) + result = get_slice(state->string, 0, self->last_pos); + else + result = get_slice(state->string, self->last_pos, + state->text_length); + if (!result) + goto error; + } + } else { + /* Add group. */ + result = state_get_group(state, self->index, state->string, FALSE); + if (!result) + goto error; + } + + ++self->index; + if ((size_t)self->index > state->pattern->public_group_count) + self->index = 0; + + /* Release the state lock. */ + release_state_lock((PyObject*)self, &safe_state); + + return result; + +error: + /* Release the state lock. */ + release_state_lock((PyObject*)self, &safe_state); + + return NULL; +} + +/* SplitterObject's 'split' method. */ +static PyObject* splitter_split(SplitterObject* self, PyObject *unused) { + PyObject* result; + + result = next_split_part(self); + + if (result == Py_False) { + /* The sentinel. */ + Py_DECREF(Py_False); + Py_INCREF(Py_None); + return Py_None; + } + + return result; +} + +/* SplitterObject's 'next' method. */ +static PyObject* splitter_next(PyObject* self) { + PyObject* result; + + result = next_split_part((SplitterObject*)self); + + if (result == Py_False) { + /* No match. */ + Py_DECREF(Py_False); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + return result; +} + +/* Returns an iterator for a SplitterObject. + * + * The iterator is actually the SplitterObject itself. + */ +static PyObject* splitter_iter(PyObject* self) { + Py_INCREF(self); + return self; +} + +/* Gets the next result from a splitter iterator. */ +static PyObject* splitter_iternext(PyObject* self) { + PyObject* result; + + result = next_split_part((SplitterObject*)self); + + if (result == Py_False) { + /* No match. */ + Py_DECREF(result); + return NULL; + } + + return result; +} + +/* Makes a copy of a SplitterObject. + * + * It actually doesn't make a copy, just returns the original object. + */ +Py_LOCAL_INLINE(PyObject*) make_splitter_copy(SplitterObject* self) { + Py_INCREF(self); + return (PyObject*)self; +} + +/* SplitterObject's '__copy__' method. */ +static PyObject* splitter_copy(SplitterObject* self, PyObject *unused) { + return make_splitter_copy(self); +} + +/* SplitterObject's '__deepcopy__' method. */ +static PyObject* splitter_deepcopy(SplitterObject* self, PyObject* memo) { + return make_splitter_copy(self); +} + +/* The documentation of a SplitterObject. */ +PyDoc_STRVAR(splitter_split_doc, + "split() --> string or None.\n\ + Return the next part of the split string."); + +/* SplitterObject's methods. */ +static PyMethodDef splitter_methods[] = { + {"next", (PyCFunction)splitter_next, METH_NOARGS}, + {"split", (PyCFunction)splitter_split, METH_NOARGS, splitter_split_doc}, + {"__copy__", (PyCFunction)splitter_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)splitter_deepcopy, METH_O}, + {NULL, NULL} +}; + +PyDoc_STRVAR(splitter_doc, "Splitter object"); + +/* Deallocates a SplitterObject. */ +static void splitter_dealloc(PyObject* self_) { + SplitterObject* self; + + self = (SplitterObject*)self_; + + state_fini(&self->state); + Py_DECREF(self->pattern); + PyObject_DEL(self); +} + +static PyMemberDef splitter_members[] = { + {"pattern", T_OBJECT, offsetof(SplitterObject, pattern), READONLY, + "The regex object that produced this splitter object."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject Splitter_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Splitter", + sizeof(SplitterObject) +}; + +/* Creates a new SplitterObject. */ +Py_LOCAL_INLINE(PyObject*) pattern_splitter(PatternObject* pattern, PyObject* + args, PyObject* kwargs) { + /* Create split state object. */ + int conc; + SplitterObject* self; + RE_State* state; + + PyObject* string; + Py_ssize_t maxsplit = 0; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:splitter", kwlist, + &string, &maxsplit, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* Create a splitter object. */ + self = PyObject_NEW(SplitterObject, &Splitter_Type); + if (!self) + return NULL; + + self->pattern = pattern; + Py_INCREF(self->pattern); + + if (maxsplit == 0) + maxsplit = PY_SSIZE_T_MAX; + + state = &self->state; + + /* The MatchObject, and therefore repeated captures, will not be visible. + */ + if (!state_init(state, pattern, string, 0, PY_SSIZE_T_MAX, FALSE, conc, + FALSE, TRUE, FALSE, FALSE)) { + PyObject_DEL(self); + return NULL; + } + + self->maxsplit = maxsplit; + self->last_pos = state->reverse ? state->text_length : 0; + self->split_count = 0; + self->index = 0; + self->status = 1; + + return (PyObject*) self; +} + +/* Implements the functionality of PatternObject's search and match methods. */ +Py_LOCAL_INLINE(PyObject*) pattern_search_or_match(PatternObject* self, + PyObject* args, PyObject* kwargs, char* args_desc, BOOL search, BOOL + match_all) { + Py_ssize_t start; + Py_ssize_t end; + int conc; + BOOL part; + RE_State state; + RE_SafeState safe_state; + int status; + PyObject* match; + + PyObject* string; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + PyObject* partial = Py_False; + static char* kwlist[] = { "string", "pos", "endpos", "concurrent", + "partial", NULL }; + /* When working with a short string, such as a line from a file, the + * relative cost of PyArg_ParseTupleAndKeywords can be significant, and + * it's worth not using it when there are only positional arguments. + */ + Py_ssize_t arg_count; + if (args && !kwargs && PyTuple_CheckExact(args)) + arg_count = PyTuple_GET_SIZE(args); + else + arg_count = -1; + + if (1 <= arg_count && arg_count <= 5) { + /* PyTuple_GET_ITEM borrows the reference. */ + string = PyTuple_GET_ITEM(args, 0); + if (arg_count >= 2) + pos = PyTuple_GET_ITEM(args, 1); + if (arg_count >= 3) + endpos = PyTuple_GET_ITEM(args, 2); + if (arg_count >= 4) + concurrent = PyTuple_GET_ITEM(args, 3); + if (arg_count >= 5) + partial = PyTuple_GET_ITEM(args, 4); + } else if (!PyArg_ParseTupleAndKeywords(args, kwargs, args_desc, kwlist, + &string, &pos, &endpos, &concurrent, &partial)) + return NULL; + + start = as_string_index(pos, 0); + if (start == -1 && PyErr_Occurred()) + return NULL; + + end = as_string_index(endpos, PY_SSIZE_T_MAX); + if (end == -1 && PyErr_Occurred()) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + part = decode_partial(partial); + + /* The MatchObject, and therefore repeated captures, will be visible. */ + if (!state_init(&state, self, string, start, end, FALSE, conc, part, FALSE, + TRUE, match_all)) + return NULL; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + status = do_match(&safe_state, search); + + if (status >= 0 || status == RE_ERROR_PARTIAL) + /* Create the match object. */ + match = pattern_new_match(self, &state, status); + else + match = NULL; + + state_fini(&state); + + return match; +} + +/* PatternObject's 'match' method. */ +static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject* + kwargs) { + return pattern_search_or_match(self, args, kwargs, "O|OOOO:match", FALSE, + FALSE); +} + +/* PatternObject's 'fullmatch' method. */ +static PyObject* pattern_fullmatch(PatternObject* self, PyObject* args, + PyObject* kwargs) { + return pattern_search_or_match(self, args, kwargs, "O|OOOO:fullmatch", + FALSE, TRUE); +} + +/* PatternObject's 'search' method. */ +static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* + kwargs) { + return pattern_search_or_match(self, args, kwargs, "O|OOOO:search", TRUE, + FALSE); +} + +/* Gets the limits of the matching. */ +Py_LOCAL_INLINE(BOOL) get_limits(PyObject* pos, PyObject* endpos, Py_ssize_t + length, Py_ssize_t* start, Py_ssize_t* end) { + Py_ssize_t s; + Py_ssize_t e; + + s = as_string_index(pos, 0); + if (s == -1 && PyErr_Occurred()) + return FALSE; + + e = as_string_index(endpos, PY_SSIZE_T_MAX); + if (e == -1 && PyErr_Occurred()) + return FALSE; + + /* Adjust boundaries. */ + if (s < 0) + s += length; + if (s < 0) + s = 0; + else if (s > length) + s = length; + + if (e < 0) + e += length; + if (e < 0) + e = 0; + else if (e > length) + e = length; + + *start = s; + *end = e; + + return TRUE; +} + +/* Gets a replacement item from the replacement list. + * + * The replacement item could be a string literal or a group. + * + * It can return None to represent an empty string. + */ +Py_LOCAL_INLINE(PyObject*) get_sub_replacement(PyObject* item, PyObject* + string, RE_State* state, size_t group_count) { + Py_ssize_t index; + + if (PyUnicode_CheckExact(item) || PyString_CheckExact(item)) { + /* It's a literal, which can be added directly to the list. */ + Py_INCREF(item); + return item; + } + + /* Is it a group reference? */ + index = as_group_index(item); + if (index == -1 && PyErr_Occurred()) { + /* Not a group either! */ + set_error(RE_ERROR_REPLACEMENT, NULL); + return NULL; + } + + if (index == 0) { + /* The entire matched portion of the string. */ + if (state->match_pos == state->text_pos) { + /* Return None for "". */ + Py_INCREF(Py_None); + return Py_None; + } + + if (state->reverse) + return get_slice(string, state->text_pos, state->match_pos); + else + return get_slice(string, state->match_pos, state->text_pos); + } else if (1 <= index && (size_t)index <= group_count) { + /* A group. */ + RE_GroupData* group; + + group = &state->groups[index - 1]; + + if (group->capture_count == 0 && group->span.start != group->span.end) + { + /* The group didn't match or is "", so return None for "". */ + Py_INCREF(Py_None); + return Py_None; + } + + return get_slice(string, group->span.start, group->span.end); + } else { + /* No such group. */ + set_error(RE_ERROR_INVALID_GROUP_REF, NULL); + return NULL; + } +} + +/* PatternObject's 'subx' method. */ +Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* + str_template, PyObject* string, Py_ssize_t maxsub, int sub_type, PyObject* + pos, PyObject* endpos, int concurrent) { + RE_StringInfo str_info; + Py_ssize_t start; + Py_ssize_t end; + BOOL is_callable = FALSE; + BOOL is_literal = FALSE; + BOOL is_template = FALSE; + PyObject* replacement = NULL; +#if PY_VERSION_HEX >= 0x02060000 + BOOL is_format = FALSE; +#endif + RE_State state; + RE_SafeState safe_state; + JoinInfo join_info; + Py_ssize_t sub_count; + Py_ssize_t last_pos; + PyObject* item; + Py_ssize_t end_pos; + Py_ssize_t step; + + /* Get the string. */ + if (!get_string(string, &str_info)) + return NULL; + + /* Get the limits of the search. */ + if (!get_limits(pos, endpos, str_info.length, &start, &end)) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* If the pattern is too long for the string, then take a shortcut, unless + * it's a fuzzy pattern. + */ + if (!self->is_fuzzy && self->min_width > end - start) { + PyObject* result; + + Py_INCREF(string); + + if (sub_type & RE_SUBN) + result = Py_BuildValue("Nn", string, 0); + else + result = string; + +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return result; + } + + if (maxsub == 0) + maxsub = PY_SSIZE_T_MAX; + + /* sub/subn takes either a function or a string template. */ + if (PyCallable_Check(str_template)) { + /* It's callable. */ + is_callable = TRUE; + + replacement = str_template; + Py_INCREF(replacement); +#if PY_VERSION_HEX >= 0x02060000 + } else if (sub_type & RE_SUBF) { + /* Is it a literal format? + * + * To keep it simple we'll say that a literal is a string which can be + * used as-is, so no placeholders. + */ + Py_ssize_t literal_length; + + literal_length = check_replacement_string(str_template, '{'); + if (literal_length > 0) { + /* It's a literal. */ + is_literal = TRUE; + + replacement = str_template; + Py_INCREF(replacement); + } else if (literal_length < 0) { + /* It isn't a literal, so get the 'format' method. */ + is_format = TRUE; + + replacement = PyObject_GetAttrString(str_template, "format"); + if (!replacement) { + release_buffer(&str_info); + return NULL; + } + } +#endif + } else { + /* Is it a literal template? + * + * To keep it simple we'll say that a literal is a string which can be + * used as-is, so no backslashes. + */ + Py_ssize_t literal_length; + + literal_length = check_replacement_string(str_template, '\\'); + if (literal_length > 0) { + /* It's a literal. */ + is_literal = TRUE; + + replacement = str_template; + Py_INCREF(replacement); + } else if (literal_length < 0 ) { + /* It isn't a literal, so hand it over to the template compiler. */ + is_template = TRUE; + + replacement = call(RE_MODULE, "_compile_replacement_helper", + PyTuple_Pack(2, self, str_template)); + if (!replacement) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + } + } + + /* The MatchObject, and therefore repeated captures, will be visible only + * if the replacement is callable. + */ + if (!state_init_2(&state, self, string, &str_info, start, end, FALSE, + concurrent, FALSE, FALSE, is_callable, FALSE)) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + Py_XDECREF(replacement); + return NULL; + } + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + init_join_list(&join_info, state.reverse, PyUnicode_Check(string)); + + sub_count = 0; + last_pos = state.reverse ? state.text_length : 0; + step = state.reverse ? -1 : 1; + while (sub_count < maxsub) { + int status; + + status = do_match(&safe_state, TRUE); + if (status < 0) + goto error; + + if (status == 0) + break; + + /* Append the segment before this match. */ + if (state.match_pos != last_pos) { + if (state.reverse) + item = get_slice(string, state.match_pos, last_pos); + else + item = get_slice(string, last_pos, state.match_pos); + if (!item) + goto error; + + /* Add to the list. */ + status = add_to_join_list(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + /* Add this match. */ + if (is_literal) { + /* The replacement is a literal string. */ + status = add_to_join_list(&join_info, replacement); + if (status < 0) + goto error; +#if PY_VERSION_HEX >= 0x02060000 + } else if (is_format) { + /* The replacement is a format string. */ + MatchObject* match; + PyObject* args; + size_t g; + PyObject* kwargs; + + /* We need to create the arguments for the 'format' method. We'll + * start by creating a MatchObject. + */ + match = (MatchObject*)pattern_new_match(self, &state, 1); + if (!match) + goto error; + + /* The args are a tuple of the capture group matches. */ + args = PyTuple_New((Py_ssize_t)state.pattern->public_group_count + + 1); + if (!args) { + Py_DECREF(match); + goto error; + } + + for (g = 0; g < state.pattern->public_group_count + 1; g++) + /* PyTuple_SetItem borrows the reference. */ + PyTuple_SetItem(args, (Py_ssize_t)g, + match_get_group_by_index(match, (Py_ssize_t)g, Py_None)); + + /* The kwargs are a dict of the named capture group matches. */ + kwargs = match_get_group_dict(match); + if (!kwargs) { + Py_DECREF(args); + Py_DECREF(match); + goto error; + } + + /* Call the 'format' method. */ + item = PyObject_Call(replacement, args, kwargs); + Py_DECREF(kwargs); + Py_DECREF(args); + Py_DECREF(match); + if (!item) + goto error; + + /* Add the result to the list. */ + status = add_to_join_list(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; +#endif + } else if (is_template) { + /* The replacement is a list template. */ + Py_ssize_t size; + Py_ssize_t i; + + /* Add each part of the template to the list. */ + size = PyList_GET_SIZE(replacement); + for (i = 0; i < size; i++) { + PyObject* item; + PyObject* str_item; + + /* PyList_GET_ITEM borrows a reference. */ + item = PyList_GET_ITEM(replacement, i); + str_item = get_sub_replacement(item, string, &state, + self->public_group_count); + if (!str_item) + goto error; + + /* Add the result to the list. */ + if (str_item == Py_None) + /* None for "". */ + Py_DECREF(str_item); + else { + status = add_to_join_list(&join_info, str_item); + Py_DECREF(str_item); + if (status < 0) + goto error; + } + } + } else if (is_callable) { + /* Pass a MatchObject to the replacement function. */ + PyObject* match; + PyObject* args; + + /* We need to create a MatchObject to pass to the replacement + * function. + */ + match = pattern_new_match(self, &state, 1); + if (!match) + goto error; + + /* The args for the replacement function. */ + args = PyTuple_Pack(1, match); + if (!args) { + Py_DECREF(match); + goto error; + } + + /* Call the replacement function. */ + item = PyObject_CallObject(replacement, args); + Py_DECREF(args); + Py_DECREF(match); + if (!item) + goto error; + + /* Add the result to the list. */ + status = add_to_join_list(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + ++sub_count; + + last_pos = state.text_pos; + + if (state.version_0) { + /* Always advance after a zero-width match. */ + if (state.match_pos == state.text_pos) { + state.text_pos += step; + state.must_advance = FALSE; + } else + state.must_advance = TRUE; + } else + /* Continue from where we left off, but don't allow a contiguous + * zero-width match. + */ + state.must_advance = state.match_pos == state.text_pos; + } + + /* Get the segment following the last match. We use 'length' instead of + * 'text_length' because the latter is truncated to 'slice_end', a + * documented idiosyncracy of the 're' module. + */ + end_pos = state.reverse ? 0 : str_info.length; + if (last_pos != end_pos) { + int status; + + /* The segment is part of the original string. */ + if (state.reverse) + item = get_slice(string, 0, last_pos); + else + item = get_slice(string, last_pos, str_info.length); + if (!item) + goto error; + + status = add_to_join_list(&join_info, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + Py_XDECREF(replacement); + + /* Convert the list to a single string (also cleans up join_info). */ + item = join_list_info(&join_info); + + state_fini(&state); + + if (!item) + return NULL; + + if (sub_type & RE_SUBN) + return Py_BuildValue("Nn", item, sub_count); + + return item; + +error: + clear_join_list(&join_info); + state_fini(&state); + Py_XDECREF(replacement); + return NULL; +} + +/* PatternObject's 'sub' method. */ +static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* replacement; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, + &replacement, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, replacement, string, count, RE_SUB, pos, endpos, + conc); +} + +#if PY_VERSION_HEX >= 0x02060000 +/* PatternObject's 'subf' method. */ +static PyObject* pattern_subf(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* format; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "format", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, + &format, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, format, string, count, RE_SUBF, pos, endpos, + conc); +} + +#endif +/* PatternObject's 'subn' method. */ +static PyObject* pattern_subn(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* replacement; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, + &replacement, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, replacement, string, count, RE_SUBN, pos, endpos, + conc); +} + +#if PY_VERSION_HEX >= 0x02060000 +/* PatternObject's 'subfn' method. */ +static PyObject* pattern_subfn(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + PyObject* format; + PyObject* string; + Py_ssize_t count = 0; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "format", "string", "count", "pos", "endpos", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, + &format, &string, &count, &pos, &endpos, &concurrent)) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + return pattern_subx(self, format, string, count, RE_SUBF | RE_SUBN, pos, + endpos, conc); +} + +#endif +/* PatternObject's 'split' method. */ +static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* + kwargs) { + int conc; + + RE_State state; + RE_SafeState safe_state; + PyObject* list; + PyObject* item; + int status; + Py_ssize_t split_count; + size_t g; + Py_ssize_t start_pos; + Py_ssize_t end_pos; + Py_ssize_t step; + Py_ssize_t last_pos; + + PyObject* string; + Py_ssize_t maxsplit = 0; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:split", kwlist, + &string, &maxsplit, &concurrent)) + return NULL; + + if (maxsplit == 0) + maxsplit = PY_SSIZE_T_MAX; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* The MatchObject, and therefore repeated captures, will not be visible. + */ + if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX, FALSE, conc, + FALSE, FALSE, FALSE, FALSE)) + return NULL; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + split_count = 0; + if (state.reverse) { + start_pos = state.text_length; + end_pos = 0; + step = -1; + } else { + start_pos = 0; + end_pos = state.text_length; + step = 1; + } + + last_pos = start_pos; + while (split_count < maxsplit) { + status = do_match(&safe_state, TRUE); + if (status < 0) + goto error; + + if (status == 0) + /* No more matches. */ + break; + + if (state.version_0) { + /* Version 0 behaviour is to advance one character if the split was + * zero-width. Unfortunately, this can give an incorrect result. + * GvR wants this behaviour to be retained so as not to break any + * existing software which might rely on it. + */ + if (state.text_pos == state.match_pos) { + if (last_pos == end_pos) + break; + + /* Advance one character. */ + state.text_pos += step; + state.must_advance = FALSE; + continue; + } + } + + /* Get segment before this match. */ + if (state.reverse) + item = get_slice(string, state.match_pos, last_pos); + else + item = get_slice(string, last_pos, state.match_pos); + if (!item) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + /* Add groups (if any). */ + for (g = 1; g <= self->public_group_count; g++) { + item = state_get_group(&state, (Py_ssize_t)g, string, FALSE); + if (!item) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + ++split_count; + last_pos = state.text_pos; + + /* Version 0 behaviour is to advance one character if the match was + * zero-width. Unfortunately, this can give an incorrect result. GvR + * wants this behaviour to be retained so as not to break any existing + * software which might rely on it. + */ + if (state.version_0) { + if (state.text_pos == state.match_pos) + /* Advance one character. */ + state.text_pos += step; + + state.must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow a contiguous + * zero-width match. + */ + state.must_advance = TRUE; + } + + /* Get segment following last match (even if empty). */ + if (state.reverse) + item = get_slice(string, 0, last_pos); + else + item = get_slice(string, last_pos, state.text_length); + if (!item) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + state_fini(&state); + + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; +} + +/* PatternObject's 'splititer' method. */ +static PyObject* pattern_splititer(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + return pattern_splitter(pattern, args, kwargs); +} + +/* PatternObject's 'findall' method. */ +static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* + kwargs) { + Py_ssize_t start; + Py_ssize_t end; + RE_State state; + int conc; + RE_SafeState safe_state; + PyObject* list; + Py_ssize_t step; + int status; + size_t g; + Py_ssize_t b; + Py_ssize_t e; + + PyObject* string; + PyObject* pos = Py_None; + PyObject* endpos = Py_None; + Py_ssize_t overlapped = FALSE; + PyObject* concurrent = Py_None; + static char* kwlist[] = { "string", "pos", "endpos", "overlapped", + "concurrent", NULL }; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnO:findall", kwlist, + &string, &pos, &endpos, &overlapped, &concurrent)) + return NULL; + + start = as_string_index(pos, 0); + if (start == -1 && PyErr_Occurred()) + return NULL; + + end = as_string_index(endpos, PY_SSIZE_T_MAX); + if (end == -1 && PyErr_Occurred()) + return NULL; + + conc = decode_concurrent(concurrent); + if (conc < 0) + return NULL; + + /* The MatchObject, and therefore repeated captures, will not be visible. + */ + if (!state_init(&state, self, string, start, end, overlapped != 0, conc, + FALSE, FALSE, FALSE, FALSE)) + return NULL; + + /* Initialise the "safe state" structure. */ + safe_state.re_state = &state; + safe_state.thread_state = NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + step = state.reverse ? -1 : 1; + while (state.slice_start <= state.text_pos && state.text_pos <= + state.slice_end) { + PyObject* item; + + status = do_match(&safe_state, TRUE); + if (status < 0) + goto error; + + if (status == 0) + break; + + /* Don't bother to build a MatchObject. */ + switch (self->public_group_count) { + case 0: + if (state.reverse) { + b = state.text_pos; + e = state.match_pos; + } else { + b = state.match_pos; + e = state.text_pos; + } + item = get_slice(string, b, e); + if (!item) + goto error; + break; + case 1: + item = state_get_group(&state, 1, string, TRUE); + if (!item) + goto error; + break; + default: + item = PyTuple_New((Py_ssize_t)self->public_group_count); + if (!item) + goto error; + + for (g = 0; g < self->public_group_count; g++) { + PyObject* o; + + o = state_get_group(&state, (Py_ssize_t)g + 1, string, TRUE); + if (!o) { + Py_DECREF(item); + goto error; + } + + /* PyTuple_SET_ITEM borrows the reference. */ + PyTuple_SET_ITEM(item, g, o); + } + break; + } + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + if (state.overlapped) { + /* Advance one character. */ + state.text_pos = state.match_pos + step; + state.must_advance = FALSE; + } else + /* Continue from where we left off, but don't allow 2 contiguous + * zero-width matches. + */ + state.must_advance = state.text_pos == state.match_pos; + } + + state_fini(&state); + + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; +} + +/* PatternObject's 'finditer' method. */ +static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args, + PyObject* kwargs) { + return pattern_scanner(pattern, args, kwargs); +} + +/* Makes a copy of a PatternObject. */ +Py_LOCAL_INLINE(PyObject*) make_pattern_copy(PatternObject* self) { + Py_INCREF(self); + return (PyObject*)self; +} + +/* PatternObject's '__copy__' method. */ +static PyObject* pattern_copy(PatternObject* self, PyObject *unused) { + return make_pattern_copy(self); +} + +/* PatternObject's '__deepcopy__' method. */ +static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) { + return make_pattern_copy(self); +} + +/* The documentation of a PatternObject. */ +PyDoc_STRVAR(pattern_match_doc, + "match(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ + Match zero or more characters at the beginning of the string."); + +PyDoc_STRVAR(pattern_fullmatch_doc, + "fullmatch(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ + Match zero or more characters against all of the string."); + +PyDoc_STRVAR(pattern_search_doc, + "search(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ + Search through string looking for a match, and return a corresponding\n\ + match object instance. Return None if no match is found."); + +PyDoc_STRVAR(pattern_sub_doc, + "sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ + Return the string obtained by replacing the leftmost (or rightmost with a\n\ + reverse pattern) non-overlapping occurrences of pattern in string by the\n\ + replacement repl."); + +#if PY_VERSION_HEX >= 0x02060000 +PyDoc_STRVAR(pattern_subf_doc, + "subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ + Return the string obtained by replacing the leftmost (or rightmost with a\n\ + reverse pattern) non-overlapping occurrences of pattern in string by the\n\ + replacement format."); + +#endif +PyDoc_STRVAR(pattern_subn_doc, + "subn(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ + Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ + leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ + of pattern with the replacement repl."); + +#if PY_VERSION_HEX >= 0x02060000 +PyDoc_STRVAR(pattern_subfn_doc, + "subfn(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ + Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ + leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ + of pattern with the replacement format."); + +#endif +PyDoc_STRVAR(pattern_split_doc, + "split(string, string, maxsplit=0, concurrent=None) --> list.\n\ + Split string by the occurrences of pattern."); + +PyDoc_STRVAR(pattern_splititer_doc, + "splititer(string, maxsplit=0, concurrent=None) --> iterator.\n\ + Return an iterator yielding the parts of a split string."); + +PyDoc_STRVAR(pattern_findall_doc, + "findall(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> list.\n\ + Return a list of all matches of pattern in string. The matches may be\n\ + overlapped if overlapped is True."); + +PyDoc_STRVAR(pattern_finditer_doc, + "finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> iterator.\n\ + Return an iterator over all matches for the RE pattern in string. The\n\ + matches may be overlapped if overlapped is True. For each match, the\n\ + iterator returns a MatchObject."); + +PyDoc_STRVAR(pattern_scanner_doc, + "scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> scanner.\n\ + Return an scanner for the RE pattern in string. The matches may be overlapped\n\ + if overlapped is True."); + +/* The methods of a PatternObject. */ +static PyMethodDef pattern_methods[] = { + {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, + pattern_match_doc}, + {"fullmatch", (PyCFunction)pattern_fullmatch, METH_VARARGS|METH_KEYWORDS, + pattern_fullmatch_doc}, + {"search", (PyCFunction)pattern_search, METH_VARARGS|METH_KEYWORDS, + pattern_search_doc}, + {"sub", (PyCFunction)pattern_sub, METH_VARARGS|METH_KEYWORDS, + pattern_sub_doc}, +#if PY_VERSION_HEX >= 0x02060000 + {"subf", (PyCFunction)pattern_subf, METH_VARARGS|METH_KEYWORDS, + pattern_subf_doc}, +#endif + {"subn", (PyCFunction)pattern_subn, METH_VARARGS|METH_KEYWORDS, + pattern_subn_doc}, +#if PY_VERSION_HEX >= 0x02060000 + {"subfn", (PyCFunction)pattern_subfn, METH_VARARGS|METH_KEYWORDS, + pattern_subfn_doc}, +#endif + {"split", (PyCFunction)pattern_split, METH_VARARGS|METH_KEYWORDS, + pattern_split_doc}, + {"splititer", (PyCFunction)pattern_splititer, METH_VARARGS|METH_KEYWORDS, + pattern_splititer_doc}, + {"findall", (PyCFunction)pattern_findall, METH_VARARGS|METH_KEYWORDS, + pattern_findall_doc}, + {"finditer", (PyCFunction)pattern_finditer, METH_VARARGS|METH_KEYWORDS, + pattern_finditer_doc}, + {"scanner", (PyCFunction)pattern_scanner, METH_VARARGS|METH_KEYWORDS, + pattern_scanner_doc}, + {"__copy__", (PyCFunction)pattern_copy, METH_NOARGS}, + {"__deepcopy__", (PyCFunction)pattern_deepcopy, METH_O}, + {NULL, NULL} +}; + +PyDoc_STRVAR(pattern_doc, "Compiled regex object"); + +/* Deallocates a PatternObject. */ +static void pattern_dealloc(PyObject* self_) { + PatternObject* self; + int partial_side; + size_t i; + + self = (PatternObject*)self_; + + /* Discard the nodes. */ + for (i = 0; i < self->node_count; i++) { + RE_Node* node; + + node = self->node_list[i]; + re_dealloc(node->values); + if (node->status & RE_STATUS_STRING) { + re_dealloc(node->string.bad_character_offset); + re_dealloc(node->string.good_suffix_offset); + } + re_dealloc(node); + } + re_dealloc(self->node_list); + + /* Discard the group info. */ + re_dealloc(self->group_info); + + /* Discard the call_ref info. */ + re_dealloc(self->call_ref_info); + + /* Discard the repeat info. */ + re_dealloc(self->repeat_info); + + dealloc_groups(self->groups_storage, self->true_group_count); + + dealloc_repeats(self->repeats_storage, self->repeat_count); + + if (self->weakreflist) + PyObject_ClearWeakRefs((PyObject*)self); + Py_XDECREF(self->pattern); + Py_XDECREF(self->groupindex); + Py_XDECREF(self->indexgroup); + + for (partial_side = 0; partial_side < 2; partial_side++) { + if (self->partial_named_lists[partial_side]) { + for (i = 0; i < self->named_lists_count; i++) + Py_XDECREF(self->partial_named_lists[partial_side][i]); + + re_dealloc(self->partial_named_lists[partial_side]); + } + } + + Py_DECREF(self->named_lists); + Py_DECREF(self->named_list_indexes); + PyObject_DEL(self); +} + +/* Info about the various flags that can be passed in. */ +typedef struct RE_FlagName { + char* name; + int value; +} RE_FlagName; + +/* We won't bother about the A flag in Python 2. */ +static RE_FlagName flag_names[] = { + {"B", RE_FLAG_BESTMATCH}, + {"D", RE_FLAG_DEBUG}, + {"S", RE_FLAG_DOTALL}, + {"F", RE_FLAG_FULLCASE}, + {"I", RE_FLAG_IGNORECASE}, + {"L", RE_FLAG_LOCALE}, + {"M", RE_FLAG_MULTILINE}, + {"R", RE_FLAG_REVERSE}, + {"T", RE_FLAG_TEMPLATE}, + {"U", RE_FLAG_UNICODE}, + {"X", RE_FLAG_VERBOSE}, + {"V0", RE_FLAG_VERSION0}, + {"V1", RE_FLAG_VERSION1}, + {"W", RE_FLAG_WORD}, +}; + +/* Appends a string to a list. */ +Py_LOCAL_INLINE(BOOL) append_string(PyObject* list, char* string) { + PyObject* item; + int status; + + item = Py_BuildValue("s", string); + if (!item) + return FALSE; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + return FALSE; + + return TRUE; +} + +/* Appends a (decimal) integer to a list. */ +Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) { + PyObject* int_obj; + PyObject* repr_obj; + int status; + + int_obj = Py_BuildValue("n", value); + if (!int_obj) + return FALSE; + + repr_obj = PyObject_Repr(int_obj); + Py_DECREF(int_obj); + if (!repr_obj) + return FALSE; + + status = PyList_Append(list, repr_obj); + Py_DECREF(repr_obj); + if (status < 0) + return FALSE; + + return TRUE; +} + +/* MatchObject's '__repr__' method. */ +static PyObject* match_repr(PyObject* self_) { + MatchObject* self; + PyObject* list; + PyObject* matched_substring; + PyObject* matched_repr; + int status; + PyObject* separator; + PyObject* result; + + self = (MatchObject*)self_; + + list = PyList_New(0); + if (!list) + return NULL; + + if (!append_string(list, "match_start)) + goto error; + + if (! append_string(list, ", ")) + goto error; + + if (!append_integer(list, self->match_end)) + goto error; + + if (!append_string(list, "), match=")) + goto error; + + matched_substring = get_slice(self->substring, self->match_start - + self->substring_offset, self->match_end - self->substring_offset); + if (!matched_substring) + goto error; + + matched_repr = PyObject_Repr(matched_substring); + Py_DECREF(matched_substring); + if (!matched_repr) + goto error; + + status = PyList_Append(list, matched_repr); + Py_DECREF(matched_repr); + if (status < 0) + goto error; + + if (self->fuzzy_counts[RE_FUZZY_SUB] != 0 || + self->fuzzy_counts[RE_FUZZY_INS] != 0 || self->fuzzy_counts[RE_FUZZY_DEL] + != 0) { + if (! append_string(list, ", fuzzy_counts=(")) + goto error; + + if (!append_integer(list, + (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_SUB])) + goto error; + + if (! append_string(list, ", ")) + goto error; + + if (!append_integer(list, + (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_INS])) + goto error; + + if (! append_string(list, ", ")) + goto error; + if (!append_integer(list, + (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_DEL])) + goto error; + + if (! append_string(list, ")")) + goto error; + } + + if (self->partial) { + if (!append_string(list, ", partial=True")) + goto error; + } + + if (! append_string(list, ">")) + goto error; + + separator = Py_BuildValue("s", ""); + if (!separator) + goto error; + + result = PyUnicode_Join(separator, list); + Py_DECREF(separator); + Py_DECREF(list); + + return result; + +error: + Py_DECREF(list); + return NULL; +} + +/* PatternObject's '__repr__' method. */ +static PyObject* pattern_repr(PyObject* self_) { + PatternObject* self; + PyObject* list; + PyObject* item; + int status; + int flag_count; + unsigned int i; + Py_ssize_t pos; + PyObject *key; + PyObject *value; + PyObject* separator; + PyObject* result; + + self = (PatternObject*)self_; + + list = PyList_New(0); + if (!list) + return NULL; + + if (!append_string(list, "regex.Regex(")) + goto error; + + item = PyObject_Repr(self->pattern); + if (!item) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + flag_count = 0; + for (i = 0; i < sizeof(flag_names) / sizeof(flag_names[0]); i++) { + if (self->flags & flag_names[i].value) { + if (flag_count == 0) { + if (!append_string(list, ", flags=")) + goto error; + } else { + if (!append_string(list, " | ")) + goto error; + } + + if (!append_string(list, "regex.")) + goto error; + + if (!append_string(list, flag_names[i].name)) + goto error; + + ++flag_count; + } + } + + pos = 0; + /* PyDict_Next borrows references. */ + while (PyDict_Next(self->named_lists, &pos, &key, &value)) { + if (!append_string(list, ", ")) + goto error; + + status = PyList_Append(list, key); + if (status < 0) + goto error; + + if (!append_string(list, "=")) + goto error; + + item = PyObject_Repr(value); + if (!item) + goto error; + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + if (!append_string(list, ")")) + goto error; + + separator = Py_BuildValue("s", ""); + if (!separator) + goto error; + + result = PyUnicode_Join(separator, list); + Py_DECREF(separator); + Py_DECREF(list); + + return result; + +error: + Py_DECREF(list); + return NULL; +} + +/* PatternObject's 'groupindex' method. */ +static PyObject* pattern_groupindex(PyObject* self_) { + PatternObject* self; + + self = (PatternObject*)self_; + + return PyDict_Copy(self->groupindex); +} + +static PyGetSetDef pattern_getset[] = { + {"groupindex", (getter)pattern_groupindex, (setter)NULL, + "A dictionary mapping group names to group numbers."}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef pattern_members[] = { + {"pattern", T_OBJECT, offsetof(PatternObject, pattern), READONLY, + "The pattern string from which the regex object was compiled."}, + {"flags", T_PYSSIZET, offsetof(PatternObject, flags), READONLY, + "The regex matching flags."}, + {"groups", T_PYSSIZET, offsetof(PatternObject, public_group_count), + READONLY, "The number of capturing groups in the pattern."}, + {"named_lists", T_OBJECT, offsetof(PatternObject, named_lists), READONLY, + "The named lists used by the regex."}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject Pattern_Type = { + PyObject_HEAD_INIT(NULL) + 0, + "_" RE_MODULE "." "Pattern", + sizeof(PatternObject) +}; + +/* Building the nodes is made simpler by allowing branches to have a single + * exit. These need to be removed. + */ +Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) { + BOOL modified; + + /* If a node refers to a 1-way branch then make the former refer to the + * latter's destination. Repeat until they're all done. + */ + do { + size_t i; + + modified = FALSE; + + for (i = 0; i < pattern->node_count; i++) { + RE_Node* node; + RE_Node* next; + + node = pattern->node_list[i]; + + /* Check the first destination. */ + next = node->next_1.node; + if (next && next->op == RE_OP_BRANCH && + !next->nonstring.next_2.node) { + node->next_1.node = next->next_1.node; + modified = TRUE; + } + + /* Check the second destination. */ + next = node->nonstring.next_2.node; + if (next && next->op == RE_OP_BRANCH && + !next->nonstring.next_2.node) { + node->nonstring.next_2.node = next->next_1.node; + modified = TRUE; + } + } + } while (modified); + + /* The start node might be a 1-way branch. Skip over it because it'll be + * removed. It might even be the first in a chain. + */ + while (pattern->start_node->op == RE_OP_BRANCH && + !pattern->start_node->nonstring.next_2.node) + pattern->start_node = pattern->start_node->next_1.node; +} + +/* Adds guards to repeats which are followed by a reference to a group. + * + * Returns whether a guard was added for a node at or after the given node. + */ +Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* + node) { + RE_STATUS_T result; + + result = RE_STATUS_NEITHER; + + for (;;) { + if (node->status & RE_STATUS_VISITED_AG) + return node->status & (RE_STATUS_REPEAT | RE_STATUS_REF); + + switch (node->op) { + case RE_OP_ATOMIC: + case RE_OP_LOOKAROUND: + { + RE_STATUS_T body_result; + RE_STATUS_T tail_result; + RE_STATUS_T status; + + body_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + tail_result = add_repeat_guards(pattern, node->next_1.node); + status = max_status_3(result, body_result, tail_result); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_BRANCH: + { + RE_STATUS_T branch_1_result; + RE_STATUS_T branch_2_result; + RE_STATUS_T status; + + branch_1_result = add_repeat_guards(pattern, node->next_1.node); + branch_2_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + status = max_status_3(result, branch_1_result, branch_2_result); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_END_GREEDY_REPEAT: + case RE_OP_END_LAZY_REPEAT: + node->status |= RE_STATUS_VISITED_AG; + return result; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + { + BOOL limited; + RE_STATUS_T body_result; + RE_STATUS_T tail_result; + RE_RepeatInfo* repeat_info; + RE_STATUS_T status; + + limited = ~node->values[2] != 0; + if (limited) + body_result = RE_STATUS_LIMITED; + else + body_result = add_repeat_guards(pattern, node->next_1.node); + tail_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + + repeat_info = &pattern->repeat_info[node->values[0]]; + if (body_result != RE_STATUS_REF) + repeat_info->status |= RE_STATUS_BODY; + if (tail_result != RE_STATUS_REF) + repeat_info->status |= RE_STATUS_TAIL; + if (limited) + result = max_status_2(result, RE_STATUS_LIMITED); + else + result = max_status_2(result, RE_STATUS_REPEAT); + status = max_status_3(result, body_result, tail_result); + node->status |= RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + { + BOOL limited; + RE_STATUS_T tail_result; + RE_RepeatInfo* repeat_info; + RE_STATUS_T status; + + limited = ~node->values[2] != 0; + tail_result = add_repeat_guards(pattern, node->next_1.node); + + repeat_info = &pattern->repeat_info[node->values[0]]; + repeat_info->status |= RE_STATUS_BODY; + if (tail_result != RE_STATUS_REF) + repeat_info->status |= RE_STATUS_TAIL; + if (limited) + result = max_status_2(result, RE_STATUS_LIMITED); + else + result = max_status_2(result, RE_STATUS_REPEAT); + status = max_status_3(result, RE_STATUS_REPEAT, tail_result); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_GROUP_EXISTS: + { + RE_STATUS_T branch_1_result; + RE_STATUS_T branch_2_result; + RE_STATUS_T status; + + branch_1_result = add_repeat_guards(pattern, node->next_1.node); + branch_2_result = add_repeat_guards(pattern, + node->nonstring.next_2.node); + status = max_status_4(result, branch_1_result, branch_2_result, + RE_STATUS_REF); + node->status = RE_STATUS_VISITED_AG | status; + return status; + } + case RE_OP_GROUP_CALL: + case RE_OP_REF_GROUP: + case RE_OP_REF_GROUP_FLD: + case RE_OP_REF_GROUP_FLD_REV: + case RE_OP_REF_GROUP_IGN: + case RE_OP_REF_GROUP_IGN_REV: + case RE_OP_REF_GROUP_REV: + result = RE_STATUS_REF; + node = node->next_1.node; + break; + case RE_OP_SUCCESS: + node->status = RE_STATUS_VISITED_AG | result; + return result; + default: + node = node->next_1.node; + break; + } + } +} + +/* Adds an index to a node's values unless it's already present. + * + * 'offset' is the offset of the index count within the values. + */ +Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, size_t index) { + size_t index_count; + size_t first_index; + size_t i; + RE_CODE* new_values; + + if (!node) + return TRUE; + + index_count = node->values[offset]; + first_index = offset + 1; + + /* Is the index already present? */ + for (i = 0; i < index_count; i++) { + if (node->values[first_index + i] == index) + return TRUE; + } + + /* Allocate more space for the new index. */ + new_values = re_realloc(node->values, (node->value_count + 1) * + sizeof(RE_CODE)); + if (!new_values) + return FALSE; + + ++node->value_count; + node->values = new_values; + + node->values[first_index + node->values[offset]++] = (RE_CODE)index; + + return TRUE; +} + +/* Records the index of every repeat and fuzzy section within atomic + * subpatterns and lookarounds. + */ +Py_LOCAL_INLINE(BOOL) record_subpattern_repeats_and_fuzzy_sections(RE_Node* + parent_node, size_t offset, size_t repeat_count, RE_Node* node) { + while (node) { + if (node->status & RE_STATUS_VISITED_REP) + return TRUE; + + node->status |= RE_STATUS_VISITED_REP; + + switch (node->op) { + case RE_OP_ATOMIC: + if (!record_subpattern_repeats_and_fuzzy_sections(node, 0, + repeat_count, node->nonstring.next_2.node)) + return FALSE; + node = node->next_1.node; + break; + case RE_OP_BRANCH: + if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, + offset, repeat_count, node->next_1.node)) + return FALSE; + node = node->nonstring.next_2.node; + break; + case RE_OP_END_FUZZY: + node = node->next_1.node; + break; + case RE_OP_END_GREEDY_REPEAT: + case RE_OP_END_LAZY_REPEAT: + return TRUE; + case RE_OP_FUZZY: + /* Record the fuzzy index. */ + if (!add_index(parent_node, offset, repeat_count + + node->values[0])) + return FALSE; + node = node->next_1.node; + break; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + /* Record the repeat index. */ + if (!add_index(parent_node, offset, node->values[0])) + return FALSE; + if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, + offset, repeat_count, node->next_1.node)) + return FALSE; + node = node->nonstring.next_2.node; + break; + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + /* Record the repeat index. */ + if (!add_index(parent_node, offset, node->values[0])) + return FALSE; + node = node->next_1.node; + break; + case RE_OP_GROUP_EXISTS: + if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, + offset, repeat_count, node->next_1.node)) + return FALSE; + node = node->nonstring.next_2.node; + break; + case RE_OP_LOOKAROUND: + if (!record_subpattern_repeats_and_fuzzy_sections(node, 1, + repeat_count, node->nonstring.next_2.node)) + return FALSE; + node = node->next_1.node; + break; + default: + node = node->next_1.node; + break; + } + } + + return TRUE; +} + +/* Marks nodes which are being used as used. */ +Py_LOCAL_INLINE(void) use_nodes(RE_Node* node) { + while (node && !(node->status & RE_STATUS_USED)) { + node->status |= RE_STATUS_USED; + if (!(node->status & RE_STATUS_STRING)) { + if (node->nonstring.next_2.node) + use_nodes(node->nonstring.next_2.node); + } + node = node->next_1.node; + } +} + +/* Discards any unused nodes. + * + * Optimising the nodes might result in some nodes no longer being used. + */ +Py_LOCAL_INLINE(void) discard_unused_nodes(PatternObject* pattern) { + size_t new_count; + size_t i; + + /* Mark the nodes which are being used. */ + use_nodes(pattern->start_node); + + for (i = 0; i < pattern->call_ref_info_capacity; i++) + use_nodes(pattern->call_ref_info[i].node); + + new_count = 0; + for (i = 0; i < pattern->node_count; i++) { + RE_Node* node; + + node = pattern->node_list[i]; + if (node->status & RE_STATUS_USED) + pattern->node_list[new_count++] = node; + else { + re_dealloc(node->values); + if (node->status & RE_STATUS_STRING) { + re_dealloc(node->string.bad_character_offset); + re_dealloc(node->string.good_suffix_offset); + } + re_dealloc(node); + } + } + + pattern->node_count = new_count; +} + +/* Marks all the group which are named. Returns FALSE if there's an error. */ +Py_LOCAL_INLINE(BOOL) mark_named_groups(PatternObject* pattern) { + size_t i; + + for (i = 0; i < pattern->public_group_count; i++) { + RE_GroupInfo* group_info; + PyObject* index; + int status; + + group_info = &pattern->group_info[i]; + index = Py_BuildValue("n", i + 1); + if (!index) + return FALSE; + + status = PyDict_Contains(pattern->indexgroup, index); + Py_DECREF(index); + if (status < 0) + return FALSE; + + group_info->has_name = status == 1; + } + + return TRUE; +} + +/* Gets the test node. + * + * The test node lets the matcher look ahead in the pattern, allowing it to + * avoid the cost of housekeeping, only to find that what follows doesn't match + * anyway. + */ +Py_LOCAL_INLINE(void) set_test_node(RE_NextNode* next) { + RE_Node* node = next->node; + RE_Node* test; + + next->test = node; + next->match_next = node; + next->match_step = 0; + + if (!node) + return; + + test = node; + while (test->op == RE_OP_END_GROUP || test->op == RE_OP_START_GROUP) + test = test->next_1.node; + + next->test = test; + + if (test != node) + return; + + switch (test->op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + case RE_OP_BOUNDARY: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_DEFAULT_BOUNDARY: + case RE_OP_DEFAULT_END_OF_WORD: + case RE_OP_DEFAULT_START_OF_WORD: + case RE_OP_END_OF_LINE: + case RE_OP_END_OF_LINE_U: + case RE_OP_END_OF_STRING: + case RE_OP_END_OF_STRING_LINE: + case RE_OP_END_OF_STRING_LINE_U: + case RE_OP_END_OF_WORD: + case RE_OP_GRAPHEME_BOUNDARY: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SEARCH_ANCHOR: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + case RE_OP_START_OF_LINE: + case RE_OP_START_OF_LINE_U: + case RE_OP_START_OF_STRING: + case RE_OP_START_OF_WORD: + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + next->match_next = test->next_1.node; + next->match_step = test->step; + break; + case RE_OP_GREEDY_REPEAT_ONE: + case RE_OP_LAZY_REPEAT_ONE: + if (test->values[1] > 0) + next->test = test; + break; + } +} + +/* Sets the test nodes. */ +Py_LOCAL_INLINE(void) set_test_nodes(PatternObject* pattern) { + RE_Node** node_list; + size_t i; + + node_list = pattern->node_list; + for (i = 0; i < pattern->node_count; i++) { + RE_Node* node; + + node = node_list[i]; + set_test_node(&node->next_1); + if (!(node->status & RE_STATUS_STRING)) + set_test_node(&node->nonstring.next_2); + } +} + +/* Optimises the pattern. */ +Py_LOCAL_INLINE(BOOL) optimise_pattern(PatternObject* pattern) { + size_t i; + + /* Building the nodes is made simpler by allowing branches to have a single + * exit. These need to be removed. + */ + skip_one_way_branches(pattern); + + /* Add position guards for repeat bodies containing a reference to a group + * or repeat tails followed at some point by a reference to a group. + */ + add_repeat_guards(pattern, pattern->start_node); + + /* Record the index of repeats and fuzzy sections within the body of atomic + * and lookaround nodes. + */ + if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, + pattern->repeat_count, pattern->start_node)) + return FALSE; + + for (i = 0; i < pattern->call_ref_info_count; i++) { + RE_Node* node; + + node = pattern->call_ref_info[i].node; + if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, + pattern->repeat_count, node)) + return FALSE; + } + + /* Discard any unused nodes. */ + discard_unused_nodes(pattern); + + /* Set the test nodes. */ + set_test_nodes(pattern); + + /* Mark all the group that are named. */ + if (!mark_named_groups(pattern)) + return FALSE; + + return TRUE; +} + +/* Creates a new pattern node. */ +Py_LOCAL_INLINE(RE_Node*) create_node(PatternObject* pattern, RE_UINT8 op, + RE_CODE flags, Py_ssize_t step, size_t value_count) { + RE_Node* node; + + node = (RE_Node*)re_alloc(sizeof(*node)); + if (!node) + return NULL; + memset(node, 0, sizeof(RE_Node)); + + node->value_count = value_count; + if (node->value_count > 0) { + node->values = (RE_CODE*)re_alloc(node->value_count * sizeof(RE_CODE)); + if (!node->values) + goto error; + } else + node->values = NULL; + + node->op = op; + node->match = (flags & RE_POSITIVE_OP) != 0; + node->status = (RE_STATUS_T)(flags << RE_STATUS_SHIFT); + node->step = step; + + /* Ensure that there's enough storage to record the new node. */ + if (pattern->node_count >= pattern->node_capacity) { + RE_Node** new_node_list; + + pattern->node_capacity *= 2; + if (pattern->node_capacity == 0) + pattern->node_capacity = RE_INIT_NODE_LIST_SIZE; + new_node_list = (RE_Node**)re_realloc(pattern->node_list, + pattern->node_capacity * sizeof(RE_Node*)); + if (!new_node_list) + goto error; + pattern->node_list = new_node_list; + } + + /* Record the new node. */ + pattern->node_list[pattern->node_count++] = node; + + return node; + +error: + re_dealloc(node->values); + re_dealloc(node); + return NULL; +} + +/* Adds a node as a next node for another node. */ +Py_LOCAL_INLINE(void) add_node(RE_Node* node_1, RE_Node* node_2) { + if (!node_1->next_1.node) + node_1->next_1.node = node_2; + else + node_1->nonstring.next_2.node = node_2; +} + +/* Ensures that the entry for a group's details actually exists. */ +Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, size_t group) { + size_t old_capacity; + size_t new_capacity; + RE_GroupInfo* new_group_info; + + if (group <= pattern->true_group_count) + /* We already have an entry for the group. */ + return TRUE; + + /* Increase the storage capacity to include the new entry if it's + * insufficient. + */ + old_capacity = pattern->group_info_capacity; + new_capacity = pattern->group_info_capacity; + while (group > new_capacity) + new_capacity += RE_LIST_SIZE_INC; + + if (new_capacity > old_capacity) { + new_group_info = (RE_GroupInfo*)re_realloc(pattern->group_info, + new_capacity * sizeof(RE_GroupInfo)); + if (!new_group_info) + return FALSE; + memset(new_group_info + old_capacity, 0, (new_capacity - old_capacity) + * sizeof(RE_GroupInfo)); + + pattern->group_info = new_group_info; + pattern->group_info_capacity = new_capacity; + } + + pattern->true_group_count = group; + + return TRUE; +} + +/* Records that there's a reference to a group. */ +Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, size_t group) { + if (!ensure_group(pattern, group)) + return FALSE; + + pattern->group_info[group - 1].referenced = TRUE; + + return TRUE; +} + +/* Records that there's a new group. */ +Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, size_t group, + RE_Node* node) { + if (!ensure_group(pattern, group)) + return FALSE; + + if (group >= 1) { + RE_GroupInfo* info; + + info = &pattern->group_info[group - 1]; + info->end_index = (Py_ssize_t)pattern->true_group_count; + info->node = node; + } + + return TRUE; +} + +/* Records that a group has closed. */ +Py_LOCAL_INLINE(void) record_group_end(PatternObject* pattern, size_t group) { + if (group >= 1) + pattern->group_info[group - 1].end_index = ++pattern->group_end_index; +} + +/* Ensures that the entry for a call_ref's details actually exists. */ +Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, size_t call_ref) + { + size_t old_capacity; + size_t new_capacity; + RE_CallRefInfo* new_call_ref_info; + + if (call_ref < pattern->call_ref_info_count) + /* We already have an entry for the call_ref. */ + return TRUE; + + /* Increase the storage capacity to include the new entry if it's + * insufficient. + */ + old_capacity = pattern->call_ref_info_capacity; + new_capacity = pattern->call_ref_info_capacity; + while (call_ref >= new_capacity) + new_capacity += RE_LIST_SIZE_INC; + + if (new_capacity > old_capacity) { + new_call_ref_info = (RE_CallRefInfo*)re_realloc(pattern->call_ref_info, + new_capacity * sizeof(RE_CallRefInfo)); + if (!new_call_ref_info) + return FALSE; + memset(new_call_ref_info + old_capacity, 0, (new_capacity - + old_capacity) * sizeof(RE_CallRefInfo)); + + pattern->call_ref_info = new_call_ref_info; + pattern->call_ref_info_capacity = new_capacity; + } + + pattern->call_ref_info_count = 1 + call_ref; + + return TRUE; +} + +/* Records that a call_ref is defined. */ +Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, size_t + call_ref, RE_Node* node) { + if (!ensure_call_ref(pattern, call_ref)) + return FALSE; + + pattern->call_ref_info[call_ref].defined = TRUE; + pattern->call_ref_info[call_ref].node = node; + + return TRUE; +} + +/* Records that a call_ref is used. */ +Py_LOCAL_INLINE(BOOL) record_call_ref_used(PatternObject* pattern, size_t + call_ref) { + if (!ensure_call_ref(pattern, call_ref)) + return FALSE; + + pattern->call_ref_info[call_ref].used = TRUE; + + return TRUE; +} + +/* Checks whether a node matches one and only one character. */ +Py_LOCAL_INLINE(BOOL) sequence_matches_one(RE_Node* node) { + while (node->op == RE_OP_BRANCH && !node->nonstring.next_2.node) + node = node->next_1.node; + + if (node->next_1.node || (node->status & RE_STATUS_FUZZY)) + return FALSE; + + return node_matches_one_character(node); +} + +/* Records a repeat. */ +Py_LOCAL_INLINE(BOOL) record_repeat(PatternObject* pattern, size_t index, + size_t repeat_depth) { + size_t old_capacity; + size_t new_capacity; + + /* Increase the storage capacity to include the new entry if it's + * insufficient. + */ + old_capacity = pattern->repeat_info_capacity; + new_capacity = pattern->repeat_info_capacity; + while (index >= new_capacity) + new_capacity += RE_LIST_SIZE_INC; + + if (new_capacity > old_capacity) { + RE_RepeatInfo* new_repeat_info; + + new_repeat_info = (RE_RepeatInfo*)re_realloc(pattern->repeat_info, + new_capacity * sizeof(RE_RepeatInfo)); + if (!new_repeat_info) + return FALSE; + memset(new_repeat_info + old_capacity, 0, (new_capacity - old_capacity) + * sizeof(RE_RepeatInfo)); + + pattern->repeat_info = new_repeat_info; + pattern->repeat_info_capacity = new_capacity; + } + + if (index >= pattern->repeat_count) + pattern->repeat_count = index + 1; + + if (repeat_depth > 0) + pattern->repeat_info[index].status |= RE_STATUS_INNER; + + return TRUE; +} + +Py_LOCAL_INLINE(Py_ssize_t) get_step(RE_CODE op) { + switch (op) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_U: + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_IGN: + return 1; + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U_REV: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + return -1; + } + + return 0; +} + +Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args); + +/* Builds an ANY node. */ +Py_LOCAL_INLINE(int) build_ANY(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + + /* codes: opcode, flags. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step, 0); + if (!node) + return RE_ERROR_MEMORY; + + args->code += 2; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a FUZZY node. */ +Py_LOCAL_INLINE(int) build_FUZZY(RE_CompileArgs* args) { + RE_CODE flags; + RE_Node* start_node; + RE_Node* end_node; + RE_CODE index; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, flags, constraints, sequence, end. */ + if (args->code + 13 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + + /* Create nodes for the start and end of the fuzzy sequence. */ + start_node = create_node(args->pattern, RE_OP_FUZZY, flags, 0, 9); + end_node = create_node(args->pattern, RE_OP_END_FUZZY, flags, 0, 5); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + index = (RE_CODE)args->pattern->fuzzy_count++; + start_node->values[0] = index; + end_node->values[0] = index; + + /* The constraints consist of 4 pairs of limits and the cost equation. */ + end_node->values[RE_FUZZY_VAL_MIN_DEL] = args->code[2]; /* Deletion minimum. */ + end_node->values[RE_FUZZY_VAL_MIN_INS] = args->code[4]; /* Insertion minimum. */ + end_node->values[RE_FUZZY_VAL_MIN_SUB] = args->code[6]; /* Substitution minimum. */ + end_node->values[RE_FUZZY_VAL_MIN_ERR] = args->code[8]; /* Error minimum. */ + + start_node->values[RE_FUZZY_VAL_MAX_DEL] = args->code[3]; /* Deletion maximum. */ + start_node->values[RE_FUZZY_VAL_MAX_INS] = args->code[5]; /* Insertion maximum. */ + start_node->values[RE_FUZZY_VAL_MAX_SUB] = args->code[7]; /* Substitution maximum. */ + start_node->values[RE_FUZZY_VAL_MAX_ERR] = args->code[9]; /* Error maximum. */ + + start_node->values[RE_FUZZY_VAL_DEL_COST] = args->code[10]; /* Deletion cost. */ + start_node->values[RE_FUZZY_VAL_INS_COST] = args->code[11]; /* Insertion cost. */ + start_node->values[RE_FUZZY_VAL_SUB_COST] = args->code[12]; /* Substitution cost. */ + start_node->values[RE_FUZZY_VAL_MAX_COST] = args->code[13]; /* Total cost. */ + + args->code += 14; + + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = TRUE; + subargs.within_fuzzy = TRUE; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + + ++args->code; + + /* Append the fuzzy sequence. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + args->end = end_node; + + args->is_fuzzy = TRUE; + + return RE_ERROR_SUCCESS; +} + +/* Builds an ATOMIC node. */ +Py_LOCAL_INLINE(int) build_ATOMIC(RE_CompileArgs* args) { + RE_Node* atomic_node; + RE_CompileArgs subargs; + RE_Node* success_node; + int status; + + /* codes: opcode, sequence, end. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + atomic_node = create_node(args->pattern, RE_OP_ATOMIC, 0, 0, 1); + if (!atomic_node) + return RE_ERROR_MEMORY; + + /* The number of repeat indexes. */ + atomic_node->values[0] = 0; + + ++args->code; + + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + /* Create the success node to terminate the subpattern. */ + success_node = create_node(subargs.pattern, RE_OP_SUCCESS, 0, 0, 0); + if (!success_node) + return RE_ERROR_MEMORY; + + /* Append the SUCCESS node. */ + add_node(subargs.end, success_node); + + /* Insert the subpattern. */ + atomic_node->nonstring.next_2.node = subargs.start; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Append the node. */ + add_node(args->end, atomic_node); + args->end = atomic_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a BOUNDARY node. */ +Py_LOCAL_INLINE(int) build_BOUNDARY(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + RE_Node* node; + + /* codes: opcode, flags. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + args->code += 2; + + /* Create the node. */ + node = create_node(args->pattern, op, flags, 0, 0); + if (!node) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a BRANCH node. */ +Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { + RE_Node* branch_node; + RE_Node* join_node; + Py_ssize_t smallest_min_width; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, branch, next, branch, end. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + /* Create nodes for the start and end of the branch sequence. */ + branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!branch_node || !join_node) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, branch_node); + args->end = join_node; + + smallest_min_width = PY_SSIZE_T_MAX; + + subargs = *args; + + /* A branch in the regular expression is compiled into a series of 2-way + * branches. + */ + do { + RE_Node* next_branch_node; + + /* Skip over the 'BRANCH' or 'NEXT' opcode. */ + ++subargs.code; + + /* Compile the sequence until the next 'BRANCH' or 'NEXT' opcode. */ + subargs.min_width = 0; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + smallest_min_width = min_ssize_t(smallest_min_width, + subargs.min_width); + + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + /* Append the sequence. */ + add_node(branch_node, subargs.start); + add_node(subargs.end, join_node); + + /* Create a start node for the next sequence and append it. */ + next_branch_node = create_node(subargs.pattern, RE_OP_BRANCH, 0, 0, 0); + if (!next_branch_node) + return RE_ERROR_MEMORY; + + add_node(branch_node, next_branch_node); + branch_node = next_branch_node; + } while (subargs.code < subargs.end_code && subargs.code[0] == RE_OP_NEXT); + + /* We should have reached the end of the branch. */ + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + + ++args->code; + args->min_width += smallest_min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a CALL_REF node. */ +Py_LOCAL_INLINE(int) build_CALL_REF(RE_CompileArgs* args) { + RE_CODE call_ref; + RE_Node* start_node; + RE_Node* end_node; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, call_ref. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + call_ref = args->code[1]; + + args->code += 2; + + /* Create nodes for the start and end of the subpattern. */ + start_node = create_node(args->pattern, RE_OP_CALL_REF, 0, 0, 1); + end_node = create_node(args->pattern, RE_OP_GROUP_RETURN, 0, 0, 0); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + start_node->values[0] = call_ref; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Record that we defined a call_ref. */ + if (!record_call_ref_defined(args->pattern, call_ref, start_node)) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + args->end = end_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a CHARACTER or PROPERTY node. */ +Py_LOCAL_INLINE(int) build_CHARACTER_or_PROPERTY(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + + /* codes: opcode, flags, value. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + if (flags & RE_ZEROWIDTH_OP) + step = 0; + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step, 1); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = args->code[2]; + + args->code += 3; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + if (step != 0) + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a GROUP node. */ +Py_LOCAL_INLINE(int) build_GROUP(RE_CompileArgs* args) { + RE_CODE private_group; + RE_CODE public_group; + RE_Node* start_node; + RE_Node* end_node; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, private_group, public_group. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + private_group = args->code[1]; + public_group = args->code[2]; + + args->code += 3; + + /* Create nodes for the start and end of the capture group. */ + start_node = create_node(args->pattern, args->forward ? RE_OP_START_GROUP : + RE_OP_END_GROUP, 0, 0, 3); + end_node = create_node(args->pattern, args->forward ? RE_OP_END_GROUP : + RE_OP_START_GROUP, 0, 0, 3); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + start_node->values[0] = private_group; + end_node->values[0] = private_group; + start_node->values[1] = public_group; + end_node->values[1] = public_group; + + /* Signal that the capture should be saved when it's complete. */ + start_node->values[2] = 0; + end_node->values[2] = 1; + + /* Record that we have a new capture group. */ + if (!record_group(args->pattern, private_group, start_node)) + return RE_ERROR_MEMORY; + + /* Compile the sequence and check that we've reached the end of the capture + * group. + */ + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + if (subargs.has_captures || subargs.visible_captures) + args->has_captures = TRUE; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Record that the capture group has closed. */ + record_group_end(args->pattern, private_group); + + /* Append the capture group. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + args->end = end_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a GROUP_CALL node. */ +Py_LOCAL_INLINE(int) build_GROUP_CALL(RE_CompileArgs* args) { + RE_CODE call_ref; + RE_Node* node; + + /* codes: opcode, call_ref. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + call_ref = args->code[1]; + + /* Create the node. */ + node = create_node(args->pattern, RE_OP_GROUP_CALL, 0, 0, 1); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = call_ref; + + args->code += 2; + + /* Record that we used a call_ref. */ + if (!record_call_ref_used(args->pattern, call_ref)) + return RE_ERROR_MEMORY; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a GROUP_EXISTS node. */ +Py_LOCAL_INLINE(int) build_GROUP_EXISTS(RE_CompileArgs* args) { + RE_CODE group; + RE_Node* start_node; + RE_Node* end_node; + RE_CompileArgs subargs; + Py_ssize_t min_width; + int status; + + /* codes: opcode, sequence, next, sequence, end. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + group = args->code[1]; + + args->code += 2; + + /* Create nodes for the start and end of the structure. */ + start_node = create_node(args->pattern, RE_OP_GROUP_EXISTS, 0, 0, 1); + end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!start_node || !end_node) + return RE_ERROR_MEMORY; + + start_node->values[0] = group; + + subargs = *args; + subargs.min_width = 0; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + min_width = subargs.min_width; + + /* Append the start node. */ + add_node(args->end, start_node); + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + + if (args->code[0] == RE_OP_NEXT) { + ++args->code; + + subargs.code = args->code; + subargs.min_width = 0; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + min_width = min_ssize_t(min_width, subargs.min_width); + + add_node(start_node, subargs.start); + add_node(subargs.end, end_node); + } else { + add_node(start_node, end_node); + + min_width = 0; + } + + args->min_width += min_width; + + if (args->code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + ++args->code; + + args->end = end_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a LOOKAROUND node. */ +Py_LOCAL_INLINE(int) build_LOOKAROUND(RE_CompileArgs* args) { + RE_CODE flags; + BOOL forward; + RE_Node* lookaround_node; + RE_Node* success_node; + RE_CompileArgs subargs; + int status; + + /* codes: opcode, flags, forward, sequence, end. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + forward = (BOOL)args->code[2]; + + /* Create a node for the lookaround. */ + lookaround_node = create_node(args->pattern, RE_OP_LOOKAROUND, flags, 0, + 2); + if (!lookaround_node) + return RE_ERROR_MEMORY; + + /* The number of repeat indexes. */ + lookaround_node->values[1] = 0; + + args->code += 3; + + /* Compile the sequence and check that we've reached the end of the + * subpattern. + */ + subargs = *args; + subargs.forward = forward; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + lookaround_node->values[0] = subargs.has_captures; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + ++args->code; + + /* Create the 'SUCCESS' node and append it to the subpattern. */ + success_node = create_node(args->pattern, RE_OP_SUCCESS, 0, 0, 0); + if (!success_node) + return RE_ERROR_MEMORY; + + /* Append the SUCCESS node. */ + add_node(subargs.end, success_node); + + /* Insert the subpattern into the node. */ + lookaround_node->nonstring.next_2.node = subargs.start; + + /* Append the lookaround. */ + add_node(args->end, lookaround_node); + args->end = lookaround_node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a RANGE node. */ +Py_LOCAL_INLINE(int) build_RANGE(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + + /* codes: opcode, flags, lower, upper. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + if (flags & RE_ZEROWIDTH_OP) + step = 0; + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step, 2); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = args->code[2]; + node->values[1] = args->code[3]; + + args->code += 4; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + if (step != 0) + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a REF_GROUP node. */ +Py_LOCAL_INLINE(int) build_REF_GROUP(RE_CompileArgs* args) { + RE_CODE flags; + RE_CODE group; + RE_Node* node; + + /* codes: opcode, flags, group. */ + if (args->code + 2 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + group = args->code[2]; + node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 1); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = group; + + args->code += 3; + + /* Record that we have a reference to a group. */ + if (!record_ref_group(args->pattern, group)) + return RE_ERROR_MEMORY; + + /* Append the reference. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a REPEAT node. */ +Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { + BOOL greedy; + RE_CODE min_count; + RE_CODE max_count; + int status; + + /* codes: opcode, min_count, max_count, sequence, end. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + /* This includes special cases such as optional items, which we'll check + * for and treat specially. They don't need repeat counts, which helps us + * avoid unnecessary work when matching. + */ + greedy = args->code[0] == RE_OP_GREEDY_REPEAT; + min_count = args->code[1]; + max_count = args->code[2]; + if (args->code[1] > args->code[2]) + return RE_ERROR_ILLEGAL; + + args->code += 3; + + if (min_count == 0 && max_count == 1) { + /* Optional sequence. */ + RE_Node* branch_node; + RE_Node* join_node; + RE_CompileArgs subargs; + + /* Create the start and end nodes. */ + branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!branch_node || !join_node) + return RE_ERROR_MEMORY; + + /* Compile the sequence and check that we've reached the end of it. */ + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + if (greedy) { + /* It's a greedy option. */ + add_node(branch_node, subargs.start); + add_node(branch_node, join_node); + } else { + /* It's a lazy option. */ + add_node(branch_node, join_node); + add_node(branch_node, subargs.start); + } + add_node(subargs.end, join_node); + + /* Append the optional sequence. */ + add_node(args->end, branch_node); + args->end = join_node; + } else if (min_count == 1 && max_count == 1) { + /* Singly-repeated sequence. */ + RE_CompileArgs subargs; + + subargs = *args; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width = subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Append the sequence. */ + add_node(args->end, subargs.start); + args->end = subargs.end; + } else { + size_t index; + RE_Node* repeat_node; + RE_CompileArgs subargs; + + index = args->pattern->repeat_count; + + /* Create the nodes for the repeat. */ + repeat_node = create_node(args->pattern, greedy ? RE_OP_GREEDY_REPEAT : + RE_OP_LAZY_REPEAT, 0, args->forward ? 1 : -1, 4); + if (!repeat_node || !record_repeat(args->pattern, index, + args->repeat_depth)) + return RE_ERROR_MEMORY; + + repeat_node->values[0] = (RE_CODE)index; + repeat_node->values[1] = min_count; + repeat_node->values[2] = max_count; + repeat_node->values[3] = args->forward; + + if (args->within_fuzzy) + args->pattern->repeat_info[index].status |= RE_STATUS_BODY; + + /* Compile the 'body' and check that we've reached the end of it. */ + subargs = *args; + subargs.min_width = 0; + subargs.visible_captures = TRUE; + subargs.has_captures = FALSE; + subargs.is_fuzzy = FALSE; + ++subargs.repeat_depth; + status = build_sequence(&subargs); + if (status != RE_ERROR_SUCCESS) + return status; + + if (subargs.code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + args->code = subargs.code; + args->min_width += (Py_ssize_t)min_count * subargs.min_width; + args->has_captures |= subargs.has_captures; + args->is_fuzzy |= subargs.is_fuzzy; + + ++args->code; + + /* Is it a repeat of something which will match a single character? + * + * If it's in a fuzzy section then it won't be optimised as a + * single-character repeat. + */ + if (sequence_matches_one(subargs.start)) { + repeat_node->op = greedy ? RE_OP_GREEDY_REPEAT_ONE : + RE_OP_LAZY_REPEAT_ONE; + + /* Append the new sequence. */ + add_node(args->end, repeat_node); + repeat_node->nonstring.next_2.node = subargs.start; + args->end = repeat_node; + } else { + RE_Node* end_repeat_node; + RE_Node* end_node; + + end_repeat_node = create_node(args->pattern, greedy ? + RE_OP_END_GREEDY_REPEAT : RE_OP_END_LAZY_REPEAT, 0, args->forward + ? 1 : -1, 4); + if (!end_repeat_node) + return RE_ERROR_MEMORY; + + end_repeat_node->values[0] = repeat_node->values[0]; + end_repeat_node->values[1] = repeat_node->values[1]; + end_repeat_node->values[2] = repeat_node->values[2]; + end_repeat_node->values[3] = args->forward; + + end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + if (!end_node) + return RE_ERROR_MEMORY; + + /* Append the new sequence. */ + add_node(args->end, repeat_node); + add_node(repeat_node, subargs.start); + add_node(repeat_node, end_node); + add_node(subargs.end, end_repeat_node); + add_node(end_repeat_node, subargs.start); + add_node(end_repeat_node, end_node); + args->end = end_node; + } + } + + return RE_ERROR_SUCCESS; +} + +/* Builds a STRING node. */ +Py_LOCAL_INLINE(int) build_STRING(RE_CompileArgs* args, BOOL is_charset) { + RE_CODE flags; + RE_CODE length; + RE_UINT8 op; + Py_ssize_t step; + RE_Node* node; + size_t i; + + /* codes: opcode, flags, length, characters. */ + flags = args->code[1]; + length = args->code[2]; + if (args->code + 3 + length > args->end_code) + return RE_ERROR_ILLEGAL; + + op = (RE_UINT8)args->code[0]; + + step = get_step(op); + + /* Create the node. */ + node = create_node(args->pattern, op, flags, step * (Py_ssize_t)length, + length); + if (!node) + return RE_ERROR_MEMORY; + if (!is_charset) + node->status |= RE_STATUS_STRING; + + for (i = 0; i < length; i++) + node->values[i] = args->code[3 + i]; + + args->code += 3 + length; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + /* Because of full case-folding, one character in the text could match + * multiple characters in the pattern. + */ + if (op == RE_OP_STRING_FLD || op == RE_OP_STRING_FLD_REV) + args->min_width += possible_unfolded_length((Py_ssize_t)length); + else + args->min_width += (Py_ssize_t)length; + + return RE_ERROR_SUCCESS; +} + +/* Builds a SET node. */ +Py_LOCAL_INLINE(int) build_SET(RE_CompileArgs* args) { + RE_UINT8 op; + RE_CODE flags; + Py_ssize_t step; + RE_Node* node; + Py_ssize_t saved_min_width; + int status; + + /* codes: opcode, flags, members. */ + op = (RE_UINT8)args->code[0]; + flags = args->code[1]; + + step = get_step(op); + + if (flags & RE_ZEROWIDTH_OP) + step = 0; + + node = create_node(args->pattern, op, flags, step, 0); + if (!node) + return RE_ERROR_MEMORY; + + args->code += 2; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + saved_min_width = args->min_width; + + /* Compile the character set. */ + do { + switch (args->code[0]) { + case RE_OP_CHARACTER: + case RE_OP_PROPERTY: + status = build_CHARACTER_or_PROPERTY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_RANGE: + status = build_RANGE(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_SET_DIFF: + case RE_OP_SET_INTER: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_UNION: + status = build_SET(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_STRING: + /* A set of characters. */ + if (!build_STRING(args, TRUE)) + return FALSE; + break; + default: + /* Illegal opcode for a character set. */ + return RE_ERROR_ILLEGAL; + } + } while (args->code < args->end_code && args->code[0] != RE_OP_END); + + /* Check that we've reached the end correctly. (The last opcode should be + * 'END'.) + */ + if (args->code >= args->end_code || args->code[0] != RE_OP_END) + return RE_ERROR_ILLEGAL; + + ++args->code; + + /* At this point the set's members are in the main sequence. They need to + * be moved out-of-line. + */ + node->nonstring.next_2.node = node->next_1.node; + node->next_1.node = NULL; + args->end = node; + + args->min_width = saved_min_width; + + if (step != 0) + ++args->min_width; + + return RE_ERROR_SUCCESS; +} + +/* Builds a STRING_SET node. */ +Py_LOCAL_INLINE(int) build_STRING_SET(RE_CompileArgs* args) { + RE_CODE index; + RE_CODE min_len; + RE_CODE max_len; + RE_Node* node; + + /* codes: opcode, index, min_len, max_len. */ + if (args->code + 3 > args->end_code) + return RE_ERROR_ILLEGAL; + + index = args->code[1]; + min_len = args->code[2]; + max_len = args->code[3]; + node = create_node(args->pattern, (RE_UINT8)args->code[0], 0, 0, 3); + if (!node) + return RE_ERROR_MEMORY; + + node->values[0] = index; + node->values[1] = min_len; + node->values[2] = max_len; + + args->code += 4; + + /* Append the reference. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a SUCCESS node . */ +Py_LOCAL_INLINE(int) build_SUCCESS(RE_CompileArgs* args) { + RE_Node* node; + /* code: opcode. */ + + /* Create the node. */ + node = create_node(args->pattern, RE_OP_SUCCESS, 0, 0, 0); + if (!node) + return RE_ERROR_MEMORY; + + ++args->code; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a zero-width node. */ +Py_LOCAL_INLINE(int) build_zerowidth(RE_CompileArgs* args) { + RE_CODE flags; + RE_Node* node; + + /* codes: opcode, flags. */ + if (args->code + 1 > args->end_code) + return RE_ERROR_ILLEGAL; + + flags = args->code[1]; + + /* Create the node. */ + node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 0); + if (!node) + return RE_ERROR_MEMORY; + + args->code += 2; + + /* Append the node. */ + add_node(args->end, node); + args->end = node; + + return RE_ERROR_SUCCESS; +} + +/* Builds a sequence of nodes from regular expression code. */ +Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args) { + int status; + + /* Guarantee that there's something to attach to. */ + args->start = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); + args->end = args->start; + + /* The sequence should end with an opcode we don't understand. If it + * doesn't then the code is illegal. + */ + while (args->code < args->end_code) { + /* The following code groups opcodes by format, not function. */ + switch (args->code[0]) { + case RE_OP_ANY: + case RE_OP_ANY_ALL: + case RE_OP_ANY_ALL_REV: + case RE_OP_ANY_REV: + case RE_OP_ANY_U: + case RE_OP_ANY_U_REV: + /* A simple opcode with no trailing codewords and width of 1. */ + status = build_ANY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_ATOMIC: + /* An atomic sequence. */ + status = build_ATOMIC(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_BOUNDARY: + case RE_OP_DEFAULT_BOUNDARY: + case RE_OP_DEFAULT_END_OF_WORD: + case RE_OP_DEFAULT_START_OF_WORD: + case RE_OP_END_OF_WORD: + case RE_OP_GRAPHEME_BOUNDARY: + case RE_OP_START_OF_WORD: + /* A word or grapheme boundary. */ + status = build_BOUNDARY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_BRANCH: + /* A 2-way branch. */ + status = build_BRANCH(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_CALL_REF: + /* A group call ref. */ + status = build_CALL_REF(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_CHARACTER: + case RE_OP_CHARACTER_IGN: + case RE_OP_CHARACTER_IGN_REV: + case RE_OP_CHARACTER_REV: + case RE_OP_PROPERTY: + case RE_OP_PROPERTY_IGN: + case RE_OP_PROPERTY_IGN_REV: + case RE_OP_PROPERTY_REV: + /* A character literal or a property. */ + status = build_CHARACTER_or_PROPERTY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_END_OF_LINE: + case RE_OP_END_OF_LINE_U: + case RE_OP_END_OF_STRING: + case RE_OP_END_OF_STRING_LINE: + case RE_OP_END_OF_STRING_LINE_U: + case RE_OP_SEARCH_ANCHOR: + case RE_OP_START_OF_LINE: + case RE_OP_START_OF_LINE_U: + case RE_OP_START_OF_STRING: + /* A simple opcode with no trailing codewords and width of 0. */ + status = build_zerowidth(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_FUZZY: + /* A fuzzy sequence. */ + status = build_FUZZY(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GREEDY_REPEAT: + case RE_OP_LAZY_REPEAT: + /* A repeated sequence. */ + status = build_REPEAT(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GROUP: + /* A capture group. */ + status = build_GROUP(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GROUP_CALL: + /* A group call. */ + status = build_GROUP_CALL(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_GROUP_EXISTS: + /* A conditional sequence. */ + status = build_GROUP_EXISTS(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_LOOKAROUND: + /* A lookaround. */ + status = build_LOOKAROUND(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_RANGE: + case RE_OP_RANGE_IGN: + case RE_OP_RANGE_IGN_REV: + case RE_OP_RANGE_REV: + /* A range. */ + status = build_RANGE(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_REF_GROUP: + case RE_OP_REF_GROUP_FLD: + case RE_OP_REF_GROUP_FLD_REV: + case RE_OP_REF_GROUP_IGN: + case RE_OP_REF_GROUP_IGN_REV: + case RE_OP_REF_GROUP_REV: + /* A reference to a group. */ + status = build_REF_GROUP(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_SET_DIFF: + case RE_OP_SET_DIFF_IGN: + case RE_OP_SET_DIFF_IGN_REV: + case RE_OP_SET_DIFF_REV: + case RE_OP_SET_INTER: + case RE_OP_SET_INTER_IGN: + case RE_OP_SET_INTER_IGN_REV: + case RE_OP_SET_INTER_REV: + case RE_OP_SET_SYM_DIFF: + case RE_OP_SET_SYM_DIFF_IGN: + case RE_OP_SET_SYM_DIFF_IGN_REV: + case RE_OP_SET_SYM_DIFF_REV: + case RE_OP_SET_UNION: + case RE_OP_SET_UNION_IGN: + case RE_OP_SET_UNION_IGN_REV: + case RE_OP_SET_UNION_REV: + /* A set. */ + status = build_SET(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_STRING: + case RE_OP_STRING_FLD: + case RE_OP_STRING_FLD_REV: + case RE_OP_STRING_IGN: + case RE_OP_STRING_IGN_REV: + case RE_OP_STRING_REV: + /* A string literal. */ + if (!build_STRING(args, FALSE)) + return FALSE; + break; + case RE_OP_STRING_SET: + case RE_OP_STRING_SET_FLD: + case RE_OP_STRING_SET_FLD_REV: + case RE_OP_STRING_SET_IGN: + case RE_OP_STRING_SET_IGN_REV: + case RE_OP_STRING_SET_REV: + /* A reference to a list. */ + status = build_STRING_SET(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + case RE_OP_SUCCESS: + /* Success. */ + status = build_SUCCESS(args); + if (status != RE_ERROR_SUCCESS) + return status; + break; + default: + /* We've found an opcode which we don't recognise. We'll leave it + * for the caller. + */ + return RE_ERROR_SUCCESS; + } + } + + /* If we're here then we should be at the end of the code, otherwise we + * have an error. + */ + return args->code == args->end_code; +} + +/* Compiles the regular expression code to 'nodes'. + * + * Various details about the regular expression are discovered during + * compilation and stored in the PatternObject. + */ +Py_LOCAL_INLINE(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, + PatternObject* pattern) { + RE_CompileArgs args; + int status; + + /* Compile a regex sequence and then check that we've reached the end + * correctly. (The last opcode should be 'SUCCESS'.) + * + * If successful, 'start' and 'end' will point to the start and end nodes + * of the compiled sequence. + */ + args.code = code; + args.end_code = end_code; + args.pattern = pattern; + args.forward = (pattern->flags & RE_FLAG_REVERSE) == 0; + args.min_width = 0; + args.visible_captures = FALSE; + args.has_captures = FALSE; + args.repeat_depth = 0; + args.is_fuzzy = FALSE; + args.within_fuzzy = FALSE; + status = build_sequence(&args); + if (status == RE_ERROR_ILLEGAL) + set_error(RE_ERROR_ILLEGAL, NULL); + + if (status != RE_ERROR_SUCCESS) + return FALSE; + + pattern->min_width = args.min_width; + pattern->is_fuzzy = args.is_fuzzy; + pattern->do_search_start = TRUE; + pattern->start_node = args.start; + + /* Optimise the pattern. */ + if (!optimise_pattern(pattern)) + return FALSE; + + pattern->start_test = locate_test_start(pattern->start_node); + + /* Get the call_ref for the entire pattern, if any. */ + if (pattern->start_node->op == RE_OP_CALL_REF) + pattern->pattern_call_ref = (Py_ssize_t)pattern->start_node->values[0]; + else + pattern->pattern_call_ref = -1; + + return TRUE; +} + +/* Gets the required characters for a regex. + * + * In the event of an error, it just pretends that there are no required + * characters. + */ +Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** + req_chars, size_t* req_length) { + Py_ssize_t len; + RE_CODE* chars; + Py_ssize_t i; + + *req_chars = NULL; + *req_length = 0; + + len = PyTuple_GET_SIZE(required_chars); + if (len < 1 || PyErr_Occurred()) { + PyErr_Clear(); + return; + } + + chars = (RE_CODE*)re_alloc((size_t)len * sizeof(RE_CODE)); + if (!chars) + goto error; + + for (i = 0; i < len; i++) { + PyObject* o; + size_t value; + + /* PyTuple_SET_ITEM borrows the reference. */ + o = PyTuple_GET_ITEM(required_chars, i); + + value = PyLong_AsUnsignedLong(o); + if ((Py_ssize_t)value == -1 && PyErr_Occurred()) + goto error; + + chars[i] = (RE_CODE)value; + if (chars[i] != value) + goto error; + } + + *req_chars = chars; + *req_length = (size_t)len; + + return; + +error: + PyErr_Clear(); + re_dealloc(chars); +} + +/* Makes a STRING node. */ +Py_LOCAL_INLINE(RE_Node*) make_STRING_node(PatternObject* pattern, RE_UINT8 op, + size_t length, RE_CODE* chars) { + Py_ssize_t step; + RE_Node* node; + size_t i; + + step = get_step(op); + + /* Create the node. */ + node = create_node(pattern, op, 0, step * (Py_ssize_t)length, length); + if (!node) + return NULL; + + node->status |= RE_STATUS_STRING; + + for (i = 0; i < length; i++) + node->values[i] = chars[i]; + + return node; +} + +/* Compiles regular expression code to a PatternObject. + * + * The regular expression code is provided as a list and is then compiled to + * 'nodes'. Various details about the regular expression are discovered during + * compilation and stored in the PatternObject. + */ +static PyObject* re_compile(PyObject* self_, PyObject* args) { + PyObject* pattern; + Py_ssize_t flags = 0; + PyObject* code_list; + PyObject* groupindex; + PyObject* indexgroup; + PyObject* named_lists; + PyObject* named_list_indexes; + Py_ssize_t req_offset; + PyObject* required_chars; + size_t req_length; + RE_CODE* req_chars; + Py_ssize_t req_flags; + size_t public_group_count; + Py_ssize_t code_len; + RE_CODE* code; + Py_ssize_t i; + PatternObject* self; + BOOL ascii; + BOOL locale; + BOOL unicode; + BOOL ok; + + if (!PyArg_ParseTuple(args, "OnOOOOOnOnn:re_compile", &pattern, &flags, + &code_list, &groupindex, &indexgroup, &named_lists, &named_list_indexes, + &req_offset, &required_chars, &req_flags, &public_group_count)) + return NULL; + + /* Read the regex code. */ + code_len = PyList_GET_SIZE(code_list); + code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE)); + if (!code) + return NULL; + + for (i = 0; i < code_len; i++) { + PyObject* o; + size_t value; + + /* PyList_GET_ITEM borrows a reference. */ + o = PyList_GET_ITEM(code_list, i); + + value = PyLong_AsUnsignedLong(o); + if ((Py_ssize_t)value == -1 && PyErr_Occurred()) + goto error; + + code[i] = (RE_CODE)value; + if (code[i] != value) + goto error; + } + + /* Get the required characters. */ + get_required_chars(required_chars, &req_chars, &req_length); + + /* Create the PatternObject. */ + self = PyObject_NEW(PatternObject, &Pattern_Type); + if (!self) { + set_error(RE_ERROR_MEMORY, NULL); + re_dealloc(req_chars); + re_dealloc(code); + return NULL; + } + + /* Initialise the PatternObject. */ + self->pattern = pattern; + self->flags = flags; + self->weakreflist = NULL; + self->start_node = NULL; + self->repeat_count = 0; + self->true_group_count = 0; + self->public_group_count = public_group_count; + self->group_end_index = 0; + self->groupindex = groupindex; + self->indexgroup = indexgroup; + self->named_lists = named_lists; + self->named_lists_count = (size_t)PyDict_Size(named_lists); + self->partial_named_lists[0] = NULL; + self->partial_named_lists[1] = NULL; + self->named_list_indexes = named_list_indexes; + self->node_capacity = 0; + self->node_count = 0; + self->node_list = NULL; + self->group_info_capacity = 0; + self->group_info = NULL; + self->call_ref_info_capacity = 0; + self->call_ref_info_count = 0; + self->call_ref_info = NULL; + self->repeat_info_capacity = 0; + self->repeat_info = NULL; + self->groups_storage = NULL; + self->repeats_storage = NULL; + self->fuzzy_count = 0; + self->recursive = FALSE; + self->req_offset = req_offset; + self->req_string = NULL; + Py_INCREF(self->pattern); + Py_INCREF(self->groupindex); + Py_INCREF(self->indexgroup); + Py_INCREF(self->named_lists); + Py_INCREF(self->named_list_indexes); + + /* Initialise the character encoding. */ + unicode = (flags & RE_FLAG_UNICODE) != 0; + locale = (flags & RE_FLAG_LOCALE) != 0; + ascii = (flags & RE_FLAG_ASCII) != 0; + if (!unicode && !locale && !ascii) { + if (PyString_Check(self->pattern)) + ascii = RE_FLAG_ASCII; + else + unicode = RE_FLAG_UNICODE; + } + if (unicode) + self->encoding = &unicode_encoding; + else if (locale) + self->encoding = &locale_encoding; + else if (ascii) + self->encoding = &ascii_encoding; + + /* Compile the regular expression code to nodes. */ + ok = compile_to_nodes(code, code + code_len, self); + + /* We no longer need the regular expression code. */ + re_dealloc(code); + + if (!ok) { + Py_DECREF(self); + re_dealloc(req_chars); + return NULL; + } + + /* Make a node for the required string, if there's one. */ + if (req_chars) { + /* Remove the FULLCASE flag if it's not a Unicode pattern. */ + if (!(self->flags & RE_FLAG_UNICODE)) + req_flags &= ~RE_FLAG_FULLCASE; + + if (self->flags & RE_FLAG_REVERSE) { + switch (req_flags) { + case 0: + self->req_string = make_STRING_node(self, RE_OP_STRING_REV, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_FLD_REV, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_IGN_REV, + req_length, req_chars); + break; + } + } else { + switch (req_flags) { + case 0: + self->req_string = make_STRING_node(self, RE_OP_STRING, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_FLD, + req_length, req_chars); + break; + case RE_FLAG_IGNORECASE: + self->req_string = make_STRING_node(self, RE_OP_STRING_IGN, + req_length, req_chars); + break; + } + } + + re_dealloc(req_chars); + } + + return (PyObject*)self; + +error: + re_dealloc(code); + set_error(RE_ERROR_ILLEGAL, NULL); + return NULL; +} + +/* Gets the size of the codewords. */ +static PyObject* get_code_size(PyObject* self, PyObject* unused) { + return Py_BuildValue("n", sizeof(RE_CODE)); +} + +/* Gets the property dict. */ +static PyObject* get_properties(PyObject* self_, PyObject* args) { + Py_INCREF(property_dict); + + return property_dict; +} + +/* Folds the case of a string. */ +static PyObject* fold_case(PyObject* self_, PyObject* args) { + RE_StringInfo str_info; + Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); + Py_ssize_t folded_charsize; + void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); + RE_EncodingTable* encoding; + Py_ssize_t buf_size; + void* folded; + Py_ssize_t folded_len; + PyObject* result; + + Py_ssize_t flags; + PyObject* string; + if (!PyArg_ParseTuple(args, "nO:fold_case", &flags, &string)) + return NULL; + + if (!(flags & RE_FLAG_IGNORECASE)) { + Py_INCREF(string); + return string; + } + + /* Get the string. */ + if (!get_string(string, &str_info)) + return NULL; + + /* Get the function for reading from the original string. */ + switch (str_info.charsize) { + case 1: + char_at = bytes1_char_at; + break; + case 2: + char_at = bytes2_char_at; + break; + case 4: + char_at = bytes4_char_at; + break; + default: +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* What's the encoding? */ + if (flags & RE_FLAG_UNICODE) + encoding = &unicode_encoding; + else if (flags & RE_FLAG_LOCALE) + encoding = &locale_encoding; + else if (flags & RE_FLAG_ASCII) + encoding = &ascii_encoding; + else + encoding = &unicode_encoding; + + /* The folded string will have the same width as the original string. */ + folded_charsize = str_info.charsize; + + /* Get the function for writing to the folded string. */ + switch (folded_charsize) { + case 1: + set_char_at = bytes1_set_char_at; + break; + case 2: + set_char_at = bytes2_set_char_at; + break; + case 4: + set_char_at = bytes4_set_char_at; + break; + default: +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* Allocate a buffer for the folded string. */ + if (flags & RE_FLAG_FULLCASE) + /* When using full case-folding with Unicode, some single codepoints + * are mapped to multiple codepoints. + */ + buf_size = str_info.length * RE_MAX_FOLDED; + else + buf_size = str_info.length; + + folded = re_alloc((size_t)(buf_size * folded_charsize)); + if (!folded) { +#if PY_VERSION_HEX >= 0x02060000 + release_buffer(&str_info); + +#endif + return NULL; + } + + /* Fold the case of the string. */ + folded_len = 0; + + if (flags & RE_FLAG_FULLCASE) { + /* Full case-folding. */ + int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); + Py_ssize_t i; + Py_UCS4 codepoints[RE_MAX_FOLDED]; + + full_case_fold = encoding->full_case_fold; + + for (i = 0; i < str_info.length; i++) { + int count; + int j; + + count = full_case_fold(char_at(str_info.characters, i), + codepoints); + for (j = 0; j < count; j++) + set_char_at(folded, folded_len + j, codepoints[j]); + + folded_len += count; + } + } else { + /* Simple case-folding. */ + Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); + Py_ssize_t i; + + simple_case_fold = encoding->simple_case_fold; + + for (i = 0; i < str_info.length; i++) { + Py_UCS4 ch; + + ch = simple_case_fold(char_at(str_info.characters, i)); + set_char_at(folded, i, ch); + } + + folded_len = str_info.length; + } + + /* Build the result string. */ + if (str_info.is_unicode) + result = build_unicode_value(folded, folded_len, folded_charsize); + else + result = build_bytes_value(folded, folded_len, folded_charsize); + + re_dealloc(folded); + +#if PY_VERSION_HEX >= 0x02060000 + /* Release the original string's buffer. */ + release_buffer(&str_info); + +#endif + return result; +} + +/* Returns a tuple of the Unicode characters that expand on full case-folding. + */ +static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { + int count; + int i; + PyObject* result; + + /* How many characters are there? */ + count = sizeof(re_expand_on_folding) / sizeof(re_expand_on_folding[0]); + + /* Put all the characters in a tuple. */ + result = PyTuple_New(count); + if (!result) + return NULL; + + for (i = 0; i < count; i++) { + Py_UNICODE codepoint; + PyObject* item; + + codepoint = re_expand_on_folding[i]; + + item = build_unicode_value(&codepoint, 1, sizeof(codepoint)); + if (!item) + goto error; + + /* PyTuple_SetItem borrows the reference. */ + PyTuple_SetItem(result, i, item); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* Returns whether a character has a given value for a Unicode property. */ +static PyObject* has_property_value(PyObject* self_, PyObject* args) { + BOOL v; + + Py_ssize_t property_value; + Py_ssize_t character; + if (!PyArg_ParseTuple(args, "nn:has_property_value", &property_value, + &character)) + return NULL; + + v = unicode_has_property((RE_CODE)property_value, (Py_UCS4)character) ? 1 : + 0; + + return Py_BuildValue("n", v); +} + +/* Returns a list of all the simple cases of a character. + * + * If full case-folding is turned on and the character also expands on full + * case-folding, a None is appended to the list. + */ +static PyObject* get_all_cases(PyObject* self_, PyObject* args) { + RE_EncodingTable* encoding; + int count; + Py_UCS4 cases[RE_MAX_CASES]; + Py_UCS4 folded[RE_MAX_FOLDED]; + PyObject* result; + int i; + + Py_ssize_t flags; + Py_ssize_t character; + if (!PyArg_ParseTuple(args, "nn:get_all_cases", &flags, &character)) + return NULL; + + /* What's the encoding? */ + if (flags & RE_FLAG_UNICODE) + encoding = &unicode_encoding; + else if (flags & RE_FLAG_LOCALE) + encoding = &locale_encoding; + else if (flags & RE_FLAG_ASCII) + encoding = &ascii_encoding; + else + encoding = &ascii_encoding; + + /* Get all the simple cases. */ + count = encoding->all_cases((Py_UCS4)character, cases); + + result = PyList_New(count); + if (!result) + return NULL; + + for (i = 0; i < count; i++) { + PyObject* item; + + item = Py_BuildValue("n", cases[i]); + if (!item) + goto error; + + /* PyList_SetItem borrows the reference. */ + PyList_SetItem(result, i, item); + } + + /* If the character also expands on full case-folding, append a None. */ + if ((flags & RE_FULL_CASE_FOLDING) == RE_FULL_CASE_FOLDING) { + count = encoding->full_case_fold((Py_UCS4)character, folded); + if (count > 1) + PyList_Append(result, Py_None); + } + + return result; + +error: + Py_DECREF(result); + return NULL; +} + +/* The table of the module's functions. */ +static PyMethodDef _functions[] = { + {"compile", (PyCFunction)re_compile, METH_VARARGS}, + {"get_code_size", (PyCFunction)get_code_size, METH_NOARGS}, + {"get_properties", (PyCFunction)get_properties, METH_VARARGS}, + {"fold_case", (PyCFunction)fold_case, METH_VARARGS}, + {"get_expand_on_folding", (PyCFunction)get_expand_on_folding, METH_NOARGS}, + {"has_property_value", (PyCFunction)has_property_value, METH_VARARGS}, + {"get_all_cases", (PyCFunction)get_all_cases, METH_VARARGS}, + {NULL, NULL} +}; + +/* Initialises the property dictionary. */ +Py_LOCAL_INLINE(BOOL) init_property_dict(void) { + size_t value_set_count; + size_t i; + PyObject** value_dicts; + + property_dict = NULL; + + /* How many value sets are there? */ + value_set_count = 0; + + for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); + i++) { + RE_PropertyValue* value; + + value = &re_property_values[i]; + if (value->value_set >= value_set_count) + value_set_count = (size_t)value->value_set + 1; + } + + /* Quick references for the value sets. */ + value_dicts = (PyObject**)re_alloc(value_set_count * + sizeof(value_dicts[0])); + if (!value_dicts) + return FALSE; + + memset(value_dicts, 0, value_set_count * sizeof(value_dicts[0])); + + /* Build the property values dictionaries. */ + for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); + i++) { + RE_PropertyValue* value; + PyObject* v; + int status; + + value = &re_property_values[i]; + if (!value_dicts[value->value_set]) { + value_dicts[value->value_set] = PyDict_New(); + if (!value_dicts[value->value_set]) + goto error; + } + + v = Py_BuildValue("i", value->id); + if (!v) + goto error; + + status = PyDict_SetItemString(value_dicts[value->value_set], + re_strings[value->name], v); + Py_DECREF(v); + if (status < 0) + goto error; + } + + /* Build the property dictionary. */ + property_dict = PyDict_New(); + if (!property_dict) + goto error; + + for (i = 0; i < sizeof(re_properties) / sizeof(re_properties[0]); i++) { + RE_Property* property; + PyObject* v; + int status; + + property = &re_properties[i]; + v = Py_BuildValue("iO", property->id, + value_dicts[property->value_set]); + if (!v) + goto error; + + status = PyDict_SetItemString(property_dict, + re_strings[property->name], v); + Py_DECREF(v); + if (status < 0) + goto error; + } + + /* DECREF the value sets. Any unused ones will be deallocated. */ + for (i = 0; i < value_set_count; i++) + Py_XDECREF(value_dicts[i]); + + re_dealloc(value_dicts); + + return TRUE; + +error: + Py_XDECREF(property_dict); + + /* DECREF the value sets. */ + for (i = 0; i < value_set_count; i++) + Py_XDECREF(value_dicts[i]); + + re_dealloc(value_dicts); + + return FALSE; +} + +/* Initialises the module. */ +PyMODINIT_FUNC init_regex(void) { + PyObject* m; + PyObject* d; + PyObject* x; + +#if defined(VERBOSE) + /* Unbuffered in case it crashes! */ + setvbuf(stdout, NULL, _IONBF, 0); + +#endif + /* Initialise Pattern_Type. */ + Pattern_Type.tp_dealloc = pattern_dealloc; + Pattern_Type.tp_repr = pattern_repr; + Pattern_Type.tp_flags = Py_TPFLAGS_HAVE_WEAKREFS; + Pattern_Type.tp_doc = pattern_doc; + Pattern_Type.tp_weaklistoffset = offsetof(PatternObject, weakreflist); + Pattern_Type.tp_methods = pattern_methods; + Pattern_Type.tp_members = pattern_members; + Pattern_Type.tp_getset = pattern_getset; + + /* Initialise Match_Type. */ + Match_Type.tp_dealloc = match_dealloc; + Match_Type.tp_repr = match_repr; + Match_Type.tp_as_mapping = &match_as_mapping; + Match_Type.tp_flags = Py_TPFLAGS_DEFAULT; + Match_Type.tp_doc = match_doc; + Match_Type.tp_methods = match_methods; + Match_Type.tp_members = match_members; + Match_Type.tp_getset = match_getset; + + /* Initialise Scanner_Type. */ + Scanner_Type.tp_dealloc = scanner_dealloc; + Scanner_Type.tp_flags = Py_TPFLAGS_DEFAULT; + Scanner_Type.tp_doc = scanner_doc; + Scanner_Type.tp_iter = scanner_iter; + Scanner_Type.tp_iternext = scanner_iternext; + Scanner_Type.tp_methods = scanner_methods; + Scanner_Type.tp_members = scanner_members; + + /* Initialise Splitter_Type. */ + Splitter_Type.tp_dealloc = splitter_dealloc; + Splitter_Type.tp_flags = Py_TPFLAGS_DEFAULT; + Splitter_Type.tp_doc = splitter_doc; + Splitter_Type.tp_iter = splitter_iter; + Splitter_Type.tp_iternext = splitter_iternext; + Splitter_Type.tp_methods = splitter_methods; + Splitter_Type.tp_members = splitter_members; + + /* Initialize object types */ + if (PyType_Ready(&Pattern_Type) < 0) + return; + if (PyType_Ready(&Match_Type) < 0) + return; + if (PyType_Ready(&Scanner_Type) < 0) + return; + if (PyType_Ready(&Splitter_Type) < 0) + return; + + error_exception = NULL; + + m = Py_InitModule("_" RE_MODULE, _functions); + if (!m) + return; + + d = PyModule_GetDict(m); + + x = PyInt_FromLong(RE_MAGIC); + if (x) { + PyDict_SetItemString(d, "MAGIC", x); + Py_DECREF(x); + } + + x = PyInt_FromLong(sizeof(RE_CODE)); + if (x) { + PyDict_SetItemString(d, "CODE_SIZE", x); + Py_DECREF(x); + } + + x = PyString_FromString(copyright); + if (x) { + PyDict_SetItemString(d, "copyright", x); + Py_DECREF(x); + } + + /* Initialise the property dictionary. */ + if (!init_property_dict()) + return; +} + +/* vim:ts=4:sw=4:et */ diff --git a/lib/regex/_regex.h b/lib/regex/_regex.h new file mode 100644 index 00000000..33dc1540 --- /dev/null +++ b/lib/regex/_regex.h @@ -0,0 +1,228 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * NOTE: This file is generated by regex.py. If you need + * to change anything in here, edit regex.py and run it. + * + * 2010-01-16 mrab Re-written + */ + +/* Supports Unicode version 6.3.0. */ + +#define RE_MAGIC 20100116 + +#include "_regex_unicode.h" + +/* Operators. */ +#define RE_OP_FAILURE 0 +#define RE_OP_SUCCESS 1 +#define RE_OP_ANY 2 +#define RE_OP_ANY_ALL 3 +#define RE_OP_ANY_ALL_REV 4 +#define RE_OP_ANY_REV 5 +#define RE_OP_ANY_U 6 +#define RE_OP_ANY_U_REV 7 +#define RE_OP_ATOMIC 8 +#define RE_OP_BOUNDARY 9 +#define RE_OP_BRANCH 10 +#define RE_OP_CALL_REF 11 +#define RE_OP_CHARACTER 12 +#define RE_OP_CHARACTER_IGN 13 +#define RE_OP_CHARACTER_IGN_REV 14 +#define RE_OP_CHARACTER_REV 15 +#define RE_OP_DEFAULT_BOUNDARY 16 +#define RE_OP_DEFAULT_END_OF_WORD 17 +#define RE_OP_DEFAULT_START_OF_WORD 18 +#define RE_OP_END 19 +#define RE_OP_END_OF_LINE 20 +#define RE_OP_END_OF_LINE_U 21 +#define RE_OP_END_OF_STRING 22 +#define RE_OP_END_OF_STRING_LINE 23 +#define RE_OP_END_OF_STRING_LINE_U 24 +#define RE_OP_END_OF_WORD 25 +#define RE_OP_FUZZY 26 +#define RE_OP_GRAPHEME_BOUNDARY 27 +#define RE_OP_GREEDY_REPEAT 28 +#define RE_OP_GROUP 29 +#define RE_OP_GROUP_CALL 30 +#define RE_OP_GROUP_EXISTS 31 +#define RE_OP_LAZY_REPEAT 32 +#define RE_OP_LOOKAROUND 33 +#define RE_OP_NEXT 34 +#define RE_OP_PROPERTY 35 +#define RE_OP_PROPERTY_IGN 36 +#define RE_OP_PROPERTY_IGN_REV 37 +#define RE_OP_PROPERTY_REV 38 +#define RE_OP_RANGE 39 +#define RE_OP_RANGE_IGN 40 +#define RE_OP_RANGE_IGN_REV 41 +#define RE_OP_RANGE_REV 42 +#define RE_OP_REF_GROUP 43 +#define RE_OP_REF_GROUP_FLD 44 +#define RE_OP_REF_GROUP_FLD_REV 45 +#define RE_OP_REF_GROUP_IGN 46 +#define RE_OP_REF_GROUP_IGN_REV 47 +#define RE_OP_REF_GROUP_REV 48 +#define RE_OP_SEARCH_ANCHOR 49 +#define RE_OP_SET_DIFF 50 +#define RE_OP_SET_DIFF_IGN 51 +#define RE_OP_SET_DIFF_IGN_REV 52 +#define RE_OP_SET_DIFF_REV 53 +#define RE_OP_SET_INTER 54 +#define RE_OP_SET_INTER_IGN 55 +#define RE_OP_SET_INTER_IGN_REV 56 +#define RE_OP_SET_INTER_REV 57 +#define RE_OP_SET_SYM_DIFF 58 +#define RE_OP_SET_SYM_DIFF_IGN 59 +#define RE_OP_SET_SYM_DIFF_IGN_REV 60 +#define RE_OP_SET_SYM_DIFF_REV 61 +#define RE_OP_SET_UNION 62 +#define RE_OP_SET_UNION_IGN 63 +#define RE_OP_SET_UNION_IGN_REV 64 +#define RE_OP_SET_UNION_REV 65 +#define RE_OP_START_OF_LINE 66 +#define RE_OP_START_OF_LINE_U 67 +#define RE_OP_START_OF_STRING 68 +#define RE_OP_START_OF_WORD 69 +#define RE_OP_STRING 70 +#define RE_OP_STRING_FLD 71 +#define RE_OP_STRING_FLD_REV 72 +#define RE_OP_STRING_IGN 73 +#define RE_OP_STRING_IGN_REV 74 +#define RE_OP_STRING_REV 75 +#define RE_OP_STRING_SET 76 +#define RE_OP_STRING_SET_FLD 77 +#define RE_OP_STRING_SET_FLD_REV 78 +#define RE_OP_STRING_SET_IGN 79 +#define RE_OP_STRING_SET_IGN_REV 80 +#define RE_OP_STRING_SET_REV 81 +#define RE_OP_BODY_END 82 +#define RE_OP_BODY_START 83 +#define RE_OP_END_FUZZY 84 +#define RE_OP_END_GREEDY_REPEAT 85 +#define RE_OP_END_GROUP 86 +#define RE_OP_END_LAZY_REPEAT 87 +#define RE_OP_GREEDY_REPEAT_ONE 88 +#define RE_OP_GROUP_RETURN 89 +#define RE_OP_LAZY_REPEAT_ONE 90 +#define RE_OP_MATCH_BODY 91 +#define RE_OP_MATCH_TAIL 92 +#define RE_OP_START_GROUP 93 + +char* re_op_text[] = { + "RE_OP_FAILURE", + "RE_OP_SUCCESS", + "RE_OP_ANY", + "RE_OP_ANY_ALL", + "RE_OP_ANY_ALL_REV", + "RE_OP_ANY_REV", + "RE_OP_ANY_U", + "RE_OP_ANY_U_REV", + "RE_OP_ATOMIC", + "RE_OP_BOUNDARY", + "RE_OP_BRANCH", + "RE_OP_CALL_REF", + "RE_OP_CHARACTER", + "RE_OP_CHARACTER_IGN", + "RE_OP_CHARACTER_IGN_REV", + "RE_OP_CHARACTER_REV", + "RE_OP_DEFAULT_BOUNDARY", + "RE_OP_DEFAULT_END_OF_WORD", + "RE_OP_DEFAULT_START_OF_WORD", + "RE_OP_END", + "RE_OP_END_OF_LINE", + "RE_OP_END_OF_LINE_U", + "RE_OP_END_OF_STRING", + "RE_OP_END_OF_STRING_LINE", + "RE_OP_END_OF_STRING_LINE_U", + "RE_OP_END_OF_WORD", + "RE_OP_FUZZY", + "RE_OP_GRAPHEME_BOUNDARY", + "RE_OP_GREEDY_REPEAT", + "RE_OP_GROUP", + "RE_OP_GROUP_CALL", + "RE_OP_GROUP_EXISTS", + "RE_OP_LAZY_REPEAT", + "RE_OP_LOOKAROUND", + "RE_OP_NEXT", + "RE_OP_PROPERTY", + "RE_OP_PROPERTY_IGN", + "RE_OP_PROPERTY_IGN_REV", + "RE_OP_PROPERTY_REV", + "RE_OP_RANGE", + "RE_OP_RANGE_IGN", + "RE_OP_RANGE_IGN_REV", + "RE_OP_RANGE_REV", + "RE_OP_REF_GROUP", + "RE_OP_REF_GROUP_FLD", + "RE_OP_REF_GROUP_FLD_REV", + "RE_OP_REF_GROUP_IGN", + "RE_OP_REF_GROUP_IGN_REV", + "RE_OP_REF_GROUP_REV", + "RE_OP_SEARCH_ANCHOR", + "RE_OP_SET_DIFF", + "RE_OP_SET_DIFF_IGN", + "RE_OP_SET_DIFF_IGN_REV", + "RE_OP_SET_DIFF_REV", + "RE_OP_SET_INTER", + "RE_OP_SET_INTER_IGN", + "RE_OP_SET_INTER_IGN_REV", + "RE_OP_SET_INTER_REV", + "RE_OP_SET_SYM_DIFF", + "RE_OP_SET_SYM_DIFF_IGN", + "RE_OP_SET_SYM_DIFF_IGN_REV", + "RE_OP_SET_SYM_DIFF_REV", + "RE_OP_SET_UNION", + "RE_OP_SET_UNION_IGN", + "RE_OP_SET_UNION_IGN_REV", + "RE_OP_SET_UNION_REV", + "RE_OP_START_OF_LINE", + "RE_OP_START_OF_LINE_U", + "RE_OP_START_OF_STRING", + "RE_OP_START_OF_WORD", + "RE_OP_STRING", + "RE_OP_STRING_FLD", + "RE_OP_STRING_FLD_REV", + "RE_OP_STRING_IGN", + "RE_OP_STRING_IGN_REV", + "RE_OP_STRING_REV", + "RE_OP_STRING_SET", + "RE_OP_STRING_SET_FLD", + "RE_OP_STRING_SET_FLD_REV", + "RE_OP_STRING_SET_IGN", + "RE_OP_STRING_SET_IGN_REV", + "RE_OP_STRING_SET_REV", + "RE_OP_BODY_END", + "RE_OP_BODY_START", + "RE_OP_END_FUZZY", + "RE_OP_END_GREEDY_REPEAT", + "RE_OP_END_GROUP", + "RE_OP_END_LAZY_REPEAT", + "RE_OP_GREEDY_REPEAT_ONE", + "RE_OP_GROUP_RETURN", + "RE_OP_LAZY_REPEAT_ONE", + "RE_OP_MATCH_BODY", + "RE_OP_MATCH_TAIL", + "RE_OP_START_GROUP", +}; + +#define RE_FLAG_ASCII 0x80 +#define RE_FLAG_BESTMATCH 0x1000 +#define RE_FLAG_DEBUG 0x200 +#define RE_FLAG_DOTALL 0x10 +#define RE_FLAG_ENHANCEMATCH 0x8000 +#define RE_FLAG_FULLCASE 0x4000 +#define RE_FLAG_IGNORECASE 0x2 +#define RE_FLAG_LOCALE 0x4 +#define RE_FLAG_MULTILINE 0x8 +#define RE_FLAG_REVERSE 0x400 +#define RE_FLAG_TEMPLATE 0x1 +#define RE_FLAG_UNICODE 0x20 +#define RE_FLAG_VERBOSE 0x40 +#define RE_FLAG_VERSION0 0x2000 +#define RE_FLAG_VERSION1 0x100 +#define RE_FLAG_WORD 0x800 diff --git a/lib/regex/_regex_core.py b/lib/regex/_regex_core.py new file mode 100644 index 00000000..5adbb524 --- /dev/null +++ b/lib/regex/_regex_core.py @@ -0,0 +1,4086 @@ +# +# Secret Labs' Regular Expression Engine core module +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# +# 2010-01-16 mrab Python front-end re-written and extended + +import string +import sys +import unicodedata +from collections import defaultdict + +if sys.version_info < (2, 6): + from Python25 import _regex +elif sys.version_info < (2, 7): + from Python26 import _regex +else: + from Python27 import _regex + + +__all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", + "F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "R", + "REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", + "V1", "VERSION1", "W", "WORD", "X", "VERBOSE", "error", + "Scanner"] + +# The regex exception. +class error(Exception): + def __init__(self, message, set_error=False): + Exception.__init__(self, message) + self.set_error = set_error + +# The exception for when a positional flag has been turned on in the old +# behaviour. +class _UnscopedFlagSet(Exception): + pass + +# The exception for when parsing fails and we want to try something else. +class ParseError(Exception): + pass + +# The exception for when there isn't a valid first set. +class _FirstSetError(Exception): + pass + +# Flags. +A = ASCII = 0x80 # Assume ASCII locale. +B = BESTMATCH = 0x1000 # Best fuzzy match. +D = DEBUG = 0x200 # Print parsed pattern. +E = ENHANCEMATCH = 0x8000 # Attempt to improve the fit after finding the first + # fuzzy match. +F = FULLCASE = 0x4000 # Unicode full case-folding. +I = IGNORECASE = 0x2 # Ignore case. +L = LOCALE = 0x4 # Assume current 8-bit locale. +M = MULTILINE = 0x8 # Make anchors look for newline. +R = REVERSE = 0x400 # Search backwards. +S = DOTALL = 0x10 # Make dot match newline. +U = UNICODE = 0x20 # Assume Unicode locale. +V0 = VERSION0 = 0x2000 # Old legacy behaviour. +V1 = VERSION1 = 0x100 # New enhanced behaviour. +W = WORD = 0x800 # Default Unicode word breaks. +X = VERBOSE = 0x40 # Ignore whitespace and comments. +T = TEMPLATE = 0x1 # Template (present because re module has it). + +DEFAULT_VERSION = VERSION1 + +_ALL_VERSIONS = VERSION0 | VERSION1 +_ALL_ENCODINGS = ASCII | LOCALE | UNICODE + +# The default flags for the various versions. +DEFAULT_FLAGS = {VERSION0: 0, VERSION1: FULLCASE} + +# The mask for the flags. +GLOBAL_FLAGS = (_ALL_ENCODINGS | _ALL_VERSIONS | BESTMATCH | DEBUG | + ENHANCEMATCH | REVERSE) +SCOPED_FLAGS = FULLCASE | IGNORECASE | MULTILINE | DOTALL | WORD | VERBOSE + +ALPHA = frozenset(string.ascii_letters) +DIGITS = frozenset(string.digits) +ALNUM = ALPHA | DIGITS +OCT_DIGITS = frozenset(string.octdigits) +HEX_DIGITS = frozenset(string.hexdigits) +SPECIAL_CHARS = frozenset("()|?*+{^$.[\\#") | frozenset([""]) +NAMED_CHAR_PART = ALNUM | frozenset(" -") +PROPERTY_NAME_PART = ALNUM | frozenset(" &_-.") +SET_OPS = ("||", "~~", "&&", "--") + +# The width of the code words inside the regex engine. +BYTES_PER_CODE = _regex.get_code_size() +BITS_PER_CODE = BYTES_PER_CODE * 8 + +# The repeat count which represents infinity. +UNLIMITED = (1 << BITS_PER_CODE) - 1 + +# The regular expression flags. +REGEX_FLAGS = {"a": ASCII, "b": BESTMATCH, "e": ENHANCEMATCH, "f": FULLCASE, + "i": IGNORECASE, "L": LOCALE, "m": MULTILINE, "r": REVERSE, "s": DOTALL, "u": + UNICODE, "V0": VERSION0, "V1": VERSION1, "w": WORD, "x": VERBOSE} + +# The case flags. +CASE_FLAGS = FULLCASE | IGNORECASE +NOCASE = 0 +FULLIGNORECASE = FULLCASE | IGNORECASE + +FULL_CASE_FOLDING = UNICODE | FULLIGNORECASE + +# The number of digits in hexadecimal escapes. +HEX_ESCAPES = {"x": 2, "u": 4, "U": 8} + +# A singleton which indicates a comment within a pattern. +COMMENT = object() +FLAGS = object() + +# The names of the opcodes. +OPCODES = """ +FAILURE +SUCCESS +ANY +ANY_ALL +ANY_ALL_REV +ANY_REV +ANY_U +ANY_U_REV +ATOMIC +BOUNDARY +BRANCH +CALL_REF +CHARACTER +CHARACTER_IGN +CHARACTER_IGN_REV +CHARACTER_REV +DEFAULT_BOUNDARY +DEFAULT_END_OF_WORD +DEFAULT_START_OF_WORD +END +END_OF_LINE +END_OF_LINE_U +END_OF_STRING +END_OF_STRING_LINE +END_OF_STRING_LINE_U +END_OF_WORD +FUZZY +GRAPHEME_BOUNDARY +GREEDY_REPEAT +GROUP +GROUP_CALL +GROUP_EXISTS +LAZY_REPEAT +LOOKAROUND +NEXT +PROPERTY +PROPERTY_IGN +PROPERTY_IGN_REV +PROPERTY_REV +RANGE +RANGE_IGN +RANGE_IGN_REV +RANGE_REV +REF_GROUP +REF_GROUP_FLD +REF_GROUP_FLD_REV +REF_GROUP_IGN +REF_GROUP_IGN_REV +REF_GROUP_REV +SEARCH_ANCHOR +SET_DIFF +SET_DIFF_IGN +SET_DIFF_IGN_REV +SET_DIFF_REV +SET_INTER +SET_INTER_IGN +SET_INTER_IGN_REV +SET_INTER_REV +SET_SYM_DIFF +SET_SYM_DIFF_IGN +SET_SYM_DIFF_IGN_REV +SET_SYM_DIFF_REV +SET_UNION +SET_UNION_IGN +SET_UNION_IGN_REV +SET_UNION_REV +START_OF_LINE +START_OF_LINE_U +START_OF_STRING +START_OF_WORD +STRING +STRING_FLD +STRING_FLD_REV +STRING_IGN +STRING_IGN_REV +STRING_REV +STRING_SET +STRING_SET_FLD +STRING_SET_FLD_REV +STRING_SET_IGN +STRING_SET_IGN_REV +STRING_SET_REV +""" + +# Define the opcodes in a namespace. +class Namespace(object): + pass + +OP = Namespace() +for i, op in enumerate(OPCODES.split()): + setattr(OP, op, i) + +def _shrink_cache(cache_dict, args_dict, max_length, divisor=5): + """Make room in the given cache. + + Args: + cache_dict: The cache dictionary to modify. + args_dict: The dictionary of named list args used by patterns. + max_length: Maximum # of entries in cache_dict before it is shrunk. + divisor: Cache will shrink to max_length - 1/divisor*max_length items. + """ + # Toss out a fraction of the entries at random to make room for new ones. + # A random algorithm was chosen as opposed to simply cache_dict.popitem() + # as popitem could penalize the same regular expression repeatedly based + # on its internal hash value. Being random should spread the cache miss + # love around. + cache_keys = tuple(cache_dict.keys()) + overage = len(cache_keys) - max_length + if overage < 0: + # Cache is already within limits. Normally this should not happen + # but it could due to multithreading. + return + + number_to_toss = max_length // divisor + overage + + # The import is done here to avoid a circular dependency. + import random + if not hasattr(random, 'sample'): + # Do nothing while resolving the circular dependency: + # re->random->warnings->tokenize->string->re + return + + for doomed_key in random.sample(cache_keys, number_to_toss): + try: + del cache_dict[doomed_key] + except KeyError: + # Ignore problems if the cache changed from another thread. + pass + + # Rebuild the arguments dictionary. + args_dict.clear() + for pattern, pattern_type, flags, args, default_version in cache_dict: + args_dict[pattern, pattern_type, flags, default_version] = args + +def _fold_case(info, string): + "Folds the case of a string." + flags = info.flags + if (flags & _ALL_ENCODINGS) == 0: + flags |= info.guess_encoding + + return _regex.fold_case(flags, string) + +def is_cased(info, char): + "Checks whether a character is cased." + return len(_regex.get_all_cases(info.flags, char)) > 1 + +def _compile_firstset(info, fs): + "Compiles the firstset for the pattern." + if not fs or None in fs: + return [] + + # If we ignore the case, for simplicity we won't build a firstset. + members = set() + for i in fs: + if i.case_flags: + if isinstance(i, Character): + if is_cased(info, i.value): + return [] + elif isinstance(i, SetBase): + return [] + + members.add(i.with_flags(case_flags=NOCASE)) + + # Build the firstset. + fs = SetUnion(info, list(members), zerowidth=True) + fs = fs.optimise(info, in_set=True) + + # Compile the firstset. + return fs.compile(bool(info.flags & REVERSE)) + +def _flatten_code(code): + "Flattens the code from a list of tuples." + flat_code = [] + for c in code: + flat_code.extend(c) + + return flat_code + +def make_character(info, value, in_set=False): + "Makes a character literal." + if in_set: + # A character set is built case-sensitively. + return Character(value) + + return Character(value, case_flags=info.flags & CASE_FLAGS) + +def make_ref_group(info, name, position): + "Makes a group reference." + return RefGroup(info, name, position, case_flags=info.flags & CASE_FLAGS) + +def make_string_set(info, name): + "Makes a string set." + return StringSet(info, name, case_flags=info.flags & CASE_FLAGS) + +def make_property(info, prop, in_set): + "Makes a property." + if in_set: + return prop + + return prop.with_flags(case_flags=info.flags & CASE_FLAGS) + +def _parse_pattern(source, info): + "Parses a pattern, eg. 'a|b|c'." + branches = [parse_sequence(source, info)] + while source.match("|"): + branches.append(parse_sequence(source, info)) + + if len(branches) == 1: + return branches[0] + return Branch(branches) + +def parse_sequence(source, info): + "Parses a sequence, eg. 'abc'." + sequence = [] + applied = False + while True: + # Get literal characters followed by an element. + characters, case_flags, element = parse_literal_and_element(source, + info) + if not element: + # No element, just a literal. We've also reached the end of the + # sequence. + append_literal(characters, case_flags, sequence) + break + + if element is COMMENT or element is FLAGS: + append_literal(characters, case_flags, sequence) + elif type(element) is tuple: + # It looks like we've found a quantifier. + ch, saved_pos = element + + counts = parse_quantifier(source, info, ch) + if counts: + # It _is_ a quantifier. + apply_quantifier(source, info, counts, characters, case_flags, + ch, saved_pos, applied, sequence) + applied = True + else: + # It's not a quantifier. Maybe it's a fuzzy constraint. + constraints = parse_fuzzy(source, ch) + if constraints: + # It _is_ a fuzzy constraint. + apply_constraint(source, info, constraints, characters, + case_flags, saved_pos, applied, sequence) + applied = True + else: + # The element was just a literal. + characters.append(ord(ch)) + append_literal(characters, case_flags, sequence) + applied = False + else: + # We have a literal followed by something else. + append_literal(characters, case_flags, sequence) + sequence.append(element) + applied = False + + return make_sequence(sequence) + +def apply_quantifier(source, info, counts, characters, case_flags, ch, + saved_pos, applied, sequence): + if characters: + # The quantifier applies to the last character. + append_literal(characters[ : -1], case_flags, sequence) + element = Character(characters[-1], case_flags=case_flags) + else: + # The quantifier applies to the last item in the sequence. + if applied or not sequence: + raise error("nothing to repeat at position %d" % saved_pos) + + element = sequence.pop() + + min_count, max_count = counts + saved_pos = source.pos + ch = source.get() + if ch == "?": + # The "?" suffix that means it's a lazy repeat. + repeated = LazyRepeat + elif ch == "+": + # The "+" suffix that means it's a possessive repeat. + repeated = PossessiveRepeat + else: + # No suffix means that it's a greedy repeat. + source.pos = saved_pos + repeated = GreedyRepeat + + # Ignore the quantifier if it applies to a zero-width item or the number of + # repeats is fixed at 1. + if not element.is_empty() and (min_count != 1 or max_count != 1): + element = repeated(element, min_count, max_count) + + sequence.append(element) + +def apply_constraint(source, info, constraints, characters, case_flags, + saved_pos, applied, sequence): + if characters: + # The constraint applies to the last character. + append_literal(characters[ : -1], case_flags, sequence) + element = Character(characters[-1], case_flags=case_flags) + sequence.append(Fuzzy(element, constraints)) + else: + # The constraint applies to the last item in the sequence. + if applied or not sequence: + raise error("nothing for fuzzy constraint at position %d" % saved_pos) + + element = sequence.pop() + + # If a group is marked as fuzzy then put all of the fuzzy part in the + # group. + if isinstance(element, Group): + element.subpattern = Fuzzy(element.subpattern, constraints) + sequence.append(element) + else: + sequence.append(Fuzzy(element, constraints)) + +def append_literal(characters, case_flags, sequence): + if characters: + sequence.append(Literal(characters, case_flags=case_flags)) + +def PossessiveRepeat(element, min_count, max_count): + "Builds a possessive repeat." + return Atomic(GreedyRepeat(element, min_count, max_count)) + +_QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)} + +def parse_quantifier(source, info, ch): + "Parses a quantifier." + q = _QUANTIFIERS.get(ch) + if q: + # It's a quantifier. + return q + + if ch == "{": + # Looks like a limited repeated element, eg. 'a{2,3}'. + counts = parse_limited_quantifier(source) + if counts: + return counts + + return None + +def is_above_limit(count): + "Checks whether a count is above the maximum." + return count is not None and count >= UNLIMITED + +def parse_limited_quantifier(source): + "Parses a limited quantifier." + saved_pos = source.pos + min_count = parse_count(source) + if source.match(","): + max_count = parse_count(source) + + # No minimum means 0 and no maximum means unlimited. + min_count = int(min_count or 0) + max_count = int(max_count) if max_count else None + + if max_count is not None and min_count > max_count: + raise error("min repeat greater than max repeat at position %d" % saved_pos) + else: + if not min_count: + source.pos = saved_pos + return None + + min_count = max_count = int(min_count) + + if is_above_limit(min_count) or is_above_limit(max_count): + raise error("repeat count too big at position %d" % saved_pos) + + if not source.match ("}"): + source.pos = saved_pos + return None + + return min_count, max_count + +def parse_fuzzy(source, ch): + "Parses a fuzzy setting, if present." + if ch != "{": + return None + + saved_pos = source.pos + + constraints = {} + try: + parse_fuzzy_item(source, constraints) + while source.match(","): + parse_fuzzy_item(source, constraints) + except ParseError: + source.pos = saved_pos + return None + + if not source.match("}"): + raise error("expected } at position %d" % source.pos) + + return constraints + +def parse_fuzzy_item(source, constraints): + "Parses a fuzzy setting item." + saved_pos = source.pos + try: + parse_cost_constraint(source, constraints) + except ParseError: + source.pos = saved_pos + + parse_cost_equation(source, constraints) + +def parse_cost_constraint(source, constraints): + "Parses a cost constraint." + saved_pos = source.pos + ch = source.get() + if ch in ALPHA: + # Syntax: constraint [("<=" | "<") cost] + constraint = parse_constraint(source, constraints, ch) + + max_inc = parse_fuzzy_compare(source) + + if max_inc is None: + # No maximum cost. + constraints[constraint] = 0, None + else: + # There's a maximum cost. + cost_pos = source.pos + max_cost = int(parse_count(source)) + + # Inclusive or exclusive limit? + if not max_inc: + max_cost -= 1 + + if max_cost < 0: + raise error("bad fuzzy cost limit at position %d" % cost_pos) + + constraints[constraint] = 0, max_cost + elif ch in DIGITS: + # Syntax: cost ("<=" | "<") constraint ("<=" | "<") cost + source.pos = saved_pos + try: + # Minimum cost. + min_cost = int(parse_count(source)) + + min_inc = parse_fuzzy_compare(source) + if min_inc is None: + raise ParseError() + + constraint = parse_constraint(source, constraints, source.get()) + + max_inc = parse_fuzzy_compare(source) + if max_inc is None: + raise ParseError() + + # Maximum cost. + cost_pos = source.pos + max_cost = int(parse_count(source)) + + # Inclusive or exclusive limits? + if not min_inc: + min_cost += 1 + if not max_inc: + max_cost -= 1 + + if not 0 <= min_cost <= max_cost: + raise error("bad fuzzy cost limit at position %d" % cost_pos) + + constraints[constraint] = min_cost, max_cost + except ValueError: + raise ParseError() + else: + raise ParseError() + +def parse_constraint(source, constraints, ch): + "Parses a constraint." + if ch not in "deis": + raise error("bad fuzzy constraint at position %d" % source.pos) + + if ch in constraints: + raise error("repeated fuzzy constraint at position %d" % source.pos) + + return ch + +def parse_fuzzy_compare(source): + "Parses a cost comparator." + if source.match("<="): + return True + elif source.match("<"): + return False + else: + return None + +def parse_cost_equation(source, constraints): + "Parses a cost equation." + if "cost" in constraints: + raise error("more than one cost equation at position %d" % source.pos) + + cost = {} + + parse_cost_term(source, cost) + while source.match("+"): + parse_cost_term(source, cost) + + max_inc = parse_fuzzy_compare(source) + if max_inc is None: + raise error("missing fuzzy cost limit at position %d" % source.pos) + + max_cost = int(parse_count(source)) + + if not max_inc: + max_cost -= 1 + + if max_cost < 0: + raise error("bad fuzzy cost limit at position %d" % source.pos) + + cost["max"] = max_cost + + constraints["cost"] = cost + +def parse_cost_term(source, cost): + "Parses a cost equation term." + coeff = parse_count(source) + ch = source.get() + if ch not in "dis": + raise ParseError() + + if ch in cost: + raise error("repeated fuzzy cost at position %d" % source.pos) + + cost[ch] = int(coeff or 1) + +def parse_count(source): + "Parses a quantifier's count, which can be empty." + return source.get_while(DIGITS) + +def parse_literal_and_element(source, info): + """Parses a literal followed by an element. The element is FLAGS if it's an + inline flag or None if it has reached the end of a sequence. + """ + characters = [] + case_flags = info.flags & CASE_FLAGS + while True: + saved_pos = source.pos + ch = source.get() + if ch in SPECIAL_CHARS: + if ch in ")|": + # The end of a sequence. At the end of the pattern ch is "". + source.pos = saved_pos + return characters, case_flags, None + elif ch == "\\": + # An escape sequence outside a set. + element = parse_escape(source, info, False) + return characters, case_flags, element + elif ch == "(": + # A parenthesised subpattern or a flag. + element = parse_paren(source, info) + if element and element is not COMMENT: + return characters, case_flags, element + elif ch == ".": + # Any character. + if info.flags & DOTALL: + element = AnyAll() + elif info.flags & WORD: + element = AnyU() + else: + element = Any() + + return characters, case_flags, element + elif ch == "[": + # A character set. + element = parse_set(source, info) + return characters, case_flags, element + elif ch == "^": + # The start of a line or the string. + if info.flags & MULTILINE: + if info.flags & WORD: + element = StartOfLineU() + else: + element = StartOfLine() + else: + element = StartOfString() + + return characters, case_flags, element + elif ch == "$": + # The end of a line or the string. + if info.flags & MULTILINE: + if info.flags & WORD: + element = EndOfLineU() + else: + element = EndOfLine() + else: + if info.flags & WORD: + element = EndOfStringLineU() + else: + element = EndOfStringLine() + + return characters, case_flags, element + elif ch in "?*+{": + # Looks like a quantifier. + return characters, case_flags, (ch, saved_pos) + else: + # A literal. + characters.append(ord(ch)) + else: + # A literal. + characters.append(ord(ch)) + +def parse_paren(source, info): + """Parses a parenthesised subpattern or a flag. Returns FLAGS if it's an + inline flag. + """ + saved_pos = source.pos + ch = source.get() + if ch == "?": + # (?... + saved_pos_2 = source.pos + ch = source.get() + if ch == "<": + # (?<... + saved_pos_3 = source.pos + ch = source.get() + if ch in ("=", "!"): + # (?<=... or (?") + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + return Group(info, group, subpattern) + if ch in ("=", "!"): + # (?=... or (?!...: lookahead. + return parse_lookaround(source, info, False, ch == "=") + if ch == "P": + # (?P...: a Python extension. + return parse_extension(source, info) + if ch == "#": + # (?#...: a comment. + return parse_comment(source) + if ch == "(": + # (?(...: a conditional subpattern. + return parse_conditional(source, info) + if ch == ">": + # (?>...: an atomic subpattern. + return parse_atomic(source, info) + if ch == "|": + # (?|...: a common/reset groups branch. + return parse_common(source, info) + if ch == "R" or "0" <= ch <= "9": + # (?R...: probably a call to a group. + return parse_call_group(source, info, ch, saved_pos_2) + if ch == "&": + # (?&...: a call to a named group. + return parse_call_named_group(source, info, saved_pos_2) + + # (?...: probably a flags subpattern. + source.pos = saved_pos_2 + return parse_flags_subpattern(source, info) + + # (...: an unnamed capture group. + source.pos = saved_pos + group = info.open_group() + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + + return Group(info, group, subpattern) + +def parse_extension(source, info): + "Parses a Python extension." + saved_pos = source.pos + ch = source.get() + if ch == "<": + # (?P<...: a named capture group. + name = parse_name(source) + group = info.open_group(name) + source.expect(">") + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + info.close_group() + + return Group(info, group, subpattern) + if ch == "=": + # (?P=...: a named group reference. + name = parse_name(source) + source.expect(")") + if info.is_open_group(name): + raise error("can't refer to an open group at position %d" % saved_pos) + + return make_ref_group(info, name, saved_pos) + if ch == ">" or ch == "&": + # (?P>...: a call to a group. + return parse_call_named_group(source, info, saved_pos) + + source.pos = saved_pos + raise error("unknown extension at position %d" % saved_pos) + +def parse_comment(source): + "Parses a comment." + source.skip_while(set(")"), include=False) + source.expect(")") + + return COMMENT + +def parse_lookaround(source, info, behind, positive): + "Parses a lookaround." + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return LookAround(behind, positive, subpattern) + +def parse_conditional(source, info): + "Parses a conditional subpattern." + saved_flags = info.flags + saved_pos = source.pos + try: + group = parse_name(source, True) + source.expect(")") + yes_branch = parse_sequence(source, info) + if source.match("|"): + no_branch = parse_sequence(source, info) + else: + no_branch = Sequence() + + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + if yes_branch.is_empty() and no_branch.is_empty(): + return Sequence() + + return Conditional(info, group, yes_branch, no_branch, saved_pos) + +def parse_atomic(source, info): + "Parses an atomic subpattern." + saved_flags = info.flags + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return Atomic(subpattern) + +def parse_common(source, info): + "Parses a common groups branch." + # Capture group numbers in different branches can reuse the group numbers. + initial_group_count = info.group_count + branches = [parse_sequence(source, info)] + final_group_count = info.group_count + while source.match("|"): + info.group_count = initial_group_count + branches.append(parse_sequence(source, info)) + final_group_count = max(final_group_count, info.group_count) + + info.group_count = final_group_count + source.expect(")") + + if len(branches) == 1: + return branches[0] + return Branch(branches) + +def parse_call_group(source, info, ch, pos): + "Parses a call to a group." + if ch == "R": + group = "0" + else: + group = ch + source.get_while(DIGITS) + + source.expect(")") + + return CallGroup(info, group, pos) + +def parse_call_named_group(source, info, pos): + "Parses a call to a named group." + group = parse_name(source) + source.expect(")") + + return CallGroup(info, group, pos) + +def parse_flag_set(source): + "Parses a set of inline flags." + flags = 0 + + try: + while True: + saved_pos = source.pos + ch = source.get() + if ch == "V": + ch += source.get() + flags |= REGEX_FLAGS[ch] + except KeyError: + source.pos = saved_pos + + return flags + +def parse_flags(source, info): + "Parses flags being turned on/off." + flags_on = parse_flag_set(source) + if source.match("-"): + flags_off = parse_flag_set(source) + if not flags_off: + raise error("bad inline flags: no flags after '-' at position %d" % source.pos) + else: + flags_off = 0 + + return flags_on, flags_off + +def parse_subpattern(source, info, flags_on, flags_off): + "Parses a subpattern with scoped flags." + saved_flags = info.flags + info.flags = (info.flags | flags_on) & ~flags_off + source.ignore_space = bool(info.flags & VERBOSE) + try: + subpattern = _parse_pattern(source, info) + source.expect(")") + finally: + info.flags = saved_flags + source.ignore_space = bool(info.flags & VERBOSE) + + return subpattern + +def parse_flags_subpattern(source, info): + """Parses a flags subpattern. It could be inline flags or a subpattern + possibly with local flags. If it's a subpattern, then that's returned; + if it's a inline flags, then FLAGS is returned. + """ + flags_on, flags_off = parse_flags(source, info) + + if flags_off & GLOBAL_FLAGS: + raise error("bad inline flags: can't turn off global flag at position %d" % source.pos) + + if flags_on & flags_off: + raise error("bad inline flags: flag turned on and off at position %d" % source.pos) + + # Handle flags which are global in all regex behaviours. + new_global_flags = (flags_on & ~info.global_flags) & GLOBAL_FLAGS + if new_global_flags: + info.global_flags |= new_global_flags + + # A global has been turned on, so reparse the pattern. + raise _UnscopedFlagSet(info.global_flags) + + # Ensure that from now on we have only scoped flags. + flags_on &= ~GLOBAL_FLAGS + + if source.match(":"): + return parse_subpattern(source, info, flags_on, flags_off) + + if source.match(")"): + parse_positional_flags(source, info, flags_on, flags_off) + return FLAGS + + raise error("unknown extension at position %d" % source.pos) + +def parse_positional_flags(source, info, flags_on, flags_off): + "Parses positional flags." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version == VERSION0: + # Positional flags are global and can only be turned on. + if flags_off: + raise error("bad inline flags: can't turn flags off at position %d" % source.pos) + + new_global_flags = flags_on & ~info.global_flags + if new_global_flags: + info.global_flags |= new_global_flags + + # A global has been turned on, so reparse the pattern. + raise _UnscopedFlagSet(info.global_flags) + else: + info.flags = (info.flags | flags_on) & ~flags_off + + source.ignore_space = bool(info.flags & VERBOSE) + +def parse_name(source, allow_numeric=False): + "Parses a name." + name = source.get_while(set(")>"), include=False) + + if not name: + raise error("bad group name at position %d" % source.pos) + + if name.isdigit(): + if not allow_numeric: + raise error("bad group name at position %d" % source.pos) + else: + if not is_identifier(name): + raise error("bad group name at position %d" % source.pos) + + return name + +def is_identifier(name): + if not name: + return False + + if name[0] not in ALPHA and name[0] != "_": + return False + + name = name.replace("_", "") + + return not name or all(c in ALNUM for c in name) + +def is_octal(string): + "Checks whether a string is octal." + return all(ch in OCT_DIGITS for ch in string) + +def is_decimal(string): + "Checks whether a string is decimal." + return all(ch in DIGITS for ch in string) + +def is_hexadecimal(string): + "Checks whether a string is hexadecimal." + return all(ch in HEX_DIGITS for ch in string) + +def parse_escape(source, info, in_set): + "Parses an escape sequence." + saved_ignore = source.ignore_space + source.ignore_space = False + ch = source.get() + source.ignore_space = saved_ignore + if not ch: + # A backslash at the end of the pattern. + raise error("bad escape at position %d" % source.pos) + if ch in HEX_ESCAPES: + # A hexadecimal escape sequence. + return parse_hex_escape(source, info, HEX_ESCAPES[ch], in_set) + elif ch == "g" and not in_set: + # A group reference. + saved_pos = source.pos + try: + return parse_group_ref(source, info) + except error: + # Invalid as a group reference, so assume it's a literal. + source.pos = saved_pos + + return make_character(info, ord(ch), in_set) + elif ch == "G" and not in_set: + # A search anchor. + return SearchAnchor() + elif ch == "L" and not in_set: + # A string set. + return parse_string_set(source, info) + elif ch == "N": + # A named codepoint. + return parse_named_char(source, info, in_set) + elif ch in "pP": + # A Unicode property, positive or negative. + return parse_property(source, info, ch == "p", in_set) + elif ch == "X" and not in_set: + # A grapheme cluster. + return Grapheme() + elif ch in ALPHA: + # An alphabetic escape sequence. + # Positional escapes aren't allowed inside a character set. + if not in_set: + if info.flags & WORD: + value = WORD_POSITION_ESCAPES.get(ch) + else: + value = POSITION_ESCAPES.get(ch) + + if value: + return value + + value = CHARSET_ESCAPES.get(ch) + if value: + return value + + value = CHARACTER_ESCAPES.get(ch) + if value: + return Character(ord(value)) + + return make_character(info, ord(ch), in_set) + elif ch in DIGITS: + # A numeric escape sequence. + return parse_numeric_escape(source, info, ch, in_set) + else: + # A literal. + return make_character(info, ord(ch), in_set) + +def parse_numeric_escape(source, info, ch, in_set): + "Parses a numeric escape sequence." + if in_set or ch == "0": + # Octal escape sequence, max 3 digits. + return parse_octal_escape(source, info, [ch], in_set) + + # At least 1 digit, so either octal escape or group. + digits = ch + saved_pos = source.pos + ch = source.get() + if ch in DIGITS: + # At least 2 digits, so either octal escape or group. + digits += ch + saved_pos = source.pos + ch = source.get() + if is_octal(digits) and ch in OCT_DIGITS: + # 3 octal digits, so octal escape sequence. + encoding = info.flags & _ALL_ENCODINGS + if encoding == ASCII or encoding == LOCALE: + octal_mask = 0xFF + else: + octal_mask = 0x1FF + + value = int(digits + ch, 8) & octal_mask + return make_character(info, value) + + # Group reference. + source.pos = saved_pos + if info.is_open_group(digits): + raise error("can't refer to an open group at position %d" % source.pos) + + return make_ref_group(info, digits, source.pos) + +def parse_octal_escape(source, info, digits, in_set): + "Parses an octal escape sequence." + saved_pos = source.pos + ch = source.get() + while len(digits) < 3 and ch in OCT_DIGITS: + digits.append(ch) + saved_pos = source.pos + ch = source.get() + + source.pos = saved_pos + try: + value = int("".join(digits), 8) + return make_character(info, value, in_set) + except ValueError: + raise error("bad octal escape at position %d" % source.pos) + +def parse_hex_escape(source, info, expected_len, in_set): + "Parses a hex escape sequence." + digits = [] + for i in range(expected_len): + ch = source.get() + if ch not in HEX_DIGITS: + raise error("bad hex escape at position %d" % source.pos) + digits.append(ch) + + value = int("".join(digits), 16) + return make_character(info, value, in_set) + +def parse_group_ref(source, info): + "Parses a group reference." + source.expect("<") + saved_pos = source.pos + name = parse_name(source, True) + source.expect(">") + if info.is_open_group(name): + raise error("can't refer to an open group at position %d" % source.pos) + + return make_ref_group(info, name, saved_pos) + +def parse_string_set(source, info): + "Parses a string set reference." + source.expect("<") + name = parse_name(source, True) + source.expect(">") + if name is None or name not in info.kwargs: + raise error("undefined named list at position %d" % source.pos) + + return make_string_set(info, name) + +def parse_named_char(source, info, in_set): + "Parses a named character." + saved_pos = source.pos + if source.match("{"): + name = source.get_while(NAMED_CHAR_PART) + if source.match("}"): + try: + value = unicodedata.lookup(name) + return make_character(info, ord(value), in_set) + except KeyError: + raise error("undefined character name at position %d" % source.pos) + + source.pos = saved_pos + return make_character(info, ord("N"), in_set) + +def parse_property(source, info, positive, in_set): + "Parses a Unicode property." + saved_pos = source.pos + ch = source.get() + if ch == "{": + negate = source.match("^") + prop_name, name = parse_property_name(source) + if source.match("}"): + # It's correctly delimited. + prop = lookup_property(prop_name, name, positive != negate, source_pos=source.pos) + return make_property(info, prop, in_set) + elif ch and ch in "CLMNPSZ": + # An abbreviated property, eg \pL. + prop = lookup_property(None, ch, positive) + return make_property(info, prop, in_set, source_pos=source.pos) + + # Not a property, so treat as a literal "p" or "P". + source.pos = saved_pos + ch = "p" if positive else "P" + return make_character(info, ord(ch), in_set) + +def parse_property_name(source): + "Parses a property name, which may be qualified." + name = source.get_while(PROPERTY_NAME_PART) + saved_pos = source.pos + + ch = source.get() + if ch and ch in ":=": + prop_name = name + name = source.get_while(ALNUM | set(" &_-./")).strip() + + if name: + # Name after the ":" or "=", so it's a qualified name. + saved_pos = source.pos + else: + # No name after the ":" or "=", so assume it's an unqualified name. + prop_name, name = None, prop_name + else: + prop_name = None + + source.pos = saved_pos + return prop_name, name + +def parse_set(source, info): + "Parses a character set." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + saved_ignore = source.ignore_space + source.ignore_space = False + # Negative set? + negate = source.match("^") + try: + if version == VERSION0: + item = parse_set_imp_union(source, info) + else: + item = parse_set_union(source, info) + + if not source.match("]"): + raise error("missing ] at position %d" % source.pos) + finally: + source.ignore_space = saved_ignore + + if negate: + item = item.with_flags(positive=not item.positive) + + item = item.with_flags(case_flags=info.flags & CASE_FLAGS) + + return item + +def parse_set_union(source, info): + "Parses a set union ([x||y])." + items = [parse_set_symm_diff(source, info)] + while source.match("||"): + items.append(parse_set_symm_diff(source, info)) + + if len(items) == 1: + return items[0] + return SetUnion(info, items) + +def parse_set_symm_diff(source, info): + "Parses a set symmetric difference ([x~~y])." + items = [parse_set_inter(source, info)] + while source.match("~~"): + items.append(parse_set_inter(source, info)) + + if len(items) == 1: + return items[0] + return SetSymDiff(info, items) + +def parse_set_inter(source, info): + "Parses a set intersection ([x&&y])." + items = [parse_set_diff(source, info)] + while source.match("&&"): + items.append(parse_set_diff(source, info)) + + if len(items) == 1: + return items[0] + return SetInter(info, items) + +def parse_set_diff(source, info): + "Parses a set difference ([x--y])." + items = [parse_set_imp_union(source, info)] + while source.match("--"): + items.append(parse_set_imp_union(source, info)) + + if len(items) == 1: + return items[0] + return SetDiff(info, items) + +def parse_set_imp_union(source, info): + "Parses a set implicit union ([xy])." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + items = [parse_set_member(source, info)] + while True: + saved_pos = source.pos + if source.match("]"): + # End of the set. + source.pos = saved_pos + break + + if version == VERSION1 and any(source.match(op) for op in SET_OPS): + # The new behaviour has set operators. + source.pos = saved_pos + break + + items.append(parse_set_member(source, info)) + + if len(items) == 1: + return items[0] + return SetUnion(info, items) + +def parse_set_member(source, info): + "Parses a member in a character set." + # Parse a set item. + start = parse_set_item(source, info) + if (not isinstance(start, Character) or not start.positive or not + source.match("-")): + # It's not the start of a range. + return start + + # It looks like the start of a range of characters. + saved_pos = source.pos + if source.match("]"): + # We've reached the end of the set, so return both the character and + # hyphen. + source.pos = saved_pos + return SetUnion(info, [start, Character(ord("-"))]) + + # Parse a set item. + end = parse_set_item(source, info) + if not isinstance(end, Character) or not end.positive: + # It's not a range, so return the character, hyphen and property. + return SetUnion(info, [start, Character(ord("-")), end]) + + # It _is_ a range. + if start.value > end.value: + raise error("bad character range at position %d" % source.pos) + + if start.value == end.value: + return start + + return Range(start.value, end.value) + +def parse_set_item(source, info): + "Parses an item in a character set." + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + + if source.match("\\"): + # An escape sequence in a set. + return parse_escape(source, info, True) + + saved_pos = source.pos + if source.match("[:"): + # Looks like a POSIX character class. + try: + return parse_posix_class(source, info) + except ParseError: + # Not a POSIX character class. + source.pos = saved_pos + + if version == VERSION1 and source.match("["): + # It's the start of a nested set. + + # Negative set? + negate = source.match("^") + item = parse_set_union(source, info) + + if not source.match("]"): + raise error("missing ] at position %d" % source.pos) + + if negate: + item = item.with_flags(positive=not item.positive) + + return item + + ch = source.get() + if not ch: + raise error("bad set at position %d" % source.pos, True) + + return Character(ord(ch)) + +def parse_posix_class(source, info): + "Parses a POSIX character class." + negate = source.match("^") + prop_name, name = parse_property_name(source) + if not source.match(":]"): + raise ParseError() + + return lookup_property(prop_name, name, positive=not negate, source_pos=source.pos) + +def float_to_rational(flt): + "Converts a float to a rational pair." + int_part = int(flt) + error = flt - int_part + if abs(error) < 0.0001: + return int_part, 1 + + den, num = float_to_rational(1.0 / error) + + return int_part * den + num, den + +def numeric_to_rational(numeric): + "Converts a numeric string to a rational string, if possible." + if numeric[0] == "-": + sign, numeric = numeric[0], numeric[1 : ] + else: + sign = "" + + parts = numeric.split("/") + if len(parts) == 2: + num, den = float_to_rational(float(parts[0]) / float(parts[1])) + elif len(parts) == 1: + num, den = float_to_rational(float(parts[0])) + else: + raise ValueError() + + result = "%s%s/%s" % (sign, num, den) + if result.endswith("/1"): + return result[ : -2] + + return result + +def standardise_name(name): + "Standardises a property or value name." + try: + return numeric_to_rational("".join(name)) + except (ValueError, ZeroDivisionError): + return "".join(ch for ch in name if ch not in "_- ").upper() + +def lookup_property(property, value, positive, source_pos=None): + "Looks up a property." + # Normalise the names (which may still be lists). + property = standardise_name(property) if property else None + value = standardise_name(value) + + if (property, value) == ("GENERALCATEGORY", "ASSIGNED"): + property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive + + if property: + # Both the property and the value are provided. + prop = PROPERTIES.get(property) + if not prop: + raise error("unknown property at position %d" % source_pos) + + prop_id, value_dict = prop + val_id = value_dict.get(value) + if val_id is None: + raise error("unknown property value at position %d" % source_pos) + + if "YES" in value_dict and val_id == 0: + positive, val_id = not positive, 1 + + return Property((prop_id << 16) | val_id, positive) + + # Only the value is provided. + # It might be the name of a GC, script or block value. + for property in ("GC", "SCRIPT", "BLOCK"): + prop_id, value_dict = PROPERTIES.get(property) + val_id = value_dict.get(value) + if val_id is not None: + return Property((prop_id << 16) | val_id, positive) + + # It might be the name of a binary property. + prop = PROPERTIES.get(value) + if prop: + prop_id, value_dict = prop + + if "YES" in value_dict: + return Property((prop_id << 16) | 1, positive) + + # It might be the name of a binary property starting with a prefix. + if value.startswith("IS"): + prop = PROPERTIES.get(value[2 : ]) + if prop: + prop_id, value_dict = prop + if "YES" in value_dict: + return Property((prop_id << 16) | 1, positive) + + # It might be the name of a script or block starting with a prefix. + for prefix, property in (("IS", "SCRIPT"), ("IN", "BLOCK")): + if value.startswith(prefix): + prop_id, value_dict = PROPERTIES.get(property) + val_id = value_dict.get(value[2 : ]) + if val_id is not None: + return Property((prop_id << 16) | val_id, positive) + + # Unknown property. + raise error("unknown property at position %d" % source_pos) + +def _compile_replacement(source, pattern, is_unicode): + "Compiles a replacement template escape sequence." + ch = source.get() + if ch in ALPHA: + # An alphabetic escape sequence. + value = CHARACTER_ESCAPES.get(ch) + if value: + return False, [ord(value)] + + if ch in HEX_ESCAPES and (ch == "x" or is_unicode): + # A hexadecimal escape sequence. + return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch])] + + if ch == "g": + # A group preference. + return True, [compile_repl_group(source, pattern)] + + if ch == "N" and is_unicode: + # A named character. + value = parse_repl_named_char(source) + if value is not None: + return False, [value] + + return False, [ord("\\"), ord(ch)] + + if isinstance(source.sep, str): + octal_mask = 0xFF + else: + octal_mask = 0x1FF + + if ch == "0": + # An octal escape sequence. + digits = ch + while len(digits) < 3: + saved_pos = source.pos + ch = source.get() + if ch not in OCT_DIGITS: + source.pos = saved_pos + break + digits += ch + + return False, [int(digits, 8) & octal_mask] + + if ch in DIGITS: + # Either an octal escape sequence (3 digits) or a group reference (max + # 2 digits). + digits = ch + saved_pos = source.pos + ch = source.get() + if ch in DIGITS: + digits += ch + saved_pos = source.pos + ch = source.get() + if ch and is_octal(digits + ch): + # An octal escape sequence. + return False, [int(digits + ch, 8) & octal_mask] + + # A group reference. + source.pos = saved_pos + return True, [int(digits)] + + if ch == "\\": + # An escaped backslash is a backslash. + return False, [ord("\\")] + + if not ch: + # A trailing backslash. + raise error("bad escape at position %d" % source.pos) + + # An escaped non-backslash is a backslash followed by the literal. + return False, [ord("\\"), ord(ch)] + +def parse_repl_hex_escape(source, expected_len): + "Parses a hex escape sequence in a replacement string." + digits = [] + for i in range(expected_len): + ch = source.get() + if ch not in HEX_DIGITS: + raise error("bad hex escape at position %d" % source.pos) + digits.append(ch) + + return int("".join(digits), 16) + +def parse_repl_named_char(source): + "Parses a named character in a replacement string." + saved_pos = source.pos + if source.match("{"): + name = source.get_while(ALPHA | set(" ")) + + if source.match("}"): + try: + value = unicodedata.lookup(name) + return ord(value) + except KeyError: + raise error("undefined character name at position %d" % source.pos) + + source.pos = saved_pos + return None + +def compile_repl_group(source, pattern): + "Compiles a replacement template group reference." + source.expect("<") + name = parse_name(source, True) + + source.expect(">") + if name.isdigit(): + index = int(name) + if not 0 <= index <= pattern.groups: + raise error("invalid group at position %d" % source.pos) + + return index + + try: + return pattern.groupindex[name] + except KeyError: + raise IndexError("unknown group") + +# The regular expression is parsed into a syntax tree. The different types of +# node are defined below. + +INDENT = " " +POSITIVE_OP = 0x1 +ZEROWIDTH_OP = 0x2 +FUZZY_OP = 0x4 +REVERSE_OP = 0x8 +REQUIRED_OP = 0x10 + +POS_TEXT = {False: "NON-MATCH", True: "MATCH"} +CASE_TEXT = {NOCASE: "", IGNORECASE: " SIMPLE_IGNORE_CASE", FULLCASE: "", + FULLIGNORECASE: " FULL_IGNORE_CASE"} + +def make_sequence(items): + if len(items) == 1: + return items[0] + return Sequence(items) + +# Common base class for all nodes. +class RegexBase(object): + def __init__(self): + self._key = self.__class__ + + def with_flags(self, positive=None, case_flags=None, zerowidth=None): + if positive is None: + positive = self.positive + else: + positive = bool(positive) + if case_flags is None: + case_flags = self.case_flags + else: + case_flags = case_flags & CASE_FLAGS + if zerowidth is None: + zerowidth = self.zerowidth + else: + zerowidth = bool(zerowidth) + + if (positive == self.positive and case_flags == self.case_flags and + zerowidth == self.zerowidth): + return self + + return self.rebuild(positive, case_flags, zerowidth) + + def fix_groups(self, reverse, fuzzy): + pass + + def optimise(self, info): + return self + + def pack_characters(self, info): + return self + + def remove_captures(self): + return self + + def is_atomic(self): + return True + + def can_be_affix(self): + return True + + def contains_group(self): + return False + + def get_firstset(self, reverse): + raise _FirstSetError() + + def has_simple_start(self): + return False + + def compile(self, reverse=False, fuzzy=False): + return self._compile(reverse, fuzzy) + + def dump(self, indent, reverse): + self._dump(indent, reverse) + + def is_empty(self): + return False + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + return type(self) is type(other) and self._key == other._key + + def __ne__(self, other): + return not self.__eq__(other) + + def get_required_string(self, reverse): + return self.max_width(), None + +# Base class for zero-width nodes. +class ZeroWidthBase(RegexBase): + def __init__(self, positive=True): + RegexBase.__init__(self) + self.positive = bool(positive) + + self._key = self.__class__, self.positive + + def get_firstset(self, reverse): + return set([None]) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if fuzzy: + flags |= FUZZY_OP + if reverse: + flags |= REVERSE_OP + return [(self._opcode, flags)] + + def _dump(self, indent, reverse): + print "%s%s %s" % (INDENT * indent, self._op_name, + POS_TEXT[self.positive]) + + def max_width(self): + return 0 + +class Any(RegexBase): + _opcode = {False: OP.ANY, True: OP.ANY_REV} + _op_name = "ANY" + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[reverse], flags)] + + def _dump(self, indent, reverse): + print "%s%s" % (INDENT * indent, self._op_name) + + def max_width(self): + return 1 + +class AnyAll(Any): + _opcode = {False: OP.ANY_ALL, True: OP.ANY_ALL_REV} + _op_name = "ANY_ALL" + +class AnyU(Any): + _opcode = {False: OP.ANY_U, True: OP.ANY_U_REV} + _op_name = "ANY_U" + +class Atomic(RegexBase): + def __init__(self, subpattern): + RegexBase.__init__(self) + self.subpattern = subpattern + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(reverse, fuzzy) + + def optimise(self, info): + self.subpattern = self.subpattern.optimise(info) + + if self.subpattern.is_empty(): + return self.subpattern + return self + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def can_be_affix(self): + return self.subpattern.can_be_affix() + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + return self.subpattern.get_firstset(reverse) + + def has_simple_start(self): + return self.subpattern.has_simple_start() + + def _compile(self, reverse, fuzzy): + return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) + + [(OP.END, )]) + + def _dump(self, indent, reverse): + print "%sATOMIC" % (INDENT * indent) + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return (type(self) is type(other) and self.subpattern == + other.subpattern) + + def max_width(self): + return self.subpattern.max_width() + + def get_required_string(self, reverse): + return self.subpattern.get_required_string(reverse) + +class Boundary(ZeroWidthBase): + _opcode = OP.BOUNDARY + _op_name = "BOUNDARY" + +class Branch(RegexBase): + def __init__(self, branches): + RegexBase.__init__(self) + self.branches = branches + + def fix_groups(self, reverse, fuzzy): + for b in self.branches: + b.fix_groups(reverse, fuzzy) + + def optimise(self, info): + # Flatten branches within branches. + branches = Branch._flatten_branches(info, self.branches) + + # Move any common prefix or suffix out of the branches. + prefix, branches = Branch._split_common_prefix(info, branches) + suffix, branches = Branch._split_common_suffix(info, branches) + + # Merge branches starting with the same character. (If a character + # prefix doesn't match in one branch, it won't match in any of the + # others starting with that same character.) + branches = Branch._merge_common_prefixes(info, branches) + + # Try to reduce adjacent single-character branches to sets. + branches = Branch._reduce_to_set(info, branches) + + if len(branches) > 1: + sequence = prefix + [Branch(branches)] + suffix + else: + sequence = prefix + branches + suffix + + return make_sequence(sequence) + + def optimise(self, info): + # Flatten branches within branches. + branches = Branch._flatten_branches(info, self.branches) + + # Try to reduce adjacent single-character branches to sets. + branches = Branch._reduce_to_set(info, branches) + + if len(branches) > 1: + sequence = [Branch(branches)] + else: + sequence = branches + + return make_sequence(sequence) + + def pack_characters(self, info): + self.branches = [b.pack_characters(info) for b in self.branches] + return self + + def remove_captures(self): + self.branches = [b.remove_captures() for b in self.branches] + return self + + def is_atomic(self): + return all(b.is_atomic() for b in self.branches) + + def can_be_affix(self): + return all(b.can_be_affix() for b in self.branches) + + def contains_group(self): + return any(b.contains_group() for b in self.branches) + + def get_firstset(self, reverse): + fs = set() + for b in self.branches: + fs |= b.get_firstset(reverse) + + return fs or set([None]) + + def _compile(self, reverse, fuzzy): + code = [(OP.BRANCH, )] + for b in self.branches: + code.extend(b.compile(reverse, fuzzy)) + code.append((OP.NEXT, )) + + code[-1] = (OP.END, ) + + return code + + def _dump(self, indent, reverse): + print "%sBRANCH" % (INDENT * indent) + self.branches[0].dump(indent + 1, reverse) + for b in self.branches[1 : ]: + print "%sOR" % (INDENT * indent) + b.dump(indent + 1, reverse) + + @staticmethod + def _flatten_branches(info, branches): + # Flatten the branches so that there aren't branches of branches. + new_branches = [] + for b in branches: + b = b.optimise(info) + if isinstance(b, Branch): + new_branches.extend(b.branches) + else: + new_branches.append(b) + + return new_branches + + @staticmethod + def _split_common_prefix(info, branches): + # Common leading items can be moved out of the branches. + # Get the items in the branches. + alternatives = [] + for b in branches: + if isinstance(b, Sequence): + alternatives.append(b.items) + else: + alternatives.append([b]) + + # What is the maximum possible length of the prefix? + max_count = min(len(a) for a in alternatives) + + # What is the longest common prefix? + prefix = alternatives[0] + pos = 0 + end_pos = max_count + while pos < end_pos and prefix[pos].can_be_affix() and all(a[pos] == + prefix[pos] for a in alternatives): + pos += 1 + count = pos + + if info.flags & UNICODE: + # We need to check that we're not splitting a sequence of + # characters which could form part of full case-folding. + count = pos + while count > 0 and not all(Branch._can_split(a, count) for a in + alternatives): + count -= 1 + + # No common prefix is possible. + if count == 0: + return [], branches + + # Rebuild the branches. + new_branches = [] + for a in alternatives: + new_branches.append(make_sequence(a[count : ])) + + return prefix[ : count], new_branches + + @staticmethod + def _split_common_suffix(info, branches): + # Common trailing items can be moved out of the branches. + # Get the items in the branches. + alternatives = [] + for b in branches: + if isinstance(b, Sequence): + alternatives.append(b.items) + else: + alternatives.append([b]) + + # What is the maximum possible length of the suffix? + max_count = min(len(a) for a in alternatives) + + # What is the longest common suffix? + suffix = alternatives[0] + pos = -1 + end_pos = -1 - max_count + while pos > end_pos and suffix[pos].can_be_affix() and all(a[pos] == + suffix[pos] for a in alternatives): + pos -= 1 + count = -1 - pos + + if info.flags & UNICODE: + # We need to check that we're not splitting a sequence of + # characters which could form part of full case-folding. + while count > 0 and not all(Branch._can_split_rev(a, count) for a + in alternatives): + count -= 1 + + # No common suffix is possible. + if count == 0: + return [], branches + + # Rebuild the branches. + new_branches = [] + for a in alternatives: + new_branches.append(make_sequence(a[ : -count])) + + return suffix[-count : ], new_branches + + @staticmethod + def _can_split(items, count): + # Check the characters either side of the proposed split. + if not Branch._is_full_case(items, count - 1): + return True + + if not Branch._is_full_case(items, count): + return True + + # Check whether a 1-1 split would be OK. + if Branch._is_folded(items[count - 1 : count + 1]): + return False + + # Check whether a 1-2 split would be OK. + if (Branch._is_full_case(items, count + 2) and + Branch._is_folded(items[count - 1 : count + 2])): + return False + + # Check whether a 2-1 split would be OK. + if (Branch._is_full_case(items, count - 2) and + Branch._is_folded(items[count - 2 : count + 1])): + return False + + return True + + @staticmethod + def _can_split_rev(items, count): + end = len(items) + + # Check the characters either side of the proposed split. + if not Branch._is_full_case(items, end - count): + return True + + if not Branch._is_full_case(items, end - count - 1): + return True + + # Check whether a 1-1 split would be OK. + if Branch._is_folded(items[end - count - 1 : end - count + 1]): + return False + + # Check whether a 1-2 split would be OK. + if (Branch._is_full_case(items, end - count + 2) and + Branch._is_folded(items[end - count - 1 : end - count + 2])): + return False + + # Check whether a 2-1 split would be OK. + if (Branch._is_full_case(items, end - count - 2) and + Branch._is_folded(items[end - count - 2 : end - count + 1])): + return False + + return True + + @staticmethod + def _merge_common_prefixes(info, branches): + # Branches with the same case-sensitive character prefix can be grouped + # together if they are separated only by other branches with a + # character prefix. + prefixed = defaultdict(list) + order = {} + new_branches = [] + for b in branches: + if Branch._is_simple_character(b): + # Branch starts with a simple character. + prefixed[b.value].append([b]) + order.setdefault(b.value, len(order)) + elif (isinstance(b, Sequence) and b.items and + Branch._is_simple_character(b.items[0])): + # Branch starts with a simple character. + prefixed[b.items[0].value].append(b.items) + order.setdefault(b.items[0].value, len(order)) + else: + Branch._flush_char_prefix(info, prefixed, order, new_branches) + + new_branches.append(b) + + Branch._flush_char_prefix(info, prefixed, order, new_branches) + + return new_branches + + @staticmethod + def _is_simple_character(c): + return isinstance(c, Character) and c.positive and not c.case_flags + + @staticmethod + def _reduce_to_set(info, branches): + # Can the branches be reduced to a set? + new_branches = [] + items = set() + case_flags = NOCASE + for b in branches: + if isinstance(b, (Character, Property, SetBase)): + # Branch starts with a single character. + if b.case_flags != case_flags: + # Different case sensitivity, so flush. + Branch._flush_set_members(info, items, case_flags, + new_branches) + + case_flags = b.case_flags + + items.add(b.with_flags(case_flags=NOCASE)) + else: + Branch._flush_set_members(info, items, case_flags, + new_branches) + + new_branches.append(b) + + Branch._flush_set_members(info, items, case_flags, new_branches) + + return new_branches + + @staticmethod + def _flush_char_prefix(info, prefixed, order, new_branches): + # Flush the prefixed branches. + if not prefixed: + return + + for value, branches in sorted(prefixed.items(), key=lambda pair: + order[pair[0]]): + if len(branches) == 1: + new_branches.append(make_sequence(branches[0])) + else: + subbranches = [] + optional = False + for b in branches: + if len(b) > 1: + subbranches.append(make_sequence(b[1 : ])) + elif not optional: + subbranches.append(Sequence()) + optional = True + + sequence = Sequence([Character(value), Branch(subbranches)]) + new_branches.append(sequence.optimise(info)) + + prefixed.clear() + order.clear() + + @staticmethod + def _flush_set_members(info, items, case_flags, new_branches): + # Flush the set members. + if not items: + return + + if len(items) == 1: + item = list(items)[0] + else: + item = SetUnion(info, list(items)).optimise(info) + + new_branches.append(item.with_flags(case_flags=case_flags)) + + items.clear() + + @staticmethod + def _is_full_case(items, i): + if not 0 <= i < len(items): + return False + + item = items[i] + return (isinstance(item, Character) and item.positive and + (item.case_flags & FULLIGNORECASE) == FULLIGNORECASE) + + @staticmethod + def _is_folded(items): + if len(items) < 2: + return False + + for i in items: + if (not isinstance(i, Character) or not i.positive or not + i.case_flags): + return False + + folded = u"".join(unichr(i.value) for i in items) + folded = _regex.fold_case(FULL_CASE_FOLDING, folded) + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + for c in expanding_chars: + if folded == _regex.fold_case(FULL_CASE_FOLDING, c): + return True + + return False + + def is_empty(self): + return all(b.is_empty() for b in self.branches) + + def __eq__(self, other): + return type(self) is type(other) and self.branches == other.branches + + def max_width(self): + return max(b.max_width() for b in self.branches) + +class CallGroup(RegexBase): + def __init__(self, info, group, position): + RegexBase.__init__(self) + self.info = info + self.group = group + self.position = position + + self._key = self.__class__, self.group + + def fix_groups(self, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group at position %d" % self.position) + + if not 0 <= self.group <= self.info.group_count: + raise error("unknown group at position %d" % self.position) + + if self.group > 0 and self.info.open_group_count[self.group] > 1: + raise error("ambiguous group reference at position %d" % self.position) + + self.info.group_calls.append((self, reverse, fuzzy)) + + self._key = self.__class__, self.group + + def remove_captures(self): + raise error("group reference not allowed at position %d" % self.position) + + def _compile(self, reverse, fuzzy): + return [(OP.GROUP_CALL, self.call_ref)] + + def _dump(self, indent, reverse): + print "%sGROUP_CALL %s" % (INDENT * indent, self.group) + + def __eq__(self, other): + return type(self) is type(other) and self.group == other.group + + def max_width(self): + return UNLIMITED + +class Character(RegexBase): + _opcode = {(NOCASE, False): OP.CHARACTER, (IGNORECASE, False): + OP.CHARACTER_IGN, (FULLCASE, False): OP.CHARACTER, (FULLIGNORECASE, + False): OP.CHARACTER_IGN, (NOCASE, True): OP.CHARACTER_REV, (IGNORECASE, + True): OP.CHARACTER_IGN_REV, (FULLCASE, True): OP.CHARACTER_REV, + (FULLIGNORECASE, True): OP.CHARACTER_IGN_REV} + + def __init__(self, value, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.value = value + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + if (self.positive and (self.case_flags & FULLIGNORECASE) == + FULLIGNORECASE): + self.folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(self.value)) + else: + self.folded = unichr(self.value) + + self._key = (self.__class__, self.value, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Character(self.value, positive, case_flags, zerowidth) + + def optimise(self, info, in_set=False): + return self + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + + code = PrecompiledCode([self._opcode[self.case_flags, reverse], flags, + self.value]) + + if len(self.folded) > 1: + # The character expands on full case-folding. + code = Branch([code, String([ord(c) for c in self.folded], + case_flags=self.case_flags)]) + + return code.compile(reverse, fuzzy) + + def _dump(self, indent, reverse): + display = repr(unichr(self.value)).lstrip("bu") + print "%sCHARACTER %s %s%s" % (INDENT * indent, + POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags]) + + def matches(self, ch): + return (ch == self.value) == self.positive + + def max_width(self): + return len(self.folded) + + def get_required_string(self, reverse): + if not self.positive: + return 1, None + + self.folded_characters = tuple(ord(c) for c in self.folded) + + return 0, self + +class Conditional(RegexBase): + def __init__(self, info, group, yes_item, no_item, position): + RegexBase.__init__(self) + self.info = info + self.group = group + self.yes_item = yes_item + self.no_item = no_item + self.position = position + + def fix_groups(self, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group at position %d" % self.position) + + if not 1 <= self.group <= self.info.group_count: + raise error("unknown group at position %d" % self.position) + + self.yes_item.fix_groups(reverse, fuzzy) + self.no_item.fix_groups(reverse, fuzzy) + + def optimise(self, info): + yes_item = self.yes_item.optimise(info) + no_item = self.no_item.optimise(info) + + return Conditional(info, self.group, yes_item, no_item, self.position) + + def pack_characters(self, info): + self.yes_item = self.yes_item.pack_characters(info) + self.no_item = self.no_item.pack_characters(info) + return self + + def remove_captures(self): + self.yes_item = self.yes_item.remove_captures() + self.no_item = self.no_item.remove_captures() + + def is_atomic(self): + return self.yes_item.is_atomic() and self.no_item.is_atomic() + + def can_be_affix(self): + return self.yes_item.can_be_affix() and self.no_item.can_be_affix() + + def contains_group(self): + return self.yes_item.contains_group() or self.no_item.contains_group() + + def get_firstset(self, reverse): + return (self.yes_item.get_firstset(reverse) | + self.no_item.get_firstset(reverse)) + + def _compile(self, reverse, fuzzy): + code = [(OP.GROUP_EXISTS, self.group)] + code.extend(self.yes_item.compile(reverse, fuzzy)) + add_code = self.no_item.compile(reverse, fuzzy) + if add_code: + code.append((OP.NEXT, )) + code.extend(add_code) + + code.append((OP.END, )) + + return code + + def _dump(self, indent, reverse): + print "%sGROUP_EXISTS %s" % (INDENT * indent, self.group) + self.yes_item.dump(indent + 1, reverse) + if self.no_item: + print "%sOR" % (INDENT * indent) + self.no_item.dump(indent + 1, reverse) + + def is_empty(self): + return self.yes_item.is_empty() and self.no_item.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.group, self.yes_item, + self.no_item) == (other.group, other.yes_item, other.no_item) + + def max_width(self): + return max(self.yes_item.max_width(), self.no_item.max_width()) + +class DefaultBoundary(ZeroWidthBase): + _opcode = OP.DEFAULT_BOUNDARY + _op_name = "DEFAULT_BOUNDARY" + +class DefaultEndOfWord(ZeroWidthBase): + _opcode = OP.DEFAULT_END_OF_WORD + _op_name = "DEFAULT_END_OF_WORD" + +class DefaultStartOfWord(ZeroWidthBase): + _opcode = OP.DEFAULT_START_OF_WORD + _op_name = "DEFAULT_START_OF_WORD" + +class EndOfLine(ZeroWidthBase): + _opcode = OP.END_OF_LINE + _op_name = "END_OF_LINE" + +class EndOfLineU(EndOfLine): + _opcode = OP.END_OF_LINE_U + _op_name = "END_OF_LINE_U" + +class EndOfString(ZeroWidthBase): + _opcode = OP.END_OF_STRING + _op_name = "END_OF_STRING" + +class EndOfStringLine(ZeroWidthBase): + _opcode = OP.END_OF_STRING_LINE + _op_name = "END_OF_STRING_LINE" + +class EndOfStringLineU(EndOfStringLine): + _opcode = OP.END_OF_STRING_LINE_U + _op_name = "END_OF_STRING_LINE_U" + +class EndOfWord(ZeroWidthBase): + _opcode = OP.END_OF_WORD + _op_name = "END_OF_WORD" + +class Fuzzy(RegexBase): + def __init__(self, subpattern, constraints=None): + RegexBase.__init__(self) + if constraints is None: + constraints = {} + self.subpattern = subpattern + self.constraints = constraints + + # If an error type is mentioned in the cost equation, then its maximum + # defaults to unlimited. + if "cost" in constraints: + for e in "dis": + if e in constraints["cost"]: + constraints.setdefault(e, (0, None)) + + # If any error type is mentioned, then all the error maxima default to + # 0, otherwise they default to unlimited. + if set(constraints) & set("dis"): + for e in "dis": + constraints.setdefault(e, (0, 0)) + else: + for e in "dis": + constraints.setdefault(e, (0, None)) + + # The maximum of the generic error type defaults to unlimited. + constraints.setdefault("e", (0, None)) + + # The cost equation defaults to equal costs. Also, the cost of any + # error type not mentioned in the cost equation defaults to 0. + if "cost" in constraints: + for e in "dis": + constraints["cost"].setdefault(e, 0) + else: + constraints["cost"] = {"d": 1, "i": 1, "s": 1, "max": + constraints["e"][1]} + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(reverse, True) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def is_atomic(self): + return self.subpattern.is_atomic() + + def contains_group(self): + return self.subpattern.contains_group() + + def _compile(self, reverse, fuzzy): + # The individual limits. + arguments = [] + for e in "dise": + v = self.constraints[e] + arguments.append(v[0]) + arguments.append(UNLIMITED if v[1] is None else v[1]) + + # The coeffs of the cost equation. + for e in "dis": + arguments.append(self.constraints["cost"][e]) + + # The maximum of the cost equation. + v = self.constraints["cost"]["max"] + arguments.append(UNLIMITED if v is None else v) + + flags = 0 + if reverse: + flags |= REVERSE_OP + + return ([(OP.FUZZY, flags) + tuple(arguments)] + + self.subpattern.compile(reverse, True) + [(OP.END,)]) + + def _dump(self, indent, reverse): + constraints = self._constraints_to_string() + if constraints: + constraints = " " + constraints + print "%sFUZZY%s" % (INDENT * indent, constraints) + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return (type(self) is type(other) and self.subpattern == + other.subpattern) + + def max_width(self): + return UNLIMITED + + def _constraints_to_string(self): + constraints = [] + + for name in "ids": + min, max = self.constraints[name] + if max == 0: + continue + + con = "" + + if min > 0: + con = "%s<=" % min + + con += name + + if max is not None: + con += "<=%s" % max + + constraints.append(con) + + cost = [] + for name in "ids": + coeff = self.constraints["cost"][name] + if coeff > 0: + cost.append("%s%s" % (coeff, name)) + + limit = self.constraints["cost"]["max"] + if limit is not None and limit > 0: + cost = "%s<=%s" % ("+".join(cost), limit) + constraints.append(cost) + + return ",".join(constraints) + +class Grapheme(RegexBase): + def _compile(self, reverse, fuzzy): + # Match at least 1 character until a grapheme boundary is reached. Note + # that this is the same whether matching forwards or backwards. + character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse, + fuzzy) + boundary_matcher = [(OP.GRAPHEME_BOUNDARY, 1)] + + return character_matcher + boundary_matcher + + def _dump(self, indent, reverse): + print "%sGRAPHEME" % (INDENT * indent) + + def max_width(self): + return UNLIMITED + +class GreedyRepeat(RegexBase): + _opcode = OP.GREEDY_REPEAT + _op_name = "GREEDY_REPEAT" + + def __init__(self, subpattern, min_count, max_count): + RegexBase.__init__(self) + self.subpattern = subpattern + self.min_count = min_count + self.max_count = max_count + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(reverse, fuzzy) + + def optimise(self, info): + subpattern = self.subpattern.optimise(info) + + return type(self)(subpattern, self.min_count, self.max_count) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + self.subpattern = self.subpattern.remove_captures() + return self + + def is_atomic(self): + return self.min_count == self.max_count and self.subpattern.is_atomic() + + def contains_group(self): + return self.subpattern.contains_group() + + def get_firstset(self, reverse): + fs = self.subpattern.get_firstset(reverse) + if self.min_count == 0: + fs.add(None) + + return fs + + def _compile(self, reverse, fuzzy): + repeat = [self._opcode, self.min_count] + if self.max_count is None: + repeat.append(UNLIMITED) + else: + repeat.append(self.max_count) + + subpattern = self.subpattern.compile(reverse, fuzzy) + if not subpattern: + return [] + + return ([tuple(repeat)] + subpattern + [(OP.END, )]) + + def _dump(self, indent, reverse): + if self.max_count is None: + limit = "INF" + else: + limit = self.max_count + print "%s%s %s %s" % (INDENT * indent, self._op_name, self.min_count, + limit) + + self.subpattern.dump(indent + 1, reverse) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.subpattern, self.min_count, + self.max_count) == (other.subpattern, other.min_count, + other.max_count) + + def max_width(self): + if self.max_count is None: + return UNLIMITED + + return self.subpattern.max_width() * self.max_count + + def get_required_string(self, reverse): + max_count = UNLIMITED if self.max_count is None else self.max_count + if self.min_count == 0: + w = self.subpattern.max_width() * max_count + return min(w, UNLIMITED), None + + ofs, req = self.subpattern.get_required_string(reverse) + if req: + return ofs, req + + w = self.subpattern.max_width() * max_count + return min(w, UNLIMITED), None + +class Group(RegexBase): + def __init__(self, info, group, subpattern): + RegexBase.__init__(self) + self.info = info + self.group = group + self.subpattern = subpattern + + self.call_ref = None + + def fix_groups(self, reverse, fuzzy): + self.info.defined_groups[self.group] = (self, reverse, fuzzy) + self.subpattern.fix_groups(reverse, fuzzy) + + def optimise(self, info): + subpattern = self.subpattern.optimise(info) + + return Group(self.info, self.group, subpattern) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + return self.subpattern.remove_captures() + + def is_atomic(self): + return self.subpattern.is_atomic() + + def can_be_affix(self): + return False + + def contains_group(self): + return True + + def get_firstset(self, reverse): + return self.subpattern.get_firstset(reverse) + + def has_simple_start(self): + return self.subpattern.has_simple_start() + + def _compile(self, reverse, fuzzy): + code = [] + + key = self.group, reverse, fuzzy + ref = self.info.call_refs.get(key) + if ref is not None: + code += [(OP.CALL_REF, ref)] + + public_group = private_group = self.group + if private_group < 0: + public_group = self.info.private_groups[private_group] + private_group = self.info.group_count - private_group + + code += ([(OP.GROUP, private_group, public_group)] + + self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) + + if ref is not None: + code += [(OP.END, )] + + return code + + def _dump(self, indent, reverse): + group = self.group + if group < 0: + group = private_groups[group] + print "%sGROUP %s" % (INDENT * indent, group) + self.subpattern.dump(indent + 1, reverse) + + def __eq__(self, other): + return (type(self) is type(other) and (self.group, self.subpattern) == + (other.group, other.subpattern)) + + def max_width(self): + return self.subpattern.max_width() + + def get_required_string(self, reverse): + return self.subpattern.get_required_string(reverse) + +class LazyRepeat(GreedyRepeat): + _opcode = OP.LAZY_REPEAT + _op_name = "LAZY_REPEAT" + +class LookAround(RegexBase): + _dir_text = {False: "AHEAD", True: "BEHIND"} + + def __new__(cls, behind, positive, subpattern): + if positive and subpattern.is_empty(): + return subpattern + + return RegexBase.__new__(cls) + + def __init__(self, behind, positive, subpattern): + RegexBase.__init__(self) + self.behind = bool(behind) + self.positive = bool(positive) + self.subpattern = subpattern + + def fix_groups(self, reverse, fuzzy): + self.subpattern.fix_groups(self.behind, fuzzy) + + def optimise(self, info): + subpattern = self.subpattern.optimise(info) + + return LookAround(self.behind, self.positive, subpattern) + + def pack_characters(self, info): + self.subpattern = self.subpattern.pack_characters(info) + return self + + def remove_captures(self): + return self.subpattern.remove_captures() + + def is_atomic(self): + return self.subpattern.is_atomic() + + def can_be_affix(self): + return self.subpattern.can_be_affix() + + def contains_group(self): + return self.subpattern.contains_group() + + def _compile(self, reverse, fuzzy): + return ([(OP.LOOKAROUND, int(self.positive), int(not self.behind))] + + self.subpattern.compile(self.behind) + [(OP.END, )]) + + def _dump(self, indent, reverse): + print "%sLOOK%s %s" % (INDENT * indent, self._dir_text[self.behind], + POS_TEXT[self.positive]) + self.subpattern.dump(indent + 1, self.behind) + + def is_empty(self): + return self.subpattern.is_empty() + + def __eq__(self, other): + return type(self) is type(other) and (self.behind, self.positive, + self.subpattern) == (other.behind, other.positive, other.subpattern) + + def max_width(self): + return 0 + +class PrecompiledCode(RegexBase): + def __init__(self, code): + self.code = code + + def _compile(self, reverse, fuzzy): + return [tuple(self.code)] + +class Property(RegexBase): + _opcode = {(NOCASE, False): OP.PROPERTY, (IGNORECASE, False): + OP.PROPERTY_IGN, (FULLCASE, False): OP.PROPERTY, (FULLIGNORECASE, False): + OP.PROPERTY_IGN, (NOCASE, True): OP.PROPERTY_REV, (IGNORECASE, True): + OP.PROPERTY_IGN_REV, (FULLCASE, True): OP.PROPERTY_REV, (FULLIGNORECASE, + True): OP.PROPERTY_IGN_REV} + + def __init__(self, value, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.value = value + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + self._key = (self.__class__, self.value, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Property(self.value, positive, case_flags, zerowidth) + + def optimise(self, info, in_set=False): + return self + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.value)] + + def _dump(self, indent, reverse): + prop = PROPERTY_NAMES[self.value >> 16] + name, value = prop[0], prop[1][self.value & 0xFFFF] + print "%sPROPERTY %s %s:%s%s" % (INDENT * indent, + POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags]) + + def matches(self, ch): + return _regex.has_property_value(self.value, ch) == self.positive + + def max_width(self): + return 1 + +class Range(RegexBase): + _opcode = {(NOCASE, False): OP.RANGE, (IGNORECASE, False): OP.RANGE_IGN, + (FULLCASE, False): OP.RANGE, (FULLIGNORECASE, False): OP.RANGE_IGN, + (NOCASE, True): OP.RANGE_REV, (IGNORECASE, True): OP.RANGE_IGN_REV, + (FULLCASE, True): OP.RANGE_REV, (FULLIGNORECASE, True): OP.RANGE_IGN_REV} + _op_name = "RANGE" + + def __init__(self, lower, upper, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.lower = lower + self.upper = upper + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + self._key = (self.__class__, self.lower, self.upper, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return Range(self.lower, self.upper, positive, case_flags, zerowidth) + + def optimise(self, info, in_set=False): + # Is the range case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE) or in_set: + return self + + # Is full case-folding possible? + if (not (info.flags & UNICODE) or (self.case_flags & FULLIGNORECASE) != + FULLIGNORECASE): + return self + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the range. + items = [] + for ch in expanding_chars: + if self.lower <= ord(ch) <= self.upper: + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + items.append(String([ord(c) for c in folded], + case_flags=self.case_flags)) + + if not items: + # We can fall back to simple case-folding. + return self + + if len(items) < self.upper - self.lower + 1: + # Not all the characters are covered by the full case-folding. + items.insert(0, self) + + return Branch(items) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.lower, + self.upper)] + + def _dump(self, indent, reverse): + display_lower = repr(unichr(self.lower)).lstrip("bu") + display_upper = repr(unichr(self.upper)).lstrip("bu") + print "%sRANGE %s %s %s%s" % (INDENT * indent, POS_TEXT[self.positive], + display_lower, display_upper, CASE_TEXT[self.case_flags]) + + def matches(self, ch): + return (self.lower <= ch <= self.upper) == self.positive + + def max_width(self): + return 1 + +class RefGroup(RegexBase): + _opcode = {(NOCASE, False): OP.REF_GROUP, (IGNORECASE, False): + OP.REF_GROUP_IGN, (FULLCASE, False): OP.REF_GROUP, (FULLIGNORECASE, + False): OP.REF_GROUP_FLD, (NOCASE, True): OP.REF_GROUP_REV, (IGNORECASE, + True): OP.REF_GROUP_IGN_REV, (FULLCASE, True): OP.REF_GROUP_REV, + (FULLIGNORECASE, True): OP.REF_GROUP_FLD_REV} + + def __init__(self, info, group, position, case_flags=NOCASE): + RegexBase.__init__(self) + self.info = info + self.group = group + self.position = position + self.case_flags = case_flags + + self._key = self.__class__, self.group, self.case_flags + + def fix_groups(self, reverse, fuzzy): + try: + self.group = int(self.group) + except ValueError: + try: + self.group = self.info.group_index[self.group] + except KeyError: + raise error("unknown group at position %d" % self.position) + + if not 1 <= self.group <= self.info.group_count: + raise error("unknown group at position %d" % self.position) + + self._key = self.__class__, self.group, self.case_flags + + def remove_captures(self): + raise error("group reference not allowed at position %d" % self.position) + + def _compile(self, reverse, fuzzy): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + return [(self._opcode[self.case_flags, reverse], flags, self.group)] + + def _dump(self, indent, reverse): + print "%sREF_GROUP %s%s" % (INDENT * indent, self.group, + CASE_TEXT[self.case_flags]) + + def max_width(self): + return UNLIMITED + +class SearchAnchor(ZeroWidthBase): + _opcode = OP.SEARCH_ANCHOR + _op_name = "SEARCH_ANCHOR" + +class Sequence(RegexBase): + def __init__(self, items=None): + RegexBase.__init__(self) + if items is None: + items = [] + + self.items = items + + def fix_groups(self, reverse, fuzzy): + for s in self.items: + s.fix_groups(reverse, fuzzy) + + def optimise(self, info): + # Flatten the sequences. + items = [] + for s in self.items: + s = s.optimise(info) + if isinstance(s, Sequence): + items.extend(s.items) + else: + items.append(s) + + return make_sequence(items) + + def pack_characters(self, info): + "Packs sequences of characters into strings." + items = [] + characters = [] + case_flags = NOCASE + for s in self.items: + if type(s) is Character and s.positive: + if s.case_flags != case_flags: + # Different case sensitivity, so flush, unless neither the + # previous nor the new character are cased. + if s.case_flags or is_cased(info, s.value): + Sequence._flush_characters(info, characters, + case_flags, items) + + case_flags = s.case_flags + + characters.append(s.value) + elif type(s) is String or type(s) is Literal: + if s.case_flags != case_flags: + # Different case sensitivity, so flush, unless the neither + # the previous nor the new string are cased. + if s.case_flags or any(is_cased(info, c) for c in + characters): + Sequence._flush_characters(info, characters, + case_flags, items) + + case_flags = s.case_flags + + characters.extend(s.characters) + else: + Sequence._flush_characters(info, characters, case_flags, items) + + items.append(s.pack_characters(info)) + + Sequence._flush_characters(info, characters, case_flags, items) + + return make_sequence(items) + + def remove_captures(self): + self.items = [s.remove_captures() for s in self.items] + return self + + def is_atomic(self): + return all(s.is_atomic() for s in self.items) + + def can_be_affix(self): + return False + + def contains_group(self): + return any(s.contains_group() for s in self.items) + + def get_firstset(self, reverse): + fs = set() + items = self.items + if reverse: + items.reverse() + for s in items: + fs |= s.get_firstset(reverse) + if None not in fs: + return fs + fs.discard(None) + + return fs | set([None]) + + def has_simple_start(self): + return self.items and self.items[0].has_simple_start() + + def _compile(self, reverse, fuzzy): + seq = self.items + if reverse: + seq = seq[::-1] + + code = [] + for s in seq: + code.extend(s.compile(reverse, fuzzy)) + + return code + + def _dump(self, indent, reverse): + for s in self.items: + s.dump(indent, reverse) + + @staticmethod + def _flush_characters(info, characters, case_flags, items): + if not characters: + return + + # Disregard case_flags if all of the characters are case-less. + if case_flags & IGNORECASE: + if not any(is_cased(info, c) for c in characters): + case_flags = NOCASE + + if len(characters) == 1: + items.append(Character(characters[0], case_flags=case_flags)) + else: + items.append(String(characters, case_flags=case_flags)) + + characters[:] = [] + + def is_empty(self): + return all(i.is_empty() for i in self.items) + + def __eq__(self, other): + return type(self) is type(other) and self.items == other.items + + def max_width(self): + return sum(s.max_width() for s in self.items) + + def get_required_string(self, reverse): + seq = self.items + if reverse: + seq = seq[::-1] + + offset = 0 + + for s in seq: + ofs, req = s.get_required_string(reverse) + offset += ofs + if req: + return offset, req + + return offset, None + +class SetBase(RegexBase): + def __init__(self, info, items, positive=True, case_flags=NOCASE, + zerowidth=False): + RegexBase.__init__(self) + self.info = info + self.items = tuple(items) + self.positive = bool(positive) + self.case_flags = case_flags + self.zerowidth = bool(zerowidth) + + self.char_width = 1 + + self._key = (self.__class__, self.items, self.positive, + self.case_flags, self.zerowidth) + + def rebuild(self, positive, case_flags, zerowidth): + return type(self)(self.info, self.items, positive, case_flags, + zerowidth).optimise(self.info) + + def get_firstset(self, reverse): + return set([self]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + code = [(self._opcode[self.case_flags, reverse], flags)] + for m in self.items: + code.extend(m.compile()) + + code.append((OP.END, )) + + return code + + def _dump(self, indent, reverse): + print "%s%s %s%s" % (INDENT * indent, self._op_name, + POS_TEXT[self.positive], CASE_TEXT[self.case_flags]) + for i in self.items: + i.dump(indent + 1) + + def _handle_case_folding(self, info, in_set): + # Is the set case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE) or in_set: + return self + + # Is full case-folding possible? + if (not (self.info.flags & UNICODE) or (self.case_flags & + FULLIGNORECASE) != + FULLIGNORECASE): + return self + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the set. + items = [] + seen = set() + for ch in expanding_chars: + if self.matches(ord(ch)): + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + if folded not in seen: + items.append(String([ord(c) for c in folded], + case_flags=self.case_flags)) + seen.add(folded) + + if not items: + # We can fall back to simple case-folding. + return self + + return Branch([self] + items) + + def max_width(self): + # Is the set case-sensitive? + if not self.positive or not (self.case_flags & IGNORECASE): + return 1 + + # Is full case-folding possible? + if (not (self.info.flags & UNICODE) or (self.case_flags & + FULLIGNORECASE) != FULLIGNORECASE): + return 1 + + # Get the characters which expand to multiple codepoints on folding. + expanding_chars = _regex.get_expand_on_folding() + + # Get the folded characters in the set. + seen = set() + for ch in expanding_chars: + if self.matches(ord(ch)): + folded = _regex.fold_case(FULL_CASE_FOLDING, ch) + seen.add(folded) + + if not seen: + return 1 + + return max(len(folded) for folded in seen) + +class SetDiff(SetBase): + _opcode = {(NOCASE, False): OP.SET_DIFF, (IGNORECASE, False): + OP.SET_DIFF_IGN, (FULLCASE, False): OP.SET_DIFF, (FULLIGNORECASE, False): + OP.SET_DIFF_IGN, (NOCASE, True): OP.SET_DIFF_REV, (IGNORECASE, True): + OP.SET_DIFF_IGN_REV, (FULLCASE, True): OP.SET_DIFF_REV, (FULLIGNORECASE, + True): OP.SET_DIFF_IGN_REV} + _op_name = "SET_DIFF" + + def optimise(self, info, in_set=False): + items = self.items + if len(items) > 2: + items = [items[0], SetUnion(info, items[1 : ])] + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(m.optimise(info, in_set=True) for m in items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = self.items[0].matches(ch) and not self.items[1].matches(ch) + return m == self.positive + +class SetInter(SetBase): + _opcode = {(NOCASE, False): OP.SET_INTER, (IGNORECASE, False): + OP.SET_INTER_IGN, (FULLCASE, False): OP.SET_INTER, (FULLIGNORECASE, + False): OP.SET_INTER_IGN, (NOCASE, True): OP.SET_INTER_REV, (IGNORECASE, + True): OP.SET_INTER_IGN_REV, (FULLCASE, True): OP.SET_INTER_REV, + (FULLIGNORECASE, True): OP.SET_INTER_IGN_REV} + _op_name = "SET_INTER" + + def optimise(self, info, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, in_set=True) + if isinstance(m, SetInter) and m.positive: + # Intersection in intersection. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = all(i.matches(ch) for i in self.items) + return m == self.positive + +class SetSymDiff(SetBase): + _opcode = {(NOCASE, False): OP.SET_SYM_DIFF, (IGNORECASE, False): + OP.SET_SYM_DIFF_IGN, (FULLCASE, False): OP.SET_SYM_DIFF, (FULLIGNORECASE, + False): OP.SET_SYM_DIFF_IGN, (NOCASE, True): OP.SET_SYM_DIFF_REV, + (IGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV, (FULLCASE, True): + OP.SET_SYM_DIFF_REV, (FULLIGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV} + _op_name = "SET_SYM_DIFF" + + def optimise(self, info, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, in_set=True) + if isinstance(m, SetSymDiff) and m.positive: + # Symmetric difference in symmetric difference. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + return items[0].with_flags(case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def matches(self, ch): + m = False + for i in self.items: + m = m != i.matches(ch) + + return m == self.positive + +class SetUnion(SetBase): + _opcode = {(NOCASE, False): OP.SET_UNION, (IGNORECASE, False): + OP.SET_UNION_IGN, (FULLCASE, False): OP.SET_UNION, (FULLIGNORECASE, + False): OP.SET_UNION_IGN, (NOCASE, True): OP.SET_UNION_REV, (IGNORECASE, + True): OP.SET_UNION_IGN_REV, (FULLCASE, True): OP.SET_UNION_REV, + (FULLIGNORECASE, True): OP.SET_UNION_IGN_REV} + _op_name = "SET_UNION" + + def optimise(self, info, in_set=False): + items = [] + for m in self.items: + m = m.optimise(info, in_set=True) + if isinstance(m, SetUnion) and m.positive: + # Union in union. + items.extend(m.items) + else: + items.append(m) + + if len(items) == 1: + i = items[0] + return i.with_flags(positive=i.positive == self.positive, + case_flags=self.case_flags, + zerowidth=self.zerowidth).optimise(info, in_set) + + self.items = tuple(items) + + return self._handle_case_folding(info, in_set) + + def _compile(self, reverse, fuzzy): + flags = 0 + if self.positive: + flags |= POSITIVE_OP + if self.zerowidth: + flags |= ZEROWIDTH_OP + if fuzzy: + flags |= FUZZY_OP + + characters, others = defaultdict(list), [] + for m in self.items: + if isinstance(m, Character): + characters[m.positive].append(m.value) + else: + others.append(m) + + code = [(self._opcode[self.case_flags, reverse], flags)] + + for positive, values in characters.items(): + flags = 0 + if positive: + flags |= POSITIVE_OP + if len(values) == 1: + code.append((OP.CHARACTER, flags, values[0])) + else: + code.append((OP.STRING, flags, len(values)) + tuple(values)) + + for m in others: + code.extend(m.compile()) + + code.append((OP.END, )) + + return code + + def matches(self, ch): + m = any(i.matches(ch) for i in self.items) + return m == self.positive + +class StartOfLine(ZeroWidthBase): + _opcode = OP.START_OF_LINE + _op_name = "START_OF_LINE" + +class StartOfLineU(StartOfLine): + _opcode = OP.START_OF_LINE_U + _op_name = "START_OF_LINE_U" + +class StartOfString(ZeroWidthBase): + _opcode = OP.START_OF_STRING + _op_name = "START_OF_STRING" + +class StartOfWord(ZeroWidthBase): + _opcode = OP.START_OF_WORD + _op_name = "START_OF_WORD" + +class String(RegexBase): + _opcode = {(NOCASE, False): OP.STRING, (IGNORECASE, False): OP.STRING_IGN, + (FULLCASE, False): OP.STRING, (FULLIGNORECASE, False): OP.STRING_FLD, + (NOCASE, True): OP.STRING_REV, (IGNORECASE, True): OP.STRING_IGN_REV, + (FULLCASE, True): OP.STRING_REV, (FULLIGNORECASE, True): + OP.STRING_FLD_REV} + + def __init__(self, characters, case_flags=NOCASE): + self.characters = tuple(characters) + self.case_flags = case_flags + + if (self.case_flags & FULLIGNORECASE) == FULLIGNORECASE: + folded_characters = [] + for char in self.characters: + folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(char)) + folded_characters.extend(ord(c) for c in folded) + else: + folded_characters = self.characters + + self.folded_characters = tuple(folded_characters) + self.required = False + + self._key = self.__class__, self.characters, self.case_flags + + def get_firstset(self, reverse): + if reverse: + pos = -1 + else: + pos = 0 + return set([Character(self.characters[pos], + case_flags=self.case_flags)]) + + def has_simple_start(self): + return True + + def _compile(self, reverse, fuzzy): + flags = 0 + if fuzzy: + flags |= FUZZY_OP + if self.required: + flags |= REQUIRED_OP + return [(self._opcode[self.case_flags, reverse], flags, + len(self.folded_characters)) + self.folded_characters] + + def _dump(self, indent, reverse): + display = repr("".join(unichr(c) for c in self.characters)).lstrip("bu") + print "%sSTRING %s%s" % (INDENT * indent, display, + CASE_TEXT[self.case_flags]) + + def max_width(self): + return len(self.folded_characters) + + def get_required_string(self, reverse): + return 0, self + +class Literal(String): + def _dump(self, indent, reverse): + for c in self.characters: + display = ascii("".join(chr(c))).lstrip("bu") + print("{}CHARACTER MATCH {}{}".format(INDENT * indent, + display, CASE_TEXT[self.case_flags])) + +class StringSet(RegexBase): + _opcode = {(NOCASE, False): OP.STRING_SET, (IGNORECASE, False): + OP.STRING_SET_IGN, (FULLCASE, False): OP.STRING_SET, (FULLIGNORECASE, + False): OP.STRING_SET_FLD, (NOCASE, True): OP.STRING_SET_REV, + (IGNORECASE, True): OP.STRING_SET_IGN_REV, (FULLCASE, True): + OP.STRING_SET_REV, (FULLIGNORECASE, True): OP.STRING_SET_FLD_REV} + + def __init__(self, info, name, case_flags=NOCASE): + self.info = info + self.name = name + self.case_flags = case_flags + + self._key = self.__class__, self.name, self.case_flags + + self.set_key = (name, self.case_flags) + if self.set_key not in info.named_lists_used: + info.named_lists_used[self.set_key] = len(info.named_lists_used) + + def _compile(self, reverse, fuzzy): + index = self.info.named_lists_used[self.set_key] + items = self.info.kwargs[self.name] + + case_flags = self.case_flags + + if not items: + return [] + + encoding = self.info.flags & _ALL_ENCODINGS + fold_flags = encoding | case_flags + + if fuzzy: + choices = [self._folded(fold_flags, i) for i in items] + + # Sort from longest to shortest. + choices.sort(key=lambda s: (-len(s), s)) + + branches = [] + for string in choices: + branches.append(Sequence([Character(c, case_flags=case_flags) + for c in string])) + + if len(branches) > 1: + branch = Branch(branches) + else: + branch = branches[0] + branch = branch.optimise(self.info).pack_characters(self.info) + + return branch.compile(reverse, fuzzy) + else: + min_len = min(len(i) for i in items) + max_len = max(len(self._folded(fold_flags, i)) for i in items) + return [(self._opcode[case_flags, reverse], index, min_len, + max_len)] + + def _dump(self, indent, reverse): + print "%sSTRING_SET %s%s" % (INDENT * indent, self.name, + CASE_TEXT[self.case_flags]) + + def _folded(self, fold_flags, item): + if isinstance(item, unicode): + return [ord(c) for c in _regex.fold_case(fold_flags, item)] + else: + return [ord(c) for c in item] + + def _flatten(self, s): + # Flattens the branches. + if isinstance(s, Branch): + for b in s.branches: + self._flatten(b) + elif isinstance(s, Sequence) and s.items: + seq = s.items + + while isinstance(seq[-1], Sequence): + seq[-1 : ] = seq[-1].items + + n = 0 + while n < len(seq) and isinstance(seq[n], Character): + n += 1 + + if n > 1: + seq[ : n] = [String([c.value for c in seq[ : n]], + case_flags=self.case_flags)] + + self._flatten(seq[-1]) + + def max_width(self): + if not self.info.kwargs[self.name]: + return 0 + + if self.case_flags & IGNORECASE: + fold_flags = (self.info.flags & _ALL_ENCODINGS) | self.case_flags + return max(len(_regex.fold_case(fold_flags, i)) for i in + self.info.kwargs[self.name]) + else: + return max(len(i) for i in self.info.kwargs[self.name]) + +class Source(object): + "Scanner for the regular expression source string." + def __init__(self, string): + if isinstance(string, unicode): + self.string = string + self.char_type = unichr + else: + self.string = string + self.char_type = chr + + self.pos = 0 + self.ignore_space = False + self.sep = string[ : 0] + + def get(self): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + ch = string[pos] + self.pos = pos + 1 + return ch + except IndexError: + # We've reached the end of the string. + self.pos = pos + return string[ : 0] + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + return string[ : 0] + + def get_many(self, count=1): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + substring = [] + + while len(substring) < count: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + substring.append(string[pos]) + pos += 1 + + substring = "".join(substring) + else: + substring = string[pos : pos + count] + pos += len(substring) + + self.pos = pos + return substring + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + return "".join(substring) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + return "".join(substring) + + def get_while(self, test_set, include=True): + string = self.string + pos = self.pos + + if self.ignore_space: + try: + substring = [] + + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + elif (string[pos] in test_set) == include: + substring.append(string[pos]) + pos += 1 + else: + break + + self.pos = pos + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + + return "".join(substring) + else: + try: + while (string[pos] in test_set) == include: + pos += 1 + + substring = string[self.pos : pos] + + self.pos = pos + + return substring + except IndexError: + # We've reached the end of the string. + substring = string[self.pos : pos] + + self.pos = pos + + return substring + + def skip_while(self, test_set, include=True): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + elif (string[pos] in test_set) == include: + pos += 1 + else: + break + else: + while (string[pos] in test_set) == include: + pos += 1 + + self.pos = pos + except IndexError: + # We've reached the end of the string. + self.pos = len(string) + except ValueError: + # The comment extended to the end of the string. + self.pos = len(string) + + def match(self, substring): + string = self.string + pos = self.pos + + if self.ignore_space: + try: + for c in substring: + while True: + if string[pos].isspace(): + # Skip over the whitespace. + pos += 1 + elif string[pos] == "#": + # Skip over the comment to the end of the line. + pos = string.index("\n", pos) + else: + break + + if string[pos] != c: + return False + + pos += 1 + + self.pos = pos + + return True + except IndexError: + # We've reached the end of the string. + return False + except ValueError: + # The comment extended to the end of the string. + return False + else: + if not string.startswith(substring, pos): + return False + + self.pos = pos + len(substring) + + return True + + def expect(self, substring): + if not self.match(substring): + raise error("missing %s at position %d" % (substring, self.pos)) + + def at_end(self): + string = self.string + pos = self.pos + + try: + if self.ignore_space: + while True: + if string[pos].isspace(): + pos += 1 + elif string[pos] == "#": + pos = string.index("\n", pos) + else: + break + + return pos >= len(string) + except IndexError: + # We've reached the end of the string. + return True + except ValueError: + # The comment extended to the end of the string. + return True + +class Info(object): + "Info about the regular expression." + + def __init__(self, flags=0, char_type=None, kwargs={}): + flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION] + self.flags = flags + self.global_flags = flags + + self.kwargs = kwargs + + self.group_count = 0 + self.group_index = {} + self.group_name = {} + self.char_type = char_type + self.named_lists_used = {} + self.open_groups = [] + self.open_group_count = {} + self.defined_groups = {} + self.group_calls = [] + self.private_groups = {} + + def open_group(self, name=None): + group = self.group_index.get(name) + if group is None: + while True: + self.group_count += 1 + if name is None or self.group_count not in self.group_name: + break + + group = self.group_count + if name: + self.group_index[name] = group + self.group_name[group] = name + + if group in self.open_groups: + # We have a nested named group. We'll assign it a private group + # number, initially negative until we can assign a proper + # (positive) number. + group_alias = -(len(self.private_groups) + 1) + self.private_groups[group_alias] = group + group = group_alias + + self.open_groups.append(group) + self.open_group_count[group] = self.open_group_count.get(group, 0) + 1 + + return group + + def close_group(self): + self.open_groups.pop() + + def is_open_group(self, name): + # In version 1, a group reference can refer to an open group. We'll + # just pretend the group isn't open. + version = (self.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version == VERSION1: + return False + + if name.isdigit(): + group = int(name) + else: + group = self.group_index.get(name) + + return group in self.open_groups + +def _check_group_features(info, parsed): + """Checks whether the reverse and fuzzy features of the group calls match + the groups which they call. + """ + call_refs = {} + additional_groups = [] + for call, reverse, fuzzy in info.group_calls: + # Look up the reference of this group call. + key = (call.group, reverse, fuzzy) + ref = call_refs.get(key) + if ref is None: + # This group doesn't have a reference yet, so look up its features. + if call.group == 0: + # Calling the pattern as a whole. + rev = bool(info.flags & REVERSE) + fuz = isinstance(parsed, Fuzzy) + if (rev, fuz) != (reverse, fuzzy): + # The pattern as a whole doesn't have the features we want, + # so we'll need to make a copy of it with the desired + # features. + additional_groups.append((parsed, reverse, fuzzy)) + else: + # Calling a capture group. + def_info = info.defined_groups[call.group] + group = def_info[0] + if def_info[1 : ] != (reverse, fuzzy): + # The group doesn't have the features we want, so we'll + # need to make a copy of it with the desired features. + additional_groups.append((group, reverse, fuzzy)) + + ref = len(call_refs) + call_refs[key] = ref + + call.call_ref = ref + + info.call_refs = call_refs + info.additional_groups = additional_groups + +def _get_required_string(parsed, flags): + "Gets the required string and related info of a parsed pattern." + + req_offset, required = parsed.get_required_string(bool(flags & REVERSE)) + if required: + required.required = True + if req_offset >= UNLIMITED: + req_offset = -1 + + req_flags = required.case_flags + if not (flags & UNICODE): + req_flags &= ~UNICODE + + req_chars = required.folded_characters + else: + req_offset = 0 + req_chars = () + req_flags = 0 + + return req_offset, req_chars, req_flags + +class Scanner: + def __init__(self, lexicon, flags=0): + self.lexicon = lexicon + + # Combine phrases into a compound pattern. + patterns = [] + for phrase, action in lexicon: + # Parse the regular expression. + source = Source(phrase) + info = Info(flags, source.char_type) + source.ignore_space = bool(info.flags & VERBOSE) + parsed = _parse_pattern(source, info) + if not source.at_end(): + raise error("trailing characters at position %d" % source.pos) + + # We want to forbid capture groups within each phrase. + patterns.append(parsed.remove_captures()) + + # Combine all the subpatterns into one pattern. + info = Info(flags) + patterns = [Group(info, g + 1, p) for g, p in enumerate(patterns)] + parsed = Branch(patterns) + + # Optimise the compound pattern. + parsed = parsed.optimise(info) + parsed = parsed.pack_characters(info) + + # Get the required string. + req_offset, req_chars, req_flags = _get_required_string(parsed, + info.flags) + + # Check the features of the groups. + _check_group_features(info, parsed) + + # Complain if there are any group calls. They are not supported by the + # Scanner class. + if info.call_refs: + raise error("recursive regex not supported by Scanner") + + reverse = bool(info.flags & REVERSE) + + # Compile the compound pattern. The result is a list of tuples. + code = parsed.compile(reverse) + [(OP.SUCCESS, )] + + # Flatten the code into a list of ints. + code = _flatten_code(code) + + if not parsed.has_simple_start(): + # Get the first set, if possible. + try: + fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) + fs_code = _flatten_code(fs_code) + code = fs_code + code + except _FirstSetError: + pass + + # Check the global flags for conflicts. + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version not in (0, VERSION0, VERSION1): + raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") + + # Create the PatternObject. + # + # Local flags like IGNORECASE affect the code generation, but aren't + # needed by the PatternObject itself. Conversely, global flags like + # LOCALE _don't_ affect the code generation but _are_ needed by the + # PatternObject. + self.scanner = _regex.compile(None, (flags & GLOBAL_FLAGS) | version, + code, {}, {}, {}, [], req_offset, req_chars, req_flags, + len(patterns)) + + def scan(self, string): + result = [] + append = result.append + match = self.scanner.scanner(string).match + i = 0 + while True: + m = match() + if not m: + break + j = m.end() + if i == j: + break + action = self.lexicon[m.lastindex - 1][1] + if hasattr(action, '__call__'): + self.match = m + action = action(self, m.group()) + if action is not None: + append(action) + i = j + + return result, string[i : ] + +# Get the known properties dict. +PROPERTIES = _regex.get_properties() + +# Build the inverse of the properties dict. +PROPERTY_NAMES = {} +for prop_name, (prop_id, values) in PROPERTIES.items(): + name, prop_values = PROPERTY_NAMES.get(prop_id, ("", {})) + name = max(name, prop_name, key=len) + PROPERTY_NAMES[prop_id] = name, prop_values + + for val_name, val_id in values.items(): + prop_values[val_id] = max(prop_values.get(val_id, ""), val_name, + key=len) + +# Character escape sequences. +CHARACTER_ESCAPES = { + "a": "\a", + "b": "\b", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +# Predefined character set escape sequences. +CHARSET_ESCAPES = { + "d": lookup_property(None, "Digit", True), + "D": lookup_property(None, "Digit", False), + "s": lookup_property(None, "Space", True), + "S": lookup_property(None, "Space", False), + "w": lookup_property(None, "Word", True), + "W": lookup_property(None, "Word", False), +} + +# Positional escape sequences. +POSITION_ESCAPES = { + "A": StartOfString(), + "b": Boundary(), + "B": Boundary(False), + "m": StartOfWord(), + "M": EndOfWord(), + "Z": EndOfString(), +} + +# Positional escape sequences when WORD flag set. +WORD_POSITION_ESCAPES = dict(POSITION_ESCAPES) +WORD_POSITION_ESCAPES.update({ + "b": DefaultBoundary(), + "B": DefaultBoundary(False), + "m": DefaultStartOfWord(), + "M": DefaultEndOfWord(), +}) diff --git a/lib/regex/_regex_unicode.c b/lib/regex/_regex_unicode.c new file mode 100644 index 00000000..663a6bed --- /dev/null +++ b/lib/regex/_regex_unicode.c @@ -0,0 +1,12748 @@ +/* For Unicode version 6.3.0 */ + +#include "_regex_unicode.h" + +#define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP)) +#define RE_GRAPH_MASK ((1 << RE_PROP_CC) | (1 << RE_PROP_CS) | (1 << RE_PROP_CN)) +#define RE_WORD_MASK (RE_PROP_M_MASK | (1 << RE_PROP_ND) | (1 << RE_PROP_PC)) + +typedef struct RE_AllCases { + RE_INT32 diffs[RE_MAX_CASES - 1]; +} RE_AllCases; + +typedef struct RE_FullCaseFolding { + RE_INT32 diff; + RE_UINT16 codepoints[RE_MAX_FOLDED - 1]; +} RE_FullCaseFolding; + +/* strings. */ + +char* re_strings[] = { + "-1/2", + "0", + "1", + "1/10", + "1/16", + "1/2", + "1/3", + "1/4", + "1/5", + "1/6", + "1/7", + "1/8", + "1/9", + "10", + "100", + "1000", + "10000", + "100000", + "100000000", + "1000000000000", + "103", + "107", + "11", + "11/2", + "118", + "12", + "122", + "129", + "13", + "13/2", + "130", + "132", + "133", + "14", + "15", + "15/2", + "16", + "17", + "17/2", + "18", + "19", + "2", + "2/3", + "2/5", + "20", + "200", + "2000", + "20000", + "202", + "21", + "214", + "216", + "216000", + "218", + "22", + "220", + "222", + "224", + "226", + "228", + "23", + "230", + "232", + "233", + "234", + "24", + "240", + "25", + "26", + "27", + "28", + "29", + "3", + "3/16", + "3/2", + "3/4", + "3/5", + "3/8", + "30", + "300", + "3000", + "30000", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "4", + "4/5", + "40", + "400", + "4000", + "40000", + "41", + "42", + "43", + "432000", + "44", + "45", + "46", + "47", + "48", + "49", + "5", + "5/2", + "5/6", + "5/8", + "50", + "500", + "5000", + "50000", + "6", + "60", + "600", + "6000", + "60000", + "7", + "7/2", + "7/8", + "70", + "700", + "7000", + "70000", + "8", + "80", + "800", + "8000", + "80000", + "84", + "9", + "9/2", + "90", + "900", + "9000", + "90000", + "91", + "A", + "ABOVE", + "ABOVELEFT", + "ABOVERIGHT", + "AEGEANNUMBERS", + "AHEX", + "AI", + "AIN", + "AL", + "ALAPH", + "ALCHEMICAL", + "ALCHEMICALSYMBOLS", + "ALEF", + "ALETTER", + "ALNUM", + "ALPHA", + "ALPHABETIC", + "ALPHABETICPF", + "ALPHABETICPRESENTATIONFORMS", + "ALPHANUMERIC", + "AMBIGUOUS", + "AN", + "ANCIENTGREEKMUSIC", + "ANCIENTGREEKMUSICALNOTATION", + "ANCIENTGREEKNUMBERS", + "ANCIENTSYMBOLS", + "ANY", + "AR", + "ARAB", + "ARABIC", + "ARABICEXTA", + "ARABICEXTENDEDA", + "ARABICLETTER", + "ARABICMATH", + "ARABICMATHEMATICALALPHABETICSYMBOLS", + "ARABICNUMBER", + "ARABICPFA", + "ARABICPFB", + "ARABICPRESENTATIONFORMSA", + "ARABICPRESENTATIONFORMSB", + "ARABICSUP", + "ARABICSUPPLEMENT", + "ARMENIAN", + "ARMI", + "ARMN", + "ARROWS", + "ASCII", + "ASCIIHEXDIGIT", + "ASSIGNED", + "AT", + "ATA", + "ATAR", + "ATB", + "ATBL", + "ATERM", + "ATTACHEDABOVE", + "ATTACHEDABOVERIGHT", + "ATTACHEDBELOW", + "ATTACHEDBELOWLEFT", + "AVAGRAHA", + "AVESTAN", + "AVST", + "B", + "B2", + "BA", + "BALI", + "BALINESE", + "BAMU", + "BAMUM", + "BAMUMSUP", + "BAMUMSUPPLEMENT", + "BASICLATIN", + "BATAK", + "BATK", + "BB", + "BC", + "BEH", + "BELOW", + "BELOWLEFT", + "BELOWRIGHT", + "BENG", + "BENGALI", + "BETH", + "BIDIC", + "BIDICLASS", + "BIDICONTROL", + "BIDIM", + "BIDIMIRRORED", + "BINDU", + "BK", + "BL", + "BLANK", + "BLK", + "BLOCK", + "BLOCKELEMENTS", + "BN", + "BOPO", + "BOPOMOFO", + "BOPOMOFOEXT", + "BOPOMOFOEXTENDED", + "BOTTOM", + "BOTTOMANDRIGHT", + "BOUNDARYNEUTRAL", + "BOXDRAWING", + "BR", + "BRAH", + "BRAHMI", + "BRAI", + "BRAILLE", + "BRAILLEPATTERNS", + "BREAKAFTER", + "BREAKBEFORE", + "BREAKBOTH", + "BREAKSYMBOLS", + "BUGI", + "BUGINESE", + "BUHD", + "BUHID", + "BURUSHASKIYEHBARREE", + "BYZANTINEMUSIC", + "BYZANTINEMUSICALSYMBOLS", + "C", + "C&", + "CAKM", + "CAN", + "CANADIANABORIGINAL", + "CANADIANSYLLABICS", + "CANONICAL", + "CANONICALCOMBININGCLASS", + "CANS", + "CARI", + "CARIAN", + "CARRIAGERETURN", + "CASED", + "CASEDLETTER", + "CASEIGNORABLE", + "CB", + "CC", + "CCC", + "CCC10", + "CCC103", + "CCC107", + "CCC11", + "CCC118", + "CCC12", + "CCC122", + "CCC129", + "CCC13", + "CCC130", + "CCC132", + "CCC133", + "CCC14", + "CCC15", + "CCC16", + "CCC17", + "CCC18", + "CCC19", + "CCC20", + "CCC21", + "CCC22", + "CCC23", + "CCC24", + "CCC25", + "CCC26", + "CCC27", + "CCC28", + "CCC29", + "CCC30", + "CCC31", + "CCC32", + "CCC33", + "CCC34", + "CCC35", + "CCC36", + "CCC84", + "CCC91", + "CF", + "CHAKMA", + "CHAM", + "CHANGESWHENCASEFOLDED", + "CHANGESWHENCASEMAPPED", + "CHANGESWHENLOWERCASED", + "CHANGESWHENTITLECASED", + "CHANGESWHENUPPERCASED", + "CHER", + "CHEROKEE", + "CI", + "CIRCLE", + "CJ", + "CJK", + "CJKCOMPAT", + "CJKCOMPATFORMS", + "CJKCOMPATIBILITY", + "CJKCOMPATIBILITYFORMS", + "CJKCOMPATIBILITYIDEOGRAPHS", + "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT", + "CJKCOMPATIDEOGRAPHS", + "CJKCOMPATIDEOGRAPHSSUP", + "CJKEXTA", + "CJKEXTB", + "CJKEXTC", + "CJKEXTD", + "CJKRADICALSSUP", + "CJKRADICALSSUPPLEMENT", + "CJKSTROKES", + "CJKSYMBOLS", + "CJKSYMBOLSANDPUNCTUATION", + "CJKUNIFIEDIDEOGRAPHS", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONA", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONB", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONC", + "CJKUNIFIEDIDEOGRAPHSEXTENSIOND", + "CL", + "CLOSE", + "CLOSEPARENTHESIS", + "CLOSEPUNCTUATION", + "CM", + "CN", + "CNTRL", + "CO", + "COM", + "COMBININGDIACRITICALMARKS", + "COMBININGDIACRITICALMARKSFORSYMBOLS", + "COMBININGDIACRITICALMARKSSUPPLEMENT", + "COMBININGHALFMARKS", + "COMBININGMARK", + "COMBININGMARKSFORSYMBOLS", + "COMMON", + "COMMONINDICNUMBERFORMS", + "COMMONSEPARATOR", + "COMPAT", + "COMPATJAMO", + "COMPLEXCONTEXT", + "CONDITIONALJAPANESESTARTER", + "CONNECTORPUNCTUATION", + "CONSONANT", + "CONSONANTDEAD", + "CONSONANTFINAL", + "CONSONANTHEADLETTER", + "CONSONANTMEDIAL", + "CONSONANTPLACEHOLDER", + "CONSONANTREPHA", + "CONSONANTSUBJOINED", + "CONTINGENTBREAK", + "CONTROL", + "CONTROLPICTURES", + "COPT", + "COPTIC", + "COUNTINGROD", + "COUNTINGRODNUMERALS", + "CP", + "CPRT", + "CR", + "CS", + "CUNEIFORM", + "CUNEIFORMNUMBERS", + "CUNEIFORMNUMBERSANDPUNCTUATION", + "CURRENCYSYMBOL", + "CURRENCYSYMBOLS", + "CWCF", + "CWCM", + "CWL", + "CWT", + "CWU", + "CYPRIOT", + "CYPRIOTSYLLABARY", + "CYRILLIC", + "CYRILLICEXTA", + "CYRILLICEXTB", + "CYRILLICEXTENDEDA", + "CYRILLICEXTENDEDB", + "CYRILLICSUP", + "CYRILLICSUPPLEMENT", + "CYRILLICSUPPLEMENTARY", + "CYRL", + "D", + "DA", + "DAL", + "DALATHRISH", + "DASH", + "DASHPUNCTUATION", + "DB", + "DE", + "DECIMAL", + "DECIMALNUMBER", + "DECOMPOSITIONTYPE", + "DEFAULTIGNORABLECODEPOINT", + "DEP", + "DEPRECATED", + "DESERET", + "DEVA", + "DEVANAGARI", + "DEVANAGARIEXT", + "DEVANAGARIEXTENDED", + "DI", + "DIA", + "DIACRITIC", + "DIACRITICALS", + "DIACRITICALSFORSYMBOLS", + "DIACRITICALSSUP", + "DIGIT", + "DINGBATS", + "DOMINO", + "DOMINOTILES", + "DOUBLEABOVE", + "DOUBLEBELOW", + "DOUBLEQUOTE", + "DQ", + "DSRT", + "DT", + "DUALJOINING", + "E", + "EA", + "EASTASIANWIDTH", + "EGYP", + "EGYPTIANHIEROGLYPHS", + "EMOTICONS", + "EN", + "ENC", + "ENCLOSEDALPHANUM", + "ENCLOSEDALPHANUMERICS", + "ENCLOSEDALPHANUMERICSUPPLEMENT", + "ENCLOSEDALPHANUMSUP", + "ENCLOSEDCJK", + "ENCLOSEDCJKLETTERSANDMONTHS", + "ENCLOSEDIDEOGRAPHICSUP", + "ENCLOSEDIDEOGRAPHICSUPPLEMENT", + "ENCLOSINGMARK", + "ES", + "ET", + "ETHI", + "ETHIOPIC", + "ETHIOPICEXT", + "ETHIOPICEXTA", + "ETHIOPICEXTENDED", + "ETHIOPICEXTENDEDA", + "ETHIOPICSUP", + "ETHIOPICSUPPLEMENT", + "EUROPEANNUMBER", + "EUROPEANSEPARATOR", + "EUROPEANTERMINATOR", + "EX", + "EXCLAMATION", + "EXT", + "EXTEND", + "EXTENDER", + "EXTENDNUMLET", + "F", + "FALSE", + "FARSIYEH", + "FE", + "FEH", + "FIN", + "FINAL", + "FINALPUNCTUATION", + "FINALSEMKATH", + "FIRSTSTRONGISOLATE", + "FO", + "FONT", + "FORMAT", + "FRA", + "FRACTION", + "FSI", + "FULLWIDTH", + "GAF", + "GAMAL", + "GC", + "GCB", + "GENERALCATEGORY", + "GENERALPUNCTUATION", + "GEOMETRICSHAPES", + "GEOR", + "GEORGIAN", + "GEORGIANSUP", + "GEORGIANSUPPLEMENT", + "GL", + "GLAG", + "GLAGOLITIC", + "GLUE", + "GOTH", + "GOTHIC", + "GRAPH", + "GRAPHEMEBASE", + "GRAPHEMECLUSTERBREAK", + "GRAPHEMEEXTEND", + "GRAPHEMELINK", + "GRBASE", + "GREEK", + "GREEKANDCOPTIC", + "GREEKEXT", + "GREEKEXTENDED", + "GREK", + "GREXT", + "GRLINK", + "GUJARATI", + "GUJR", + "GURMUKHI", + "GURU", + "H", + "H2", + "H3", + "HAH", + "HALFANDFULLFORMS", + "HALFMARKS", + "HALFWIDTH", + "HALFWIDTHANDFULLWIDTHFORMS", + "HAMZAONHEHGOAL", + "HAN", + "HANG", + "HANGUL", + "HANGULCOMPATIBILITYJAMO", + "HANGULJAMO", + "HANGULJAMOEXTENDEDA", + "HANGULJAMOEXTENDEDB", + "HANGULSYLLABLES", + "HANGULSYLLABLETYPE", + "HANI", + "HANO", + "HANUNOO", + "HE", + "HEBR", + "HEBREW", + "HEBREWLETTER", + "HEH", + "HEHGOAL", + "HETH", + "HEX", + "HEXDIGIT", + "HIGHPRIVATEUSESURROGATES", + "HIGHPUSURROGATES", + "HIGHSURROGATES", + "HIRA", + "HIRAGANA", + "HL", + "HRKT", + "HST", + "HY", + "HYPHEN", + "ID", + "IDC", + "IDCONTINUE", + "IDEO", + "IDEOGRAPHIC", + "IDEOGRAPHICDESCRIPTIONCHARACTERS", + "IDS", + "IDSB", + "IDSBINARYOPERATOR", + "IDST", + "IDSTART", + "IDSTRINARYOPERATOR", + "IMPERIALARAMAIC", + "IN", + "INDICMATRACATEGORY", + "INDICNUMBERFORMS", + "INDICSYLLABICCATEGORY", + "INFIXNUMERIC", + "INHERITED", + "INIT", + "INITIAL", + "INITIALPUNCTUATION", + "INMC", + "INSC", + "INSCRIPTIONALPAHLAVI", + "INSCRIPTIONALPARTHIAN", + "INSEPARABLE", + "INSEPERABLE", + "INVISIBLE", + "IOTASUBSCRIPT", + "IPAEXT", + "IPAEXTENSIONS", + "IS", + "ISO", + "ISOLATED", + "ITAL", + "JAMO", + "JAMOEXTA", + "JAMOEXTB", + "JAVA", + "JAVANESE", + "JG", + "JL", + "JOINC", + "JOINCAUSING", + "JOINCONTROL", + "JOININGGROUP", + "JOININGTYPE", + "JT", + "JV", + "KA", + "KAF", + "KAITHI", + "KALI", + "KANA", + "KANASUP", + "KANASUPPLEMENT", + "KANAVOICING", + "KANBUN", + "KANGXI", + "KANGXIRADICALS", + "KANNADA", + "KAPH", + "KATAKANA", + "KATAKANAEXT", + "KATAKANAORHIRAGANA", + "KATAKANAPHONETICEXTENSIONS", + "KAYAHLI", + "KHAPH", + "KHAR", + "KHAROSHTHI", + "KHMER", + "KHMERSYMBOLS", + "KHMR", + "KNDA", + "KNOTTEDHEH", + "KTHI", + "KV", + "L", + "L&", + "LAM", + "LAMADH", + "LANA", + "LAO", + "LAOO", + "LATIN", + "LATIN1", + "LATIN1SUP", + "LATIN1SUPPLEMENT", + "LATINEXTA", + "LATINEXTADDITIONAL", + "LATINEXTB", + "LATINEXTC", + "LATINEXTD", + "LATINEXTENDEDA", + "LATINEXTENDEDADDITIONAL", + "LATINEXTENDEDB", + "LATINEXTENDEDC", + "LATINEXTENDEDD", + "LATN", + "LB", + "LC", + "LE", + "LEADINGJAMO", + "LEFT", + "LEFTANDRIGHT", + "LEFTJOINING", + "LEFTTORIGHT", + "LEFTTORIGHTEMBEDDING", + "LEFTTORIGHTISOLATE", + "LEFTTORIGHTOVERRIDE", + "LEPC", + "LEPCHA", + "LETTER", + "LETTERLIKESYMBOLS", + "LETTERNUMBER", + "LF", + "LIMB", + "LIMBU", + "LINB", + "LINEARB", + "LINEARBIDEOGRAMS", + "LINEARBSYLLABARY", + "LINEBREAK", + "LINEFEED", + "LINESEPARATOR", + "LISU", + "LL", + "LM", + "LO", + "LOE", + "LOGICALORDEREXCEPTION", + "LOWER", + "LOWERCASE", + "LOWERCASELETTER", + "LOWSURROGATES", + "LRE", + "LRI", + "LRO", + "LT", + "LU", + "LV", + "LVSYLLABLE", + "LVT", + "LVTSYLLABLE", + "LYCI", + "LYCIAN", + "LYDI", + "LYDIAN", + "M", + "M&", + "MAHJONG", + "MAHJONGTILES", + "MALAYALAM", + "MAND", + "MANDAIC", + "MANDATORYBREAK", + "MARK", + "MATH", + "MATHALPHANUM", + "MATHEMATICALALPHANUMERICSYMBOLS", + "MATHEMATICALOPERATORS", + "MATHOPERATORS", + "MATHSYMBOL", + "MB", + "MC", + "ME", + "MED", + "MEDIAL", + "MEEM", + "MEETEIMAYEK", + "MEETEIMAYEKEXT", + "MEETEIMAYEKEXTENSIONS", + "MERC", + "MERO", + "MEROITICCURSIVE", + "MEROITICHIEROGLYPHS", + "MIAO", + "MIDLETTER", + "MIDNUM", + "MIDNUMLET", + "MIM", + "MISCARROWS", + "MISCELLANEOUSMATHEMATICALSYMBOLSA", + "MISCELLANEOUSMATHEMATICALSYMBOLSB", + "MISCELLANEOUSSYMBOLS", + "MISCELLANEOUSSYMBOLSANDARROWS", + "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS", + "MISCELLANEOUSTECHNICAL", + "MISCMATHSYMBOLSA", + "MISCMATHSYMBOLSB", + "MISCPICTOGRAPHS", + "MISCSYMBOLS", + "MISCTECHNICAL", + "ML", + "MLYM", + "MN", + "MODIFIERLETTER", + "MODIFIERLETTERS", + "MODIFIERSYMBOL", + "MODIFIERTONELETTERS", + "MODIFYINGLETTER", + "MONG", + "MONGOLIAN", + "MTEI", + "MUSIC", + "MUSICALSYMBOLS", + "MYANMAR", + "MYANMAREXTA", + "MYANMAREXTENDEDA", + "MYMR", + "N", + "N&", + "NA", + "NAN", + "NAR", + "NARROW", + "NB", + "NCHAR", + "ND", + "NEUTRAL", + "NEWLINE", + "NEWTAILUE", + "NEXTLINE", + "NK", + "NKO", + "NKOO", + "NL", + "NO", + "NOBLOCK", + "NOBREAK", + "NOJOININGGROUP", + "NONCHARACTERCODEPOINT", + "NONE", + "NONJOINING", + "NONSPACINGMARK", + "NONSTARTER", + "NOON", + "NOTAPPLICABLE", + "NOTREORDERED", + "NR", + "NS", + "NSM", + "NT", + "NU", + "NUKTA", + "NUMBER", + "NUMBERFORMS", + "NUMERIC", + "NUMERICTYPE", + "NUMERICVALUE", + "NUN", + "NV", + "NYA", + "OALPHA", + "OCR", + "ODI", + "OGAM", + "OGHAM", + "OGREXT", + "OIDC", + "OIDS", + "OLCHIKI", + "OLCK", + "OLDITALIC", + "OLDPERSIAN", + "OLDSOUTHARABIAN", + "OLDTURKIC", + "OLETTER", + "OLOWER", + "OMATH", + "ON", + "OP", + "OPENPUNCTUATION", + "OPTICALCHARACTERRECOGNITION", + "ORIYA", + "ORKH", + "ORYA", + "OSMA", + "OSMANYA", + "OTHER", + "OTHERALPHABETIC", + "OTHERDEFAULTIGNORABLECODEPOINT", + "OTHERGRAPHEMEEXTEND", + "OTHERIDCONTINUE", + "OTHERIDSTART", + "OTHERLETTER", + "OTHERLOWERCASE", + "OTHERMATH", + "OTHERNEUTRAL", + "OTHERNUMBER", + "OTHERPUNCTUATION", + "OTHERSYMBOL", + "OTHERUPPERCASE", + "OUPPER", + "OV", + "OVERLAY", + "OVERSTRUCK", + "P", + "P&", + "PARAGRAPHSEPARATOR", + "PATSYN", + "PATTERNSYNTAX", + "PATTERNWHITESPACE", + "PATWS", + "PC", + "PD", + "PDF", + "PDI", + "PE", + "PF", + "PHAG", + "PHAGSPA", + "PHAISTOS", + "PHAISTOSDISC", + "PHLI", + "PHNX", + "PHOENICIAN", + "PHONETICEXT", + "PHONETICEXTENSIONS", + "PHONETICEXTENSIONSSUPPLEMENT", + "PHONETICEXTSUP", + "PI", + "PLAYINGCARDS", + "PLRD", + "PO", + "POPDIRECTIONALFORMAT", + "POPDIRECTIONALISOLATE", + "POSTFIXNUMERIC", + "PP", + "PR", + "PREFIXNUMERIC", + "PREPEND", + "PRINT", + "PRIVATEUSE", + "PRIVATEUSEAREA", + "PRTI", + "PS", + "PUA", + "PUNCT", + "PUNCTUATION", + "QAAC", + "QAAI", + "QAF", + "QAPH", + "QMARK", + "QU", + "QUOTATION", + "QUOTATIONMARK", + "R", + "RADICAL", + "REGIONALINDICATOR", + "REGISTERSHIFTER", + "REH", + "REJANG", + "REVERSEDPE", + "RI", + "RIGHT", + "RIGHTJOINING", + "RIGHTTOLEFT", + "RIGHTTOLEFTEMBEDDING", + "RIGHTTOLEFTISOLATE", + "RIGHTTOLEFTOVERRIDE", + "RJNG", + "RLE", + "RLI", + "RLO", + "ROHINGYAYEH", + "RUMI", + "RUMINUMERALSYMBOLS", + "RUNIC", + "RUNR", + "S", + "S&", + "SA", + "SAD", + "SADHE", + "SAMARITAN", + "SAMR", + "SARB", + "SAUR", + "SAURASHTRA", + "SB", + "SC", + "SCONTINUE", + "SCRIPT", + "SD", + "SE", + "SEEN", + "SEGMENTSEPARATOR", + "SEMKATH", + "SENTENCEBREAK", + "SEP", + "SEPARATOR", + "SG", + "SHARADA", + "SHAVIAN", + "SHAW", + "SHIN", + "SHRD", + "SINGLEQUOTE", + "SINH", + "SINHALA", + "SK", + "SM", + "SMALL", + "SMALLFORMS", + "SMALLFORMVARIANTS", + "SML", + "SO", + "SOFTDOTTED", + "SORA", + "SORASOMPENG", + "SP", + "SPACE", + "SPACESEPARATOR", + "SPACINGMARK", + "SPACINGMODIFIERLETTERS", + "SPECIALS", + "SQ", + "SQR", + "SQUARE", + "ST", + "STERM", + "SUB", + "SUND", + "SUNDANESE", + "SUNDANESESUP", + "SUNDANESESUPPLEMENT", + "SUP", + "SUPARROWSA", + "SUPARROWSB", + "SUPER", + "SUPERANDSUB", + "SUPERSCRIPTSANDSUBSCRIPTS", + "SUPMATHOPERATORS", + "SUPPLEMENTALARROWSA", + "SUPPLEMENTALARROWSB", + "SUPPLEMENTALMATHEMATICALOPERATORS", + "SUPPLEMENTALPUNCTUATION", + "SUPPLEMENTARYPRIVATEUSEAREAA", + "SUPPLEMENTARYPRIVATEUSEAREAB", + "SUPPUAA", + "SUPPUAB", + "SUPPUNCTUATION", + "SURROGATE", + "SWASHKAF", + "SY", + "SYLO", + "SYLOTINAGRI", + "SYMBOL", + "SYRC", + "SYRIAC", + "SYRIACWAW", + "T", + "TAGALOG", + "TAGB", + "TAGBANWA", + "TAGS", + "TAH", + "TAILE", + "TAITHAM", + "TAIVIET", + "TAIXUANJING", + "TAIXUANJINGSYMBOLS", + "TAKR", + "TAKRI", + "TALE", + "TALU", + "TAMIL", + "TAML", + "TAVT", + "TAW", + "TEHMARBUTA", + "TEHMARBUTAGOAL", + "TELU", + "TELUGU", + "TERM", + "TERMINALPUNCTUATION", + "TETH", + "TFNG", + "TGLG", + "THAA", + "THAANA", + "THAI", + "TIBETAN", + "TIBT", + "TIFINAGH", + "TITLECASELETTER", + "TONELETTER", + "TONEMARK", + "TOP", + "TOPANDBOTTOM", + "TOPANDBOTTOMANDRIGHT", + "TOPANDLEFT", + "TOPANDLEFTANDRIGHT", + "TOPANDRIGHT", + "TRAILINGJAMO", + "TRANSPARENT", + "TRANSPORTANDMAP", + "TRANSPORTANDMAPSYMBOLS", + "TRUE", + "U", + "UCAS", + "UCASEXT", + "UGAR", + "UGARITIC", + "UIDEO", + "UNASSIGNED", + "UNIFIEDCANADIANABORIGINALSYLLABICS", + "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED", + "UNIFIEDIDEOGRAPH", + "UNKNOWN", + "UP", + "UPPER", + "UPPERCASE", + "UPPERCASELETTER", + "V", + "VAI", + "VAII", + "VARIATIONSELECTOR", + "VARIATIONSELECTORS", + "VARIATIONSELECTORSSUPPLEMENT", + "VEDICEXT", + "VEDICEXTENSIONS", + "VERT", + "VERTICAL", + "VERTICALFORMS", + "VIRAMA", + "VISARGA", + "VISUALORDERLEFT", + "VOWEL", + "VOWELDEPENDENT", + "VOWELINDEPENDENT", + "VOWELJAMO", + "VR", + "VS", + "VSSUP", + "W", + "WAW", + "WB", + "WHITESPACE", + "WIDE", + "WJ", + "WORD", + "WORDBREAK", + "WORDJOINER", + "WS", + "WSPACE", + "XDIGIT", + "XIDC", + "XIDCONTINUE", + "XIDS", + "XIDSTART", + "XPEO", + "XSUX", + "XX", + "Y", + "YEH", + "YEHBARREE", + "YEHWITHTAIL", + "YES", + "YI", + "YIII", + "YIJING", + "YIJINGHEXAGRAMSYMBOLS", + "YIRADICALS", + "YISYLLABLES", + "YUDH", + "YUDHHE", + "Z", + "Z&", + "ZAIN", + "ZHAIN", + "ZINH", + "ZL", + "ZP", + "ZS", + "ZW", + "ZWSPACE", + "ZYYY", + "ZZZZ", +}; + +/* strings: 10595 bytes. */ + +/* properties. */ + +RE_Property re_properties[] = { + { 508, 0, 0}, + { 506, 0, 0}, + { 233, 1, 1}, + { 232, 1, 1}, + { 963, 2, 2}, + { 961, 2, 2}, + {1123, 3, 3}, + {1118, 3, 3}, + { 523, 4, 4}, + { 507, 4, 4}, + { 969, 5, 5}, + { 960, 5, 5}, + { 736, 6, 6}, + { 156, 7, 6}, + { 155, 7, 6}, + { 711, 8, 6}, + { 710, 8, 6}, + {1093, 9, 6}, + {1092, 9, 6}, + { 273, 10, 6}, + { 275, 11, 6}, + { 326, 11, 6}, + { 321, 12, 6}, + { 401, 12, 6}, + { 323, 13, 6}, + { 403, 13, 6}, + { 322, 14, 6}, + { 402, 14, 6}, + { 319, 15, 6}, + { 399, 15, 6}, + { 320, 16, 6}, + { 400, 16, 6}, + { 588, 17, 6}, + { 584, 17, 6}, + { 580, 18, 6}, + { 579, 18, 6}, + {1131, 19, 6}, + {1130, 19, 6}, + {1129, 20, 6}, + {1128, 20, 6}, + { 426, 21, 6}, + { 434, 21, 6}, + { 524, 22, 6}, + { 532, 22, 6}, + { 522, 23, 6}, + { 526, 23, 6}, + { 525, 24, 6}, + { 533, 24, 6}, + {1119, 25, 6}, + {1126, 25, 6}, + { 992, 25, 6}, + { 225, 26, 6}, + { 223, 26, 6}, + { 623, 27, 6}, + { 621, 27, 6}, + { 419, 28, 6}, + { 577, 29, 6}, + { 926, 30, 6}, + { 923, 30, 6}, + {1056, 31, 6}, + {1055, 31, 6}, + { 866, 32, 6}, + { 848, 32, 6}, + { 567, 33, 6}, + { 566, 33, 6}, + { 187, 34, 6}, + { 145, 34, 6}, + { 859, 35, 6}, + { 832, 35, 6}, + { 582, 36, 6}, + { 581, 36, 6}, + { 436, 37, 6}, + { 435, 37, 6}, + { 485, 38, 6}, + { 483, 38, 6}, + { 865, 39, 6}, + { 847, 39, 6}, + { 871, 40, 6}, + { 872, 40, 6}, + { 810, 41, 6}, + { 796, 41, 6}, + { 861, 42, 6}, + { 837, 42, 6}, + { 586, 43, 6}, + { 585, 43, 6}, + { 589, 44, 6}, + { 587, 44, 6}, + { 928, 45, 6}, + {1089, 46, 6}, + {1085, 46, 6}, + { 860, 47, 6}, + { 834, 47, 6}, + { 428, 48, 6}, + { 427, 48, 6}, + { 988, 49, 6}, + { 964, 49, 6}, + { 709, 50, 6}, + { 708, 50, 6}, + { 863, 51, 6}, + { 839, 51, 6}, + { 862, 52, 6}, + { 838, 52, 6}, + {1001, 53, 6}, + {1098, 54, 6}, + {1114, 54, 6}, + { 881, 55, 6}, + { 882, 55, 6}, + { 880, 56, 6}, + { 879, 56, 6}, + { 555, 57, 7}, + { 575, 57, 7}, + { 224, 58, 8}, + { 215, 58, 8}, + { 268, 59, 9}, + { 278, 59, 9}, + { 425, 60, 10}, + { 449, 60, 10}, + { 453, 61, 11}, + { 452, 61, 11}, + { 624, 62, 12}, + { 619, 62, 12}, + { 625, 63, 13}, + { 626, 63, 13}, + { 701, 64, 14}, + { 678, 64, 14}, + { 827, 65, 15}, + { 821, 65, 15}, + { 828, 66, 16}, + { 830, 66, 16}, + { 227, 67, 6}, + { 226, 67, 6}, + { 592, 68, 17}, + { 600, 68, 17}, + { 594, 69, 18}, + { 601, 69, 18}, + { 159, 70, 6}, + { 154, 70, 6}, + { 166, 71, 6}, + { 231, 72, 6}, + { 521, 73, 6}, + { 911, 74, 6}, + {1122, 75, 6}, + {1127, 76, 6}, +}; + +/* properties: 572 bytes. */ + +/* property values. */ + +RE_PropertyValue re_property_values[] = { + {1086, 0, 0}, + { 357, 0, 0}, + {1094, 0, 1}, + { 718, 0, 1}, + { 712, 0, 2}, + { 705, 0, 2}, + {1066, 0, 3}, + { 717, 0, 3}, + { 775, 0, 4}, + { 706, 0, 4}, + { 864, 0, 5}, + { 707, 0, 5}, + { 813, 0, 6}, + { 774, 0, 6}, + { 467, 0, 7}, + { 744, 0, 7}, + { 994, 0, 8}, + { 743, 0, 8}, + { 424, 0, 9}, + { 797, 0, 9}, + { 440, 0, 9}, + { 693, 0, 10}, + { 805, 0, 10}, + { 868, 0, 11}, + { 806, 0, 11}, + { 993, 0, 12}, + {1155, 0, 12}, + { 703, 0, 13}, + {1153, 0, 13}, + { 878, 0, 14}, + {1154, 0, 14}, + { 384, 0, 15}, + { 277, 0, 15}, + { 358, 0, 15}, + { 499, 0, 16}, + { 316, 0, 16}, + { 912, 0, 17}, + { 359, 0, 17}, + {1023, 0, 18}, + { 393, 0, 18}, + { 420, 0, 19}, + { 884, 0, 19}, + { 851, 0, 20}, + { 915, 0, 20}, + { 355, 0, 21}, + { 887, 0, 21}, + { 374, 0, 22}, + { 883, 0, 22}, + { 869, 0, 23}, + { 903, 0, 23}, + { 741, 0, 24}, + { 982, 0, 24}, + { 397, 0, 25}, + { 961, 0, 25}, + { 777, 0, 26}, + { 981, 0, 26}, + { 870, 0, 27}, + { 987, 0, 27}, + { 599, 0, 28}, + { 900, 0, 28}, + { 494, 0, 29}, + { 888, 0, 29}, + { 858, 0, 30}, + { 261, 0, 30}, + { 262, 0, 30}, + { 691, 0, 31}, + { 656, 0, 31}, + { 657, 0, 31}, + { 735, 0, 32}, + { 727, 0, 32}, + { 365, 0, 32}, + { 728, 0, 32}, + { 824, 0, 33}, + { 789, 0, 33}, + { 790, 0, 33}, + { 918, 0, 34}, + { 876, 0, 34}, + { 917, 0, 34}, + { 877, 0, 34}, + {1028, 0, 35}, + { 950, 0, 35}, + { 951, 0, 35}, + { 971, 0, 36}, + {1148, 0, 36}, + {1149, 0, 36}, + { 274, 0, 37}, + { 679, 0, 37}, + { 188, 0, 38}, + { 807, 1, 0}, + { 795, 1, 0}, + { 211, 1, 1}, + { 186, 1, 1}, + { 666, 1, 2}, + { 665, 1, 2}, + { 664, 1, 2}, + { 672, 1, 3}, + { 667, 1, 3}, + { 674, 1, 4}, + { 669, 1, 4}, + { 609, 1, 5}, + { 608, 1, 5}, + { 995, 1, 6}, + { 776, 1, 6}, + { 361, 1, 7}, + { 437, 1, 7}, + { 528, 1, 8}, + { 527, 1, 8}, + { 406, 1, 9}, + { 412, 1, 10}, + { 411, 1, 10}, + { 413, 1, 10}, + { 182, 1, 11}, + { 561, 1, 12}, + { 169, 1, 13}, + {1030, 1, 14}, + { 181, 1, 15}, + { 180, 1, 15}, + {1061, 1, 16}, + { 803, 1, 17}, + { 955, 1, 18}, + { 733, 1, 19}, + { 171, 1, 20}, + { 170, 1, 20}, + { 431, 1, 21}, + { 221, 1, 22}, + { 536, 1, 23}, + { 534, 1, 24}, + { 853, 1, 25}, + {1047, 1, 26}, + {1054, 1, 27}, + { 639, 1, 28}, + { 731, 1, 29}, + { 980, 1, 30}, + {1062, 1, 31}, + { 661, 1, 32}, + {1063, 1, 33}, + { 785, 1, 34}, + { 512, 1, 35}, + { 551, 1, 36}, + { 614, 1, 36}, + { 471, 1, 37}, + { 477, 1, 38}, + { 476, 1, 38}, + { 325, 1, 39}, + {1087, 1, 40}, + {1081, 1, 40}, + { 266, 1, 40}, + { 836, 1, 41}, + { 948, 1, 42}, + {1033, 1, 43}, + { 558, 1, 44}, + { 257, 1, 45}, + {1035, 1, 46}, + { 649, 1, 47}, + { 781, 1, 48}, + {1088, 1, 49}, + {1082, 1, 49}, + { 696, 1, 50}, + {1038, 1, 51}, + { 800, 1, 52}, + { 650, 1, 53}, + { 255, 1, 54}, + {1039, 1, 55}, + { 206, 1, 56}, + {1004, 1, 57}, + { 212, 1, 58}, + { 690, 1, 59}, + { 840, 1, 60}, + {1006, 1, 61}, + {1005, 1, 61}, + {1102, 1, 62}, + {1101, 1, 62}, + { 897, 1, 63}, + { 896, 1, 63}, + { 898, 1, 64}, + { 899, 1, 64}, + { 363, 1, 65}, + { 439, 1, 65}, + { 673, 1, 66}, + { 668, 1, 66}, + { 530, 1, 67}, + { 529, 1, 67}, + { 509, 1, 68}, + { 918, 1, 68}, + {1012, 1, 69}, + {1011, 1, 69}, + { 398, 1, 70}, + { 362, 1, 71}, + { 438, 1, 71}, + { 366, 1, 71}, + { 692, 1, 72}, + { 825, 1, 73}, + { 185, 1, 74}, + { 739, 1, 75}, + { 740, 1, 75}, + { 766, 1, 76}, + { 771, 1, 76}, + { 385, 1, 77}, + { 852, 1, 78}, + { 833, 1, 78}, + { 460, 1, 79}, + { 459, 1, 79}, + { 243, 1, 80}, + { 234, 1, 81}, + { 510, 1, 82}, + { 763, 1, 83}, + { 770, 1, 83}, + { 441, 1, 84}, + { 761, 1, 85}, + { 767, 1, 85}, + {1014, 1, 86}, + {1008, 1, 86}, + { 249, 1, 87}, + { 248, 1, 87}, + {1015, 1, 88}, + {1009, 1, 88}, + { 762, 1, 89}, + { 768, 1, 89}, + {1016, 1, 90}, + {1013, 1, 90}, + { 764, 1, 91}, + { 760, 1, 91}, + { 517, 1, 92}, + { 675, 1, 93}, + { 670, 1, 93}, + { 387, 1, 94}, + { 514, 1, 95}, + { 513, 1, 95}, + {1065, 1, 96}, + { 474, 1, 97}, + { 472, 1, 97}, + { 409, 1, 98}, + { 407, 1, 98}, + {1017, 1, 99}, + {1022, 1, 99}, + { 343, 1, 100}, + { 342, 1, 100}, + { 638, 1, 101}, + { 637, 1, 101}, + { 583, 1, 102}, + { 579, 1, 102}, + { 346, 1, 103}, + { 345, 1, 103}, + { 572, 1, 104}, + { 641, 1, 105}, + { 237, 1, 106}, + { 550, 1, 107}, + { 371, 1, 107}, + { 636, 1, 108}, + { 239, 1, 109}, + { 238, 1, 109}, + { 344, 1, 110}, + { 644, 1, 111}, + { 642, 1, 111}, + { 464, 1, 112}, + { 463, 1, 112}, + { 332, 1, 113}, + { 330, 1, 113}, + { 348, 1, 114}, + { 338, 1, 114}, + {1143, 1, 115}, + {1142, 1, 115}, + { 347, 1, 116}, + { 329, 1, 116}, + {1145, 1, 117}, + {1144, 1, 118}, + { 704, 1, 119}, + {1096, 1, 120}, + { 410, 1, 121}, + { 408, 1, 121}, + { 208, 1, 122}, + { 778, 1, 123}, + { 676, 1, 124}, + { 671, 1, 124}, + {1027, 1, 125}, + { 368, 1, 126}, + { 593, 1, 126}, + { 890, 1, 127}, + { 959, 1, 128}, + { 433, 1, 129}, + { 432, 1, 129}, + { 645, 1, 130}, + { 932, 1, 131}, + { 552, 1, 132}, + { 615, 1, 132}, + { 618, 1, 133}, + { 318, 1, 134}, + { 787, 1, 135}, + { 786, 1, 135}, + {1040, 1, 136}, + { 750, 1, 137}, + { 749, 1, 137}, + { 475, 1, 138}, + { 473, 1, 138}, + { 748, 1, 139}, + { 554, 1, 140}, + { 549, 1, 140}, + { 553, 1, 141}, + { 616, 1, 141}, + { 570, 1, 142}, + { 568, 1, 143}, + { 569, 1, 143}, + { 713, 1, 144}, + { 913, 1, 145}, + { 916, 1, 145}, + { 912, 1, 145}, + { 334, 1, 146}, + { 336, 1, 146}, + { 158, 1, 147}, + { 157, 1, 147}, + { 178, 1, 148}, + { 176, 1, 148}, + {1099, 1, 149}, + {1114, 1, 149}, + {1105, 1, 150}, + { 364, 1, 151}, + { 543, 1, 151}, + { 333, 1, 152}, + { 331, 1, 152}, + { 985, 1, 153}, + { 984, 1, 153}, + { 179, 1, 154}, + { 177, 1, 154}, + { 545, 1, 155}, + { 542, 1, 155}, + { 996, 1, 156}, + { 700, 1, 157}, + { 699, 1, 158}, + { 144, 1, 159}, + { 164, 1, 160}, + { 165, 1, 161}, + { 892, 1, 162}, + { 891, 1, 162}, + { 724, 1, 163}, + { 271, 1, 164}, + { 842, 1, 165}, + { 520, 1, 166}, + {1084, 1, 167}, + { 843, 1, 168}, + { 429, 1, 169}, + { 974, 1, 170}, + { 857, 1, 171}, + { 405, 1, 172}, + { 590, 1, 173}, + { 895, 1, 174}, + { 726, 1, 175}, + { 754, 1, 176}, + { 753, 1, 177}, + { 648, 1, 178}, + { 844, 1, 179}, + { 200, 1, 180}, + { 603, 1, 181}, + { 602, 1, 182}, + { 845, 1, 183}, + { 947, 1, 184}, + { 946, 1, 184}, + { 246, 1, 185}, + { 630, 1, 186}, + { 990, 1, 187}, + { 317, 1, 188}, + { 973, 1, 189}, + {1044, 1, 190}, + { 394, 1, 191}, + { 396, 1, 192}, + { 395, 1, 192}, + { 455, 1, 193}, + { 210, 1, 194}, + { 209, 1, 194}, + { 755, 1, 195}, + { 634, 1, 196}, + { 633, 1, 196}, + { 260, 1, 197}, + { 259, 1, 197}, + { 784, 1, 198}, + { 783, 1, 198}, + { 163, 1, 199}, + { 162, 1, 199}, + {1042, 1, 200}, + {1041, 1, 200}, + { 389, 1, 201}, + { 388, 1, 201}, + { 738, 1, 202}, + { 737, 1, 202}, + { 174, 1, 203}, + { 173, 1, 203}, + { 730, 1, 204}, + { 729, 1, 204}, + { 443, 1, 205}, + { 442, 1, 205}, + { 901, 1, 206}, + { 461, 1, 207}, + { 462, 1, 207}, + { 466, 1, 208}, + { 465, 1, 208}, + { 765, 1, 209}, + { 769, 1, 209}, + { 456, 1, 210}, + {1078, 1, 211}, + {1077, 1, 211}, + { 151, 1, 212}, + { 150, 1, 212}, + { 349, 1, 213}, + { 339, 1, 213}, + { 350, 1, 214}, + { 340, 1, 214}, + { 351, 1, 215}, + { 341, 1, 215}, + { 335, 1, 216}, + { 337, 1, 216}, + {1036, 1, 217}, + {1100, 1, 218}, + {1115, 1, 218}, + {1018, 1, 219}, + {1020, 1, 219}, + {1019, 1, 220}, + {1021, 1, 220}, + {1090, 2, 0}, + {1159, 2, 0}, + { 367, 2, 1}, + {1158, 2, 1}, + { 663, 2, 2}, + { 677, 2, 2}, + { 527, 2, 3}, + { 531, 2, 3}, + { 406, 2, 4}, + { 414, 2, 4}, + { 182, 2, 5}, + { 184, 2, 5}, + { 561, 2, 6}, + { 560, 2, 6}, + { 169, 2, 7}, + { 168, 2, 7}, + {1030, 2, 8}, + {1029, 2, 8}, + {1061, 2, 9}, + {1060, 2, 9}, + { 431, 2, 10}, + { 430, 2, 10}, + { 221, 2, 11}, + { 220, 2, 11}, + { 536, 2, 12}, + { 537, 2, 12}, + { 534, 2, 13}, + { 535, 2, 13}, + { 853, 2, 14}, + { 855, 2, 14}, + {1047, 2, 15}, + {1048, 2, 15}, + {1054, 2, 16}, + {1053, 2, 16}, + { 639, 2, 17}, + { 652, 2, 17}, + { 731, 2, 18}, + { 773, 2, 18}, + { 980, 2, 19}, + { 979, 2, 19}, + {1062, 2, 20}, + { 661, 2, 21}, + { 662, 2, 21}, + {1063, 2, 22}, + {1064, 2, 22}, + { 785, 2, 23}, + { 788, 2, 23}, + { 512, 2, 24}, + { 511, 2, 24}, + { 549, 2, 25}, + { 548, 2, 25}, + { 471, 2, 26}, + { 470, 2, 26}, + { 325, 2, 27}, + { 324, 2, 27}, + { 265, 2, 28}, + { 269, 2, 28}, + { 836, 2, 29}, + { 835, 2, 29}, + { 948, 2, 30}, + { 949, 2, 30}, + { 649, 2, 31}, + { 651, 2, 31}, + { 781, 2, 32}, + { 780, 2, 32}, + { 572, 2, 33}, + { 571, 2, 33}, + { 641, 2, 34}, + { 632, 2, 34}, + { 237, 2, 35}, + { 236, 2, 35}, + { 547, 2, 36}, + { 556, 2, 36}, + {1140, 2, 37}, + {1141, 2, 37}, + { 842, 2, 38}, + { 613, 2, 38}, + { 520, 2, 39}, + { 519, 2, 39}, + { 429, 2, 40}, + { 448, 2, 40}, + { 596, 2, 41}, + {1152, 2, 41}, + { 920, 2, 41}, + {1033, 2, 42}, + {1059, 2, 42}, + { 558, 2, 43}, + { 557, 2, 43}, + { 257, 2, 44}, + { 256, 2, 44}, + {1035, 2, 45}, + {1034, 2, 45}, + { 696, 2, 46}, + { 695, 2, 46}, + {1038, 2, 47}, + {1045, 2, 47}, + { 698, 2, 48}, + { 697, 2, 48}, + {1084, 2, 49}, + {1083, 2, 49}, + { 974, 2, 50}, + { 975, 2, 50}, + { 857, 2, 51}, + { 856, 2, 51}, + { 404, 2, 52}, + { 391, 2, 52}, + { 248, 2, 53}, + { 247, 2, 53}, + { 255, 2, 54}, + { 254, 2, 54}, + { 387, 2, 55}, + { 386, 2, 55}, + { 919, 2, 55}, + { 800, 2, 56}, + {1046, 2, 56}, + { 517, 2, 57}, + { 516, 2, 57}, + {1065, 2, 58}, + {1058, 2, 58}, + {1027, 2, 59}, + {1026, 2, 59}, + { 843, 2, 60}, + {1132, 2, 60}, + { 648, 2, 61}, + { 647, 2, 61}, + { 206, 2, 62}, + { 205, 2, 62}, + { 394, 2, 63}, + {1133, 2, 63}, + { 895, 2, 64}, + { 894, 2, 64}, + { 890, 2, 65}, + { 889, 2, 65}, + { 803, 2, 66}, + { 804, 2, 66}, + {1004, 2, 67}, + {1003, 2, 67}, + { 690, 2, 68}, + { 689, 2, 68}, + { 840, 2, 69}, + { 841, 2, 69}, + {1096, 2, 70}, + {1097, 2, 70}, + { 959, 2, 71}, + { 958, 2, 71}, + { 645, 2, 72}, + { 631, 2, 72}, + { 932, 2, 73}, + { 941, 2, 73}, + { 724, 2, 74}, + { 723, 2, 74}, + { 271, 2, 75}, + { 270, 2, 75}, + { 726, 2, 76}, + { 725, 2, 76}, + { 318, 2, 77}, + {1039, 2, 78}, + { 660, 2, 78}, + {1040, 2, 79}, + {1049, 2, 79}, + { 200, 2, 80}, + { 201, 2, 80}, + { 455, 2, 81}, + { 454, 2, 81}, + { 955, 2, 82}, + { 956, 2, 82}, + { 704, 2, 83}, + { 208, 2, 84}, + { 207, 2, 84}, + { 618, 2, 85}, + { 617, 2, 85}, + { 748, 2, 86}, + { 782, 2, 86}, + { 590, 2, 87}, + { 183, 2, 87}, + { 844, 2, 88}, + { 957, 2, 88}, + { 603, 2, 89}, + { 914, 2, 89}, + { 602, 2, 90}, + { 893, 2, 90}, + { 845, 2, 91}, + { 854, 2, 91}, + { 630, 2, 92}, + { 654, 2, 92}, + { 212, 2, 93}, + { 213, 2, 93}, + { 246, 2, 94}, + { 245, 2, 94}, + { 733, 2, 95}, + { 732, 2, 95}, + { 317, 2, 96}, + { 263, 2, 96}, + { 753, 2, 97}, + { 751, 2, 97}, + { 754, 2, 98}, + { 752, 2, 98}, + { 755, 2, 99}, + { 902, 2, 99}, + { 973, 2, 100}, + { 977, 2, 100}, + { 990, 2, 101}, + { 989, 2, 101}, + {1044, 2, 102}, + {1043, 2, 102}, + { 643, 2, 103}, + { 574, 2, 103}, + { 858, 3, 0}, + {1134, 3, 0}, + { 446, 3, 1}, + { 447, 3, 1}, + { 978, 3, 2}, + { 997, 3, 2}, + { 562, 3, 3}, + { 573, 3, 3}, + { 392, 3, 4}, + { 694, 3, 5}, + { 799, 3, 6}, + { 805, 3, 6}, + { 484, 3, 7}, + { 929, 3, 8}, + { 934, 3, 8}, + { 499, 3, 9}, + { 497, 3, 9}, + { 641, 3, 10}, + { 628, 3, 10}, + { 153, 3, 11}, + { 680, 3, 11}, + { 756, 3, 12}, + { 772, 3, 12}, + { 757, 3, 13}, + { 774, 3, 13}, + { 758, 3, 14}, + { 742, 3, 14}, + { 826, 3, 15}, + { 822, 3, 15}, + { 486, 3, 16}, + { 481, 3, 16}, + { 858, 4, 0}, + {1134, 4, 0}, + { 392, 4, 1}, + { 694, 4, 2}, + { 384, 4, 3}, + { 357, 4, 3}, + { 484, 4, 4}, + { 481, 4, 4}, + { 929, 4, 5}, + { 934, 4, 5}, + { 994, 4, 6}, + { 982, 4, 6}, + { 656, 4, 7}, + {1095, 4, 8}, + {1032, 4, 9}, + { 719, 4, 10}, + { 721, 4, 11}, + { 910, 4, 12}, + { 907, 4, 12}, + { 858, 5, 0}, + {1134, 5, 0}, + { 392, 5, 1}, + { 694, 5, 2}, + { 484, 5, 3}, + { 481, 5, 3}, + { 970, 5, 4}, + { 965, 5, 4}, + { 499, 5, 5}, + { 497, 5, 5}, + { 991, 5, 6}, + { 710, 5, 7}, + { 707, 5, 7}, + {1092, 5, 8}, + {1091, 5, 8}, + { 846, 5, 9}, + { 680, 5, 9}, + { 826, 5, 10}, + { 822, 5, 10}, + { 194, 5, 11}, + { 189, 5, 11}, + {1001, 5, 12}, + {1000, 5, 12}, + { 353, 5, 13}, + { 352, 5, 13}, + { 962, 5, 14}, + { 961, 5, 14}, + { 806, 6, 0}, + { 789, 6, 0}, + { 487, 6, 0}, + { 488, 6, 0}, + {1139, 6, 1}, + {1135, 6, 1}, + {1032, 6, 1}, + {1079, 6, 1}, + { 816, 7, 0}, + { 791, 7, 0}, + { 681, 7, 1}, + { 656, 7, 1}, + {1112, 7, 2}, + {1095, 7, 2}, + {1075, 7, 3}, + {1032, 7, 3}, + { 720, 7, 4}, + { 719, 7, 4}, + { 722, 7, 5}, + { 721, 7, 5}, + { 685, 8, 0}, + { 656, 8, 0}, + { 937, 8, 1}, + { 927, 8, 1}, + { 478, 8, 2}, + { 457, 8, 2}, + { 479, 8, 3}, + { 468, 8, 3}, + { 480, 8, 4}, + { 469, 8, 4}, + { 175, 8, 5}, + { 161, 8, 5}, + { 369, 8, 6}, + { 393, 8, 6}, + { 878, 8, 7}, + { 202, 8, 7}, + { 967, 8, 8}, + { 950, 8, 8}, + {1119, 8, 9}, + {1125, 8, 9}, + { 867, 8, 10}, + { 849, 8, 10}, + { 242, 8, 11}, + { 235, 8, 11}, + { 813, 8, 12}, + { 820, 8, 12}, + { 172, 8, 13}, + { 148, 8, 13}, + { 688, 8, 14}, + { 716, 8, 14}, + { 940, 8, 15}, + { 944, 8, 15}, + { 686, 8, 16}, + { 714, 8, 16}, + { 938, 8, 17}, + { 942, 8, 17}, + { 904, 8, 18}, + { 885, 8, 18}, + { 687, 8, 19}, + { 715, 8, 19}, + { 939, 8, 20}, + { 943, 8, 20}, + { 496, 8, 21}, + { 502, 8, 21}, + { 905, 8, 22}, + { 886, 8, 22}, + { 817, 9, 0}, + { 1, 9, 0}, + { 818, 9, 0}, + { 874, 9, 1}, + { 2, 9, 1}, + { 873, 9, 1}, + { 823, 9, 2}, + { 120, 9, 2}, + { 802, 9, 2}, + { 635, 9, 3}, + { 127, 9, 3}, + { 655, 9, 3}, + {1106, 9, 4}, + { 133, 9, 4}, + {1113, 9, 4}, + { 279, 9, 5}, + { 13, 9, 5}, + { 282, 9, 6}, + { 22, 9, 6}, + { 284, 9, 7}, + { 25, 9, 7}, + { 287, 9, 8}, + { 28, 9, 8}, + { 291, 9, 9}, + { 33, 9, 9}, + { 292, 9, 10}, + { 34, 9, 10}, + { 293, 9, 11}, + { 36, 9, 11}, + { 294, 9, 12}, + { 37, 9, 12}, + { 295, 9, 13}, + { 39, 9, 13}, + { 296, 9, 14}, + { 40, 9, 14}, + { 297, 9, 15}, + { 44, 9, 15}, + { 298, 9, 16}, + { 49, 9, 16}, + { 299, 9, 17}, + { 54, 9, 17}, + { 300, 9, 18}, + { 60, 9, 18}, + { 301, 9, 19}, + { 65, 9, 19}, + { 302, 9, 20}, + { 67, 9, 20}, + { 303, 9, 21}, + { 68, 9, 21}, + { 304, 9, 22}, + { 69, 9, 22}, + { 305, 9, 23}, + { 70, 9, 23}, + { 306, 9, 24}, + { 71, 9, 24}, + { 307, 9, 25}, + { 78, 9, 25}, + { 308, 9, 26}, + { 82, 9, 26}, + { 309, 9, 27}, + { 83, 9, 27}, + { 310, 9, 28}, + { 84, 9, 28}, + { 311, 9, 29}, + { 85, 9, 29}, + { 312, 9, 30}, + { 86, 9, 30}, + { 313, 9, 31}, + { 87, 9, 31}, + { 314, 9, 32}, + { 132, 9, 32}, + { 315, 9, 33}, + { 139, 9, 33}, + { 280, 9, 34}, + { 20, 9, 34}, + { 281, 9, 35}, + { 21, 9, 35}, + { 283, 9, 36}, + { 24, 9, 36}, + { 285, 9, 37}, + { 26, 9, 37}, + { 286, 9, 38}, + { 27, 9, 38}, + { 288, 9, 39}, + { 30, 9, 39}, + { 289, 9, 40}, + { 31, 9, 40}, + { 197, 9, 41}, + { 48, 9, 41}, + { 192, 9, 41}, + { 195, 9, 42}, + { 50, 9, 42}, + { 190, 9, 42}, + { 196, 9, 43}, + { 51, 9, 43}, + { 191, 9, 43}, + { 218, 9, 44}, + { 53, 9, 44}, + { 230, 9, 44}, + { 217, 9, 45}, + { 55, 9, 45}, + { 202, 9, 45}, + { 219, 9, 46}, + { 56, 9, 46}, + { 244, 9, 46}, + { 682, 9, 47}, + { 57, 9, 47}, + { 656, 9, 47}, + { 935, 9, 48}, + { 58, 9, 48}, + { 927, 9, 48}, + { 142, 9, 49}, + { 59, 9, 49}, + { 148, 9, 49}, + { 141, 9, 50}, + { 61, 9, 50}, + { 140, 9, 50}, + { 143, 9, 51}, + { 62, 9, 51}, + { 167, 9, 51}, + { 445, 9, 52}, + { 63, 9, 52}, + { 421, 9, 52}, + { 444, 9, 53}, + { 64, 9, 53}, + { 416, 9, 53}, + { 607, 9, 54}, + { 66, 9, 54}, + { 610, 9, 54}, + { 290, 9, 55}, + { 32, 9, 55}, + { 198, 9, 56}, + { 45, 9, 56}, + { 193, 9, 56}, + { 811, 10, 0}, + { 267, 10, 1}, + { 264, 10, 1}, + { 370, 10, 2}, + { 360, 10, 2}, + { 498, 10, 3}, + { 808, 10, 4}, + { 795, 10, 4}, + { 598, 10, 5}, + { 597, 10, 5}, + { 746, 10, 6}, + { 745, 10, 6}, + { 493, 10, 7}, + { 492, 10, 7}, + { 612, 10, 8}, + { 611, 10, 8}, + { 327, 10, 9}, + { 458, 10, 9}, + {1010, 10, 10}, + {1007, 10, 10}, + {1002, 10, 11}, + {1104, 10, 12}, + {1103, 10, 12}, + {1120, 10, 13}, + { 794, 10, 14}, + { 793, 10, 14}, + { 983, 10, 15}, + { 986, 10, 15}, + { 999, 10, 16}, + { 998, 10, 16}, + { 501, 10, 17}, + { 500, 10, 17}, + { 798, 11, 0}, + { 789, 11, 0}, + { 160, 11, 1}, + { 140, 11, 1}, + { 544, 11, 2}, + { 538, 11, 2}, + {1120, 11, 3}, + {1116, 11, 3}, + { 503, 11, 4}, + { 487, 11, 4}, + { 794, 11, 5}, + { 791, 11, 5}, + { 809, 12, 0}, + { 147, 12, 1}, + { 149, 12, 2}, + { 152, 12, 3}, + { 216, 12, 4}, + { 222, 12, 5}, + { 417, 12, 6}, + { 418, 12, 7}, + { 451, 12, 8}, + { 491, 12, 9}, + { 495, 12, 10}, + { 504, 12, 11}, + { 505, 12, 12}, + { 541, 12, 13}, + { 546, 12, 14}, + {1052, 12, 14}, + { 559, 12, 15}, + { 563, 12, 16}, + { 564, 12, 17}, + { 565, 12, 18}, + { 629, 12, 19}, + { 640, 12, 20}, + { 653, 12, 21}, + { 658, 12, 22}, + { 659, 12, 23}, + { 747, 12, 24}, + { 759, 12, 25}, + { 815, 12, 26}, + { 829, 12, 27}, + { 887, 12, 28}, + { 921, 12, 29}, + { 922, 12, 30}, + { 931, 12, 31}, + { 933, 12, 32}, + { 953, 12, 33}, + { 954, 12, 34}, + { 966, 12, 35}, + { 968, 12, 36}, + { 976, 12, 37}, + {1024, 12, 38}, + {1037, 12, 39}, + {1050, 12, 40}, + {1051, 12, 41}, + {1057, 12, 42}, + {1117, 12, 43}, + {1031, 12, 44}, + {1136, 12, 45}, + {1137, 12, 46}, + {1138, 12, 47}, + {1146, 12, 48}, + {1147, 12, 49}, + {1150, 12, 50}, + {1151, 12, 51}, + { 646, 12, 52}, + { 490, 12, 53}, + { 258, 12, 54}, + { 489, 12, 55}, + { 831, 12, 56}, + { 945, 12, 57}, + { 812, 13, 0}, + {1080, 13, 0}, + { 622, 13, 1}, + { 261, 13, 1}, + { 450, 13, 2}, + { 415, 13, 2}, + { 936, 13, 3}, + { 927, 13, 3}, + { 684, 13, 4}, + { 656, 13, 4}, + {1076, 13, 5}, + {1032, 13, 5}, + {1090, 14, 0}, + {1134, 14, 0}, + { 851, 14, 1}, + { 850, 14, 1}, + { 355, 14, 2}, + { 352, 14, 2}, + { 925, 14, 3}, + { 924, 14, 3}, + { 518, 14, 4}, + { 515, 14, 4}, + { 814, 14, 5}, + { 819, 14, 5}, + { 482, 14, 6}, + { 481, 14, 6}, + { 253, 14, 7}, + {1025, 14, 7}, + { 595, 14, 8}, + { 610, 14, 8}, + { 909, 14, 9}, + { 908, 14, 9}, + { 906, 14, 10}, + { 903, 14, 10}, + { 826, 14, 11}, + { 822, 14, 11}, + { 156, 14, 12}, + { 148, 14, 12}, + { 582, 14, 13}, + { 578, 14, 13}, + { 604, 14, 14}, + { 591, 14, 14}, + { 605, 14, 14}, + { 577, 14, 15}, + { 576, 14, 15}, + { 365, 14, 16}, + { 356, 14, 16}, + { 251, 14, 17}, + { 214, 14, 17}, + { 250, 14, 18}, + { 204, 14, 18}, + { 992, 14, 19}, + { 991, 14, 19}, + { 734, 14, 20}, + { 229, 14, 20}, + { 272, 14, 21}, + { 392, 14, 21}, + { 702, 14, 22}, + { 694, 14, 22}, + { 383, 14, 23}, + { 276, 14, 23}, + { 372, 14, 24}, + { 952, 14, 24}, + { 160, 14, 25}, + { 146, 14, 25}, + { 252, 14, 26}, + { 203, 14, 26}, + {1023, 14, 27}, + { 972, 14, 27}, + {1157, 14, 28}, + {1156, 14, 28}, + { 801, 14, 29}, + { 805, 14, 29}, + {1124, 14, 30}, + {1121, 14, 30}, + { 620, 14, 31}, + { 627, 14, 32}, + { 626, 14, 33}, + { 539, 14, 34}, + { 540, 14, 35}, + { 354, 14, 36}, + { 390, 14, 36}, + { 562, 14, 37}, + { 573, 14, 37}, + { 373, 14, 38}, + { 328, 14, 38}, + { 929, 14, 39}, + { 934, 14, 39}, + { 811, 15, 0}, + { 826, 15, 1}, + { 822, 15, 1}, + { 440, 15, 2}, + { 434, 15, 2}, + { 423, 15, 3}, + { 422, 15, 3}, + { 792, 16, 0}, + { 0, 16, 1}, + { 1, 16, 2}, + { 4, 16, 3}, + { 3, 16, 4}, + { 12, 16, 5}, + { 11, 16, 6}, + { 10, 16, 7}, + { 9, 16, 8}, + { 73, 16, 9}, + { 8, 16, 10}, + { 7, 16, 11}, + { 6, 16, 12}, + { 77, 16, 13}, + { 43, 16, 14}, + { 5, 16, 15}, + { 76, 16, 16}, + { 110, 16, 17}, + { 42, 16, 18}, + { 75, 16, 19}, + { 92, 16, 20}, + { 109, 16, 21}, + { 122, 16, 22}, + { 2, 16, 23}, + { 74, 16, 24}, + { 41, 16, 25}, + { 108, 16, 26}, + { 72, 16, 27}, + { 121, 16, 28}, + { 91, 16, 29}, + { 134, 16, 30}, + { 107, 16, 31}, + { 23, 16, 32}, + { 115, 16, 33}, + { 29, 16, 34}, + { 120, 16, 35}, + { 35, 16, 36}, + { 127, 16, 37}, + { 38, 16, 38}, + { 133, 16, 39}, + { 13, 16, 40}, + { 22, 16, 41}, + { 25, 16, 42}, + { 28, 16, 43}, + { 33, 16, 44}, + { 34, 16, 45}, + { 36, 16, 46}, + { 37, 16, 47}, + { 39, 16, 48}, + { 40, 16, 49}, + { 44, 16, 50}, + { 49, 16, 51}, + { 54, 16, 52}, + { 60, 16, 53}, + { 65, 16, 54}, + { 67, 16, 55}, + { 68, 16, 56}, + { 69, 16, 57}, + { 70, 16, 58}, + { 71, 16, 59}, + { 78, 16, 60}, + { 82, 16, 61}, + { 83, 16, 62}, + { 84, 16, 63}, + { 85, 16, 64}, + { 86, 16, 65}, + { 87, 16, 66}, + { 88, 16, 67}, + { 89, 16, 68}, + { 90, 16, 69}, + { 93, 16, 70}, + { 97, 16, 71}, + { 98, 16, 72}, + { 99, 16, 73}, + { 101, 16, 74}, + { 102, 16, 75}, + { 103, 16, 76}, + { 104, 16, 77}, + { 105, 16, 78}, + { 106, 16, 79}, + { 111, 16, 80}, + { 116, 16, 81}, + { 123, 16, 82}, + { 128, 16, 83}, + { 135, 16, 84}, + { 14, 16, 85}, + { 45, 16, 86}, + { 79, 16, 87}, + { 94, 16, 88}, + { 112, 16, 89}, + { 117, 16, 90}, + { 124, 16, 91}, + { 129, 16, 92}, + { 136, 16, 93}, + { 15, 16, 94}, + { 46, 16, 95}, + { 80, 16, 96}, + { 95, 16, 97}, + { 113, 16, 98}, + { 118, 16, 99}, + { 125, 16, 100}, + { 130, 16, 101}, + { 137, 16, 102}, + { 16, 16, 103}, + { 47, 16, 104}, + { 81, 16, 105}, + { 96, 16, 106}, + { 114, 16, 107}, + { 119, 16, 108}, + { 126, 16, 109}, + { 131, 16, 110}, + { 138, 16, 111}, + { 17, 16, 112}, + { 52, 16, 113}, + { 100, 16, 114}, + { 18, 16, 115}, + { 19, 16, 116}, + { 791, 17, 0}, + { 935, 17, 1}, + { 682, 17, 2}, + {1108, 17, 3}, + { 683, 17, 4}, + {1069, 17, 5}, + { 240, 17, 6}, + {1070, 17, 7}, + {1074, 17, 8}, + {1072, 17, 9}, + {1073, 17, 10}, + { 241, 17, 11}, + {1071, 17, 12}, + { 875, 17, 13}, + { 606, 17, 14}, + { 858, 18, 0}, + { 228, 18, 1}, + {1107, 18, 2}, + { 199, 18, 3}, + { 823, 18, 4}, + {1106, 18, 5}, + {1111, 18, 6}, + {1110, 18, 7}, + {1109, 18, 8}, + { 380, 18, 9}, + { 375, 18, 10}, + { 376, 18, 11}, + { 381, 18, 12}, + { 382, 18, 13}, + { 379, 18, 14}, + { 377, 18, 15}, + { 378, 18, 16}, + { 779, 18, 17}, + {1067, 18, 18}, + {1068, 18, 19}, + { 930, 18, 20}, +}; + +/* property values: 5004 bytes. */ + +/* Codepoints which expand on full case-folding. */ + +RE_UINT16 re_expand_on_folding[] = { + 223, 304, 329, 496, 912, 944, 1415, 7830, + 7831, 7832, 7833, 7834, 7838, 8016, 8018, 8020, + 8022, 8064, 8065, 8066, 8067, 8068, 8069, 8070, + 8071, 8072, 8073, 8074, 8075, 8076, 8077, 8078, + 8079, 8080, 8081, 8082, 8083, 8084, 8085, 8086, + 8087, 8088, 8089, 8090, 8091, 8092, 8093, 8094, + 8095, 8096, 8097, 8098, 8099, 8100, 8101, 8102, + 8103, 8104, 8105, 8106, 8107, 8108, 8109, 8110, + 8111, 8114, 8115, 8116, 8118, 8119, 8124, 8130, + 8131, 8132, 8134, 8135, 8140, 8146, 8147, 8150, + 8151, 8162, 8163, 8164, 8166, 8167, 8178, 8179, + 8180, 8182, 8183, 8188, 64256, 64257, 64258, 64259, + 64260, 64261, 64262, 64275, 64276, 64277, 64278, 64279, +}; + +/* expand_on_folding: 208 bytes. */ + +/* General_Category. */ + +static RE_UINT8 re_general_category_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 14, 14, 14, 15, + 16, 17, 18, 19, 20, 19, 21, 19, 19, 19, 19, 19, 19, 22, 19, 19, + 19, 19, 19, 19, 19, 19, 23, 19, 19, 19, 24, 19, 19, 25, 26, 19, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 27, 7, 28, 29, 19, 19, 19, 19, 19, 19, 19, 30, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 31, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, +}; + +static RE_UINT8 re_general_category_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 69, 72, 73, + 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83, + 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, + 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 34, 34, 109, 110, 111, 112, 34, 34, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 76, 123, 124, 125, 126, 127, 76, 76, 76, 76, 76, 76, + 128, 76, 129, 130, 131, 76, 132, 76, 133, 76, 76, 76, 134, 76, 76, 76, + 135, 136, 137, 138, 76, 76, 76, 76, 76, 76, 76, 76, 76, 139, 76, 76, + 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 34, 34, 140, 76, 141, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 34, 34, 34, 34, 142, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 143, 76, 76, 76, 76, 76, 76, 76, 76, 76, 144, 145, + 146, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 69, 147, 148, 149, 150, 76, 151, 76, 152, 153, 154, 155, 156, 157, 158, 159, + 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 160, 161, 76, 76, + 162, 163, 164, 165, 166, 76, 167, 168, 169, 170, 171, 172, 173, 174, 175, 76, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 176, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 177, 34, + 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 34, 34, 34, 34, 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 179, 76, 180, 181, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 182, +}; + +static RE_UINT16 re_general_category_stage_3[] = { + 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, + 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, + 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, + 13, 13, 42, 43, 9, 44, 45, 11, 46, 47, 32, 48, 49, 50, 51, 52, + 53, 54, 50, 50, 55, 32, 56, 57, 50, 50, 50, 50, 50, 58, 59, 60, + 61, 62, 50, 32, 63, 50, 50, 50, 50, 50, 64, 65, 66, 50, 67, 68, + 50, 69, 70, 71, 50, 72, 73, 73, 73, 73, 74, 73, 73, 73, 75, 76, + 77, 50, 50, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102, + 83, 84, 85, 103, 104, 105, 89, 106, 107, 108, 109, 110, 111, 112, 95, 113, + 114, 115, 85, 116, 117, 118, 89, 119, 120, 115, 85, 121, 122, 123, 89, 124, + 120, 115, 50, 125, 126, 127, 89, 128, 129, 130, 50, 131, 132, 133, 73, 134, + 135, 50, 50, 136, 137, 138, 73, 73, 139, 140, 141, 142, 143, 144, 73, 73, + 145, 146, 147, 148, 149, 50, 150, 151, 152, 153, 32, 154, 155, 156, 73, 73, + 50, 50, 157, 158, 159, 160, 161, 162, 163, 164, 9, 9, 165, 50, 50, 166, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 167, 168, 50, 50, + 167, 50, 50, 169, 170, 171, 50, 50, 50, 170, 50, 50, 50, 172, 173, 174, + 50, 175, 50, 50, 50, 50, 50, 176, 177, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 178, 50, 179, 180, 50, 50, 50, 50, 181, 182, + 183, 184, 50, 185, 50, 186, 183, 187, 50, 50, 50, 188, 189, 190, 191, 192, + 193, 191, 50, 50, 194, 50, 50, 195, 50, 50, 196, 50, 50, 50, 50, 197, + 50, 150, 198, 199, 200, 50, 201, 176, 50, 50, 202, 203, 204, 205, 206, 206, + 50, 207, 50, 50, 50, 208, 209, 210, 191, 191, 211, 73, 73, 73, 73, 73, + 212, 50, 50, 213, 214, 159, 215, 216, 217, 50, 218, 66, 50, 50, 219, 220, + 50, 50, 221, 222, 223, 66, 50, 224, 73, 73, 73, 73, 225, 226, 227, 228, + 11, 11, 229, 27, 27, 27, 230, 231, 11, 232, 27, 27, 32, 32, 233, 234, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 235, 13, 13, 13, 13, 13, 13, + 236, 237, 236, 236, 237, 238, 236, 239, 240, 240, 240, 241, 242, 243, 244, 245, + 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 73, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 267, 268, 269, 270, 206, 271, 272, 206, 273, + 274, 274, 274, 274, 274, 274, 274, 274, 275, 206, 276, 206, 206, 206, 206, 277, + 206, 278, 274, 279, 206, 280, 281, 282, 206, 206, 283, 73, 284, 73, 266, 266, + 266, 285, 206, 206, 206, 206, 286, 266, 206, 206, 206, 206, 206, 206, 206, 206, + 206, 206, 206, 287, 288, 206, 206, 289, 206, 206, 206, 206, 206, 206, 290, 206, + 291, 206, 206, 206, 206, 206, 292, 293, 266, 294, 206, 206, 295, 274, 296, 274, + 297, 298, 274, 274, 274, 299, 274, 300, 206, 206, 206, 274, 301, 175, 73, 73, + 73, 73, 73, 73, 73, 73, 73, 73, 9, 9, 302, 11, 11, 303, 304, 305, + 13, 13, 13, 13, 13, 13, 306, 307, 11, 11, 308, 50, 50, 50, 309, 310, + 50, 311, 312, 312, 312, 312, 32, 32, 313, 314, 315, 316, 73, 73, 73, 73, + 206, 317, 206, 206, 206, 206, 206, 282, 206, 206, 206, 206, 206, 318, 73, 319, + 320, 321, 322, 323, 135, 50, 50, 50, 50, 324, 177, 50, 50, 50, 50, 325, + 326, 50, 201, 135, 50, 50, 50, 50, 327, 328, 50, 51, 206, 206, 282, 50, + 206, 329, 330, 206, 331, 332, 206, 206, 330, 206, 206, 332, 206, 206, 206, 329, + 50, 50, 50, 197, 206, 206, 206, 206, 50, 50, 50, 50, 150, 73, 73, 73, + 50, 333, 50, 50, 50, 50, 50, 50, 150, 206, 206, 206, 283, 50, 50, 224, + 334, 50, 335, 73, 13, 13, 336, 337, 13, 338, 50, 50, 50, 50, 339, 340, + 31, 341, 342, 343, 13, 13, 13, 344, 345, 346, 347, 73, 73, 73, 73, 348, + 349, 50, 350, 351, 50, 50, 50, 352, 353, 50, 50, 354, 355, 191, 32, 356, + 66, 50, 357, 50, 358, 359, 50, 150, 77, 50, 50, 360, 361, 362, 73, 73, + 50, 50, 363, 364, 365, 366, 50, 367, 50, 50, 50, 368, 369, 370, 371, 372, + 373, 374, 312, 73, 73, 73, 73, 73, 73, 73, 73, 73, 50, 50, 375, 191, + 50, 50, 376, 50, 377, 50, 50, 202, 378, 378, 378, 378, 378, 378, 378, 378, + 379, 379, 379, 379, 379, 379, 379, 379, 50, 50, 50, 50, 50, 50, 201, 50, + 50, 50, 50, 50, 50, 380, 73, 73, 381, 382, 383, 384, 385, 50, 50, 50, + 50, 50, 50, 386, 387, 388, 50, 50, 50, 50, 50, 389, 73, 50, 50, 50, + 50, 390, 50, 50, 195, 73, 73, 391, 32, 392, 233, 393, 394, 395, 396, 397, + 50, 50, 50, 50, 50, 50, 50, 398, 399, 2, 3, 4, 5, 400, 401, 402, + 50, 403, 50, 327, 404, 405, 406, 407, 408, 50, 171, 409, 201, 201, 73, 73, + 50, 50, 50, 50, 50, 50, 50, 51, 410, 266, 266, 411, 267, 267, 267, 412, + 413, 319, 73, 73, 73, 206, 206, 414, 50, 150, 50, 50, 50, 101, 73, 73, + 50, 327, 415, 50, 416, 73, 73, 73, 50, 417, 50, 50, 418, 419, 73, 73, + 9, 9, 420, 11, 11, 50, 50, 50, 50, 201, 191, 73, 73, 73, 73, 73, + 421, 50, 50, 422, 50, 423, 73, 73, 50, 424, 50, 425, 73, 73, 73, 73, + 50, 50, 50, 426, 73, 73, 73, 73, 427, 428, 50, 429, 430, 431, 50, 432, + 50, 50, 50, 433, 50, 434, 50, 435, 50, 50, 50, 50, 436, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 266, 437, 438, 50, 50, 439, 440, 441, 442, 73, + 217, 50, 50, 443, 444, 50, 436, 191, 445, 50, 446, 447, 448, 73, 73, 73, + 217, 50, 50, 449, 450, 191, 73, 73, 50, 50, 451, 452, 191, 73, 73, 73, + 50, 50, 50, 50, 50, 50, 327, 73, 267, 267, 267, 267, 267, 267, 453, 448, + 50, 50, 327, 73, 73, 73, 73, 73, 50, 50, 50, 436, 73, 73, 73, 73, + 50, 50, 50, 50, 176, 454, 203, 455, 456, 457, 73, 73, 73, 73, 73, 73, + 458, 73, 73, 73, 73, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 318, + 206, 206, 459, 206, 206, 206, 460, 461, 462, 206, 463, 206, 206, 464, 73, 73, + 206, 206, 206, 206, 465, 73, 73, 73, 206, 206, 206, 206, 206, 283, 266, 466, + 9, 467, 11, 468, 469, 470, 236, 9, 471, 472, 473, 474, 475, 9, 467, 11, + 476, 477, 11, 478, 479, 480, 481, 9, 482, 11, 9, 467, 11, 468, 469, 11, + 236, 9, 471, 481, 9, 482, 11, 9, 467, 11, 483, 9, 484, 485, 486, 487, + 11, 488, 9, 489, 490, 491, 492, 11, 493, 9, 494, 11, 495, 496, 496, 496, + 497, 50, 498, 499, 500, 501, 502, 503, 504, 202, 505, 202, 73, 73, 73, 506, + 206, 206, 319, 206, 206, 206, 206, 206, 206, 282, 329, 507, 291, 291, 73, 73, + 508, 206, 329, 206, 206, 206, 319, 206, 206, 284, 73, 73, 73, 73, 509, 206, + 510, 206, 206, 284, 511, 512, 73, 73, 206, 206, 513, 514, 206, 206, 206, 515, + 206, 282, 206, 206, 516, 73, 206, 513, 206, 206, 206, 329, 517, 206, 206, 206, + 206, 206, 206, 206, 206, 206, 206, 518, 206, 206, 206, 464, 282, 206, 519, 73, + 73, 73, 73, 73, 73, 73, 73, 520, 206, 206, 206, 206, 521, 73, 73, 73, + 206, 206, 206, 206, 318, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 282, + 50, 50, 50, 50, 50, 311, 73, 73, 50, 50, 50, 176, 50, 50, 50, 50, + 50, 201, 73, 73, 73, 73, 73, 73, 522, 73, 523, 523, 523, 523, 523, 523, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 73, + 379, 379, 379, 379, 379, 379, 379, 524, +}; + +static RE_UINT8 re_general_category_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, + 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25, + 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32, + 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11, + 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11, + 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34, + 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32, + 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32, + 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, + 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, + 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 46, + 44, 44, 41, 47, 11, 48, 48, 11, 34, 11, 11, 11, 11, 11, 11, 11, + 11, 49, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, + 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 50, 34, 32, 34, 11, + 32, 51, 43, 43, 52, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, + 32, 32, 32, 32, 44, 44, 44, 44, 49, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 48, 53, 2, 2, 2, 54, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 55, 56, 44, 57, 58, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 59, 60, 61, 43, 60, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, 44, + 36, 63, 46, 44, 44, 44, 44, 44, 64, 64, 65, 8, 9, 66, 2, 67, + 43, 43, 43, 43, 43, 61, 65, 2, 68, 36, 36, 36, 36, 69, 43, 43, + 7, 7, 7, 7, 7, 2, 2, 36, 70, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 71, 43, 43, 43, 72, 51, 43, 43, 73, 74, 75, 43, 43, 36, + 7, 7, 7, 7, 7, 36, 76, 77, 2, 2, 2, 2, 2, 2, 2, 78, + 69, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 79, 80, 36, + 36, 36, 36, 43, 43, 43, 43, 43, 70, 44, 44, 44, 44, 44, 44, 44, + 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 36, 69, 43, 43, + 43, 43, 40, 21, 2, 81, 44, 44, 36, 36, 36, 43, 43, 74, 43, 43, + 43, 43, 74, 43, 74, 43, 43, 44, 2, 2, 2, 2, 2, 2, 2, 46, + 36, 36, 36, 36, 69, 43, 44, 46, 44, 44, 44, 44, 44, 44, 44, 44, + 62, 36, 36, 36, 36, 36, 62, 44, 44, 44, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 79, 43, 82, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 82, 70, 83, 84, 43, 43, 43, 82, 83, 84, 83, + 69, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7, + 85, 36, 36, 36, 80, 36, 36, 36, 58, 83, 80, 36, 36, 36, 62, 80, + 62, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, + 62, 62, 44, 36, 36, 44, 70, 83, 84, 43, 79, 86, 87, 86, 84, 62, + 44, 44, 44, 86, 44, 44, 36, 80, 36, 43, 44, 7, 7, 7, 7, 7, + 36, 20, 27, 27, 27, 88, 44, 44, 58, 82, 80, 36, 36, 62, 44, 80, + 62, 36, 80, 62, 36, 44, 79, 83, 84, 79, 44, 58, 79, 58, 43, 44, + 58, 44, 44, 44, 80, 36, 62, 62, 44, 44, 44, 7, 7, 7, 7, 7, + 43, 36, 69, 44, 44, 44, 44, 44, 58, 82, 80, 36, 36, 36, 36, 80, + 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, 36, 36, 44, 70, 83, + 84, 43, 43, 58, 82, 86, 84, 44, 62, 44, 44, 44, 44, 44, 44, 44, + 66, 44, 44, 44, 44, 44, 44, 44, 62, 36, 80, 36, 36, 44, 70, 84, + 84, 43, 79, 86, 87, 86, 84, 44, 44, 44, 44, 82, 44, 44, 36, 80, + 77, 27, 27, 27, 44, 44, 44, 44, 44, 70, 80, 36, 36, 62, 44, 36, + 62, 36, 36, 44, 80, 62, 62, 36, 44, 80, 62, 44, 36, 62, 44, 36, + 36, 36, 36, 36, 36, 44, 44, 83, 82, 87, 44, 83, 87, 83, 84, 44, + 62, 44, 44, 86, 44, 44, 44, 44, 27, 89, 67, 67, 88, 90, 44, 44, + 86, 83, 80, 36, 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 80, 36, 36, 44, 80, 43, 82, 83, 87, 43, 79, 43, 43, 44, + 44, 44, 58, 79, 36, 44, 44, 44, 44, 44, 44, 44, 27, 27, 27, 89, + 44, 83, 80, 36, 36, 36, 62, 36, 36, 36, 80, 36, 36, 44, 70, 84, + 83, 83, 87, 82, 87, 83, 43, 44, 44, 44, 86, 87, 44, 44, 44, 62, + 80, 62, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 62, 80, 83, + 84, 43, 79, 83, 87, 83, 84, 62, 44, 44, 44, 86, 44, 44, 44, 44, + 27, 27, 27, 44, 91, 36, 36, 36, 44, 83, 80, 36, 36, 36, 36, 36, + 36, 36, 36, 62, 44, 36, 36, 36, 36, 80, 36, 36, 36, 36, 80, 44, + 36, 36, 36, 62, 44, 79, 44, 86, 83, 43, 79, 79, 83, 83, 83, 83, + 44, 83, 46, 44, 44, 44, 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, + 69, 36, 43, 43, 43, 79, 44, 57, 36, 36, 36, 74, 43, 43, 43, 61, + 7, 7, 7, 7, 7, 2, 44, 44, 80, 62, 62, 80, 62, 62, 80, 44, + 44, 44, 36, 36, 80, 36, 36, 36, 80, 36, 80, 80, 44, 36, 80, 36, + 69, 36, 43, 43, 43, 58, 70, 44, 36, 36, 62, 81, 43, 43, 43, 44, + 7, 7, 7, 7, 7, 44, 36, 36, 76, 67, 2, 2, 2, 2, 2, 2, + 2, 92, 92, 67, 43, 67, 67, 67, 7, 7, 7, 7, 7, 27, 27, 27, + 27, 27, 51, 51, 51, 4, 4, 83, 36, 36, 36, 36, 80, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 62, 44, 58, 43, 43, 43, 43, 43, 43, 82, + 43, 43, 61, 43, 36, 36, 69, 43, 43, 43, 43, 43, 58, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 79, 67, 67, 67, 67, 75, 67, 67, 90, 67, + 2, 2, 92, 67, 21, 46, 44, 44, 36, 36, 36, 36, 36, 93, 84, 43, + 82, 43, 43, 43, 84, 82, 84, 70, 7, 7, 7, 7, 7, 2, 2, 2, + 36, 36, 36, 83, 43, 36, 36, 43, 70, 83, 94, 93, 83, 83, 83, 36, + 69, 43, 70, 36, 36, 36, 36, 36, 36, 82, 84, 82, 83, 83, 84, 93, + 7, 7, 7, 7, 7, 83, 84, 67, 11, 11, 11, 49, 44, 44, 49, 44, + 36, 36, 36, 36, 36, 63, 68, 36, 36, 36, 36, 36, 62, 36, 36, 44, + 36, 36, 36, 62, 62, 36, 36, 44, 62, 36, 36, 44, 36, 36, 36, 62, + 62, 36, 36, 44, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 62, 58, 43, 2, 2, 2, 2, 95, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 96, 44, 67, 67, 67, 67, 67, 44, 44, 44, + 36, 36, 62, 44, 44, 44, 44, 44, 97, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 63, 71, 98, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 99, 100, 44, 36, 36, 36, 36, 36, 63, 2, 101, + 102, 44, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 62, 36, + 36, 43, 79, 44, 44, 44, 44, 44, 36, 43, 61, 46, 44, 44, 44, 44, + 36, 43, 44, 44, 44, 44, 44, 44, 62, 43, 44, 44, 44, 44, 44, 44, + 36, 36, 43, 84, 43, 43, 43, 83, 83, 83, 83, 82, 84, 43, 43, 43, + 43, 43, 2, 85, 2, 66, 69, 44, 7, 7, 7, 7, 7, 44, 44, 44, + 27, 27, 27, 27, 27, 44, 44, 44, 2, 2, 2, 103, 2, 60, 43, 65, + 36, 104, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, + 36, 36, 36, 36, 69, 62, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44, + 43, 82, 83, 84, 82, 83, 44, 44, 83, 82, 83, 83, 84, 43, 44, 44, + 90, 44, 2, 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 44, + 36, 36, 36, 36, 36, 36, 44, 44, 83, 83, 83, 83, 83, 83, 83, 83, + 94, 36, 36, 36, 83, 44, 44, 44, 7, 7, 7, 7, 7, 96, 44, 67, + 67, 67, 67, 67, 67, 67, 67, 67, 36, 36, 36, 69, 82, 84, 44, 2, + 36, 36, 93, 82, 43, 43, 43, 79, 82, 82, 84, 43, 43, 43, 82, 83, + 83, 84, 43, 43, 43, 43, 79, 58, 2, 2, 2, 85, 2, 2, 2, 44, + 43, 43, 94, 36, 36, 36, 36, 36, 36, 36, 82, 43, 43, 82, 82, 83, + 83, 82, 94, 36, 36, 36, 44, 44, 92, 67, 67, 67, 67, 51, 43, 43, + 43, 43, 67, 67, 67, 67, 90, 44, 43, 94, 36, 36, 36, 36, 36, 36, + 93, 43, 43, 83, 43, 84, 83, 36, 36, 36, 36, 82, 43, 83, 84, 84, + 43, 83, 44, 44, 44, 44, 2, 2, 36, 36, 83, 83, 83, 83, 43, 43, + 43, 43, 83, 43, 44, 55, 2, 2, 7, 7, 7, 7, 7, 44, 80, 36, + 36, 36, 36, 36, 40, 40, 40, 2, 2, 2, 2, 2, 44, 44, 44, 44, + 43, 61, 43, 43, 43, 43, 43, 43, 82, 43, 43, 43, 70, 36, 69, 36, + 36, 83, 70, 62, 44, 44, 44, 44, 16, 16, 16, 16, 16, 16, 40, 40, + 40, 40, 40, 40, 40, 45, 16, 16, 16, 16, 16, 16, 45, 16, 16, 16, + 16, 16, 16, 16, 16, 105, 40, 40, 43, 43, 43, 79, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, + 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44, + 16, 16, 16, 16, 49, 49, 49, 49, 16, 16, 16, 16, 16, 16, 16, 44, + 16, 16, 16, 16, 106, 106, 106, 106, 16, 16, 107, 16, 11, 11, 108, 109, + 41, 16, 107, 16, 11, 11, 108, 41, 16, 16, 44, 16, 11, 11, 110, 41, + 16, 16, 16, 16, 11, 11, 111, 41, 44, 16, 107, 16, 11, 11, 108, 112, + 113, 113, 113, 113, 113, 114, 64, 64, 115, 115, 115, 2, 116, 117, 116, 117, + 2, 2, 2, 2, 118, 64, 64, 119, 2, 2, 2, 2, 120, 121, 2, 122, + 123, 2, 124, 125, 2, 2, 2, 2, 2, 9, 123, 2, 2, 2, 2, 126, + 64, 64, 65, 64, 64, 64, 64, 64, 127, 44, 27, 27, 27, 8, 124, 128, + 27, 27, 27, 27, 27, 8, 124, 100, 40, 40, 40, 40, 40, 40, 81, 44, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 129, 44, 44, + 43, 43, 43, 43, 43, 43, 130, 52, 131, 52, 131, 43, 43, 43, 43, 43, + 79, 44, 44, 44, 44, 44, 44, 44, 67, 132, 67, 133, 67, 34, 11, 16, + 11, 32, 133, 67, 50, 11, 11, 67, 67, 67, 132, 132, 132, 11, 11, 134, + 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 50, 16, 16, 26, 67, 135, + 27, 27, 27, 27, 27, 27, 27, 27, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 136, 137, 101, 138, 44, 44, 44, 8, 8, 139, 67, 67, 8, 67, 67, + 139, 26, 67, 139, 67, 67, 67, 139, 67, 67, 67, 67, 67, 67, 67, 8, + 67, 139, 139, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 4, 4, 67, 67, + 8, 67, 67, 67, 140, 141, 67, 67, 67, 67, 67, 67, 67, 67, 139, 67, + 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 44, 44, 44, 44, + 67, 67, 67, 67, 67, 90, 44, 44, 27, 27, 27, 27, 27, 27, 67, 67, + 67, 67, 67, 67, 67, 27, 27, 27, 67, 67, 67, 26, 67, 67, 67, 67, + 26, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, + 67, 67, 67, 67, 67, 67, 67, 26, 91, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, + 27, 27, 67, 67, 67, 67, 67, 67, 8, 8, 124, 142, 8, 8, 8, 8, + 8, 8, 8, 4, 4, 4, 4, 4, 8, 124, 143, 143, 143, 143, 143, 143, + 143, 143, 143, 143, 142, 8, 8, 8, 8, 8, 8, 8, 4, 4, 8, 8, + 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 139, 26, 8, 8, 144, 44, + 11, 11, 11, 11, 11, 11, 11, 48, 16, 16, 16, 16, 16, 16, 16, 107, + 32, 11, 32, 34, 34, 34, 34, 11, 32, 32, 34, 16, 16, 16, 40, 11, + 32, 32, 135, 67, 67, 133, 34, 145, 43, 32, 44, 44, 55, 2, 95, 2, + 16, 16, 16, 54, 44, 44, 54, 44, 36, 36, 36, 36, 44, 44, 44, 53, + 46, 44, 44, 44, 44, 44, 44, 58, 36, 36, 36, 62, 44, 44, 44, 44, + 36, 36, 36, 62, 36, 36, 36, 62, 2, 116, 116, 2, 120, 121, 116, 2, + 2, 2, 2, 6, 2, 103, 116, 2, 116, 4, 4, 4, 4, 2, 2, 85, + 2, 2, 2, 2, 2, 115, 44, 44, 67, 67, 67, 67, 67, 91, 67, 67, + 67, 67, 67, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 44, 44, + 1, 2, 146, 147, 4, 4, 4, 4, 4, 67, 4, 4, 4, 4, 148, 149, + 150, 101, 101, 101, 101, 43, 43, 83, 151, 40, 40, 67, 101, 152, 63, 67, + 36, 36, 36, 62, 58, 153, 154, 68, 36, 36, 36, 36, 36, 63, 40, 68, + 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, + 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, + 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, + 155, 27, 27, 27, 27, 27, 27, 27, 36, 36, 104, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 156, 2, 7, 7, 7, 7, 7, 36, 44, 44, + 32, 32, 32, 32, 32, 32, 32, 69, 52, 157, 43, 43, 43, 43, 43, 85, + 32, 32, 32, 32, 44, 44, 44, 58, 36, 36, 36, 101, 101, 101, 101, 101, + 43, 2, 2, 2, 44, 44, 44, 44, 41, 41, 41, 154, 40, 40, 40, 40, + 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, + 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42, 158, 34, 107, + 32, 32, 44, 44, 44, 44, 44, 44, 32, 32, 32, 32, 32, 48, 44, 44, + 44, 44, 44, 44, 40, 35, 36, 36, 36, 70, 36, 70, 36, 69, 36, 36, + 36, 93, 84, 82, 67, 67, 44, 44, 27, 27, 27, 67, 159, 44, 44, 44, + 36, 36, 2, 2, 44, 44, 44, 44, 83, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 83, 83, 83, 83, 83, 83, 83, 83, 79, 44, 44, 44, 44, 2, + 43, 36, 36, 36, 2, 71, 44, 44, 36, 36, 36, 43, 43, 43, 43, 2, + 36, 36, 36, 69, 43, 43, 43, 43, 43, 83, 44, 44, 44, 44, 44, 55, + 36, 69, 83, 43, 43, 83, 82, 83, 160, 2, 2, 2, 2, 2, 2, 53, + 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 36, 36, 69, 43, 43, 82, + 84, 82, 84, 79, 44, 44, 44, 44, 36, 69, 36, 36, 36, 36, 82, 44, + 7, 7, 7, 7, 7, 44, 2, 2, 68, 36, 36, 76, 67, 93, 44, 44, + 70, 43, 70, 69, 70, 36, 36, 43, 69, 62, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 80, 104, 2, 36, 36, 36, 36, 36, 93, 43, 83, + 2, 104, 161, 79, 44, 44, 44, 44, 80, 36, 36, 62, 80, 36, 36, 62, + 80, 36, 36, 62, 44, 44, 44, 44, 36, 93, 84, 83, 82, 160, 84, 44, + 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 62, 44, 80, 36, 36, + 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, + 36, 36, 36, 36, 36, 44, 44, 44, 16, 16, 16, 107, 44, 44, 44, 44, + 44, 54, 16, 16, 44, 44, 80, 70, 36, 36, 36, 36, 164, 36, 36, 36, + 36, 36, 36, 62, 36, 36, 62, 62, 36, 80, 62, 36, 36, 36, 36, 36, + 36, 41, 41, 41, 41, 41, 41, 41, 41, 44, 44, 44, 44, 44, 44, 44, + 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 4, + 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 159, 44, + 2, 2, 2, 165, 125, 44, 44, 44, 6, 166, 167, 143, 143, 143, 143, 143, + 143, 143, 125, 165, 125, 2, 122, 168, 2, 46, 2, 2, 148, 143, 143, 125, + 2, 169, 8, 144, 66, 2, 44, 44, 36, 36, 62, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 62, 78, 55, 2, 3, 2, 4, 5, 6, 2, + 16, 16, 16, 16, 16, 17, 18, 124, 125, 4, 2, 36, 36, 36, 36, 36, + 68, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, + 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 62, 44, + 20, 170, 88, 129, 26, 8, 139, 90, 44, 44, 44, 44, 78, 64, 67, 44, + 36, 36, 36, 36, 36, 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, + 2, 46, 44, 171, 27, 27, 27, 27, 27, 27, 44, 91, 67, 67, 67, 67, + 101, 101, 138, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 96, 44, 44, + 67, 67, 67, 67, 67, 67, 51, 44, 27, 27, 44, 44, 44, 44, 44, 44, + 147, 36, 36, 36, 36, 102, 44, 44, 36, 36, 36, 36, 36, 36, 36, 55, + 36, 36, 44, 44, 36, 36, 36, 36, 172, 101, 101, 44, 44, 44, 44, 44, + 11, 11, 11, 11, 16, 16, 16, 16, 36, 36, 36, 44, 62, 36, 36, 36, + 36, 36, 36, 80, 62, 44, 62, 80, 36, 36, 36, 55, 27, 27, 27, 27, + 36, 36, 36, 27, 27, 27, 44, 55, 36, 36, 36, 36, 36, 44, 44, 55, + 36, 36, 36, 36, 44, 44, 44, 36, 69, 43, 58, 79, 44, 44, 43, 43, + 36, 36, 80, 36, 80, 36, 36, 36, 36, 36, 44, 44, 43, 79, 44, 58, + 27, 27, 27, 27, 44, 44, 44, 44, 2, 2, 2, 2, 46, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 173, 30, 36, 36, 36, 44, 55, 2, 2, 2, + 36, 36, 36, 44, 27, 27, 27, 27, 36, 62, 44, 44, 27, 27, 27, 27, + 36, 36, 36, 36, 62, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 96, + 84, 94, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, + 43, 43, 43, 61, 2, 2, 2, 44, 44, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 7, 7, 7, 7, 7, 83, 84, 43, 82, 84, 61, 174, 2, + 2, 44, 44, 44, 44, 44, 44, 44, 43, 70, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 69, 43, 43, 84, 43, 43, 43, 79, 7, 7, 7, 7, 7, + 2, 2, 44, 44, 44, 44, 44, 44, 36, 93, 83, 43, 43, 43, 43, 82, + 94, 36, 63, 2, 46, 44, 44, 44, 36, 36, 36, 36, 36, 69, 84, 83, + 43, 43, 43, 84, 44, 44, 44, 44, 101, 102, 44, 44, 44, 44, 44, 44, + 93, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 87, + 44, 44, 44, 44, 44, 44, 44, 58, 43, 73, 40, 40, 40, 40, 40, 40, + 36, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 91, 67, 67, 67, + 67, 67, 175, 84, 43, 67, 175, 83, 83, 176, 64, 64, 64, 177, 43, 43, + 43, 75, 51, 43, 43, 43, 67, 67, 67, 67, 67, 67, 67, 43, 43, 67, + 67, 67, 67, 67, 67, 67, 67, 44, 67, 43, 75, 44, 44, 44, 44, 44, + 27, 44, 44, 44, 44, 44, 44, 44, 11, 11, 11, 11, 11, 16, 16, 16, + 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, + 16, 16, 107, 16, 16, 16, 16, 16, 11, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 48, 11, 44, 48, 49, 48, 49, 11, 48, 11, + 11, 11, 11, 16, 16, 54, 54, 16, 16, 16, 54, 16, 16, 16, 16, 16, + 16, 16, 11, 49, 11, 48, 49, 11, 11, 11, 48, 11, 11, 11, 48, 16, + 16, 16, 16, 16, 11, 49, 11, 48, 11, 11, 48, 48, 44, 11, 11, 11, + 48, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 11, + 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 44, 11, 11, 11, 11, + 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, + 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, + 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, + 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 32, 44, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 36, 36, 80, 36, 36, 36, 36, 36, + 80, 62, 62, 80, 80, 36, 36, 36, 36, 62, 36, 36, 80, 80, 44, 44, + 44, 62, 44, 80, 80, 80, 80, 36, 80, 62, 62, 80, 80, 80, 80, 80, + 80, 62, 62, 80, 36, 62, 36, 36, 36, 62, 36, 36, 80, 36, 62, 62, + 36, 36, 36, 36, 36, 80, 36, 36, 80, 36, 80, 36, 36, 80, 36, 36, + 8, 44, 44, 44, 44, 44, 44, 44, 91, 67, 67, 67, 67, 67, 67, 90, + 27, 27, 27, 27, 27, 96, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, + 67, 90, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 90, 44, 44, 44, + 67, 44, 44, 44, 44, 44, 44, 44, 90, 44, 44, 44, 44, 44, 44, 44, + 67, 67, 67, 91, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, 44, + 67, 67, 90, 67, 67, 90, 44, 44, 90, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 91, 67, 90, 44, 67, 67, 67, 67, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 91, 67, 67, 90, 44, 91, 67, 67, 67, 67, 67, + 78, 44, 44, 44, 44, 44, 44, 44, 64, 64, 64, 64, 64, 64, 64, 64, + 163, 163, 163, 163, 163, 163, 163, 44, +}; + +static RE_UINT8 re_general_category_stage_5[] = { + 15, 15, 12, 23, 23, 23, 25, 23, 20, 21, 23, 24, 23, 19, 9, 9, + 24, 24, 24, 23, 23, 1, 1, 1, 1, 20, 23, 21, 26, 22, 26, 2, + 2, 2, 2, 20, 24, 21, 24, 15, 25, 25, 27, 23, 26, 27, 5, 28, + 24, 16, 27, 26, 27, 24, 11, 11, 26, 11, 5, 29, 11, 23, 1, 24, + 1, 2, 2, 24, 2, 1, 2, 5, 5, 5, 1, 3, 3, 2, 5, 2, + 4, 4, 26, 26, 4, 26, 6, 6, 0, 0, 4, 2, 23, 0, 1, 23, + 1, 0, 0, 1, 24, 1, 27, 6, 7, 7, 0, 4, 0, 2, 0, 23, + 19, 0, 0, 25, 0, 6, 19, 6, 23, 6, 6, 23, 5, 0, 5, 23, + 16, 16, 16, 0, 23, 25, 27, 27, 4, 5, 5, 6, 6, 5, 23, 5, + 6, 16, 6, 4, 4, 6, 6, 27, 5, 27, 27, 5, 0, 16, 6, 0, + 0, 5, 4, 0, 6, 8, 8, 8, 8, 6, 23, 4, 0, 8, 8, 0, + 27, 25, 11, 27, 27, 0, 0, 27, 23, 27, 5, 8, 8, 5, 23, 11, + 11, 0, 19, 5, 12, 5, 5, 20, 21, 0, 10, 10, 10, 0, 19, 23, + 5, 4, 2, 4, 3, 3, 2, 0, 3, 26, 2, 26, 0, 26, 1, 26, + 26, 0, 12, 12, 12, 16, 19, 19, 28, 29, 20, 28, 13, 14, 16, 12, + 23, 28, 29, 23, 23, 22, 22, 23, 24, 20, 21, 23, 23, 12, 11, 4, + 21, 4, 25, 0, 6, 7, 7, 6, 1, 27, 27, 1, 27, 2, 2, 27, + 10, 1, 2, 10, 10, 11, 24, 27, 27, 20, 21, 27, 21, 24, 21, 20, + 24, 0, 2, 6, 27, 4, 5, 10, 19, 20, 21, 21, 27, 10, 19, 4, + 10, 4, 6, 26, 26, 4, 27, 11, 4, 23, 7, 23, 26, 1, 25, 27, + 8, 23, 4, 8, 18, 18, 17, 17, 5, 24, 23, 20, 19, 22, 22, 20, + 22, 22, 24, 19, 24, 26, 0, 11, 23, 10, 5, 11, 23, 16, 27, 8, + 8, 16, 16, 6, +}; + +/* General_Category: 8556 bytes. */ + +RE_UINT32 re_get_general_category(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 11; + code = ch ^ (f << 11); + pos = (RE_UINT32)re_general_category_stage_1[f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_general_category_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_general_category_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_general_category_stage_4[pos + f] << 1; + value = re_general_category_stage_5[pos + code]; + + return value; +} + +/* Block. */ + +static RE_UINT8 re_block_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 14, 15, 15, 15, 16, + 17, 18, 19, 20, 21, 20, 22, 20, 20, 20, 20, 20, 20, 23, 20, 20, + 20, 20, 20, 20, 20, 20, 24, 20, 20, 20, 25, 20, 20, 26, 27, 20, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 29, 30, 31, 32, 20, 20, 20, 20, 20, 20, 20, 33, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 34, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, +}; + +static RE_UINT8 re_block_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, + 29, 30, 31, 31, 32, 32, 32, 33, 34, 34, 34, 34, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 50, 51, 51, + 52, 53, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, + 65, 65, 66, 67, 68, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 89, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, + 104, 104, 104, 104, 104, 104, 104, 105, 106, 106, 106, 106, 106, 106, 106, 106, + 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, + 107, 107, 108, 108, 108, 108, 109, 110, 110, 110, 110, 110, 111, 112, 113, 114, + 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 119, 119, 119, 119, 119, 119, + 125, 119, 126, 127, 128, 119, 129, 119, 130, 119, 119, 119, 131, 119, 119, 119, + 132, 133, 134, 135, 119, 119, 119, 119, 119, 119, 119, 119, 119, 136, 119, 119, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 137, 137, 137, 137, 137, 137, 137, 137, 138, 119, 119, 119, 119, 119, 119, 119, + 139, 139, 139, 139, 139, 139, 139, 139, 140, 119, 119, 119, 119, 119, 119, 119, + 141, 141, 141, 141, 142, 119, 119, 119, 119, 119, 119, 119, 119, 119, 143, 144, + 145, 145, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 146, 146, 147, 147, 148, 119, 149, 119, 150, 150, 150, 150, 150, 150, 150, 150, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 151, 151, 119, 119, + 152, 153, 154, 154, 155, 155, 156, 156, 156, 156, 156, 156, 157, 158, 159, 119, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 161, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 163, 164, + 165, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 166, 166, 166, 166, 167, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 168, 119, 169, 170, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, + 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, + 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, +}; + +static RE_UINT8 re_block_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, + 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 17, 17, 17, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, + 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, + 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, + 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 42, 42, 42, 42, 42, 42, + 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, + 50, 50, 50, 50, 50, 51, 51, 51, 52, 52, 52, 52, 52, 52, 53, 53, + 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 19, 19, 19, 19, 19, + 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, + 59, 59, 59, 59, 59, 60, 60, 60, 19, 19, 19, 19, 61, 62, 62, 62, + 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, + 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, + 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, + 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, + 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, + 77, 77, 77, 77, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 82, 82, 82, 82, 82, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 85, 85, 85, 86, 87, 87, 87, 87, 87, 87, 87, 87, + 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, + 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, + 92, 92, 92, 92, 92, 92, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, + 95, 95, 95, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 98, 98, + 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 19, 102, + 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, + 106, 106, 106, 107, 107, 107, 107, 107, 107, 108, 109, 109, 110, 110, 110, 111, + 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, + 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, + 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, + 117, 118, 118, 118, 118, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 121, 121, 121, 121, 121, 121, 122, 122, 122, 122, 122, 122, + 123, 123, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, + 125, 125, 125, 126, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 129, 129, + 130, 130, 130, 131, 131, 131, 132, 132, 133, 133, 133, 133, 133, 133, 19, 19, + 134, 134, 134, 134, 134, 134, 135, 135, 136, 136, 136, 136, 136, 136, 137, 137, + 138, 138, 138, 19, 19, 19, 19, 19, 19, 19, 19, 19, 139, 139, 139, 139, + 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 141, 141, 141, 141, 141, + 142, 142, 142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 143, 143, 143, + 144, 144, 144, 144, 144, 144, 144, 144, 145, 145, 145, 145, 145, 145, 145, 145, + 146, 146, 146, 146, 146, 146, 146, 146, 147, 147, 147, 147, 147, 148, 148, 148, + 148, 148, 148, 148, 148, 148, 148, 148, 149, 150, 151, 152, 152, 153, 153, 154, + 154, 154, 154, 154, 154, 154, 154, 154, 155, 155, 155, 155, 155, 155, 155, 155, + 155, 155, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, + 158, 158, 158, 158, 158, 158, 158, 158, 159, 159, 159, 159, 160, 160, 160, 160, + 160, 161, 161, 161, 161, 162, 162, 162, 19, 19, 19, 19, 19, 19, 19, 19, + 163, 163, 164, 164, 164, 164, 19, 19, 165, 165, 165, 166, 166, 19, 19, 19, + 167, 167, 168, 168, 168, 168, 19, 19, 169, 169, 169, 169, 169, 170, 170, 170, + 171, 171, 171, 19, 19, 19, 19, 19, 172, 172, 172, 172, 173, 173, 19, 19, + 174, 174, 175, 175, 19, 19, 19, 19, 176, 176, 177, 177, 177, 177, 177, 177, + 178, 178, 178, 178, 178, 178, 179, 179, 180, 180, 180, 180, 181, 181, 182, 182, + 183, 183, 183, 183, 183, 19, 19, 19, 19, 19, 19, 19, 19, 19, 184, 184, + 185, 185, 185, 185, 185, 185, 185, 185, 186, 186, 186, 186, 186, 187, 187, 187, + 188, 188, 188, 188, 188, 19, 19, 19, 189, 189, 189, 189, 189, 189, 19, 19, + 190, 190, 190, 190, 190, 19, 19, 19, 191, 191, 191, 191, 191, 191, 191, 191, + 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, + 193, 193, 193, 19, 19, 19, 19, 19, 194, 194, 194, 194, 194, 194, 194, 194, + 194, 194, 194, 194, 19, 19, 19, 19, 195, 195, 195, 195, 195, 195, 195, 195, + 195, 195, 19, 19, 19, 19, 19, 19, 196, 196, 196, 196, 196, 196, 196, 196, + 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, + 199, 199, 199, 199, 199, 19, 19, 19, 200, 200, 200, 200, 200, 200, 201, 201, + 202, 202, 202, 202, 202, 202, 202, 202, 203, 203, 203, 203, 203, 203, 203, 203, + 204, 204, 204, 205, 205, 205, 205, 205, 205, 205, 206, 206, 206, 206, 206, 206, + 207, 207, 207, 207, 207, 207, 207, 207, 208, 208, 208, 208, 208, 208, 208, 208, + 209, 209, 209, 209, 209, 209, 209, 209, 210, 210, 210, 210, 210, 19, 19, 19, + 211, 211, 211, 211, 211, 211, 211, 211, 212, 212, 212, 212, 212, 212, 212, 212, + 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 19, 19, + 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 215, 215, 215, 215, + 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 19, 19, 19, 19, 19, 19, + 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 19, 19, 19, 19, 19, 19, + 217, 217, 217, 217, 217, 217, 217, 217, 218, 218, 218, 218, 218, 218, 218, 218, + 218, 218, 218, 218, 218, 218, 218, 19, 219, 219, 219, 219, 219, 219, 219, 219, + 220, 220, 220, 220, 220, 220, 220, 220, +}; + +static RE_UINT8 re_block_stage_4[] = { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, + 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, + 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, + 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, + 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, + 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, + 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, + 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, + 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, + 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, + 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, + 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, + 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, + 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, + 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, + 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, + 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, + 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, + 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, + 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, + 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, + 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, + 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, + 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, + 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, + 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, + 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, + 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, + 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, + 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, + 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, + 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, + 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, + 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, + 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, + 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, + 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, + 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, + 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, + 220, 220, 220, 220, +}; + +static RE_UINT8 re_block_stage_5[] = { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, + 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 0, 0, 0, 0, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, + 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, + 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, + 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, + 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, + 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, + 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, + 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, + 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, + 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, + 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, + 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, + 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, + 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, + 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, + 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, + 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, + 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, + 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, + 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, + 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, + 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, + 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, + 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, + 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, + 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, + 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, + 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, + 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, + 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, + 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, + 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, + 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, + 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, + 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, + 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, + 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, + 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, + 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, + 220, 220, 220, 220, +}; + +/* Block: 4288 bytes. */ + +RE_UINT32 re_get_block(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 11; + code = ch ^ (f << 11); + pos = (RE_UINT32)re_block_stage_1[f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_block_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_block_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_block_stage_4[pos + f] << 2; + value = re_block_stage_5[pos + code]; + + return value; +} + +/* Script. */ + +static RE_UINT8 re_script_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, + 16, 17, 18, 14, 19, 14, 20, 14, 14, 14, 14, 14, 14, 21, 14, 14, + 14, 14, 14, 14, 14, 14, 22, 14, 14, 14, 23, 14, 14, 24, 25, 14, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 26, 7, 27, 28, 14, 14, 14, 14, 14, 14, 14, 29, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 30, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +}; + +static RE_UINT8 re_script_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, + 55, 56, 57, 58, 59, 59, 59, 60, 61, 59, 59, 59, 59, 59, 62, 59, + 63, 63, 59, 59, 59, 59, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, + 74, 75, 76, 77, 78, 79, 80, 59, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 81, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 85, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 98, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 72, 72, 99, 100, 101, 102, 103, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 65, 114, 115, 116, 117, 118, 65, 65, 65, 65, 65, 65, + 119, 65, 120, 121, 122, 65, 123, 65, 124, 65, 65, 65, 125, 65, 65, 65, + 126, 127, 128, 129, 65, 65, 65, 65, 65, 65, 65, 65, 65, 130, 65, 65, + 131, 131, 131, 131, 131, 131, 132, 65, 133, 65, 65, 65, 65, 65, 65, 65, + 134, 134, 134, 134, 134, 134, 134, 134, 135, 65, 65, 65, 65, 65, 65, 65, + 136, 136, 136, 136, 137, 65, 65, 65, 65, 65, 65, 65, 65, 65, 138, 139, + 140, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 59, 141, 142, 143, 144, 65, 145, 65, 146, 147, 148, 59, 59, 149, 59, 150, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 151, 152, 65, 65, + 153, 154, 155, 156, 157, 65, 158, 159, 160, 161, 162, 163, 164, 165, 60, 65, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 166, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 167, 72, + 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 72, 72, 72, 72, 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 169, 65, 170, 171, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, +}; + +static RE_UINT16 re_script_stage_3[] = { + 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, + 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11, + 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 16, 17, 18, 19, 17, 18, 20, 21, 22, 22, 23, 22, 24, 25, + 26, 27, 28, 28, 29, 30, 31, 32, 28, 28, 28, 28, 28, 33, 28, 28, + 34, 35, 35, 35, 36, 28, 28, 28, 37, 37, 37, 38, 39, 39, 39, 40, + 41, 41, 42, 43, 44, 45, 46, 46, 46, 46, 47, 46, 46, 46, 48, 49, + 50, 50, 50, 50, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 46, 124, + 125, 126, 126, 127, 126, 128, 46, 46, 129, 130, 131, 132, 133, 134, 46, 46, + 135, 135, 135, 135, 136, 135, 137, 138, 135, 136, 135, 139, 139, 140, 46, 46, + 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 142, 143, 142, 142, 144, + 145, 145, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146, 147, 148, 146, 146, + 147, 146, 146, 149, 150, 151, 146, 146, 146, 150, 146, 146, 146, 152, 146, 153, + 146, 154, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, + 158, 159, 160, 160, 160, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, + 171, 171, 171, 171, 171, 172, 173, 173, 174, 175, 176, 176, 176, 176, 176, 177, + 176, 176, 178, 157, 157, 157, 157, 179, 180, 181, 182, 182, 183, 184, 185, 186, + 187, 187, 188, 187, 189, 190, 171, 171, 191, 192, 193, 193, 193, 194, 193, 195, + 196, 196, 197, 46, 46, 46, 46, 46, 198, 198, 198, 198, 199, 198, 198, 200, + 201, 201, 201, 201, 202, 202, 202, 203, 204, 204, 204, 205, 206, 207, 207, 207, + 46, 46, 46, 46, 208, 209, 210, 211, 4, 4, 212, 4, 4, 213, 214, 215, + 4, 4, 4, 216, 8, 8, 217, 218, 11, 219, 11, 11, 219, 220, 11, 221, + 11, 11, 11, 222, 222, 223, 11, 224, 225, 0, 0, 0, 0, 0, 226, 227, + 228, 229, 0, 230, 46, 8, 8, 231, 0, 0, 232, 233, 234, 0, 4, 4, + 235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 236, 0, 0, 237, 46, 230, 46, 0, 0, + 238, 0, 0, 0, 0, 0, 0, 0, 239, 239, 239, 239, 239, 239, 239, 239, + 0, 0, 0, 0, 240, 241, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 242, 242, 243, 242, 242, 243, 4, 4, 244, 244, 244, 244, 244, 244, 244, 245, + 142, 142, 143, 246, 246, 246, 247, 248, 146, 249, 250, 250, 250, 250, 14, 14, + 0, 0, 0, 251, 46, 46, 46, 46, 252, 253, 252, 252, 252, 252, 252, 254, + 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 255, 46, 251, + 256, 0, 257, 258, 259, 260, 260, 260, 260, 261, 262, 263, 263, 263, 263, 264, + 265, 266, 267, 268, 145, 145, 145, 145, 269, 0, 266, 270, 0, 0, 236, 263, + 145, 269, 0, 0, 0, 0, 145, 271, 0, 0, 0, 0, 0, 263, 263, 272, + 263, 263, 263, 263, 263, 273, 0, 0, 252, 252, 252, 255, 0, 0, 0, 0, + 252, 252, 252, 252, 274, 46, 46, 46, 275, 275, 275, 275, 275, 275, 275, 275, + 276, 275, 275, 275, 277, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, + 279, 279, 280, 46, 14, 14, 14, 14, 14, 281, 282, 282, 282, 282, 282, 283, + 0, 0, 284, 4, 4, 4, 4, 4, 285, 286, 287, 46, 46, 46, 46, 288, + 289, 289, 290, 241, 291, 291, 291, 292, 293, 293, 293, 293, 294, 295, 50, 296, + 297, 297, 297, 298, 298, 299, 145, 300, 301, 301, 301, 301, 302, 303, 46, 46, + 304, 304, 304, 305, 306, 307, 141, 308, 309, 309, 309, 309, 310, 311, 312, 313, + 314, 315, 250, 46, 46, 46, 46, 46, 46, 46, 46, 46, 312, 312, 316, 317, + 145, 145, 318, 145, 319, 145, 145, 320, 252, 252, 252, 252, 252, 252, 321, 252, + 252, 252, 252, 252, 252, 322, 46, 46, 323, 324, 22, 325, 326, 28, 28, 28, + 28, 28, 28, 28, 327, 328, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 329, 46, 28, 28, 28, 28, 330, 28, 28, 331, 46, 46, 332, + 8, 241, 217, 0, 0, 333, 334, 335, 28, 28, 28, 28, 28, 28, 28, 336, + 238, 0, 1, 2, 1, 2, 337, 262, 263, 338, 145, 269, 339, 340, 341, 342, + 343, 344, 345, 346, 347, 347, 46, 46, 344, 344, 344, 344, 344, 344, 344, 348, + 349, 0, 0, 350, 11, 11, 11, 11, 351, 251, 46, 46, 46, 0, 0, 352, + 353, 354, 355, 355, 355, 356, 46, 46, 357, 358, 359, 360, 361, 46, 46, 46, + 362, 363, 364, 364, 365, 366, 46, 46, 367, 367, 367, 367, 367, 368, 368, 368, + 369, 370, 371, 46, 46, 46, 46, 46, 372, 373, 373, 374, 375, 376, 46, 46, + 377, 378, 379, 380, 46, 46, 46, 46, 381, 381, 382, 383, 46, 46, 46, 46, + 384, 385, 386, 387, 388, 389, 390, 390, 391, 391, 391, 392, 393, 394, 395, 396, + 397, 397, 397, 397, 398, 46, 46, 46, 46, 46, 46, 46, 46, 46, 28, 49, + 399, 399, 399, 399, 400, 401, 399, 46, 402, 402, 402, 402, 403, 404, 405, 406, + 407, 407, 407, 408, 409, 46, 46, 46, 410, 410, 410, 410, 411, 412, 46, 46, + 413, 413, 413, 414, 415, 46, 46, 46, 416, 416, 416, 416, 416, 416, 416, 416, + 416, 416, 416, 416, 416, 416, 417, 46, 416, 416, 416, 416, 416, 416, 418, 419, + 420, 420, 420, 420, 420, 420, 420, 420, 420, 420, 421, 46, 46, 46, 46, 46, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 422, 46, 46, 46, 46, + 423, 423, 423, 423, 424, 423, 423, 425, 426, 423, 46, 46, 46, 46, 46, 46, + 427, 46, 46, 46, 46, 46, 46, 46, 0, 0, 0, 0, 0, 0, 0, 428, + 0, 0, 429, 0, 0, 0, 430, 431, 432, 0, 433, 0, 0, 434, 46, 46, + 11, 11, 11, 11, 435, 46, 46, 46, 0, 0, 0, 0, 0, 237, 0, 436, + 0, 0, 0, 0, 0, 226, 0, 0, 0, 437, 438, 439, 440, 0, 0, 0, + 441, 442, 0, 443, 444, 445, 0, 0, 0, 0, 446, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 447, 0, 0, 0, 448, 28, 449, 450, 451, 452, 453, 454, + 455, 456, 457, 456, 46, 46, 46, 327, 0, 0, 251, 0, 0, 0, 0, 0, + 0, 236, 228, 458, 238, 238, 46, 46, 230, 0, 228, 0, 0, 0, 251, 0, + 0, 230, 46, 46, 46, 46, 459, 0, 460, 0, 0, 230, 461, 436, 46, 46, + 0, 0, 462, 463, 0, 0, 0, 240, 0, 236, 0, 0, 464, 46, 0, 462, + 0, 0, 0, 228, 445, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 465, + 0, 0, 0, 434, 236, 0, 466, 46, 46, 46, 46, 46, 46, 46, 46, 467, + 0, 0, 0, 0, 468, 46, 46, 46, 0, 0, 0, 0, 428, 46, 46, 46, + 252, 252, 252, 252, 252, 469, 46, 46, 252, 252, 252, 470, 252, 252, 252, 252, + 252, 321, 46, 46, 46, 46, 46, 46, 471, 46, 0, 0, 0, 0, 0, 0, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 46, +}; + +static RE_UINT8 re_script_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, + 2, 2, 2, 2, 3, 0, 0, 0, 2, 2, 3, 0, 0, 4, 0, 0, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 6, 8, 6, 6, 9, + 8, 8, 10, 10, 6, 11, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 16, 14, 14, 14, 14, + 14, 14, 14, 14, 8, 8, 8, 8, 17, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 17, 18, 18, 18, + 18, 18, 18, 18, 20, 19, 8, 17, 21, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 8, 8, 8, 8, + 22, 22, 22, 22, 22, 23, 8, 8, 22, 22, 23, 8, 8, 8, 8, 8, + 24, 24, 25, 24, 24, 24, 26, 24, 24, 24, 24, 24, 24, 27, 25, 27, + 24, 24, 24, 24, 24, 24, 24, 24, 26, 24, 24, 24, 24, 28, 5, 5, + 5, 5, 5, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0, 24, 24, 24, + 29, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 27, 24, + 30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 32, 31, 30, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 8, 8, 8, 8, 8, 8, 8, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 35, 8, 8, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 8, 36, 36, 36, 36, 36, 36, 36, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 8, 39, + 8, 8, 8, 8, 8, 8, 8, 8, 25, 24, 24, 24, 24, 24, 25, 8, + 8, 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, + 40, 40, 40, 40, 40, 40, 40, 40, 41, 42, 40, 40, 40, 40, 40, 40, + 40, 40, 0, 40, 40, 40, 40, 40, 40, 40, 40, 40, 43, 40, 40, 40, + 44, 45, 44, 45, 45, 45, 46, 44, 46, 44, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 46, 45, 45, 45, 46, 46, 8, 45, 45, 8, 45, 45, + 45, 45, 46, 44, 46, 44, 45, 46, 8, 8, 8, 44, 8, 8, 45, 44, + 45, 45, 8, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 8, 8, + 47, 48, 47, 48, 48, 49, 8, 47, 49, 47, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 49, 48, 48, 48, 49, 48, 47, 49, 48, 8, 49, 48, + 48, 49, 8, 47, 49, 47, 48, 8, 47, 8, 8, 8, 47, 48, 49, 49, + 8, 8, 8, 48, 48, 48, 48, 48, 48, 48, 48, 8, 8, 8, 8, 8, + 50, 51, 50, 51, 51, 51, 51, 50, 51, 50, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 52, 51, 51, 51, 52, 51, 50, 51, 51, 8, 51, 51, + 51, 51, 51, 50, 51, 50, 51, 8, 52, 8, 8, 8, 8, 8, 8, 8, + 51, 51, 8, 51, 51, 51, 51, 51, 51, 8, 8, 8, 8, 8, 8, 8, + 53, 54, 53, 54, 54, 54, 55, 53, 55, 53, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 55, 54, 54, 54, 55, 54, 53, 54, 54, 8, 54, 54, + 54, 54, 55, 53, 55, 53, 54, 8, 8, 8, 8, 54, 8, 8, 54, 53, + 54, 54, 8, 54, 54, 54, 54, 54, 54, 54, 54, 54, 8, 8, 8, 8, + 8, 56, 57, 56, 56, 58, 8, 56, 58, 56, 56, 8, 57, 58, 58, 56, + 8, 57, 58, 8, 56, 58, 8, 56, 56, 56, 56, 56, 56, 8, 8, 56, + 56, 58, 8, 56, 58, 56, 56, 8, 58, 8, 8, 57, 8, 8, 8, 8, + 8, 8, 8, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 58, 8, 8, + 59, 60, 59, 60, 60, 60, 61, 60, 61, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 61, 60, 60, 60, 60, 60, 59, 60, 60, 8, 59, 60, + 60, 60, 61, 60, 61, 60, 60, 8, 8, 8, 59, 61, 60, 8, 8, 8, + 60, 60, 8, 60, 60, 60, 60, 60, 8, 8, 8, 8, 60, 60, 60, 60, + 8, 62, 63, 62, 62, 62, 64, 62, 64, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 64, 62, 62, 62, 62, 62, 63, 62, 62, 8, 62, 62, + 62, 62, 64, 62, 64, 62, 62, 8, 8, 8, 63, 64, 8, 8, 8, 64, + 62, 62, 8, 62, 62, 62, 62, 62, 63, 64, 8, 8, 8, 8, 8, 8, + 8, 65, 66, 65, 65, 65, 67, 65, 67, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 67, 66, 65, + 65, 65, 67, 65, 67, 65, 65, 67, 8, 8, 8, 66, 8, 8, 8, 8, + 65, 65, 8, 65, 65, 65, 65, 65, 65, 65, 65, 8, 66, 65, 65, 65, + 8, 68, 69, 68, 68, 68, 68, 68, 68, 68, 68, 70, 8, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 68, 68, 68, 68, 69, 8, + 68, 68, 68, 70, 8, 70, 8, 69, 68, 68, 70, 70, 68, 68, 68, 68, + 8, 68, 70, 8, 8, 8, 8, 8, 71, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 8, 20, + 72, 72, 72, 72, 72, 72, 8, 8, 74, 75, 75, 74, 75, 75, 74, 8, + 8, 8, 76, 76, 74, 76, 76, 76, 74, 76, 74, 74, 8, 76, 74, 76, + 76, 76, 76, 76, 76, 74, 76, 8, 76, 76, 75, 75, 76, 76, 76, 8, + 76, 76, 76, 76, 76, 8, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 8, + 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 77, + 77, 77, 80, 0, 81, 79, 8, 8, 82, 82, 82, 82, 82, 82, 82, 82, + 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 8, 8, 84, 8, + 83, 83, 83, 83, 83, 85, 83, 83, 86, 86, 86, 86, 86, 86, 86, 86, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 8, + 87, 87, 87, 88, 88, 87, 87, 8, 88, 87, 87, 8, 87, 87, 87, 88, + 88, 87, 87, 8, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 88, 89, 87, 87, 87, 87, 87, 87, 87, 88, 8, + 87, 87, 87, 87, 87, 8, 8, 8, 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 91, 8, 8, 8, 8, 8, 92, 92, 92, 92, 92, 92, 92, 92, + 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 8, + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 0, 95, + 97, 8, 8, 8, 8, 8, 8, 8, 98, 98, 98, 98, 98, 98, 99, 98, + 98, 98, 99, 8, 8, 8, 8, 8, 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 101, 9, 8, 8, 8, 8, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 8, 8, 8, 8, 8, 8, 103, 103, 103, 103, 103, 103, 104, 103, + 104, 103, 8, 8, 8, 8, 8, 8, 105, 105, 105, 105, 105, 105, 105, 105, + 105, 105, 105, 105, 105, 105, 105, 8, 105, 105, 105, 105, 105, 8, 8, 8, + 106, 0, 107, 106, 106, 106, 106, 108, 106, 106, 106, 106, 106, 8, 8, 8, + 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 8, 8, 8, 8, + 106, 106, 106, 106, 106, 108, 8, 8, 92, 92, 92, 8, 8, 8, 8, 8, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 110, 8, + 109, 109, 109, 109, 109, 109, 8, 8, 110, 8, 109, 109, 109, 109, 109, 109, + 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 8, + 111, 111, 112, 8, 8, 8, 8, 8, 113, 113, 113, 113, 113, 113, 113, 113, + 113, 113, 113, 113, 113, 113, 8, 8, 113, 113, 113, 113, 113, 8, 8, 8, + 113, 113, 113, 113, 113, 114, 8, 113, 115, 115, 115, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 115, 115, 8, 115, 116, 116, 116, 116, 116, 116, 116, 116, + 116, 116, 116, 116, 116, 116, 116, 117, 116, 116, 116, 116, 116, 116, 117, 118, + 116, 116, 116, 116, 116, 8, 8, 8, 116, 116, 116, 116, 116, 116, 116, 8, + 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 8, 8, + 119, 119, 119, 119, 119, 119, 120, 8, 121, 121, 121, 121, 121, 121, 121, 121, + 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 8, 8, 8, 8, 122, 122, + 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 8, 124, 123, 123, + 123, 123, 123, 123, 123, 8, 124, 123, 125, 125, 125, 125, 125, 125, 125, 125, + 121, 121, 121, 121, 8, 8, 8, 8, 5, 126, 5, 5, 5, 5, 5, 5, + 126, 5, 5, 5, 126, 0, 127, 0, 0, 0, 126, 9, 8, 8, 8, 8, + 2, 2, 2, 6, 6, 128, 2, 2, 2, 2, 2, 2, 2, 2, 129, 6, + 6, 2, 2, 6, 6, 130, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 129, 5, 5, 5, 132, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 5, 5, 6, 6, 6, 8, 6, 6, 6, 8, + 6, 6, 6, 6, 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 8, + 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 12, 6, + 8, 6, 11, 6, 6, 6, 6, 11, 0, 0, 0, 0, 0, 0, 5, 0, + 0, 0, 9, 0, 0, 0, 0, 0, 1, 8, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 9, 2, 2, 2, 2, 2, 2, 133, 8, + 0, 0, 0, 0, 0, 9, 8, 8, 132, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 10, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, 2, 2, 2, 2, 3, 8, 8, 8, + 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0, 9, 8, 8, 8, 8, + 20, 0, 0, 0, 0, 0, 0, 0, 134, 134, 134, 134, 134, 134, 134, 134, + 0, 0, 0, 0, 0, 0, 9, 8, 0, 0, 0, 0, 0, 8, 8, 8, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 136, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 8, 8, 137, 13, 13, 13, + 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 8, 8, 8, 139, + 140, 8, 8, 8, 8, 8, 8, 139, 87, 87, 87, 88, 8, 8, 8, 8, + 87, 87, 87, 88, 87, 87, 87, 88, 0, 0, 0, 0, 0, 0, 8, 8, + 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 141, 141, + 141, 141, 8, 8, 8, 8, 8, 8, 141, 141, 141, 8, 8, 8, 8, 8, + 0, 0, 143, 143, 0, 0, 0, 0, 143, 141, 141, 141, 141, 5, 5, 86, + 0, 0, 0, 0, 141, 141, 0, 0, 144, 145, 145, 145, 145, 145, 145, 145, + 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 146, 147, 126, 148, 145, + 149, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 151, 149, 150, 8, 8, 152, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, + 153, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 154, + 4, 4, 4, 4, 4, 155, 8, 8, 86, 86, 86, 86, 86, 86, 86, 156, + 150, 150, 150, 150, 150, 150, 150, 157, 150, 150, 150, 150, 0, 0, 0, 0, + 141, 141, 141, 141, 141, 141, 158, 8, 159, 159, 159, 159, 159, 159, 159, 159, + 159, 159, 159, 159, 159, 159, 160, 8, 159, 159, 159, 160, 8, 8, 8, 8, + 161, 161, 161, 161, 161, 161, 161, 161, 162, 162, 162, 162, 162, 162, 162, 162, + 162, 162, 162, 162, 162, 162, 8, 8, 14, 14, 14, 14, 8, 8, 8, 163, + 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 8, 8, 8, 8, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 133, + 2, 2, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 133, 8, 8, + 8, 8, 8, 8, 2, 2, 2, 2, 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 8, 8, 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 167, 167, + 167, 167, 168, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 8, 8, 8, + 40, 40, 40, 40, 40, 40, 8, 8, 169, 169, 169, 169, 169, 169, 169, 169, + 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 8, 8, 8, 8, 8, 171, + 86, 86, 86, 86, 86, 86, 154, 8, 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 172, 172, 172, 172, 20, 172, 172, 172, 172, 172, 8, 8, 172, + 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 174, 8, 8, 8, 8, + 173, 173, 173, 173, 173, 173, 173, 8, 173, 173, 173, 173, 173, 8, 173, 173, + 82, 82, 82, 82, 82, 82, 8, 8, 175, 175, 175, 175, 175, 175, 175, 175, + 175, 176, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 177, 175, 175, + 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 179, 8, 8, 8, 8, + 89, 87, 87, 88, 89, 87, 87, 88, 89, 87, 87, 88, 8, 8, 8, 8, + 178, 178, 178, 178, 178, 178, 178, 8, 178, 178, 178, 178, 178, 8, 8, 8, + 86, 86, 8, 8, 8, 8, 8, 8, 86, 86, 86, 154, 8, 153, 86, 86, + 86, 86, 86, 86, 86, 86, 8, 8, 141, 141, 141, 141, 141, 141, 141, 8, + 141, 141, 141, 141, 141, 8, 8, 8, 2, 2, 2, 133, 8, 8, 8, 8, + 8, 17, 18, 18, 8, 8, 21, 22, 22, 22, 22, 23, 22, 22, 23, 23, + 22, 21, 23, 22, 22, 22, 22, 22, 24, 8, 8, 8, 8, 8, 8, 8, + 8, 180, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, + 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 8, 8, 8, 8, + 24, 24, 24, 24, 24, 24, 27, 8, 0, 9, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 0, 0, 8, 8, 24, 24, 25, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 25, 20, 0, 0, 0, 150, 150, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 0, 8, 86, 86, 86, 8, 86, 86, 86, + 8, 86, 86, 86, 8, 86, 154, 8, 0, 0, 0, 9, 0, 0, 0, 9, + 8, 8, 8, 8, 20, 0, 0, 8, 181, 181, 181, 181, 181, 181, 182, 181, + 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 183, 181, 181, 181, 181, + 181, 181, 181, 181, 181, 183, 181, 182, 181, 181, 181, 181, 181, 181, 181, 8, + 181, 181, 181, 181, 181, 183, 8, 8, 0, 9, 8, 20, 0, 0, 0, 0, + 0, 0, 8, 20, 0, 0, 0, 0, 6, 6, 6, 6, 6, 11, 8, 8, + 0, 0, 0, 0, 0, 0, 127, 8, 184, 184, 184, 184, 184, 184, 184, 184, + 184, 184, 184, 184, 184, 184, 185, 8, 186, 186, 186, 186, 186, 186, 186, 186, + 187, 8, 8, 8, 8, 8, 8, 8, 188, 188, 188, 188, 188, 188, 188, 188, + 188, 188, 188, 188, 188, 188, 188, 189, 188, 188, 8, 8, 8, 8, 8, 8, + 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 191, 8, 8, + 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 193, + 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 8, 8, 194, 194, 194, 194, + 194, 194, 194, 8, 8, 8, 8, 8, 195, 195, 195, 195, 195, 195, 195, 195, + 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, + 197, 197, 197, 197, 197, 197, 197, 8, 197, 197, 197, 197, 197, 8, 8, 8, + 198, 198, 198, 8, 199, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, + 198, 198, 198, 200, 199, 8, 199, 200, 201, 201, 201, 201, 201, 201, 201, 201, + 201, 201, 201, 202, 201, 201, 201, 201, 203, 203, 203, 203, 203, 203, 203, 203, + 203, 203, 203, 203, 203, 203, 8, 204, 205, 205, 205, 205, 205, 205, 205, 205, + 205, 205, 205, 205, 205, 8, 8, 206, 207, 207, 207, 207, 207, 207, 207, 207, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 8, 8, 8, 208, + 209, 209, 210, 211, 8, 8, 209, 209, 209, 209, 210, 209, 210, 209, 209, 209, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 8, 8, 209, 211, 8, 210, + 209, 209, 209, 209, 8, 8, 8, 8, 209, 209, 209, 209, 211, 8, 8, 8, + 212, 212, 212, 212, 212, 212, 212, 212, 213, 213, 213, 213, 213, 213, 213, 213, + 213, 213, 213, 8, 214, 213, 213, 213, 215, 215, 215, 215, 215, 215, 215, 215, + 215, 215, 215, 8, 215, 215, 215, 215, 216, 216, 216, 216, 216, 216, 216, 216, + 216, 217, 8, 8, 216, 216, 216, 216, 218, 218, 218, 218, 218, 218, 218, 218, + 218, 218, 218, 218, 219, 8, 8, 8, 220, 220, 220, 220, 220, 220, 220, 220, + 220, 220, 220, 220, 220, 220, 220, 8, 8, 220, 220, 220, 220, 220, 220, 220, + 221, 221, 221, 221, 221, 221, 221, 221, 221, 8, 8, 8, 8, 8, 8, 8, + 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 223, 8, 8, 8, + 222, 222, 222, 222, 222, 8, 8, 8, 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 225, 224, 224, 224, 224, 224, 224, 224, 8, 8, 8, 8, 8, 8, + 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 227, 8, 8, 8, + 226, 226, 226, 226, 226, 8, 8, 8, 228, 228, 228, 228, 228, 228, 228, 228, + 228, 228, 228, 228, 8, 8, 8, 8, 228, 228, 228, 228, 228, 8, 8, 8, + 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 230, + 229, 230, 8, 8, 8, 8, 8, 8, 229, 229, 8, 8, 8, 8, 8, 8, + 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 232, + 164, 164, 164, 164, 233, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 234, + 234, 234, 235, 8, 8, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 235, + 8, 8, 8, 8, 8, 8, 8, 236, 237, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, 0, 9, 20, 0, 0, 0, + 0, 0, 0, 127, 5, 0, 0, 0, 0, 0, 0, 0, 0, 127, 5, 5, + 5, 126, 127, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 8, 6, 6, 6, 8, 8, 8, 8, 8, + 0, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 9, 0, + 8, 9, 20, 9, 20, 0, 9, 0, 0, 0, 0, 0, 0, 20, 20, 0, + 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 9, 20, 0, + 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 20, 0, 9, + 0, 0, 9, 9, 8, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, + 24, 24, 180, 24, 24, 24, 24, 24, 180, 25, 25, 180, 180, 24, 24, 24, + 24, 25, 24, 24, 180, 180, 8, 8, 8, 25, 8, 180, 180, 180, 180, 24, + 180, 25, 25, 180, 180, 180, 180, 180, 180, 25, 25, 180, 24, 25, 24, 24, + 24, 25, 24, 24, 180, 24, 25, 25, 24, 24, 24, 24, 24, 180, 24, 24, + 24, 24, 24, 24, 24, 24, 8, 8, 180, 24, 180, 24, 24, 180, 24, 24, + 20, 0, 0, 0, 0, 0, 0, 9, 8, 8, 8, 0, 0, 0, 0, 0, + 238, 9, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 9, 8, 8, 8, + 9, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 20, 0, 0, 0, 0, + 0, 0, 9, 0, 0, 9, 8, 8, 0, 0, 0, 0, 20, 0, 9, 8, + 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 20, 0, 0, + 9, 8, 20, 0, 0, 0, 0, 0, 141, 141, 141, 158, 8, 8, 8, 8, + 141, 141, 158, 8, 8, 8, 8, 8, 20, 8, 8, 8, 8, 8, 8, 8, +}; + +static RE_UINT8 re_script_stage_5[] = { + 1, 1, 1, 2, 2, 2, 2, 1, 35, 35, 41, 41, 3, 3, 1, 3, + 0, 0, 1, 0, 3, 1, 3, 0, 0, 3, 55, 55, 4, 4, 4, 41, + 41, 4, 0, 5, 5, 5, 5, 0, 0, 1, 0, 6, 6, 6, 6, 0, + 7, 7, 7, 0, 1, 7, 7, 1, 7, 41, 41, 7, 8, 8, 0, 8, + 8, 0, 9, 9, 66, 66, 66, 0, 82, 82, 82, 0, 95, 95, 95, 0, + 10, 10, 10, 41, 41, 10, 0, 10, 0, 11, 11, 11, 11, 0, 0, 12, + 12, 12, 12, 0, 0, 13, 13, 13, 13, 0, 0, 14, 14, 14, 14, 0, + 15, 15, 0, 15, 15, 0, 0, 16, 16, 16, 16, 0, 17, 17, 0, 17, + 17, 0, 18, 18, 0, 18, 18, 0, 19, 19, 0, 19, 19, 0, 0, 20, + 20, 20, 20, 0, 0, 21, 21, 0, 21, 21, 22, 22, 0, 22, 22, 0, + 22, 1, 1, 22, 23, 23, 24, 24, 0, 24, 24, 1, 25, 25, 26, 26, + 26, 0, 0, 26, 27, 27, 27, 0, 28, 28, 29, 29, 29, 0, 30, 30, + 30, 1, 30, 0, 42, 42, 42, 0, 43, 43, 43, 1, 44, 44, 45, 45, + 45, 0, 31, 31, 32, 32, 32, 1, 32, 0, 46, 46, 46, 0, 47, 47, + 47, 0, 56, 56, 56, 0, 54, 54, 78, 78, 78, 0, 0, 78, 62, 62, + 62, 0, 67, 67, 93, 93, 68, 68, 0, 68, 69, 69, 41, 1, 1, 41, + 3, 4, 2, 3, 3, 2, 4, 2, 41, 0, 2, 0, 53, 53, 57, 57, + 57, 0, 0, 55, 58, 58, 0, 58, 58, 0, 36, 36, 0, 36, 1, 36, + 0, 33, 33, 33, 33, 0, 0, 41, 1, 33, 1, 34, 34, 34, 34, 1, + 0, 35, 0, 25, 25, 0, 35, 0, 25, 1, 34, 0, 36, 0, 37, 37, + 37, 0, 83, 83, 70, 70, 0, 4, 84, 84, 59, 59, 65, 65, 71, 71, + 71, 0, 72, 72, 73, 73, 0, 73, 85, 85, 77, 77, 77, 0, 79, 79, + 79, 0, 0, 79, 86, 86, 86, 0, 0, 7, 48, 48, 0, 48, 48, 0, + 74, 74, 74, 0, 75, 75, 75, 0, 38, 38, 38, 0, 39, 39, 39, 0, + 49, 49, 0, 49, 60, 60, 40, 40, 50, 50, 51, 51, 52, 52, 52, 0, + 0, 52, 87, 87, 0, 87, 64, 64, 0, 64, 76, 76, 0, 76, 98, 98, + 97, 97, 61, 61, 0, 61, 61, 0, 88, 88, 80, 80, 0, 80, 89, 89, + 90, 90, 90, 0, 91, 91, 91, 0, 94, 94, 92, 92, 101, 101, 101, 0, + 96, 96, 96, 0, 100, 100, 100, 0, 102, 102, 63, 63, 63, 0, 81, 81, + 81, 0, 84, 0, 99, 99, 99, 0, 0, 99, 34, 33, 33, 1, +}; + +/* Script: 8046 bytes. */ + +RE_UINT32 re_get_script(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 11; + code = ch ^ (f << 11); + pos = (RE_UINT32)re_script_stage_1[f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_script_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_script_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_script_stage_4[pos + f] << 1; + value = re_script_stage_5[pos + code]; + + return value; +} + +/* Word_Break. */ + +static RE_UINT8 re_word_break_stage_1[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 5, 6, 6, 7, 4, 8, + 9, 10, 11, 12, 4, 4, 13, 4, 4, 4, 4, 14, 4, 15, 16, 17, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 18, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_word_break_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 2, 2, 31, 32, 33, 34, 35, 2, 2, 2, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 2, 2, 51, 52, + 53, 54, 55, 56, 57, 57, 57, 57, 57, 58, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 59, 60, 61, 62, 63, 57, 57, 57, + 64, 65, 66, 67, 57, 68, 69, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 83, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 84, 85, 2, 2, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 57, 96, 97, 98, 2, 99, 57, 57, 57, 57, 57, 57, + 100, 57, 101, 102, 103, 57, 104, 57, 105, 57, 57, 57, 57, 57, 57, 57, + 106, 107, 108, 109, 57, 57, 57, 57, 57, 57, 57, 57, 57, 110, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 111, 57, 112, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 2, 2, 2, 2, 113, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 2, 2, 2, 2, 114, 57, 57, 57, 57, 57, 57, 57, 57, 57, 115, 116, + 117, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 118, 119, 120, 57, 57, 57, 121, 122, 123, 2, 2, 124, 125, 126, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 127, 128, 57, 57, + 57, 57, 57, 129, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 130, 57, 131, 132, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, +}; + +static RE_UINT8 re_word_break_stage_3[] = { + 0, 1, 0, 0, 2, 3, 4, 5, 6, 7, 7, 8, 6, 7, 7, 9, + 10, 0, 0, 0, 0, 11, 12, 13, 7, 7, 14, 7, 7, 7, 14, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 16, 0, 17, 18, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 21, + 22, 23, 7, 7, 24, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 7, + 26, 27, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 0, 6, 7, 7, 7, 14, 28, 6, 7, 7, 7, + 7, 29, 30, 19, 19, 19, 19, 31, 32, 0, 33, 33, 33, 34, 35, 0, + 36, 37, 19, 38, 7, 7, 7, 7, 7, 39, 19, 19, 4, 40, 41, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 42, 43, 44, 45, 4, 46, + 0, 47, 48, 7, 7, 7, 19, 19, 19, 49, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 50, 19, 51, 0, 4, 52, 7, 7, 7, 39, 53, 54, + 7, 7, 50, 55, 56, 57, 0, 0, 7, 7, 7, 58, 0, 0, 0, 0, + 0, 0, 0, 0, 59, 17, 0, 0, 0, 0, 0, 0, 60, 19, 19, 61, + 62, 7, 7, 7, 7, 7, 7, 63, 19, 19, 64, 7, 65, 4, 6, 6, + 66, 67, 68, 7, 7, 59, 69, 70, 71, 72, 73, 74, 65, 4, 75, 0, + 66, 76, 68, 7, 7, 59, 77, 78, 79, 80, 81, 82, 83, 4, 84, 0, + 66, 25, 24, 7, 7, 59, 85, 70, 31, 86, 87, 0, 65, 4, 0, 0, + 66, 67, 68, 7, 7, 59, 85, 70, 71, 80, 88, 74, 65, 4, 28, 0, + 89, 90, 91, 92, 93, 90, 7, 94, 95, 96, 97, 0, 83, 4, 0, 0, + 66, 20, 59, 7, 7, 59, 98, 99, 100, 96, 101, 75, 65, 4, 0, 0, + 102, 20, 59, 7, 7, 59, 98, 70, 100, 96, 101, 103, 65, 4, 104, 0, + 102, 20, 59, 7, 7, 7, 7, 105, 100, 106, 73, 0, 65, 4, 0, 107, + 102, 7, 14, 107, 7, 7, 24, 108, 14, 109, 110, 19, 0, 0, 111, 0, + 0, 0, 0, 0, 0, 0, 112, 113, 73, 61, 4, 114, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 112, 115, 0, 116, 4, 114, 0, 0, 0, 0, + 87, 0, 0, 117, 4, 114, 118, 119, 7, 6, 7, 7, 7, 17, 30, 19, + 100, 120, 19, 30, 19, 19, 19, 121, 122, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 123, 19, 61, 4, 114, 88, 124, 125, 116, 126, 0, + 127, 31, 4, 128, 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 130, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 91, 14, 91, 7, 7, 7, 7, + 7, 91, 7, 7, 7, 7, 91, 14, 91, 7, 14, 7, 7, 7, 7, 7, + 7, 7, 91, 7, 7, 7, 7, 7, 7, 7, 7, 131, 0, 0, 0, 0, + 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 67, 7, 7, + 6, 7, 7, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 90, 87, 0, + 7, 20, 132, 0, 7, 7, 132, 0, 7, 7, 133, 0, 7, 20, 134, 0, + 0, 0, 0, 0, 0, 0, 60, 19, 19, 19, 135, 136, 4, 114, 0, 0, + 0, 137, 4, 114, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, + 7, 7, 7, 7, 7, 138, 7, 7, 7, 7, 7, 7, 7, 7, 139, 0, + 7, 7, 7, 17, 19, 135, 19, 135, 83, 4, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 140, 117, 4, 114, 0, 0, 0, 0, + 7, 7, 141, 135, 0, 0, 0, 0, 0, 0, 142, 61, 19, 19, 19, 71, + 4, 114, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 143, 7, 7, 7, 7, 7, 144, 19, 143, 145, 4, 114, 0, 123, 135, 0, + 146, 7, 7, 7, 64, 147, 4, 52, 7, 7, 7, 7, 50, 19, 135, 0, + 7, 7, 7, 7, 144, 19, 19, 0, 4, 148, 4, 52, 7, 7, 7, 139, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 149, 19, 19, 150, 151, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 19, 19, 19, 19, 61, 0, 0, 60, + 7, 7, 139, 139, 7, 7, 7, 7, 139, 139, 7, 152, 7, 7, 7, 139, + 7, 7, 7, 7, 7, 7, 20, 153, 154, 17, 155, 145, 7, 17, 154, 17, + 0, 156, 0, 157, 158, 159, 0, 160, 161, 0, 162, 0, 163, 164, 28, 165, + 0, 0, 7, 17, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 140, 0, + 166, 107, 108, 167, 18, 168, 7, 169, 170, 171, 0, 0, 7, 7, 7, 7, + 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 172, 7, 7, 7, 7, 7, 7, 75, 0, 0, + 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 173, 174, 0, + 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 7, 7, 165, 0, 73, + 7, 7, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 19, 19, 19, 19, + 0, 0, 0, 0, 0, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 129, 0, 0, 0, 0, 127, 175, 93, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 176, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 178, + 170, 7, 7, 7, 7, 139, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, 177, 177, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 179, + 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 0, 0, 0, 0, 0, + 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 139, + 7, 17, 7, 7, 4, 180, 0, 0, 7, 7, 7, 7, 7, 141, 149, 181, + 7, 7, 7, 73, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 117, 0, + 0, 0, 165, 7, 107, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 182, 145, 0, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 183, 184, 7, 7, 39, 0, 0, 0, 7, 7, 7, 7, 7, 7, 145, 0, + 27, 7, 7, 7, 7, 7, 144, 19, 121, 0, 4, 114, 19, 19, 27, 185, + 4, 52, 7, 7, 50, 116, 7, 7, 141, 19, 135, 0, 7, 7, 7, 17, + 62, 7, 7, 7, 7, 7, 39, 19, 140, 165, 4, 114, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 64, 61, 0, 184, 186, 4, 114, 0, 0, 0, 187, + 0, 0, 0, 0, 0, 0, 125, 188, 81, 0, 0, 0, 7, 39, 189, 0, + 190, 190, 190, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 39, 191, 4, 114, + 7, 7, 7, 7, 145, 0, 7, 7, 14, 192, 7, 7, 7, 7, 7, 145, + 14, 0, 192, 193, 33, 194, 195, 196, 197, 33, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 75, 0, 0, 0, 192, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 139, 0, 0, 7, 7, 7, 7, 7, 7, + 7, 7, 107, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 7, 145, + 19, 19, 198, 0, 61, 0, 199, 0, 0, 200, 201, 0, 0, 0, 20, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 202, + 203, 3, 0, 204, 6, 7, 7, 8, 6, 7, 7, 9, 205, 177, 177, 177, + 177, 177, 177, 206, 7, 7, 7, 14, 107, 107, 107, 207, 0, 0, 0, 208, + 7, 98, 7, 7, 14, 7, 7, 209, 7, 139, 7, 139, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 17, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, + 7, 7, 7, 17, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, + 7, 7, 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 139, 7, 7, 7, 7, 145, 7, 167, 0, 0, 0, 0, 0, + 7, 7, 7, 139, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 139, 59, 7, 7, 7, 7, 25, 210, 7, 7, 139, 0, 0, 0, 0, 0, + 7, 7, 139, 0, 7, 7, 7, 75, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 172, 0, 0, 0, 0, 0, 0, 0, 0, + 211, 60, 98, 6, 7, 7, 145, 79, 0, 0, 0, 0, 7, 7, 7, 17, + 7, 7, 7, 7, 7, 7, 139, 0, 7, 7, 139, 0, 7, 7, 9, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, + 146, 7, 7, 7, 7, 7, 7, 19, 61, 0, 0, 0, 83, 4, 0, 0, + 146, 7, 7, 7, 7, 7, 19, 212, 0, 0, 7, 7, 7, 87, 4, 114, + 146, 7, 7, 7, 141, 19, 213, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 146, 7, 7, 7, 7, 7, 39, 19, 214, 0, 4, 114, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 39, 19, 0, 4, 114, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 0, 0, 0, + 7, 7, 7, 7, 7, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 64, 19, 19, 19, 19, 61, + 0, 73, 146, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 215, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142, 216, 217, 218, + 219, 135, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 20, 7, 7, 7, 7, 7, + 7, 7, 7, 20, 222, 223, 7, 224, 98, 7, 7, 7, 7, 7, 7, 7, + 25, 225, 20, 20, 7, 7, 7, 226, 153, 107, 59, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 139, 7, 7, 7, 59, 7, 7, 130, 7, 7, 7, 130, + 7, 7, 20, 7, 7, 7, 20, 7, 7, 14, 7, 7, 7, 14, 7, 7, + 7, 59, 7, 7, 7, 59, 7, 7, 130, 227, 4, 4, 4, 4, 4, 4, + 98, 7, 7, 7, 228, 6, 130, 229, 166, 230, 228, 152, 228, 130, 130, 82, + 7, 24, 7, 145, 231, 24, 7, 145, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232, 233, 233, 233, + 234, 0, 0, 0, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, +}; + +static RE_UINT8 re_word_break_stage_4[] = { + 0, 0, 1, 2, 3, 4, 0, 5, 6, 6, 7, 0, 8, 9, 9, 9, + 10, 11, 10, 0, 0, 12, 13, 14, 0, 15, 13, 0, 9, 10, 16, 17, + 16, 18, 9, 19, 0, 20, 21, 21, 9, 22, 17, 23, 0, 24, 10, 22, + 25, 9, 9, 25, 26, 21, 27, 9, 28, 0, 29, 0, 30, 21, 21, 31, + 32, 31, 33, 33, 34, 0, 35, 36, 37, 38, 0, 39, 40, 38, 41, 21, + 42, 43, 44, 9, 9, 45, 21, 46, 21, 47, 48, 27, 49, 50, 0, 51, + 52, 9, 40, 8, 9, 53, 54, 0, 49, 9, 21, 16, 55, 0, 56, 21, + 21, 57, 57, 58, 57, 0, 22, 9, 0, 21, 21, 40, 21, 9, 53, 59, + 57, 21, 53, 60, 30, 8, 9, 50, 50, 9, 20, 17, 16, 59, 21, 61, + 61, 62, 0, 63, 0, 25, 16, 0, 10, 64, 22, 65, 16, 48, 40, 63, + 61, 58, 66, 0, 8, 20, 0, 60, 27, 67, 22, 8, 31, 58, 19, 0, + 0, 68, 69, 8, 10, 17, 22, 16, 65, 22, 64, 19, 16, 68, 40, 68, + 48, 58, 19, 63, 9, 8, 16, 45, 21, 48, 0, 32, 68, 8, 0, 13, + 65, 0, 10, 45, 48, 62, 17, 9, 9, 28, 70, 63, 21, 71, 68, 0, + 66, 21, 40, 0, 72, 0, 31, 73, 21, 58, 58, 0, 0, 74, 66, 68, + 9, 57, 21, 73, 0, 70, 63, 21, 58, 68, 48, 61, 30, 73, 68, 21, + 75, 58, 0, 28, 10, 9, 10, 30, 53, 73, 53, 0, 76, 0, 21, 0, + 0, 66, 63, 77, 78, 0, 9, 16, 73, 0, 9, 41, 0, 30, 21, 44, + 9, 21, 9, 0, 79, 9, 21, 27, 72, 8, 40, 21, 44, 52, 53, 80, + 81, 81, 9, 20, 17, 22, 9, 17, 0, 82, 83, 0, 0, 84, 85, 86, + 0, 11, 87, 88, 0, 87, 37, 89, 37, 37, 0, 64, 13, 64, 8, 16, + 22, 25, 16, 9, 0, 8, 16, 13, 0, 17, 64, 41, 27, 0, 90, 91, + 92, 93, 94, 94, 95, 94, 94, 95, 49, 0, 21, 96, 50, 10, 97, 97, + 41, 9, 64, 0, 9, 58, 63, 0, 73, 68, 17, 98, 8, 10, 40, 58, + 64, 9, 0, 99, 100, 33, 33, 34, 33, 101, 102, 100, 103, 88, 11, 87, + 0, 104, 5, 105, 9, 106, 0, 107, 108, 0, 0, 109, 94, 110, 17, 19, + 111, 0, 10, 25, 19, 50, 57, 32, 40, 14, 21, 112, 44, 19, 93, 0, + 58, 30, 113, 37, 114, 21, 40, 30, 68, 58, 68, 73, 13, 65, 8, 22, + 25, 8, 10, 8, 25, 10, 9, 60, 65, 50, 81, 0, 81, 8, 8, 8, + 0, 115, 116, 116, 14, 0, +}; + +static RE_UINT8 re_word_break_stage_5[] = { + 0, 0, 0, 0, 0, 0, 5, 6, 6, 4, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 2, 13, 0, 14, 0, 15, 15, 15, 15, 15, 15, 12, 13, + 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 16, + 0, 6, 0, 0, 0, 0, 11, 0, 0, 9, 0, 0, 0, 11, 0, 12, + 11, 11, 0, 0, 0, 0, 11, 11, 0, 0, 0, 12, 11, 0, 0, 0, + 11, 0, 11, 0, 7, 7, 7, 7, 11, 0, 11, 11, 11, 11, 13, 0, + 0, 0, 11, 12, 11, 11, 0, 11, 11, 11, 0, 7, 7, 7, 11, 11, + 0, 11, 0, 0, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 0, 7, + 0, 7, 7, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 11, + 12, 0, 0, 0, 9, 9, 9, 9, 9, 0, 0, 0, 13, 13, 0, 0, + 7, 7, 7, 0, 11, 11, 11, 7, 15, 15, 0, 15, 13, 0, 11, 11, + 7, 11, 11, 11, 0, 11, 7, 7, 7, 9, 0, 7, 7, 11, 11, 7, + 7, 0, 7, 7, 15, 15, 11, 11, 11, 0, 0, 11, 0, 0, 0, 9, + 11, 7, 11, 11, 11, 11, 7, 7, 7, 11, 0, 0, 13, 0, 11, 0, + 7, 7, 11, 7, 11, 7, 7, 7, 7, 7, 0, 0, 7, 11, 7, 7, + 0, 0, 15, 15, 7, 0, 0, 7, 7, 7, 11, 0, 0, 0, 0, 7, + 0, 0, 0, 11, 0, 11, 11, 0, 0, 7, 0, 0, 11, 7, 0, 0, + 0, 0, 7, 7, 0, 0, 7, 11, 0, 0, 7, 0, 7, 0, 7, 0, + 15, 15, 0, 0, 7, 0, 0, 0, 0, 7, 0, 7, 15, 15, 7, 7, + 11, 0, 7, 7, 7, 7, 9, 0, 11, 7, 11, 0, 7, 7, 7, 11, + 7, 11, 11, 0, 0, 11, 0, 11, 7, 7, 9, 9, 14, 14, 0, 0, + 14, 0, 0, 12, 6, 6, 9, 9, 9, 9, 9, 0, 16, 0, 0, 0, + 13, 0, 0, 0, 9, 0, 9, 9, 0, 10, 10, 10, 10, 10, 0, 0, + 0, 7, 7, 10, 10, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 0, + 7, 7, 0, 11, 11, 11, 7, 11, 11, 7, 7, 0, 0, 3, 7, 3, + 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 13, 0, 0, 12, + 0, 16, 16, 16, 13, 12, 0, 0, 11, 0, 0, 9, 0, 0, 0, 14, + 0, 0, 12, 13, 0, 0, 10, 10, 10, 10, 7, 7, 0, 9, 9, 9, + 7, 0, 15, 15, 7, 7, 7, 9, 9, 9, 9, 7, 0, 0, 8, 8, + 8, 8, 8, 8, +}; + +/* Word_Break: 3946 bytes. */ + +RE_UINT32 re_get_word_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_word_break_stage_1[f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_word_break_stage_2[pos + f] << 4; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_word_break_stage_3[pos + f] << 1; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_word_break_stage_4[pos + f] << 2; + value = re_word_break_stage_5[pos + code]; + + return value; +} + +/* Grapheme_Cluster_Break. */ + +static RE_UINT8 re_grapheme_cluster_break_stage_1[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 7, 2, 2, 8, 9, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_2[] = { + 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 1, + 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 27, 1, 1, + 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 29, 1, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 34, 35, 36, 37, 38, 39, 40, 34, 35, 36, 37, 38, 39, + 40, 34, 35, 36, 37, 38, 39, 40, 34, 35, 36, 37, 38, 39, 40, 34, + 35, 36, 37, 38, 39, 40, 34, 41, 42, 42, 42, 42, 42, 42, 42, 42, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43, 1, 1, 44, 45, + 1, 46, 1, 1, 1, 1, 1, 1, 1, 1, 47, 1, 1, 1, 1, 1, + 48, 49, 1, 1, 1, 1, 50, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 52, 53, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 54, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 42, 55, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_3[] = { + 0, 1, 2, 2, 2, 2, 2, 3, 1, 1, 4, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 5, 8, 9, 2, 2, 2, + 10, 11, 2, 2, 12, 5, 2, 13, 2, 2, 2, 2, 2, 14, 15, 2, + 3, 16, 2, 5, 17, 2, 2, 2, 2, 2, 18, 13, 2, 2, 12, 19, + 2, 20, 21, 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 23, 24, + 25, 2, 2, 26, 27, 28, 29, 2, 30, 2, 2, 31, 32, 33, 29, 2, + 34, 2, 2, 35, 36, 16, 2, 37, 34, 2, 2, 35, 38, 2, 29, 2, + 30, 2, 2, 39, 32, 40, 29, 2, 41, 2, 2, 42, 43, 33, 2, 2, + 44, 2, 2, 45, 46, 47, 29, 2, 48, 2, 2, 49, 50, 47, 29, 2, + 48, 2, 2, 42, 51, 33, 29, 2, 48, 2, 2, 2, 52, 53, 2, 48, + 2, 2, 2, 54, 55, 2, 2, 2, 2, 2, 2, 56, 57, 2, 2, 2, + 2, 58, 2, 59, 2, 2, 2, 60, 61, 62, 5, 63, 64, 2, 2, 2, + 2, 2, 65, 66, 2, 67, 13, 68, 69, 70, 2, 2, 2, 2, 2, 2, + 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73, 74, 74, 74, 74, 74, + 2, 2, 2, 2, 2, 65, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 75, 2, 75, 2, 29, 2, 29, 2, 2, 2, 76, 77, 78, 2, 2, + 79, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80, 2, 2, 2, 2, 2, + 2, 2, 81, 82, 2, 2, 2, 2, 2, 2, 2, 83, 2, 2, 2, 2, + 2, 84, 2, 2, 2, 85, 86, 87, 2, 2, 2, 2, 2, 2, 2, 2, + 88, 2, 2, 89, 90, 2, 12, 19, 91, 2, 92, 2, 2, 2, 93, 94, + 2, 2, 95, 96, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97, 98, 99, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 100, 101, + 102, 2, 103, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5, 5, 13, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 104, 105, + 2, 2, 2, 2, 2, 2, 2, 104, 2, 2, 2, 2, 2, 2, 5, 5, + 2, 2, 106, 2, 2, 2, 2, 2, 2, 107, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 104, 108, 2, 104, 2, 2, 2, 2, 2, 105, + 109, 2, 110, 2, 2, 2, 2, 2, 111, 2, 2, 112, 113, 2, 5, 105, + 2, 2, 114, 2, 115, 94, 71, 116, 25, 2, 2, 117, 118, 2, 2, 2, + 2, 2, 119, 120, 121, 2, 2, 2, 2, 2, 2, 122, 16, 2, 123, 124, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125, 2, + 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, + 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, + 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, + 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, + 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, + 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, + 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, + 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 131, 72, 132, 74, 74, 133, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 134, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 2, 100, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 45, 2, 2, 2, 2, 2, 135, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, + 136, 2, 2, 137, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 138, 2, 2, 139, 100, 2, 2, 2, 91, 2, 2, 140, 2, 2, 2, 2, + 141, 2, 142, 143, 2, 2, 2, 2, 91, 2, 2, 144, 118, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 145, 146, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 147, 148, 149, 104, 141, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 150, 151, 152, 2, 153, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 75, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 154, 155, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_4[] = { + 0, 0, 1, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 4, + 3, 3, 3, 5, 6, 6, 6, 6, 7, 6, 8, 3, 9, 6, 6, 6, + 6, 6, 6, 10, 11, 10, 3, 3, 0, 12, 3, 3, 6, 6, 13, 12, + 3, 3, 7, 6, 14, 3, 3, 3, 3, 15, 6, 16, 6, 17, 18, 8, + 19, 3, 3, 3, 6, 6, 13, 3, 3, 15, 6, 6, 6, 3, 3, 3, + 3, 15, 10, 6, 6, 9, 9, 8, 3, 3, 9, 3, 3, 6, 6, 6, + 6, 6, 6, 13, 20, 3, 3, 3, 3, 3, 21, 22, 23, 6, 24, 25, + 9, 6, 3, 3, 15, 3, 3, 3, 26, 3, 3, 3, 3, 3, 3, 27, + 23, 28, 29, 30, 3, 7, 3, 3, 31, 3, 3, 3, 3, 3, 3, 22, + 32, 7, 17, 8, 8, 19, 3, 3, 23, 10, 33, 30, 3, 3, 3, 18, + 3, 15, 3, 3, 34, 3, 3, 3, 3, 3, 3, 21, 35, 36, 37, 30, + 38, 3, 3, 3, 3, 3, 3, 15, 24, 39, 18, 8, 3, 11, 3, 3, + 36, 3, 3, 3, 3, 3, 3, 40, 41, 42, 37, 8, 23, 22, 37, 30, + 3, 3, 34, 7, 43, 44, 45, 46, 47, 6, 13, 3, 3, 7, 6, 13, + 47, 6, 10, 14, 3, 3, 6, 8, 3, 3, 8, 3, 3, 48, 19, 36, + 9, 6, 6, 20, 6, 18, 3, 9, 6, 6, 9, 6, 6, 6, 6, 14, + 3, 34, 3, 3, 3, 3, 3, 9, 49, 6, 31, 32, 3, 36, 8, 15, + 9, 14, 3, 3, 34, 32, 3, 19, 3, 3, 3, 19, 50, 50, 50, 50, + 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 15, 14, 3, 3, + 3, 53, 6, 54, 45, 41, 23, 6, 6, 3, 3, 19, 3, 3, 7, 55, + 3, 3, 19, 3, 20, 46, 24, 3, 41, 45, 23, 3, 3, 38, 56, 3, + 3, 7, 57, 3, 3, 58, 6, 13, 44, 9, 6, 24, 46, 6, 6, 17, + 6, 59, 3, 3, 3, 49, 20, 24, 41, 59, 3, 3, 60, 3, 3, 3, + 61, 54, 53, 62, 3, 21, 54, 63, 54, 3, 3, 3, 3, 45, 45, 6, + 6, 43, 3, 3, 13, 6, 6, 6, 49, 6, 14, 19, 36, 14, 3, 3, + 6, 13, 3, 3, 3, 3, 3, 6, 3, 3, 4, 64, 3, 3, 0, 65, + 3, 3, 3, 7, 8, 3, 3, 3, 3, 3, 15, 6, 3, 3, 11, 3, + 13, 6, 6, 8, 34, 34, 7, 3, 66, 67, 3, 3, 62, 3, 3, 3, + 3, 45, 45, 45, 45, 14, 3, 3, 3, 15, 6, 8, 3, 7, 6, 6, + 50, 50, 50, 68, 7, 43, 54, 24, 59, 3, 3, 3, 3, 3, 9, 20, + 67, 32, 3, 3, 7, 3, 3, 69, 18, 17, 14, 15, 3, 3, 66, 54, + 3, 70, 3, 3, 66, 25, 35, 30, 71, 72, 72, 72, 72, 72, 72, 71, + 72, 72, 72, 72, 72, 72, 71, 72, 72, 71, 72, 72, 72, 3, 3, 3, + 51, 73, 74, 52, 52, 52, 52, 3, 3, 3, 3, 34, 0, 0, 0, 3, + 9, 11, 3, 6, 3, 3, 13, 7, 75, 3, 3, 3, 3, 3, 6, 6, + 46, 20, 32, 5, 13, 3, 3, 3, 3, 7, 6, 23, 6, 14, 3, 3, + 66, 43, 6, 20, 3, 3, 7, 25, 6, 53, 3, 3, 38, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 76, 3, 77, 8, 61, 78, 0, 79, 6, + 13, 9, 6, 3, 3, 3, 15, 8, 3, 80, 81, 81, 81, 81, 81, 81, +}; + +static RE_UINT8 re_grapheme_cluster_break_stage_5[] = { + 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, 3, 3, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 3, 0, 0, 4, 4, 4, 4, 0, 0, 0, 4, + 4, 4, 0, 0, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 0, + 3, 0, 0, 0, 4, 4, 4, 0, 4, 0, 0, 0, 0, 0, 4, 4, + 4, 3, 0, 4, 4, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, + 4, 4, 4, 6, 0, 0, 4, 6, 4, 0, 6, 6, 6, 4, 4, 4, + 4, 6, 6, 6, 6, 4, 6, 6, 0, 4, 6, 6, 4, 0, 4, 6, + 4, 0, 0, 6, 6, 0, 0, 6, 6, 4, 0, 0, 0, 4, 4, 6, + 6, 4, 4, 0, 4, 6, 0, 6, 0, 0, 4, 0, 4, 6, 6, 0, + 0, 0, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 0, 4, 4, + 4, 0, 6, 4, 6, 6, 4, 6, 6, 0, 4, 6, 6, 6, 4, 4, + 4, 0, 4, 0, 6, 6, 6, 6, 6, 6, 6, 4, 0, 4, 0, 6, + 0, 4, 0, 4, 4, 6, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, + 9, 9, 9, 9, 4, 4, 6, 4, 4, 4, 6, 6, 4, 4, 3, 0, + 0, 0, 6, 0, 4, 6, 6, 4, 0, 6, 4, 6, 6, 0, 0, 0, + 4, 4, 6, 0, 0, 6, 4, 4, 6, 6, 0, 0, 6, 4, 6, 4, + 4, 4, 3, 3, 3, 3, 3, 0, 0, 0, 0, 6, 6, 4, 4, 6, + 7, 0, 0, 0, 4, 6, 0, 0, 0, 6, 4, 0, 10, 11, 11, 11, + 11, 11, 11, 11, 8, 8, 8, 0, 0, 0, 0, 9, 6, 4, 6, 0, + 6, 6, 6, 0, 0, 4, 6, 4, 4, 4, 4, 3, 3, 3, 3, 4, + 0, 0, 5, 5, 5, 5, 5, 5, +}; + +/* Grapheme_Cluster_Break: 2336 bytes. */ + +RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_grapheme_cluster_break_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_cluster_break_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_grapheme_cluster_break_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_grapheme_cluster_break_stage_4[pos + f] << 2; + value = re_grapheme_cluster_break_stage_5[pos + code]; + + return value; +} + +/* Sentence_Break. */ + +static RE_UINT8 re_sentence_break_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, + 11, 12, 13, 14, 9, 9, 15, 9, 9, 9, 9, 16, 9, 17, 18, 9, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 19, 20, 9, 9, 9, 21, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, +}; + +static RE_UINT8 re_sentence_break_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 33, 33, 36, 33, 37, 33, 33, 38, 39, 40, 33, + 41, 42, 33, 33, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 43, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 44, + 17, 17, 17, 17, 45, 17, 46, 47, 48, 49, 50, 51, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 52, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 17, 53, 54, 17, 55, 56, 57, + 58, 59, 60, 61, 62, 33, 33, 33, 63, 64, 65, 66, 67, 33, 33, 33, + 68, 69, 33, 33, 33, 33, 70, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 17, 17, 17, 71, 72, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 17, 17, 17, 17, 73, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 74, 33, 33, 33, 33, 75, + 76, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 77, 78, 33, 79, 80, 81, 82, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 83, 33, + 17, 17, 17, 17, 17, 17, 84, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 85, 86, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 86, 33, 33, 33, 33, 33, + 87, 88, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, +}; + +static RE_UINT16 re_sentence_break_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 8, 16, 17, 18, 19, 20, 21, 22, 23, 23, 23, 24, 25, 26, 27, 28, + 29, 30, 18, 8, 31, 8, 32, 8, 8, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 41, 41, 44, 45, 46, 47, 48, 41, 41, 49, 50, 51, + 52, 53, 54, 55, 55, 56, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 62, 71, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 73, 83, 84, 85, 86, 83, 87, 88, 89, 90, 91, 92, 93, + 94, 95, 96, 55, 97, 98, 99, 55, 100, 101, 102, 103, 104, 105, 106, 55, + 41, 107, 108, 109, 110, 29, 111, 112, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 113, 41, 114, 115, 116, 41, 117, 41, 118, 119, 120, 41, 41, 121, + 94, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 122, 123, 41, 41, 124, + 125, 126, 127, 128, 41, 129, 130, 131, 132, 41, 41, 133, 41, 134, 41, 135, + 136, 137, 138, 139, 41, 140, 141, 55, 142, 41, 143, 144, 145, 146, 55, 55, + 147, 129, 148, 149, 150, 151, 41, 152, 41, 153, 154, 155, 55, 55, 156, 157, + 18, 18, 18, 18, 18, 18, 23, 158, 8, 8, 8, 8, 159, 8, 8, 8, + 160, 161, 162, 163, 161, 164, 165, 166, 167, 168, 169, 170, 171, 55, 172, 173, + 174, 175, 176, 30, 177, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 178, 179, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 180, 30, 181, + 55, 55, 182, 183, 55, 55, 184, 185, 55, 55, 55, 55, 186, 55, 187, 188, + 29, 189, 190, 191, 8, 8, 8, 192, 18, 193, 41, 194, 195, 196, 196, 23, + 197, 198, 55, 55, 55, 55, 55, 55, 199, 200, 94, 41, 201, 94, 41, 112, + 202, 203, 41, 41, 204, 205, 55, 206, 41, 41, 41, 41, 41, 135, 55, 55, + 41, 41, 41, 41, 41, 41, 207, 55, 41, 41, 41, 41, 207, 55, 206, 208, + 209, 210, 8, 211, 212, 41, 41, 213, 214, 215, 8, 216, 217, 218, 55, 219, + 220, 221, 41, 222, 223, 129, 224, 225, 50, 226, 227, 136, 58, 228, 229, 55, + 41, 230, 231, 232, 41, 233, 234, 235, 236, 237, 55, 55, 55, 55, 41, 238, + 41, 41, 41, 41, 41, 239, 240, 241, 41, 41, 41, 242, 41, 41, 243, 55, + 244, 245, 246, 41, 41, 247, 248, 41, 41, 249, 206, 41, 250, 41, 251, 252, + 253, 254, 255, 256, 41, 41, 41, 257, 258, 2, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 55, 41, 41, 41, 205, 55, 55, 41, 121, 55, 55, 55, 268, + 55, 55, 55, 55, 136, 41, 269, 55, 262, 206, 270, 55, 271, 41, 272, 55, + 29, 273, 274, 41, 271, 131, 55, 55, 275, 276, 135, 55, 55, 55, 55, 55, + 135, 243, 55, 55, 41, 277, 55, 55, 278, 279, 280, 136, 55, 55, 55, 55, + 41, 135, 135, 281, 55, 55, 55, 55, 41, 41, 282, 55, 55, 55, 55, 55, + 150, 283, 284, 79, 150, 285, 286, 287, 150, 288, 289, 55, 150, 228, 290, 55, + 55, 55, 55, 55, 41, 291, 131, 55, 41, 41, 41, 204, 55, 55, 55, 55, + 41, 41, 41, 292, 55, 55, 55, 55, 41, 204, 55, 55, 55, 55, 55, 55, + 41, 293, 55, 55, 55, 55, 55, 55, 41, 41, 294, 295, 296, 55, 55, 55, + 297, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 298, 299, 300, 55, 55, + 55, 55, 301, 55, 55, 55, 55, 55, 302, 303, 304, 305, 306, 307, 308, 309, + 310, 311, 312, 313, 314, 302, 303, 315, 305, 316, 317, 318, 309, 319, 320, 321, + 322, 323, 324, 189, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 55, 55, + 41, 41, 41, 41, 41, 41, 195, 55, 41, 121, 41, 41, 41, 41, 41, 41, + 271, 55, 55, 55, 55, 55, 55, 55, 335, 336, 336, 336, 55, 55, 55, 55, + 23, 23, 23, 23, 23, 23, 23, 337, +}; + +static RE_UINT8 re_sentence_break_stage_4[] = { + 0, 0, 1, 2, 0, 0, 0, 0, 3, 4, 5, 6, 7, 7, 8, 9, + 10, 11, 11, 11, 11, 11, 12, 13, 14, 15, 15, 15, 15, 15, 16, 13, + 0, 17, 0, 0, 0, 0, 0, 0, 18, 0, 19, 20, 0, 21, 19, 0, + 11, 11, 11, 11, 11, 22, 11, 23, 15, 15, 15, 15, 15, 24, 15, 15, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, + 26, 26, 27, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 29, + 30, 31, 32, 33, 28, 31, 34, 28, 25, 31, 29, 31, 32, 26, 35, 34, + 36, 28, 31, 26, 26, 26, 26, 27, 25, 25, 25, 25, 30, 31, 25, 25, + 25, 25, 25, 25, 25, 15, 33, 30, 26, 23, 25, 25, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 37, 15, 15, + 15, 15, 15, 15, 15, 15, 38, 36, 39, 40, 36, 36, 41, 0, 0, 0, + 15, 42, 0, 43, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 25, 45, 46, 39, 0, 47, 22, 48, 32, 11, 11, 11, + 49, 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 50, 33, 34, 25, 25, + 25, 25, 25, 25, 15, 51, 30, 32, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 15, 15, 15, 15, 52, 44, 53, 25, 25, 25, 25, 25, + 28, 26, 26, 29, 25, 25, 25, 25, 25, 25, 0, 0, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 22, 54, 55, 14, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 56, 0, 57, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 58, + 59, 58, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, 36, 0, 0, 0, + 61, 62, 0, 63, 44, 44, 64, 65, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 66, 44, 44, 44, 44, 44, 7, 7, 67, 68, 69, 36, 36, 36, + 36, 36, 36, 36, 36, 70, 44, 71, 44, 72, 73, 74, 7, 7, 75, 76, + 77, 0, 0, 78, 79, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, + 44, 44, 64, 80, 36, 36, 36, 36, 36, 81, 44, 44, 82, 0, 0, 0, + 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 66, 44, 44, 41, 83, 0, + 36, 36, 36, 36, 36, 81, 84, 44, 44, 85, 85, 86, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 87, 36, 36, 88, 0, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 64, + 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, 89, + 44, 44, 44, 44, 85, 44, 36, 36, 81, 90, 7, 7, 80, 36, 80, 36, + 57, 80, 36, 76, 76, 36, 36, 36, 36, 36, 87, 36, 43, 40, 41, 89, + 44, 91, 91, 92, 0, 93, 0, 94, 81, 95, 7, 7, 41, 0, 0, 0, + 57, 80, 60, 96, 76, 36, 36, 36, 36, 36, 87, 36, 87, 97, 41, 73, + 64, 93, 91, 86, 98, 0, 80, 43, 0, 95, 7, 7, 74, 99, 0, 0, + 57, 80, 36, 94, 94, 36, 36, 36, 36, 36, 87, 36, 87, 80, 41, 89, + 44, 58, 58, 86, 88, 0, 0, 0, 81, 95, 7, 7, 0, 0, 0, 0, + 44, 91, 91, 86, 0, 100, 0, 94, 81, 95, 7, 7, 54, 0, 0, 0, + 101, 80, 60, 40, 87, 41, 97, 87, 96, 88, 60, 40, 36, 36, 41, 100, + 64, 100, 73, 86, 88, 93, 0, 0, 0, 95, 7, 7, 0, 0, 0, 0, + 57, 80, 36, 87, 87, 36, 36, 36, 36, 36, 87, 36, 36, 80, 41, 102, + 44, 73, 73, 86, 0, 59, 41, 0, 100, 80, 36, 87, 87, 36, 36, 36, + 36, 36, 87, 36, 36, 80, 41, 89, 44, 73, 73, 86, 0, 59, 0, 103, + 81, 95, 7, 7, 97, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 102, + 44, 73, 73, 92, 0, 93, 0, 0, 81, 95, 7, 7, 0, 0, 40, 36, + 100, 80, 36, 36, 36, 60, 40, 36, 36, 36, 36, 36, 94, 36, 36, 54, + 36, 60, 104, 93, 44, 105, 44, 44, 0, 0, 0, 0, 100, 0, 0, 0, + 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 79, 44, 64, 0, + 36, 66, 44, 64, 7, 7, 106, 0, 97, 76, 43, 54, 0, 36, 80, 36, + 80, 107, 40, 80, 79, 44, 58, 82, 36, 43, 44, 86, 7, 7, 106, 36, + 88, 0, 0, 0, 0, 0, 86, 0, 7, 7, 106, 0, 0, 108, 109, 110, + 36, 36, 80, 36, 36, 36, 36, 36, 36, 36, 36, 88, 57, 44, 44, 44, + 44, 73, 36, 85, 44, 44, 57, 44, 44, 44, 44, 44, 44, 44, 44, 111, + 0, 104, 0, 0, 0, 0, 0, 0, 36, 36, 66, 44, 44, 44, 44, 112, + 7, 7, 113, 0, 36, 81, 74, 81, 89, 72, 44, 74, 85, 69, 36, 36, + 81, 44, 44, 84, 7, 7, 114, 86, 11, 49, 0, 115, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 60, 36, 36, 36, 87, 41, 36, 60, 87, 41, + 36, 36, 87, 41, 36, 36, 36, 36, 36, 36, 36, 36, 87, 41, 36, 60, + 87, 41, 36, 36, 36, 60, 36, 36, 36, 36, 36, 36, 87, 41, 36, 36, + 36, 36, 36, 36, 36, 36, 60, 57, 116, 9, 117, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 0, 0, 0, 0, 36, 36, 36, 36, 36, 88, 0, 0, + 36, 36, 36, 118, 36, 36, 36, 36, 119, 36, 36, 36, 36, 36, 120, 121, + 36, 36, 60, 40, 88, 0, 0, 0, 36, 36, 36, 87, 81, 111, 0, 0, + 36, 36, 36, 36, 81, 122, 0, 0, 36, 36, 36, 36, 81, 0, 0, 0, + 36, 36, 36, 87, 123, 0, 0, 0, 36, 36, 36, 36, 36, 44, 44, 44, + 44, 44, 44, 44, 44, 96, 0, 99, 7, 7, 106, 0, 0, 0, 0, 0, + 124, 0, 125, 126, 7, 7, 106, 0, 36, 36, 36, 36, 36, 36, 0, 0, + 36, 36, 127, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 88, 44, 44, 44, 0, 44, 44, 44, 0, + 0, 90, 7, 7, 36, 36, 36, 36, 36, 36, 36, 41, 36, 88, 0, 0, + 36, 36, 36, 0, 44, 44, 44, 44, 69, 36, 86, 0, 7, 7, 106, 0, + 36, 36, 36, 36, 36, 66, 44, 0, 36, 36, 36, 36, 36, 85, 44, 64, + 44, 44, 44, 44, 44, 44, 44, 91, 7, 7, 106, 0, 7, 7, 106, 0, + 0, 96, 128, 0, 0, 0, 0, 0, 44, 69, 36, 36, 36, 36, 36, 36, + 44, 69, 36, 0, 7, 7, 113, 129, 0, 0, 93, 44, 44, 0, 0, 0, + 112, 36, 36, 36, 36, 36, 36, 36, 85, 44, 44, 74, 7, 7, 75, 36, + 36, 81, 44, 44, 44, 0, 0, 0, 36, 44, 44, 44, 44, 44, 9, 117, + 7, 7, 106, 80, 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 36, 130, + 0, 0, 0, 0, 64, 44, 44, 44, 44, 44, 69, 79, 81, 131, 0, 0, + 44, 64, 0, 0, 0, 0, 0, 44, 25, 25, 25, 25, 25, 34, 15, 27, + 15, 15, 11, 11, 15, 39, 11, 132, 15, 15, 11, 11, 15, 15, 11, 11, + 15, 39, 11, 132, 15, 15, 133, 133, 15, 15, 11, 11, 15, 15, 15, 39, + 15, 15, 11, 11, 15, 134, 11, 135, 46, 134, 11, 136, 15, 46, 11, 0, + 15, 15, 11, 136, 46, 134, 11, 136, 137, 137, 138, 139, 140, 141, 142, 142, + 0, 143, 144, 145, 0, 0, 146, 147, 0, 148, 147, 0, 0, 0, 0, 149, + 61, 150, 61, 61, 21, 0, 0, 151, 0, 0, 0, 146, 15, 15, 15, 42, + 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, 111, 0, 0, 0, + 47, 152, 153, 154, 23, 115, 10, 132, 0, 155, 48, 156, 11, 38, 157, 33, + 0, 158, 39, 159, 0, 0, 0, 0, 160, 38, 88, 0, 0, 0, 0, 0, + 0, 0, 142, 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 161, 11, 11, 15, 15, 39, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 162, 0, 0, 142, 142, 142, 5, 0, 0, + 0, 146, 0, 0, 0, 0, 0, 0, 0, 163, 142, 142, 0, 0, 0, 0, + 4, 142, 142, 142, 142, 142, 121, 0, 0, 0, 0, 0, 0, 0, 142, 0, + 0, 0, 0, 0, 0, 0, 0, 5, 11, 11, 11, 22, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 24, 31, 164, 26, 32, 25, 29, 15, 33, + 25, 42, 152, 165, 53, 0, 0, 0, 15, 166, 0, 21, 36, 36, 36, 36, + 36, 36, 0, 96, 0, 0, 0, 93, 36, 36, 36, 36, 36, 60, 0, 0, + 36, 60, 36, 60, 36, 60, 36, 60, 142, 142, 142, 5, 0, 0, 0, 5, + 142, 142, 5, 167, 0, 0, 0, 0, 168, 80, 142, 142, 5, 142, 142, 169, + 80, 36, 81, 44, 80, 41, 36, 88, 36, 36, 36, 36, 36, 60, 59, 80, + 0, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 80, 36, 36, 36, + 36, 36, 36, 60, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, + 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 88, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 170, 36, 36, 36, 171, 36, 36, 36, 36, + 7, 7, 75, 0, 0, 0, 0, 0, 25, 25, 25, 172, 64, 44, 44, 173, + 25, 25, 25, 25, 25, 25, 0, 93, 36, 36, 36, 36, 174, 9, 0, 0, + 0, 0, 0, 0, 0, 96, 36, 36, 175, 25, 25, 25, 27, 25, 25, 25, + 25, 25, 25, 25, 15, 15, 26, 30, 25, 25, 176, 177, 25, 0, 0, 0, + 25, 25, 178, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179, 36, + 180, 180, 66, 36, 36, 36, 36, 36, 66, 44, 0, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 129, 0, 0, 74, 36, 36, 36, 36, 36, 36, 36, + 44, 111, 0, 129, 7, 7, 106, 0, 44, 44, 44, 44, 74, 36, 96, 0, + 36, 81, 44, 174, 36, 36, 36, 36, 36, 66, 44, 44, 44, 0, 0, 0, + 36, 36, 36, 36, 66, 44, 44, 44, 111, 0, 147, 96, 7, 7, 106, 0, + 36, 36, 85, 44, 44, 64, 0, 0, 66, 36, 36, 86, 7, 7, 106, 181, + 36, 36, 36, 36, 36, 60, 182, 0, 36, 36, 36, 36, 89, 72, 69, 81, + 127, 0, 0, 0, 0, 0, 96, 41, 36, 36, 66, 44, 183, 184, 0, 0, + 80, 60, 80, 60, 80, 60, 0, 0, 36, 60, 36, 60, 0, 0, 0, 0, + 66, 44, 185, 86, 7, 7, 106, 0, 36, 0, 0, 0, 36, 36, 36, 36, + 36, 60, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, + 36, 36, 36, 41, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, + 15, 24, 0, 0, 186, 15, 0, 187, 36, 36, 87, 36, 36, 60, 36, 43, + 94, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, 0, + 0, 0, 0, 0, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 188, + 36, 36, 36, 36, 40, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 36, 36, 36, 0, 44, 44, 44, 44, 189, 4, 121, 0, + 44, 64, 0, 0, 190, 169, 142, 142, 142, 191, 121, 0, 6, 192, 193, 162, + 140, 0, 0, 0, 36, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 194, + 56, 0, 5, 6, 0, 0, 195, 9, 14, 15, 15, 15, 15, 15, 16, 196, + 197, 198, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, + 36, 36, 36, 36, 36, 36, 36, 60, 40, 36, 40, 36, 40, 36, 40, 88, + 0, 0, 0, 0, 0, 0, 199, 0, 36, 36, 36, 80, 36, 36, 36, 36, + 36, 60, 36, 36, 36, 36, 60, 94, 36, 36, 36, 41, 36, 36, 36, 41, + 0, 0, 0, 0, 0, 0, 0, 98, 36, 36, 36, 36, 88, 0, 0, 0, + 36, 36, 60, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 41, + 36, 0, 36, 36, 80, 41, 0, 0, 11, 11, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 36, 36, 36, 36, 36, 41, 87, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 94, 88, 76, 36, 36, 36, 36, 36, 36, 0, 40, + 85, 59, 0, 44, 36, 80, 80, 36, 36, 36, 36, 36, 36, 0, 64, 93, + 0, 0, 0, 0, 0, 129, 0, 0, 36, 36, 36, 36, 60, 0, 0, 0, + 36, 36, 88, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 44, 44, + 44, 185, 117, 0, 0, 0, 0, 0, 36, 36, 36, 36, 44, 44, 64, 200, + 147, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, 7, 7, 106, 0, + 36, 66, 44, 44, 44, 201, 7, 7, 181, 0, 0, 0, 0, 0, 0, 0, + 69, 202, 0, 0, 7, 7, 106, 0, 36, 36, 66, 44, 44, 44, 0, 0, + 60, 0, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, + 36, 88, 0, 0, 85, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 64, + 0, 0, 0, 93, 112, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, + 0, 57, 86, 57, 203, 61, 204, 44, 64, 57, 44, 0, 0, 0, 0, 0, + 0, 0, 100, 86, 0, 0, 0, 0, 100, 111, 0, 0, 0, 0, 0, 0, + 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, + 11, 11, 11, 154, 15, 134, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, + 154, 15, 15, 15, 15, 15, 15, 48, 47, 205, 10, 48, 11, 154, 166, 14, + 15, 14, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, + 15, 49, 22, 10, 11, 48, 11, 206, 15, 15, 15, 15, 15, 15, 49, 22, + 11, 155, 161, 11, 206, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, + 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 154, 15, 15, 15, 15, + 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, + 15, 15, 15, 15, 11, 11, 11, 11, 15, 39, 11, 11, 11, 11, 11, 11, + 206, 15, 15, 15, 15, 15, 24, 15, 33, 11, 11, 11, 11, 11, 22, 15, + 15, 15, 15, 15, 15, 134, 15, 11, 11, 11, 11, 11, 11, 206, 15, 15, + 15, 15, 15, 24, 15, 33, 11, 11, 15, 15, 134, 15, 11, 11, 11, 11, + 11, 11, 206, 15, 15, 15, 15, 15, 24, 15, 27, 95, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 36, 80, 36, 36, 36, 36, 36, 36, + 97, 76, 80, 36, 60, 36, 107, 0, 103, 96, 107, 80, 97, 76, 107, 107, + 97, 76, 60, 36, 60, 36, 80, 43, 36, 36, 94, 36, 36, 36, 36, 0, + 80, 80, 94, 36, 36, 36, 36, 0, 20, 0, 0, 0, 0, 0, 0, 0, + 61, 61, 61, 61, 61, 61, 61, 61, 44, 44, 44, 44, 0, 0, 0, 0, +}; + +static RE_UINT8 re_sentence_break_stage_5[] = { + 0, 0, 0, 0, 0, 6, 2, 6, 6, 1, 0, 0, 6, 12, 13, 0, + 0, 0, 0, 13, 13, 13, 0, 0, 14, 14, 11, 0, 10, 10, 10, 10, + 10, 10, 14, 0, 0, 0, 0, 12, 0, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 13, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 13, 0, 4, 0, 0, 6, 0, 0, 0, 0, 0, 7, 13, + 0, 5, 0, 0, 0, 7, 0, 0, 8, 8, 8, 0, 8, 8, 8, 7, + 7, 7, 7, 0, 8, 7, 8, 7, 7, 8, 7, 8, 7, 7, 8, 7, + 8, 8, 7, 8, 7, 8, 7, 7, 7, 8, 8, 7, 8, 7, 8, 8, + 7, 8, 8, 8, 7, 7, 8, 8, 8, 7, 7, 7, 8, 7, 7, 9, + 9, 9, 9, 9, 9, 7, 7, 7, 7, 9, 9, 9, 7, 7, 0, 0, + 0, 0, 9, 9, 9, 9, 0, 0, 7, 0, 0, 0, 9, 0, 9, 0, + 3, 3, 3, 3, 9, 0, 8, 7, 0, 0, 7, 7, 0, 0, 8, 0, + 8, 0, 8, 8, 8, 8, 0, 8, 7, 7, 7, 8, 8, 7, 0, 8, + 8, 7, 0, 3, 3, 3, 8, 7, 0, 9, 0, 0, 12, 14, 12, 0, + 0, 12, 0, 0, 0, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, + 9, 9, 9, 0, 5, 5, 5, 5, 5, 0, 0, 0, 14, 14, 0, 0, + 3, 3, 3, 0, 5, 0, 0, 12, 9, 9, 9, 3, 10, 10, 0, 10, + 10, 0, 9, 9, 3, 9, 9, 9, 12, 9, 3, 3, 3, 5, 0, 3, + 3, 9, 9, 3, 3, 0, 3, 3, 3, 3, 9, 9, 10, 10, 9, 9, + 9, 0, 0, 9, 12, 12, 12, 0, 0, 0, 0, 5, 9, 3, 9, 9, + 0, 9, 9, 9, 9, 9, 3, 3, 3, 9, 0, 0, 14, 12, 9, 0, + 3, 3, 9, 3, 9, 3, 3, 3, 3, 3, 0, 0, 9, 0, 9, 9, + 9, 0, 0, 0, 3, 9, 3, 3, 12, 12, 10, 10, 3, 0, 0, 3, + 3, 3, 9, 0, 0, 0, 0, 3, 9, 9, 0, 9, 0, 0, 10, 10, + 0, 0, 0, 9, 0, 9, 9, 0, 0, 3, 0, 0, 9, 3, 0, 0, + 0, 0, 3, 3, 0, 0, 3, 9, 0, 9, 3, 3, 0, 0, 9, 0, + 0, 0, 3, 0, 3, 0, 3, 0, 10, 10, 0, 0, 0, 9, 0, 9, + 0, 3, 0, 3, 0, 3, 13, 13, 13, 13, 3, 3, 3, 0, 0, 0, + 3, 3, 3, 9, 10, 10, 12, 12, 10, 10, 3, 3, 0, 8, 0, 0, + 0, 0, 12, 0, 12, 0, 0, 0, 9, 0, 12, 9, 6, 9, 9, 9, + 9, 9, 9, 13, 13, 0, 0, 0, 3, 12, 12, 0, 9, 0, 3, 3, + 0, 0, 14, 12, 14, 12, 0, 3, 3, 3, 5, 0, 9, 3, 9, 0, + 12, 12, 12, 12, 0, 0, 12, 12, 9, 9, 12, 12, 3, 9, 9, 0, + 8, 8, 0, 0, 0, 8, 0, 8, 7, 0, 7, 7, 8, 0, 7, 0, + 8, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 5, 3, 3, 5, 5, + 0, 0, 0, 14, 14, 0, 0, 0, 13, 13, 13, 13, 11, 0, 0, 0, + 4, 4, 5, 5, 5, 5, 5, 6, 0, 13, 13, 0, 12, 12, 0, 0, + 0, 13, 13, 12, 0, 0, 0, 6, 5, 0, 5, 5, 0, 13, 13, 7, + 0, 0, 0, 8, 0, 0, 7, 8, 8, 8, 7, 7, 8, 0, 8, 0, + 8, 8, 0, 7, 9, 7, 0, 0, 0, 8, 7, 7, 0, 0, 7, 0, + 9, 9, 9, 8, 0, 0, 8, 8, 13, 13, 13, 0, 0, 0, 13, 13, + 8, 7, 7, 8, 7, 8, 7, 3, 7, 7, 0, 7, 0, 0, 12, 9, + 6, 14, 12, 0, 0, 13, 13, 13, 9, 9, 0, 12, 9, 0, 12, 12, + 8, 7, 9, 3, 3, 3, 0, 9, 3, 3, 0, 12, 0, 0, 8, 7, + 9, 0, 0, 8, 7, 8, 7, 0, 8, 7, 8, 0, 7, 7, 7, 9, + 9, 9, 3, 9, 0, 12, 12, 12, 0, 0, 9, 3, 12, 12, 9, 9, + 9, 3, 3, 0, 3, 3, 3, 12, 0, 0, 0, 7, 0, 9, 3, 9, + 9, 9, 13, 13, 14, 14, 0, 14, 0, 14, 14, 0, 13, 0, 0, 13, + 0, 14, 12, 12, 14, 13, 13, 13, 9, 0, 0, 5, 0, 0, 14, 0, + 0, 13, 0, 13, 13, 12, 13, 13, 14, 0, 9, 9, 0, 5, 5, 5, + 0, 5, 12, 12, 3, 0, 10, 10, 9, 12, 12, 0, 3, 3, 3, 5, + 5, 5, 5, 3, 0, 8, 8, 0, 8, 0, 7, 7, +}; + +/* Sentence_Break: 5596 bytes. */ + +RE_UINT32 re_get_sentence_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_sentence_break_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_sentence_break_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_sentence_break_stage_3[pos + f] << 3; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_sentence_break_stage_4[pos + f] << 2; + value = re_sentence_break_stage_5[pos + code]; + + return value; +} + +/* Math. */ + +static RE_UINT8 re_math_stage_1[] = { + 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, +}; + +static RE_UINT8 re_math_stage_2[] = { + 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, +}; + +static RE_UINT8 re_math_stage_3[] = { + 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 4, 5, 6, 7, 1, 8, 9, 10, 1, 6, 6, 11, 1, 1, 1, 1, + 1, 1, 1, 12, 1, 1, 13, 14, 1, 1, 1, 1, 15, 16, 17, 18, + 1, 1, 1, 1, 1, 1, 19, 1, +}; + +static RE_UINT8 re_math_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, + 9, 10, 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 19, 20, 21, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 25, 0, 26, 27, 28, 29, 30, + 0, 0, 0, 0, 0, 31, 32, 33, 34, 0, 35, 36, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 23, 23, 0, 19, 37, 0, 0, 0, 0, 0, + 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, + 1, 3, 3, 0, 0, 0, 0, 40, 23, 23, 41, 23, 42, 43, 44, 23, + 45, 46, 47, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 48, 23, 23, + 23, 23, 23, 23, 23, 23, 49, 23, 44, 50, 51, 52, 53, 54, 0, 55, +}; + +static RE_UINT8 re_math_stage_5[] = { + 0, 0, 0, 0, 0, 8, 0, 112, 0, 0, 0, 64, 0, 0, 0, 80, + 0, 16, 2, 0, 0, 0, 128, 0, 0, 0, 39, 0, 0, 0, 115, 0, + 192, 1, 0, 0, 0, 0, 64, 0, 0, 0, 28, 0, 17, 0, 4, 0, + 30, 0, 0, 124, 0, 124, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, + 132, 252, 47, 63, 16, 179, 251, 241, 255, 11, 0, 0, 0, 0, 255, 255, + 255, 126, 195, 240, 255, 255, 255, 47, 48, 0, 240, 255, 255, 255, 255, 255, + 0, 15, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 248, + 255, 255, 191, 0, 0, 0, 1, 240, 7, 0, 0, 0, 3, 192, 255, 240, + 195, 140, 15, 0, 148, 31, 0, 255, 96, 0, 0, 0, 5, 0, 0, 0, + 15, 224, 0, 0, 159, 31, 0, 0, 0, 2, 0, 0, 126, 1, 0, 0, + 4, 30, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, + 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, + 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, + 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, 0, 0, 3, 0, +}; + +/* Math: 538 bytes. */ + +RE_UINT32 re_get_math(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_math_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_math_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_math_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_math_stage_4[pos + f] << 5; + pos += code; + value = (re_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Alphabetic. */ + +static RE_UINT8 re_alphabetic_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_alphabetic_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_alphabetic_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, + 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, + 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, + 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, + 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, + 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_alphabetic_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 7, 8, 9, 10, 4, 11, + 4, 4, 4, 4, 12, 4, 4, 4, 4, 13, 14, 15, 16, 17, 18, 19, + 20, 4, 21, 22, 4, 4, 23, 24, 25, 4, 26, 4, 4, 27, 28, 29, + 30, 31, 32, 0, 0, 33, 0, 34, 4, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 38, 47, 50, 51, 52, 53, 54, 0, + 55, 56, 57, 49, 58, 56, 59, 60, 58, 61, 62, 63, 64, 65, 66, 67, + 15, 68, 69, 0, 70, 71, 72, 0, 73, 0, 74, 75, 76, 77, 0, 0, + 4, 78, 25, 79, 80, 4, 81, 82, 4, 4, 83, 4, 84, 85, 86, 4, + 87, 4, 88, 0, 89, 4, 4, 90, 15, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 91, 1, 4, 4, 92, 93, 94, 94, 95, 4, 96, 97, 0, + 0, 4, 4, 98, 4, 99, 4, 100, 77, 101, 25, 102, 4, 103, 104, 0, + 105, 4, 106, 107, 0, 108, 0, 0, 4, 109, 110, 0, 4, 111, 4, 112, + 4, 100, 113, 114, 0, 0, 0, 115, 4, 4, 4, 4, 4, 4, 0, 0, + 116, 4, 117, 114, 4, 118, 119, 120, 0, 0, 0, 121, 122, 0, 0, 0, + 123, 124, 125, 4, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 127, 4, 104, 4, 128, 106, 4, 4, 4, 4, 129, + 4, 81, 4, 130, 131, 132, 132, 4, 0, 133, 0, 0, 0, 0, 0, 0, + 134, 135, 15, 4, 136, 15, 4, 82, 137, 138, 4, 4, 139, 68, 0, 25, + 4, 4, 4, 4, 4, 100, 0, 0, 4, 4, 4, 4, 4, 4, 31, 0, + 4, 4, 4, 4, 31, 0, 25, 114, 140, 141, 4, 142, 143, 4, 4, 89, + 144, 145, 4, 4, 146, 147, 0, 148, 149, 16, 4, 94, 4, 4, 49, 150, + 28, 99, 151, 77, 4, 152, 133, 0, 4, 131, 153, 154, 4, 106, 155, 156, + 157, 158, 0, 0, 0, 0, 4, 147, 4, 4, 4, 4, 4, 159, 160, 105, + 4, 4, 4, 161, 4, 4, 162, 0, 163, 164, 165, 4, 4, 27, 166, 4, + 4, 114, 25, 4, 167, 4, 16, 168, 0, 0, 0, 169, 4, 4, 4, 77, + 0, 1, 1, 170, 4, 106, 171, 0, 172, 173, 174, 0, 4, 4, 4, 68, + 0, 0, 4, 90, 0, 0, 0, 0, 0, 0, 0, 0, 77, 4, 175, 0, + 106, 25, 147, 0, 114, 4, 176, 0, 4, 4, 4, 4, 114, 0, 0, 0, + 177, 178, 100, 0, 0, 0, 0, 0, 100, 162, 0, 0, 4, 179, 0, 0, + 180, 94, 0, 77, 0, 0, 0, 0, 4, 100, 100, 151, 0, 0, 0, 0, + 4, 4, 126, 0, 0, 0, 0, 0, 4, 4, 181, 0, 145, 32, 25, 126, + 4, 151, 0, 0, 4, 4, 182, 0, 0, 0, 0, 0, 4, 100, 0, 0, + 4, 4, 4, 139, 0, 0, 0, 0, 4, 4, 4, 183, 0, 0, 0, 0, + 4, 139, 0, 0, 0, 0, 0, 0, 4, 32, 0, 0, 0, 0, 0, 0, + 4, 4, 184, 106, 166, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 186, 4, 187, 188, 189, 4, 190, 191, 192, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 193, 194, 82, 186, 186, 128, 128, 195, 195, 196, 0, + 189, 197, 198, 199, 200, 201, 0, 0, 4, 4, 4, 4, 4, 4, 131, 0, + 4, 90, 4, 4, 4, 4, 4, 4, 114, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_alphabetic_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 32, 0, 0, 0, + 0, 0, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, + 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, + 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 255, 255, 254, 0, 192, 255, 255, 255, 255, 239, 31, + 254, 225, 0, 156, 0, 0, 255, 255, 0, 224, 255, 255, 255, 255, 3, 0, + 0, 252, 255, 255, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, + 255, 255, 255, 1, 253, 31, 0, 0, 240, 3, 255, 127, 255, 255, 255, 239, + 255, 223, 225, 255, 15, 0, 254, 254, 238, 159, 249, 255, 255, 253, 197, 227, + 159, 89, 128, 176, 15, 0, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, + 135, 25, 2, 94, 0, 0, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, + 191, 27, 1, 0, 15, 0, 0, 0, 159, 25, 192, 176, 15, 0, 2, 0, + 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 29, 96, 3, 236, 223, 253, 255, 223, 29, 96, 64, + 15, 0, 6, 0, 255, 255, 255, 231, 223, 93, 128, 0, 15, 0, 0, 252, + 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, 0, 0, 12, 0, + 255, 255, 255, 7, 127, 32, 0, 0, 150, 37, 240, 254, 174, 236, 255, 59, + 95, 32, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 254, 255, + 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, 231, 193, 255, 255, + 127, 64, 0, 48, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, + 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, + 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, + 255, 199, 1, 0, 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, + 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, + 255, 255, 63, 0, 255, 15, 255, 1, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 0, 0, 255, 255, 255, 15, 255, 255, 255, 127, 254, 255, 31, 0, + 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 0, 0, 255, 243, 0, 252, + 191, 255, 3, 0, 0, 224, 0, 252, 255, 255, 255, 63, 0, 222, 111, 0, + 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, + 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, + 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, + 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, + 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, + 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, + 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 240, 143, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 0, 0, 252, 8, 255, 255, 7, 0, + 255, 255, 247, 255, 255, 63, 0, 0, 255, 255, 127, 4, 5, 0, 0, 56, + 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, + 127, 248, 255, 255, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, + 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, + 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, + 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 111, 240, 239, 254, 63, 0, 0, 0, 30, 0, 0, 0, 7, 0, 0, 0, + 31, 0, 255, 255, 3, 0, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, + 247, 15, 0, 0, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* Alphabetic: 1817 bytes. */ + +RE_UINT32 re_get_alphabetic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_alphabetic_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_alphabetic_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_alphabetic_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_alphabetic_stage_4[pos + f] << 5; + pos += code; + value = (re_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Lowercase. */ + +static RE_UINT8 re_lowercase_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_lowercase_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, + 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_lowercase_stage_3[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 5, + 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, 3, 11, + 3, 3, 12, 3, 3, 3, 3, 3, 3, 3, 13, 14, 3, 3, 3, 3, +}; + +static RE_UINT8 re_lowercase_stage_4[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 5, 13, 14, 15, 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, 24, 25, + 0, 26, 15, 5, 27, 5, 28, 5, 5, 29, 0, 30, 31, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, + 5, 5, 5, 5, 32, 5, 5, 5, 33, 34, 35, 36, 34, 37, 38, 39, + 0, 0, 0, 40, 41, 0, 0, 0, 42, 43, 44, 26, 45, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 26, 46, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 26, 47, 48, 5, 5, 5, 49, 15, 50, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 51, 52, 0, 0, 0, 0, 53, 5, 54, 55, 56, 0, 57, + 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 59, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 61, 62, 63, 31, 64, 65, 66, 67, 68, 69, 70, 71, 72, 61, 62, 73, + 31, 64, 74, 60, 67, 75, 76, 77, 78, 74, 79, 26, 80, 67, 81, 0, +}; + +static RE_UINT8 re_lowercase_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 0, 0, 0, 128, + 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 85, 85, 171, 170, 170, + 170, 170, 170, 212, 41, 49, 36, 78, 42, 45, 81, 230, 64, 82, 85, 181, + 170, 170, 41, 170, 170, 170, 250, 147, 133, 170, 255, 255, 255, 255, 255, 255, + 255, 255, 239, 255, 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 138, 60, 0, 0, 1, 0, 0, 240, 255, 255, + 255, 127, 227, 170, 170, 170, 47, 25, 0, 0, 255, 255, 2, 168, 170, 170, + 84, 213, 170, 170, 170, 0, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, + 170, 170, 234, 191, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, + 255, 0, 255, 63, 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, + 0, 0, 2, 128, 0, 0, 255, 31, 0, 196, 8, 0, 0, 128, 16, 50, + 192, 67, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, + 98, 21, 218, 63, 26, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, + 170, 170, 170, 0, 168, 170, 171, 170, 170, 170, 255, 149, 170, 80, 10, 0, + 170, 2, 0, 0, 0, 0, 0, 7, 127, 0, 248, 0, 0, 255, 255, 255, + 255, 255, 0, 0, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 223, 255, + 252, 255, 255, 15, 0, 0, 192, 235, 239, 255, 0, 0, 0, 252, 255, 255, + 15, 0, 0, 192, 255, 255, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, + 192, 255, 255, 255, 0, 192, 255, 255, 0, 0, 192, 255, 63, 0, 0, 0, + 252, 255, 255, 247, 3, 0, 0, 240, 255, 255, 223, 15, 255, 127, 63, 0, + 255, 253, 0, 0, 247, 11, 0, 0, +}; + +/* Lowercase: 697 bytes. */ + +RE_UINT32 re_get_lowercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_lowercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_lowercase_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_lowercase_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_lowercase_stage_4[pos + f] << 5; + pos += code; + value = (re_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Uppercase. */ + +static RE_UINT8 re_uppercase_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_uppercase_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, + 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_uppercase_stage_3[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 5, + 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, + 3, 3, 11, 3, 3, 3, 3, 3, 3, 3, 12, 13, 3, 3, 3, 3, +}; + +static RE_UINT8 re_uppercase_stage_4[] = { + 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, + 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, + 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 18, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 0, 30, 31, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 33, 34, 35, 19, 36, 0, 0, 0, + 0, 0, 0, 0, 0, 37, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 18, 38, 0, 39, 3, 3, 3, 40, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 41, 42, 0, 0, 0, 0, 43, 3, 44, 45, 46, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 18, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 48, 49, 50, + 51, 61, 62, 54, 55, 51, 63, 64, 65, 66, 37, 38, 54, 67, 68, 0, +}; + +static RE_UINT8 re_uppercase_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, + 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, + 213, 210, 174, 17, 144, 164, 170, 74, 85, 85, 210, 85, 85, 85, 5, 108, + 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, + 0, 128, 28, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, + 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, + 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, + 0, 63, 0, 170, 0, 255, 0, 0, 0, 0, 0, 15, 0, 15, 0, 15, + 0, 31, 0, 15, 132, 56, 39, 62, 80, 61, 15, 192, 32, 0, 0, 0, + 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, + 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, 84, 85, 84, 85, + 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, 255, 0, 0, 0, + 255, 255, 255, 3, 0, 0, 240, 255, 255, 63, 0, 0, 0, 255, 255, 255, + 3, 0, 0, 208, 100, 222, 63, 0, 0, 0, 255, 255, 255, 3, 0, 0, + 176, 231, 223, 31, 0, 0, 0, 123, 95, 252, 1, 0, 0, 240, 255, 255, + 63, 0, 0, 0, 3, 0, 0, 240, 255, 255, 63, 0, 1, 0, 0, 0, + 252, 255, 255, 7, 0, 0, 0, 240, 255, 255, 31, 0, 255, 1, 0, 0, + 0, 4, 0, 0, +}; + +/* Uppercase: 629 bytes. */ + +RE_UINT32 re_get_uppercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_uppercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_uppercase_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_uppercase_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_uppercase_stage_4[pos + f] << 5; + pos += code; + value = (re_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Cased. */ + +static RE_UINT8 re_cased_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_cased_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, + 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_cased_stage_3[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 6, + 7, 3, 8, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 3, 12, + 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, 14, 15, 3, 3, 3, 3, +}; + +static RE_UINT8 re_cased_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 5, 6, 4, + 4, 4, 4, 4, 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 4, 15, + 4, 4, 4, 4, 16, 4, 4, 4, 4, 17, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, + 4, 4, 4, 4, 4, 4, 4, 4, 22, 4, 23, 24, 4, 25, 26, 27, + 0, 0, 0, 28, 29, 0, 0, 0, 30, 31, 32, 4, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 34, 4, 35, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 36, 37, 4, 4, 4, 4, 38, 4, 21, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 39, 40, 0, 0, 0, 0, 41, 4, 4, 42, 43, 0, 44, + 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, + 4, 4, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 47, 4, 48, 49, 50, 4, 51, 52, 53, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 54, 55, 5, 47, 47, 36, 36, 56, 56, 57, 0, +}; + +static RE_UINT8 re_cased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 255, 255, 255, 247, 240, 255, 255, 255, 255, 255, 239, 255, + 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, + 0, 0, 207, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, + 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, + 255, 0, 0, 0, 191, 32, 0, 0, 255, 255, 63, 63, 63, 63, 255, 170, + 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 31, 242, + 224, 67, 0, 0, 24, 0, 0, 0, 0, 0, 192, 255, 255, 3, 0, 0, + 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 63, 0, 0, + 255, 255, 255, 0, 252, 255, 255, 255, 255, 120, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 7, 127, 0, 248, 0, 255, 255, 0, 0, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 247, 15, 0, 0, +}; + +/* Cased: 617 bytes. */ + +RE_UINT32 re_get_cased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_cased_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_cased_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_cased_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_cased_stage_4[pos + f] << 5; + pos += code; + value = (re_cased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Case_Ignorable. */ + +static RE_UINT8 re_case_ignorable_stage_1[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, + 4, 4, +}; + +static RE_UINT8 re_case_ignorable_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, + 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +static RE_UINT8 re_case_ignorable_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 25, + 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, 29, 1, + 30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 31, 1, 1, 1, 32, 1, 33, 34, 35, 36, 37, 38, 1, 1, 1, 1, + 1, 1, 1, 39, 1, 1, 40, 41, 1, 42, 1, 1, 1, 1, 1, 1, + 1, 1, 43, 1, 1, 1, 1, 1, 44, 45, 1, 1, 1, 1, 46, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 1, 48, 49, 1, 1, 1, 1, 1, + 50, 51, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_case_ignorable_stage_4[] = { + 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 6, 6, 6, 6, 6, 7, 8, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 14, + 15, 0, 16, 17, 0, 0, 18, 19, 20, 5, 21, 0, 0, 22, 0, 23, + 24, 25, 26, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 33, 37, 38, 36, 33, 39, 35, 32, 40, 41, 35, 42, 0, 43, 0, + 0, 44, 45, 35, 0, 40, 46, 35, 0, 0, 34, 35, 0, 0, 47, 0, + 0, 48, 49, 0, 0, 50, 51, 0, 52, 53, 0, 54, 55, 56, 57, 0, + 0, 58, 59, 60, 61, 0, 0, 33, 0, 0, 62, 0, 0, 0, 0, 0, + 63, 63, 64, 64, 0, 65, 66, 0, 67, 0, 68, 0, 0, 69, 0, 0, + 0, 70, 0, 0, 0, 0, 0, 0, 71, 0, 72, 73, 0, 74, 0, 0, + 75, 76, 42, 77, 78, 79, 0, 80, 0, 81, 0, 82, 0, 0, 83, 84, + 0, 85, 6, 86, 87, 6, 6, 88, 0, 0, 0, 0, 0, 89, 90, 91, + 92, 93, 0, 94, 95, 0, 5, 96, 0, 0, 0, 97, 0, 0, 0, 98, + 0, 0, 0, 99, 0, 0, 0, 6, 0, 100, 0, 0, 0, 0, 0, 0, + 101, 102, 0, 0, 103, 0, 0, 104, 105, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 82, 106, 0, 0, 107, 108, 0, 0, 109, + 6, 78, 0, 17, 110, 0, 0, 52, 111, 112, 0, 0, 0, 0, 113, 114, + 0, 115, 116, 0, 28, 117, 100, 0, 0, 118, 119, 17, 0, 120, 121, 122, + 0, 0, 0, 0, 0, 0, 0, 123, 2, 0, 0, 0, 0, 124, 78, 0, + 125, 126, 127, 0, 0, 0, 0, 108, 1, 2, 3, 17, 44, 0, 0, 128, + 0, 0, 0, 0, 0, 0, 0, 129, 130, 131, 0, 0, 0, 0, 0, 0, + 32, 132, 126, 0, 78, 133, 0, 0, 28, 134, 0, 0, 78, 135, 0, 0, + 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 137, 0, 0, 0, + 0, 0, 0, 138, 139, 140, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, + 32, 6, 6, 6, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 142, +}; + +static RE_UINT8 re_case_ignorable_stage_5[] = { + 0, 0, 0, 0, 128, 64, 0, 4, 0, 0, 0, 64, 1, 0, 0, 0, + 0, 161, 144, 1, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 48, 4, + 176, 0, 0, 0, 248, 3, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, + 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 16, 0, 31, 0, 255, 23, + 1, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 191, 255, 61, 0, 0, + 0, 128, 2, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 4, + 0, 0, 192, 255, 255, 63, 0, 0, 0, 0, 0, 14, 240, 255, 255, 127, + 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, 12, 0, 2, 0, + 2, 0, 0, 0, 0, 0, 0, 16, 30, 32, 0, 0, 12, 0, 0, 0, + 6, 0, 0, 0, 134, 57, 2, 0, 0, 0, 35, 0, 190, 33, 0, 0, + 0, 0, 0, 144, 30, 32, 64, 0, 4, 0, 0, 0, 1, 32, 0, 0, + 0, 0, 0, 192, 193, 61, 96, 0, 64, 48, 0, 0, 0, 4, 92, 0, + 0, 0, 242, 7, 192, 127, 0, 0, 0, 0, 242, 27, 64, 63, 0, 0, + 0, 0, 0, 3, 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, + 255, 255, 255, 31, 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, + 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, + 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 143, 32, 0, 120, 0, 0, + 8, 0, 0, 0, 0, 2, 0, 0, 135, 1, 4, 14, 0, 0, 128, 9, + 0, 0, 64, 127, 229, 31, 248, 159, 128, 0, 0, 0, 15, 0, 0, 0, + 0, 0, 208, 23, 0, 248, 15, 0, 3, 0, 0, 0, 60, 11, 0, 0, + 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 0, 63, 0, 0, 247, 255, + 253, 33, 16, 0, 0, 240, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, + 127, 0, 0, 240, 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, + 0, 248, 0, 3, 144, 124, 0, 0, 223, 255, 2, 128, 0, 0, 255, 31, + 255, 255, 1, 0, 0, 0, 0, 48, 0, 128, 3, 0, 0, 128, 0, 128, + 0, 128, 0, 0, 32, 0, 0, 0, 0, 60, 62, 8, 0, 0, 0, 126, + 0, 0, 0, 112, 0, 0, 32, 0, 0, 16, 0, 0, 0, 128, 247, 191, + 0, 0, 0, 128, 0, 0, 3, 0, 0, 7, 0, 0, 68, 8, 0, 0, + 96, 0, 0, 0, 16, 0, 0, 0, 255, 255, 3, 0, 192, 63, 0, 0, + 128, 255, 3, 0, 0, 0, 200, 19, 0, 126, 102, 0, 8, 16, 0, 0, + 0, 0, 157, 193, 2, 0, 0, 32, 0, 48, 88, 0, 32, 33, 0, 0, + 0, 0, 252, 255, 255, 255, 8, 0, 127, 0, 0, 0, 0, 0, 36, 0, + 8, 0, 0, 14, 0, 0, 0, 32, 110, 240, 0, 0, 0, 0, 0, 135, + 0, 0, 0, 255, 0, 0, 120, 38, 128, 239, 31, 0, 0, 0, 192, 127, + 0, 40, 191, 0, 0, 128, 255, 255, 128, 3, 248, 255, 231, 15, 0, 0, + 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 0, 0, +}; + +/* Case_Ignorable: 1254 bytes. */ + +RE_UINT32 re_get_case_ignorable(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_case_ignorable_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_case_ignorable_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_case_ignorable_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_case_ignorable_stage_4[pos + f] << 5; + pos += code; + value = (re_case_ignorable_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Lowercased. */ + +static RE_UINT8 re_changes_when_lowercased_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_2[] = { + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 15, + 6, 6, 6, 6, 16, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_4[] = { + 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, + 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, + 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 24, 0, + 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 27, 30, 31, 32, + 0, 33, 0, 19, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 19, 0, + 18, 36, 0, 37, 3, 3, 3, 38, 0, 0, 3, 39, 40, 0, 0, 0, + 0, 41, 3, 42, 43, 44, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 18, 45, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_lowercased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, + 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, + 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, 85, 85, 5, 108, + 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, + 0, 128, 0, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, + 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, + 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, + 0, 63, 0, 170, 0, 255, 0, 0, 0, 255, 0, 31, 0, 31, 0, 15, + 0, 31, 0, 31, 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, + 255, 127, 0, 0, 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, + 85, 85, 85, 0, 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, + 85, 5, 0, 0, 255, 0, 0, 0, +}; + +/* Changes_When_Lowercased: 490 bytes. */ + +RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_lowercased_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_lowercased_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_lowercased_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_lowercased_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_lowercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Uppercased. */ + +static RE_UINT8 re_changes_when_uppercased_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_2[] = { + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, + 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, + 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, + 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_4[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, + 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, + 5, 5, 5, 5, 31, 5, 5, 5, 32, 33, 34, 35, 24, 36, 37, 38, + 0, 0, 39, 23, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 41, + 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 53, 54, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_uppercased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, + 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, + 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 96, 91, 85, 181, + 170, 170, 45, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, + 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, + 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, + 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, + 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, + 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, + 255, 255, 223, 80, 220, 16, 207, 0, 255, 0, 220, 16, 0, 64, 0, 0, + 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, + 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, + 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, + 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, +}; + +/* Changes_When_Uppercased: 534 bytes. */ + +RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_uppercased_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_uppercased_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_uppercased_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_uppercased_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_uppercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Titlecased. */ + +static RE_UINT8 re_changes_when_titlecased_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_2[] = { + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, + 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, + 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, + 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_4[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, + 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, + 5, 5, 5, 5, 31, 5, 5, 5, 32, 33, 34, 35, 33, 36, 37, 38, + 0, 0, 39, 23, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 41, + 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 53, 54, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_titlecased_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, + 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, + 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 208, 86, 85, 181, + 170, 170, 43, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, + 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, + 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, + 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, + 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, + 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, + 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, 0, 64, 0, 0, + 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, + 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, + 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, + 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, +}; + +/* Changes_When_Titlecased: 534 bytes. */ + +RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_titlecased_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_titlecased_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_titlecased_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_titlecased_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_titlecased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Casefolded. */ + +static RE_UINT8 re_changes_when_casefolded_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_2[] = { + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 15, 6, 6, 6, 16, + 6, 6, 6, 6, 17, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_4[] = { + 0, 0, 1, 0, 0, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, + 4, 12, 13, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, + 20, 21, 0, 4, 22, 4, 23, 4, 4, 24, 25, 0, 26, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 27, 0, + 4, 4, 4, 4, 28, 4, 4, 4, 29, 30, 31, 32, 20, 33, 34, 35, + 0, 36, 0, 21, 37, 0, 0, 0, 0, 0, 0, 0, 0, 38, 21, 0, + 20, 39, 0, 40, 4, 4, 4, 41, 0, 0, 4, 42, 43, 0, 0, 0, + 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, 20, 49, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_casefolded_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, + 85, 85, 85, 85, 85, 85, 85, 170, 170, 86, 85, 85, 85, 85, 85, 171, + 214, 206, 219, 177, 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, + 85, 85, 5, 108, 122, 85, 0, 0, 32, 0, 0, 0, 0, 0, 69, 0, + 64, 215, 254, 255, 251, 15, 0, 0, 4, 128, 99, 85, 85, 85, 179, 230, + 255, 255, 255, 255, 255, 255, 0, 0, 1, 84, 85, 85, 171, 42, 85, 85, + 85, 0, 254, 255, 255, 255, 127, 0, 128, 0, 0, 0, 191, 32, 0, 0, + 85, 85, 21, 76, 0, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 170, + 0, 255, 0, 0, 255, 255, 156, 31, 156, 31, 0, 15, 0, 31, 156, 31, + 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, + 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, + 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, + 127, 0, 248, 0, 255, 0, 0, 0, +}; + +/* Changes_When_Casefolded: 514 bytes. */ + +RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_casefolded_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_casefolded_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_casefolded_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_casefolded_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_casefolded_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Changes_When_Casemapped. */ + +static RE_UINT8 re_changes_when_casemapped_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_2[] = { + 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, + 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, + 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 18, + 6, 6, 6, 6, 19, 6, 6, 6, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 5, 4, 4, 6, 7, 8, 4, + 4, 9, 10, 11, 12, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, 18, + 4, 4, 4, 4, 19, 4, 4, 4, 4, 20, 21, 22, 23, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0, + 0, 0, 0, 25, 0, 0, 0, 0, 4, 4, 4, 4, 26, 4, 4, 4, + 27, 4, 28, 29, 4, 30, 31, 32, 0, 33, 34, 4, 35, 0, 0, 0, + 0, 0, 0, 0, 0, 36, 4, 37, 4, 38, 39, 40, 4, 4, 4, 41, + 4, 24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 42, 43, 0, 0, 0, + 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 0, 0, 0, 0, 0, 4, 4, 49, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_changes_when_casemapped_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, + 255, 255, 255, 255, 255, 255, 255, 254, 255, 223, 255, 247, 255, 243, 255, 179, + 240, 255, 255, 255, 253, 255, 15, 252, 255, 255, 223, 10, 105, 139, 38, 32, + 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 207, 56, 64, 215, 255, 255, + 251, 255, 255, 255, 255, 255, 227, 255, 255, 255, 183, 239, 3, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, + 191, 32, 0, 0, 0, 0, 0, 34, 255, 255, 255, 79, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, + 255, 31, 220, 31, 64, 12, 4, 0, 0, 64, 0, 0, 24, 0, 0, 0, + 0, 0, 192, 255, 255, 3, 0, 0, 255, 127, 255, 255, 255, 255, 255, 127, + 255, 255, 109, 192, 15, 120, 12, 0, 255, 63, 0, 0, 255, 255, 255, 0, + 252, 255, 252, 255, 255, 255, 0, 254, 255, 56, 15, 0, 255, 7, 0, 0, + 127, 0, 248, 0, 255, 255, 0, 0, +}; + +/* Changes_When_Casemapped: 530 bytes. */ + +RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_changes_when_casemapped_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_changes_when_casemapped_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_changes_when_casemapped_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_changes_when_casemapped_stage_4[pos + f] << 5; + pos += code; + value = (re_changes_when_casemapped_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* ID_Start. */ + +static RE_UINT8 re_id_start_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_id_start_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_id_start_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, + 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, + 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, + 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 70, 71, 72, 73, 31, 31, 31, 31, 31, 31, 74, 31, + 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, 1, 76, + 77, 31, 31, 31, 31, 31, 31, 31, 1, 1, 77, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_id_start_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, + 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, + 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, + 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, + 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, + 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, + 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, + 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, + 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, + 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, + 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, + 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, + 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, + 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, + 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, + 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, + 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, + 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, + 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, + 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, + 4, 107, 16, 4, 159, 4, 15, 160, 0, 0, 0, 161, 4, 4, 4, 94, + 0, 1, 1, 162, 4, 121, 163, 0, 164, 165, 166, 0, 4, 4, 4, 82, + 0, 0, 4, 83, 0, 0, 0, 0, 0, 0, 0, 0, 94, 4, 167, 0, + 121, 16, 18, 0, 107, 4, 168, 0, 4, 4, 4, 4, 107, 0, 0, 0, + 169, 170, 93, 0, 0, 0, 0, 0, 93, 154, 0, 0, 4, 171, 0, 0, + 172, 89, 0, 94, 0, 0, 0, 0, 4, 93, 93, 141, 0, 0, 0, 0, + 4, 4, 119, 0, 0, 0, 0, 0, 102, 91, 0, 0, 102, 23, 16, 119, + 102, 62, 0, 0, 102, 141, 173, 0, 0, 0, 0, 0, 4, 18, 0, 0, + 4, 4, 4, 129, 0, 0, 0, 0, 4, 4, 4, 138, 0, 0, 0, 0, + 4, 129, 0, 0, 0, 0, 0, 0, 4, 30, 0, 0, 0, 0, 0, 0, + 4, 4, 174, 0, 158, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 175, 4, 176, 177, 178, 4, 179, 180, 181, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 182, 183, 76, 175, 175, 120, 120, 184, 184, 143, 0, + 178, 185, 186, 187, 188, 189, 0, 0, 4, 4, 4, 4, 4, 4, 98, 0, + 4, 83, 4, 4, 4, 4, 4, 4, 107, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_id_start_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 60, + 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, + 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, + 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, + 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, + 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, + 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, + 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, + 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, + 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, + 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, + 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, + 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, + 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 13, 0, + 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 0, 240, 1, 0, 0, 0, + 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, + 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, + 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, + 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, + 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, + 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, + 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, + 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, + 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, + 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, + 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, + 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 248, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, + 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, + 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, + 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, + 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, + 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, + 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, + 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, + 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* ID_Start: 1753 bytes. */ + +RE_UINT32 re_get_id_start(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_id_start_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_id_start_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_id_start_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_id_start_stage_4[pos + f] << 5; + pos += code; + value = (re_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* ID_Continue. */ + +static RE_UINT8 re_id_continue_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, + 6, 6, +}; + +static RE_UINT8 re_id_continue_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_id_continue_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, + 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, + 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, + 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, + 31, 70, 71, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, + 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, + 31, 80, 31, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_id_continue_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, + 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, + 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, + 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, + 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, + 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, + 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, + 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, + 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, + 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, + 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, + 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, + 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, + 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, + 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, + 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, + 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, + 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, + 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, + 6, 78, 24, 6, 168, 6, 149, 169, 87, 170, 171, 172, 6, 6, 6, 75, + 1, 2, 3, 101, 6, 105, 173, 0, 174, 175, 176, 0, 6, 6, 6, 65, + 0, 0, 6, 88, 0, 0, 0, 177, 0, 0, 0, 0, 75, 6, 178, 0, + 105, 24, 147, 0, 78, 6, 179, 0, 6, 6, 6, 6, 78, 95, 0, 0, + 180, 181, 99, 0, 0, 0, 0, 0, 99, 163, 0, 0, 6, 182, 0, 0, + 183, 184, 0, 75, 0, 0, 0, 0, 6, 99, 99, 185, 0, 0, 0, 0, + 6, 6, 128, 0, 0, 0, 0, 0, 6, 6, 186, 50, 6, 65, 24, 187, + 6, 188, 0, 0, 6, 6, 150, 0, 0, 0, 0, 0, 6, 97, 95, 0, + 6, 6, 6, 138, 0, 0, 0, 0, 6, 6, 6, 189, 0, 0, 0, 0, + 6, 138, 0, 0, 0, 0, 0, 0, 6, 190, 0, 0, 0, 0, 0, 0, + 6, 6, 191, 105, 192, 0, 0, 0, 193, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 194, 195, 196, 0, 0, 0, 0, 197, 0, 0, 0, 0, 0, + 6, 6, 188, 6, 198, 199, 200, 6, 201, 202, 203, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 204, 205, 80, 188, 188, 129, 129, 206, 206, 207, 6, + 200, 208, 209, 210, 211, 212, 0, 0, 6, 6, 6, 6, 6, 6, 113, 0, + 6, 88, 6, 6, 6, 6, 6, 6, 78, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 87, +}; + +static RE_UINT8 re_id_continue_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, + 31, 80, 0, 0, 255, 255, 223, 60, 192, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, + 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, + 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, + 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, + 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, + 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, + 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, + 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, + 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, + 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, + 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, + 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, + 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, + 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, + 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, + 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, + 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, + 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, + 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 254, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, + 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, + 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, + 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, + 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, + 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, + 255, 255, 252, 255, 0, 0, 255, 15, 127, 0, 24, 0, 0, 224, 0, 0, + 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, + 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, + 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, + 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, + 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, + 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, + 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, + 238, 251, 255, 15, +}; + +/* ID_Continue: 1894 bytes. */ + +RE_UINT32 re_get_id_continue(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_id_continue_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_id_continue_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_id_continue_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_id_continue_stage_4[pos + f] << 5; + pos += code; + value = (re_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* XID_Start. */ + +static RE_UINT8 re_xid_start_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_xid_start_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_xid_start_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, + 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, + 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, + 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 71, 72, 73, 74, 31, 31, 31, 31, 31, 31, 75, 31, + 1, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, 77, + 78, 31, 31, 31, 31, 31, 31, 31, 1, 1, 78, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_xid_start_stage_4[] = { + 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, + 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, + 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, + 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, + 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, + 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, + 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, + 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, + 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, + 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, + 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, + 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, + 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, + 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, + 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, + 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, + 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, + 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, + 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, + 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, + 4, 4, 107, 32, 4, 4, 4, 4, 4, 107, 16, 4, 159, 4, 15, 160, + 0, 0, 0, 161, 4, 4, 4, 94, 0, 1, 1, 162, 107, 121, 163, 0, + 164, 165, 166, 0, 4, 4, 4, 82, 0, 0, 4, 83, 0, 0, 0, 0, + 0, 0, 0, 0, 94, 4, 167, 0, 121, 16, 18, 0, 107, 4, 168, 0, + 4, 4, 4, 4, 107, 0, 0, 0, 169, 170, 93, 0, 0, 0, 0, 0, + 93, 154, 0, 0, 4, 171, 0, 0, 172, 89, 0, 94, 0, 0, 0, 0, + 4, 93, 93, 141, 0, 0, 0, 0, 4, 4, 119, 0, 0, 0, 0, 0, + 102, 91, 0, 0, 102, 23, 16, 119, 102, 62, 0, 0, 102, 141, 173, 0, + 0, 0, 0, 0, 4, 18, 0, 0, 4, 4, 4, 129, 0, 0, 0, 0, + 4, 4, 4, 138, 0, 0, 0, 0, 4, 129, 0, 0, 0, 0, 0, 0, + 4, 30, 0, 0, 0, 0, 0, 0, 4, 4, 174, 0, 158, 0, 0, 0, + 47, 0, 0, 0, 0, 0, 0, 0, 4, 4, 175, 4, 176, 177, 178, 4, + 179, 180, 181, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 182, 183, 76, + 175, 175, 120, 120, 184, 184, 143, 0, 178, 185, 186, 187, 188, 189, 0, 0, + 4, 4, 4, 4, 4, 4, 98, 0, 4, 83, 4, 4, 4, 4, 4, 4, + 107, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_xid_start_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, + 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 56, + 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, + 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, + 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, + 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, + 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, + 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, + 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, + 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, + 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, + 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, + 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, + 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, + 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 5, 0, + 150, 37, 240, 254, 174, 236, 5, 32, 95, 0, 0, 240, 1, 0, 0, 0, + 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, + 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, + 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, + 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, + 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, + 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, + 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, + 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, + 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, + 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, + 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, + 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, + 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, + 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, + 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, + 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, + 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, + 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, + 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, + 0, 0, 255, 3, 0, 0, 138, 170, 192, 255, 255, 255, 252, 252, 252, 28, + 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, + 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, + 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* XID_Start: 1761 bytes. */ + +RE_UINT32 re_get_xid_start(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_xid_start_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_xid_start_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_xid_start_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_xid_start_stage_4[pos + f] << 5; + pos += code; + value = (re_xid_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* XID_Continue. */ + +static RE_UINT8 re_xid_continue_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, + 6, 6, +}; + +static RE_UINT8 re_xid_continue_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_xid_continue_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, + 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, + 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, + 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, + 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, + 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, + 31, 71, 72, 31, 73, 74, 75, 76, 31, 31, 31, 31, 31, 31, 77, 31, + 1, 1, 1, 1, 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 79, + 80, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, 31, 31, 31, 31, + 31, 81, 31, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_xid_continue_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, + 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, + 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, + 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, + 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, + 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, + 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, + 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, + 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, + 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, + 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, + 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, + 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, + 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, + 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, + 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, + 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, + 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, + 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, + 6, 6, 78, 168, 6, 6, 6, 6, 6, 78, 24, 6, 169, 6, 149, 1, + 87, 170, 171, 172, 6, 6, 6, 75, 1, 2, 3, 101, 6, 105, 173, 0, + 174, 175, 176, 0, 6, 6, 6, 65, 0, 0, 6, 88, 0, 0, 0, 177, + 0, 0, 0, 0, 75, 6, 178, 0, 105, 24, 147, 0, 78, 6, 179, 0, + 6, 6, 6, 6, 78, 95, 0, 0, 180, 181, 99, 0, 0, 0, 0, 0, + 99, 163, 0, 0, 6, 182, 0, 0, 183, 184, 0, 75, 0, 0, 0, 0, + 6, 99, 99, 185, 0, 0, 0, 0, 6, 6, 128, 0, 0, 0, 0, 0, + 6, 6, 186, 50, 6, 65, 24, 187, 6, 188, 0, 0, 6, 6, 150, 0, + 0, 0, 0, 0, 6, 97, 95, 0, 6, 6, 6, 138, 0, 0, 0, 0, + 6, 6, 6, 189, 0, 0, 0, 0, 6, 138, 0, 0, 0, 0, 0, 0, + 6, 190, 0, 0, 0, 0, 0, 0, 6, 6, 191, 105, 192, 0, 0, 0, + 193, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 195, 196, 0, 0, + 0, 0, 197, 0, 0, 0, 0, 0, 6, 6, 188, 6, 198, 199, 200, 6, + 201, 202, 203, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 204, 205, 80, + 188, 188, 129, 129, 206, 206, 207, 6, 200, 208, 209, 210, 211, 212, 0, 0, + 6, 6, 6, 6, 6, 6, 113, 0, 6, 88, 6, 6, 6, 6, 6, 6, + 78, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 87, +}; + +static RE_UINT8 re_xid_continue_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, + 31, 80, 0, 0, 255, 255, 223, 56, 192, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, + 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, + 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, + 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, + 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, + 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, + 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, + 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, + 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, + 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, + 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, + 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, + 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, + 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, + 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, + 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, + 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, + 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, + 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, + 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, + 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, + 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, + 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, + 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, + 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, + 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, + 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, + 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, + 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, + 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, + 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, + 240, 255, 255, 255, 255, 255, 252, 255, 127, 0, 24, 0, 0, 224, 0, 0, + 0, 0, 138, 170, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, + 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, + 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, + 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, + 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, + 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, + 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, + 238, 251, 255, 15, +}; + +/* XID_Continue: 1902 bytes. */ + +RE_UINT32 re_get_xid_continue(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_xid_continue_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_xid_continue_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_xid_continue_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_xid_continue_stage_4[pos + f] << 5; + pos += code; + value = (re_xid_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Default_Ignorable_Code_Point. */ + +static RE_UINT8 re_default_ignorable_code_point_stage_1[] = { + 0, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_2[] = { + 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 7, 1, 1, 1, 1, 1, + 8, 8, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_3[] = { + 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, + 7, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 10, 1, 11, 1, 1, 1, 1, 1, 1, + 12, 12, 12, 12, 12, 12, 12, 12, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_4[] = { + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, + 7, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10, 0, 0, 0, 0, + 0, 0, 0, 11, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 4, + 0, 0, 0, 0, 0, 5, 0, 12, 0, 0, 0, 13, 0, 0, 0, 0, + 14, 14, 14, 14, 14, 14, 14, 14, +}; + +static RE_UINT8 re_default_ignorable_code_point_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 128, 0, 0, 0, 0, 0, 16, + 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 48, 0, 0, 120, 0, 0, + 0, 248, 0, 0, 0, 124, 0, 0, 255, 255, 0, 0, 16, 0, 0, 0, + 0, 0, 255, 1, 0, 0, 248, 7, 255, 255, 255, 255, +}; + +/* Default_Ignorable_Code_Point: 344 bytes. */ + +RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 14; + code = ch ^ (f << 14); + pos = (RE_UINT32)re_default_ignorable_code_point_stage_1[f] << 3; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_default_ignorable_code_point_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_default_ignorable_code_point_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_default_ignorable_code_point_stage_4[pos + f] << 5; + pos += code; + value = (re_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Grapheme_Extend. */ + +static RE_UINT8 re_grapheme_extend_stage_1[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, + 4, 4, +}; + +static RE_UINT8 re_grapheme_extend_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, + 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +static RE_UINT8 re_grapheme_extend_stage_3[] = { + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, + 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, + 0, 0, 0, 32, 0, 0, 33, 34, 0, 35, 0, 0, 0, 0, 0, 0, + 0, 0, 36, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 39, 0, + 0, 0, 0, 0, 0, 0, 0, 40, 0, 41, 42, 0, 0, 0, 0, 0, + 0, 43, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_grapheme_extend_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 0, + 7, 0, 8, 9, 0, 0, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16, + 17, 18, 19, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 24, + 28, 29, 30, 31, 28, 29, 32, 24, 25, 33, 34, 24, 35, 36, 37, 0, + 0, 38, 39, 24, 0, 40, 41, 24, 0, 36, 27, 24, 0, 0, 42, 0, + 0, 43, 44, 0, 0, 45, 46, 0, 47, 48, 0, 49, 50, 51, 52, 0, + 0, 53, 54, 55, 56, 0, 0, 0, 0, 0, 57, 0, 0, 0, 0, 0, + 58, 58, 59, 59, 0, 60, 61, 0, 62, 0, 0, 0, 0, 63, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 65, 0, 66, 67, 0, 0, 0, 0, + 68, 69, 35, 16, 70, 71, 0, 72, 0, 73, 0, 0, 0, 0, 74, 75, + 0, 0, 0, 0, 0, 0, 1, 76, 77, 0, 0, 0, 0, 0, 13, 78, + 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 80, 0, 0, 0, 1, + 0, 81, 0, 0, 82, 0, 0, 0, 0, 0, 0, 83, 80, 0, 0, 84, + 85, 86, 0, 0, 0, 0, 87, 88, 0, 89, 90, 0, 21, 91, 0, 0, + 0, 92, 93, 0, 0, 94, 25, 95, 0, 0, 0, 0, 0, 0, 0, 96, + 36, 0, 0, 0, 0, 0, 0, 0, 2, 97, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, + 99, 100, 0, 0, 0, 0, 0, 0, 25, 101, 97, 0, 70, 102, 0, 0, + 21, 103, 0, 0, 70, 104, 0, 0, 0, 0, 0, 0, 0, 105, 0, 0, + 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, 107, 108, 109, 0, 0, + 0, 0, 110, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, +}; + +static RE_UINT8 re_grapheme_extend_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 0, 0, 248, 3, 0, 0, + 0, 0, 254, 255, 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 255, 7, + 0, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 159, 159, 61, 0, 0, + 0, 0, 2, 0, 0, 0, 255, 255, 255, 7, 0, 0, 192, 255, 1, 0, + 0, 248, 15, 0, 0, 0, 192, 251, 239, 62, 0, 0, 0, 0, 0, 14, + 240, 255, 255, 127, 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, + 12, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 80, 30, 32, 128, 0, + 6, 0, 0, 0, 0, 0, 0, 16, 134, 57, 2, 0, 0, 0, 35, 0, + 190, 33, 0, 0, 0, 0, 0, 208, 30, 32, 192, 0, 4, 0, 0, 0, + 0, 0, 0, 64, 1, 32, 128, 0, 0, 0, 0, 192, 193, 61, 96, 0, + 0, 0, 0, 144, 68, 48, 96, 0, 0, 132, 92, 128, 0, 0, 242, 7, + 128, 127, 0, 0, 0, 0, 242, 27, 0, 63, 0, 0, 0, 0, 0, 3, + 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, 255, 255, 255, 31, + 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, 1, 0, 30, 0, + 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, 0, 0, 12, 0, + 0, 0, 176, 63, 64, 254, 15, 32, 0, 56, 0, 0, 0, 2, 0, 0, + 135, 1, 4, 14, 0, 0, 128, 9, 0, 0, 64, 127, 229, 31, 248, 159, + 15, 0, 0, 0, 0, 0, 208, 23, 3, 0, 0, 0, 60, 11, 0, 0, + 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 247, 255, 253, 33, 16, 0, + 127, 0, 0, 240, 0, 48, 0, 0, 255, 255, 1, 0, 0, 128, 3, 0, + 0, 0, 0, 128, 0, 252, 0, 0, 0, 0, 0, 6, 0, 128, 247, 63, + 0, 0, 3, 0, 68, 8, 0, 0, 96, 0, 0, 0, 16, 0, 0, 0, + 255, 255, 3, 0, 192, 63, 0, 0, 128, 255, 3, 0, 0, 0, 200, 19, + 0, 126, 102, 0, 8, 16, 0, 0, 0, 0, 157, 193, 0, 48, 64, 0, + 32, 33, 0, 0, 127, 0, 0, 0, 0, 0, 0, 32, 110, 240, 0, 0, + 0, 0, 0, 135, 0, 0, 0, 255, 0, 0, 120, 6, 128, 239, 31, 0, + 0, 0, 192, 127, 0, 40, 191, 0, 0, 128, 7, 0, 160, 195, 7, 248, + 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, +}; + +/* Grapheme_Extend: 1062 bytes. */ + +RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_grapheme_extend_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_grapheme_extend_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_extend_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_grapheme_extend_stage_4[pos + f] << 5; + pos += code; + value = (re_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Grapheme_Base. */ + +static RE_UINT8 re_grapheme_base_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_grapheme_base_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_grapheme_base_stage_3[] = { + 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 16, 17, 1, 1, 18, 19, 20, 21, 22, 23, 24, 25, 1, 26, + 27, 28, 1, 29, 30, 1, 1, 31, 1, 1, 1, 32, 33, 34, 35, 36, + 37, 38, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, + 1, 1, 1, 1, 42, 1, 43, 44, 45, 46, 47, 48, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 49, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 1, 51, 52, 1, 53, 54, 55, 56, 57, 58, 59, 60, 50, 50, 50, + 61, 62, 63, 64, 65, 50, 66, 50, 67, 68, 50, 50, 50, 50, 69, 50, + 1, 1, 1, 70, 71, 50, 50, 50, 1, 1, 1, 1, 72, 50, 50, 50, + 1, 1, 73, 50, 50, 50, 50, 74, 75, 50, 50, 50, 50, 50, 50, 50, + 76, 77, 78, 79, 80, 81, 82, 83, 50, 50, 50, 50, 50, 50, 84, 50, + 85, 86, 87, 88, 89, 90, 91, 92, 1, 1, 1, 1, 1, 1, 93, 1, + 1, 1, 1, 1, 1, 1, 1, 94, 95, 50, 50, 50, 50, 50, 50, 50, + 1, 1, 95, 50, 50, 50, 50, 50, +}; + +static RE_UINT8 re_grapheme_base_stage_4[] = { + 0, 1, 1, 2, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 4, 5, 6, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, + 1, 8, 9, 10, 11, 12, 13, 14, 15, 1, 16, 17, 1, 1, 18, 19, + 20, 21, 22, 1, 1, 23, 1, 24, 25, 26, 27, 0, 0, 28, 0, 0, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 33, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 56, 60, 61, 62, 63, 64, 65, 66, 10, 67, 68, 0, 69, 70, 71, 0, + 72, 73, 74, 75, 76, 77, 78, 0, 1, 79, 80, 81, 82, 1, 83, 1, + 1, 1, 84, 1, 85, 86, 87, 1, 88, 1, 89, 90, 91, 1, 1, 92, + 1, 1, 1, 1, 90, 1, 1, 93, 94, 95, 96, 97, 1, 98, 99, 100, + 101, 1, 1, 102, 1, 103, 1, 104, 90, 105, 106, 107, 1, 108, 109, 1, + 110, 1, 111, 112, 100, 113, 0, 0, 114, 115, 116, 117, 118, 119, 1, 120, + 1, 121, 122, 1, 0, 0, 123, 124, 1, 1, 1, 1, 1, 1, 0, 0, + 125, 1, 126, 127, 1, 128, 129, 130, 131, 132, 1, 133, 134, 89, 0, 0, + 1, 1, 1, 1, 135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 136, + 1, 137, 16, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 138, 0, 0, 0, 0, 0, 1, 139, 2, 1, 1, 1, 1, 140, + 1, 83, 1, 141, 142, 143, 143, 0, 1, 144, 0, 0, 145, 1, 1, 136, + 1, 1, 1, 1, 1, 1, 104, 146, 1, 135, 10, 1, 147, 1, 1, 1, + 148, 149, 1, 1, 139, 89, 1, 150, 2, 1, 1, 1, 1, 1, 1, 2, + 1, 1, 1, 1, 1, 104, 1, 1, 1, 1, 1, 1, 1, 1, 151, 0, + 1, 1, 1, 1, 152, 1, 153, 1, 1, 154, 1, 155, 102, 1, 1, 156, + 1, 1, 1, 1, 157, 16, 0, 158, 159, 160, 1, 102, 1, 1, 161, 162, + 1, 163, 164, 90, 29, 165, 166, 0, 1, 167, 168, 144, 1, 169, 170, 171, + 172, 173, 0, 0, 0, 0, 1, 174, 1, 1, 1, 1, 1, 150, 175, 144, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 176, 1, 1, 91, 0, + 177, 178, 179, 1, 1, 1, 180, 1, 1, 1, 181, 1, 182, 1, 183, 184, + 185, 181, 186, 187, 1, 1, 1, 90, 10, 1, 1, 1, 127, 2, 188, 189, + 190, 191, 192, 0, 1, 1, 1, 89, 193, 194, 1, 1, 195, 0, 181, 90, + 0, 0, 0, 0, 90, 1, 93, 0, 2, 150, 16, 0, 196, 1, 197, 0, + 1, 1, 1, 1, 127, 198, 0, 0, 199, 200, 201, 0, 0, 0, 0, 0, + 202, 203, 0, 0, 1, 204, 0, 0, 205, 136, 206, 1, 0, 0, 0, 0, + 1, 207, 208, 209, 0, 0, 0, 0, 1, 1, 210, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 211, 102, 212, 21, 118, 213, 214, 215, + 29, 216, 217, 0, 118, 218, 215, 0, 0, 0, 0, 0, 1, 219, 198, 0, + 1, 1, 1, 220, 0, 0, 0, 0, 1, 1, 1, 221, 0, 0, 0, 0, + 1, 220, 0, 0, 0, 0, 0, 0, 1, 222, 0, 0, 0, 0, 0, 0, + 1, 1, 223, 2, 224, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 104, 1, 226, 1, 227, 228, 229, 127, 0, + 1, 1, 230, 0, 0, 0, 0, 0, 1, 1, 142, 96, 0, 0, 0, 0, + 1, 1, 128, 1, 231, 232, 233, 1, 234, 235, 236, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 237, 1, 1, 1, 1, 1, 1, 1, 1, 238, 1, + 233, 239, 240, 241, 242, 243, 0, 244, 1, 108, 1, 1, 136, 245, 246, 0, + 131, 139, 1, 108, 89, 0, 0, 247, 248, 89, 249, 0, 0, 0, 0, 0, + 1, 250, 1, 90, 136, 1, 251, 93, 1, 2, 211, 1, 1, 1, 1, 252, + 1, 127, 150, 183, 0, 0, 0, 253, 1, 1, 254, 0, 1, 1, 255, 0, + 1, 1, 1, 136, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 142, 0, + 1, 92, 1, 1, 1, 1, 1, 1, 127, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_grapheme_base_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 223, 255, 255, + 0, 0, 255, 124, 240, 215, 255, 255, 251, 255, 255, 255, 7, 252, 255, 255, + 255, 0, 254, 255, 255, 255, 127, 254, 254, 255, 255, 255, 255, 134, 0, 0, + 0, 0, 0, 64, 73, 0, 255, 255, 255, 7, 31, 0, 192, 255, 0, 200, + 255, 7, 0, 0, 255, 255, 254, 255, 255, 255, 63, 64, 96, 194, 255, 255, + 255, 63, 253, 255, 255, 255, 0, 0, 0, 224, 255, 255, 63, 0, 2, 0, + 255, 7, 240, 7, 255, 255, 63, 4, 16, 1, 255, 127, 255, 255, 255, 65, + 253, 31, 0, 0, 248, 255, 255, 255, 255, 255, 255, 235, 1, 222, 1, 255, + 243, 255, 255, 254, 236, 159, 249, 255, 255, 253, 197, 163, 129, 89, 0, 176, + 195, 255, 255, 15, 232, 135, 249, 255, 255, 253, 109, 195, 1, 0, 0, 94, + 192, 255, 28, 0, 232, 191, 251, 255, 255, 253, 237, 227, 1, 26, 1, 0, + 195, 255, 3, 0, 255, 253, 237, 35, 129, 25, 0, 176, 195, 255, 255, 0, + 232, 199, 61, 214, 24, 199, 255, 131, 198, 29, 1, 0, 192, 255, 255, 7, + 238, 223, 253, 255, 255, 253, 239, 35, 30, 0, 0, 3, 195, 255, 0, 255, + 236, 223, 253, 255, 255, 253, 239, 99, 155, 13, 0, 64, 195, 255, 6, 0, + 255, 255, 255, 167, 193, 93, 0, 0, 195, 255, 63, 254, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 0, 3, 127, 0, 0, 28, 0, 255, 255, 13, 128, + 127, 128, 255, 15, 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 255, 243, + 255, 255, 255, 252, 255, 255, 95, 253, 255, 254, 255, 255, 255, 31, 0, 128, + 32, 31, 0, 0, 0, 0, 0, 192, 191, 223, 255, 7, 255, 31, 2, 153, + 255, 255, 255, 60, 254, 255, 225, 255, 155, 223, 255, 223, 191, 32, 255, 255, + 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, + 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 255, 31, 255, 255, 255, 3, + 255, 255, 31, 0, 255, 255, 1, 0, 255, 223, 3, 0, 255, 255, 99, 0, + 255, 255, 3, 0, 255, 223, 1, 0, 255, 255, 79, 192, 191, 1, 240, 31, + 255, 3, 255, 3, 255, 7, 255, 3, 255, 255, 255, 0, 255, 5, 255, 255, + 255, 255, 63, 0, 120, 14, 251, 1, 241, 255, 255, 255, 255, 63, 31, 0, + 255, 15, 255, 255, 255, 3, 255, 199, 255, 255, 127, 198, 255, 255, 191, 0, + 26, 224, 7, 0, 255, 63, 0, 0, 240, 255, 255, 255, 255, 255, 47, 232, + 251, 15, 255, 255, 255, 7, 240, 31, 252, 255, 255, 255, 195, 244, 255, 255, + 191, 92, 12, 240, 255, 15, 48, 248, 255, 227, 255, 255, 255, 0, 8, 0, + 2, 222, 111, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 255, 63, + 255, 255, 223, 255, 223, 255, 207, 239, 255, 255, 220, 127, 255, 7, 255, 255, + 255, 128, 255, 255, 0, 0, 243, 255, 255, 127, 255, 31, 255, 3, 255, 255, + 255, 255, 15, 0, 127, 0, 0, 0, 255, 31, 255, 3, 255, 127, 255, 255, + 255, 127, 12, 254, 255, 128, 1, 0, 255, 255, 127, 0, 127, 127, 127, 127, + 255, 255, 255, 15, 255, 255, 255, 251, 0, 0, 255, 15, 255, 255, 127, 248, + 224, 255, 255, 255, 255, 63, 254, 255, 15, 0, 255, 255, 255, 31, 0, 0, + 255, 31, 255, 255, 127, 0, 255, 255, 255, 15, 0, 0, 255, 127, 8, 192, + 255, 255, 252, 0, 255, 127, 15, 0, 0, 0, 0, 255, 187, 247, 255, 255, + 159, 15, 255, 3, 15, 192, 255, 3, 0, 0, 252, 15, 63, 192, 255, 255, + 127, 0, 12, 128, 255, 255, 55, 236, 255, 191, 255, 195, 255, 129, 25, 0, + 247, 47, 255, 243, 255, 255, 98, 62, 5, 0, 0, 248, 255, 207, 63, 0, + 126, 126, 126, 0, 127, 127, 0, 0, 223, 30, 255, 3, 127, 248, 255, 255, + 255, 63, 255, 255, 127, 0, 248, 160, 255, 255, 127, 95, 219, 255, 255, 255, + 3, 0, 248, 255, 0, 0, 255, 255, 255, 255, 252, 255, 255, 0, 0, 0, + 0, 0, 255, 63, 0, 0, 255, 3, 255, 255, 247, 255, 127, 15, 223, 255, + 252, 252, 252, 28, 127, 127, 0, 48, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 135, 255, 255, 255, 255, 255, 143, 255, 255, 7, 255, 15, + 255, 255, 255, 191, 15, 255, 63, 0, 255, 3, 0, 0, 63, 253, 255, 255, + 255, 255, 191, 145, 255, 255, 191, 255, 255, 255, 255, 143, 255, 255, 255, 131, + 255, 255, 255, 192, 1, 0, 239, 254, 255, 0, 255, 1, 255, 255, 63, 254, + 255, 255, 63, 255, 255, 255, 7, 255, 255, 1, 0, 0, 253, 255, 255, 255, + 128, 63, 252, 255, 255, 255, 135, 217, 3, 0, 255, 255, 255, 1, 255, 3, + 127, 16, 192, 255, 15, 0, 0, 0, 255, 255, 63, 128, 255, 215, 64, 0, + 255, 127, 0, 0, 7, 0, 15, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 0, 248, 255, 3, 0, 0, 0, 127, 254, 255, 255, 95, 60, 0, 0, + 24, 240, 255, 255, 255, 195, 255, 255, 35, 0, 0, 0, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, + 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, + 0, 0, 3, 0, 255, 127, 254, 127, 254, 255, 254, 255, 192, 255, 255, 255, + 7, 0, 255, 255, 255, 1, 3, 0, 1, 0, 191, 255, 223, 7, 0, 0, + 255, 255, 255, 30, 0, 0, 0, 248, 225, 255, 0, 0, 63, 0, 0, 0, +}; + +/* Grapheme_Base: 2169 bytes. */ + +RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_grapheme_base_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_grapheme_base_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_base_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_grapheme_base_stage_4[pos + f] << 5; + pos += code; + value = (re_grapheme_base_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Grapheme_Link. */ + +static RE_UINT8 re_grapheme_link_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_grapheme_link_stage_2[] = { + 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_grapheme_link_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 4, + 5, 0, 0, 0, 0, 0, 0, 6, 0, 0, 7, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 9, 0, 0, 10, 11, 12, 13, 0, 0, 0, 0, + 0, 0, 14, 0, 0, 0, 0, 0, 15, 16, 0, 0, 0, 0, 17, 0, +}; + +static RE_UINT8 re_grapheme_link_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, + 6, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 8, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 9, 0, 10, 0, 0, 0, 11, 0, 0, 0, 0, + 12, 0, 0, 0, 0, 0, 4, 0, 0, 0, 13, 0, 0, 0, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 15, 0, 0, + 0, 16, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 14, 0, 0, +}; + +static RE_UINT8 re_grapheme_link_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, + 16, 0, 0, 0, 0, 0, 0, 6, 0, 0, 16, 0, 0, 0, 4, 0, + 1, 0, 0, 0, 0, 12, 0, 0, 0, 0, 12, 0, 0, 0, 0, 128, + 64, 0, 0, 0, 0, 0, 8, 0, 0, 0, 64, 0, 0, 0, 0, 2, + 0, 0, 24, 0, +}; + +/* Grapheme_Link: 374 bytes. */ + +RE_UINT32 re_get_grapheme_link(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_grapheme_link_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_grapheme_link_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_grapheme_link_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_grapheme_link_stage_4[pos + f] << 5; + pos += code; + value = (re_grapheme_link_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* White_Space. */ + +static RE_UINT8 re_white_space_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_white_space_stage_2[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_white_space_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_white_space_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_white_space_stage_5[] = { + 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 255, 7, 0, 0, 0, 131, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, +}; + +/* White_Space: 169 bytes. */ + +RE_UINT32 re_get_white_space(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_white_space_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_white_space_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_white_space_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_white_space_stage_4[pos + f] << 6; + pos += code; + value = (re_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Bidi_Control. */ + +static RE_UINT8 re_bidi_control_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_bidi_control_stage_2[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_bidi_control_stage_3[] = { + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_bidi_control_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 2, 3, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_bidi_control_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, + 0, 192, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 192, 3, 0, 0, +}; + +/* Bidi_Control: 129 bytes. */ + +RE_UINT32 re_get_bidi_control(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_bidi_control_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_bidi_control_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_bidi_control_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_bidi_control_stage_4[pos + f] << 6; + pos += code; + value = (re_bidi_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Join_Control. */ + +static RE_UINT8 re_join_control_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_join_control_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_join_control_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_join_control_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_join_control_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, +}; + +/* Join_Control: 97 bytes. */ + +RE_UINT32 re_get_join_control(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_join_control_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_join_control_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_join_control_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_join_control_stage_4[pos + f] << 6; + pos += code; + value = (re_join_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Dash. */ + +static RE_UINT8 re_dash_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_dash_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_dash_stage_3[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 4, 1, 1, 1, + 5, 6, 1, 1, 1, 1, 1, 7, 8, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, +}; + +static RE_UINT8 re_dash_stage_4[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 4, 1, 1, 1, 1, 1, 1, 1, 5, 6, 7, 1, 1, 1, 1, 1, + 8, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, + 10, 1, 11, 1, 1, 1, 1, 1, 12, 13, 1, 1, 14, 1, 1, 1, +}; + +static RE_UINT8 re_dash_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, + 0, 0, 8, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 128, 4, 0, 0, 0, 12, + 0, 0, 0, 16, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 8, 0, 0, 0, + 0, 32, 0, 0, 0, 0, 0, 0, +}; + +/* Dash: 297 bytes. */ + +RE_UINT32 re_get_dash(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_dash_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_dash_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_dash_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_dash_stage_4[pos + f] << 6; + pos += code; + value = (re_dash_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Hyphen. */ + +static RE_UINT8 re_hyphen_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_hyphen_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_hyphen_stage_3[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, + 4, 1, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, +}; + +static RE_UINT8 re_hyphen_stage_4[] = { + 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 4, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 6, 1, 1, 1, 1, 1, 7, 1, 1, 8, 9, 1, 1, +}; + +static RE_UINT8 re_hyphen_stage_5[] = { + 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, + 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, +}; + +/* Hyphen: 241 bytes. */ + +RE_UINT32 re_get_hyphen(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_hyphen_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_hyphen_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_hyphen_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_hyphen_stage_4[pos + f] << 6; + pos += code; + value = (re_hyphen_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Quotation_Mark. */ + +static RE_UINT8 re_quotation_mark_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_quotation_mark_stage_2[] = { + 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_quotation_mark_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 4, +}; + +static RE_UINT8 re_quotation_mark_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 5, 1, 1, 6, 7, 1, 1, +}; + +static RE_UINT8 re_quotation_mark_stage_5[] = { + 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 255, 0, 0, 0, 6, + 0, 240, 0, 224, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, + 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, +}; + +/* Quotation_Mark: 193 bytes. */ + +RE_UINT32 re_get_quotation_mark(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_quotation_mark_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_quotation_mark_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_quotation_mark_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_quotation_mark_stage_4[pos + f] << 6; + pos += code; + value = (re_quotation_mark_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Terminal_Punctuation. */ + +static RE_UINT8 re_terminal_punctuation_stage_1[] = { + 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static RE_UINT8 re_terminal_punctuation_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 13, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, + 15, 9, 16, 9, 17, 9, 9, 9, 9, 18, 9, 9, 9, 9, 9, 9, +}; + +static RE_UINT8 re_terminal_punctuation_stage_3[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 4, 5, 6, 7, 8, + 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 12, 1, + 13, 1, 1, 1, 1, 1, 14, 1, 1, 1, 1, 1, 15, 16, 1, 17, + 18, 1, 19, 1, 1, 20, 21, 1, 22, 1, 1, 1, 1, 1, 1, 1, + 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 24, 1, 1, 1, 25, 1, 1, 1, 1, 1, 1, 1, + 1, 26, 1, 1, 27, 28, 1, 1, 29, 30, 31, 32, 33, 34, 1, 35, + 1, 1, 1, 1, 36, 1, 37, 1, 1, 1, 1, 1, 1, 1, 1, 38, + 39, 1, 40, 1, 1, 1, 41, 1, 42, 43, 44, 45, 1, 1, 1, 1, + 46, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_terminal_punctuation_stage_4[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, + 4, 0, 5, 0, 6, 0, 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, + 0, 0, 0, 9, 0, 10, 2, 0, 0, 0, 0, 11, 0, 0, 12, 0, + 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 15, 0, 0, 0, 16, + 0, 0, 0, 17, 0, 0, 18, 0, 19, 0, 0, 0, 0, 0, 11, 0, + 0, 20, 0, 0, 0, 0, 21, 0, 0, 22, 0, 23, 0, 24, 25, 0, + 0, 26, 0, 0, 27, 0, 0, 0, 0, 0, 0, 23, 28, 0, 0, 0, + 0, 0, 0, 29, 0, 0, 0, 30, 0, 0, 31, 0, 0, 32, 0, 0, + 0, 0, 25, 0, 0, 0, 33, 0, 0, 0, 34, 35, 0, 0, 0, 36, + 0, 0, 37, 0, 1, 0, 0, 38, 34, 0, 39, 0, 0, 0, 40, 0, + 34, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 0, 0, 23, 43, 0, + 0, 0, 44, 0, 0, 0, 45, 0, 0, 0, 0, 46, +}; + +static RE_UINT8 re_terminal_punctuation_stage_5[] = { + 0, 0, 0, 0, 2, 80, 0, 140, 0, 0, 0, 64, 128, 0, 0, 0, + 0, 2, 0, 0, 8, 0, 0, 0, 0, 16, 0, 136, 0, 0, 16, 0, + 255, 23, 0, 0, 0, 0, 0, 3, 0, 0, 255, 127, 48, 0, 0, 0, + 0, 0, 0, 12, 0, 225, 7, 0, 0, 12, 0, 0, 254, 1, 0, 0, + 0, 96, 0, 0, 0, 56, 0, 0, 0, 0, 112, 4, 60, 3, 0, 0, + 0, 15, 0, 0, 0, 0, 0, 236, 0, 0, 0, 248, 0, 0, 0, 192, + 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 0, 6, 0, 0, 0, + 0, 224, 0, 0, 0, 0, 248, 0, 0, 0, 192, 0, 0, 192, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 224, 0, 0, 0, 128, 0, 0, 3, 0, + 0, 8, 0, 0, 0, 0, 247, 0, 18, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 128, 0, 0, 0, 0, 252, 128, 63, 0, 0, 3, 0, 0, 0, + 14, 0, 0, 0, 96, 0, 0, 0, 0, 0, 15, 0, +}; + +/* Terminal_Punctuation: 676 bytes. */ + +RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 14; + code = ch ^ (f << 14); + pos = (RE_UINT32)re_terminal_punctuation_stage_1[f] << 4; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_terminal_punctuation_stage_2[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_terminal_punctuation_stage_3[pos + f] << 2; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_terminal_punctuation_stage_4[pos + f] << 5; + pos += code; + value = (re_terminal_punctuation_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Math. */ + +static RE_UINT8 re_other_math_stage_1[] = { + 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, +}; + +static RE_UINT8 re_other_math_stage_2[] = { + 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, +}; + +static RE_UINT8 re_other_math_stage_3[] = { + 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 4, 1, 5, 1, 6, 7, 8, 1, 9, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 10, 11, 1, 1, 1, 1, 12, 13, 14, 15, + 1, 1, 1, 1, 1, 1, 16, 1, +}; + +static RE_UINT8 re_other_math_stage_4[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, + 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 0, 0, 0, 19, 20, 21, + 0, 0, 0, 0, 0, 22, 23, 24, 25, 0, 26, 27, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 25, 28, 0, 0, 0, 0, 29, 0, 30, 31, + 0, 0, 0, 32, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, + 34, 34, 35, 34, 36, 37, 38, 34, 39, 40, 41, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 42, 43, 44, 35, 35, 45, 45, 46, 46, 47, 34, + 38, 48, 49, 50, 51, 52, 0, 0, +}; + +static RE_UINT8 re_other_math_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 39, 0, 0, 0, 51, 0, + 0, 0, 64, 0, 0, 0, 28, 0, 1, 0, 0, 0, 30, 0, 0, 96, + 0, 96, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, 132, 252, 47, 62, + 16, 179, 251, 241, 224, 3, 0, 0, 0, 0, 224, 243, 182, 62, 195, 240, + 255, 63, 235, 47, 48, 0, 0, 0, 0, 15, 0, 0, 0, 0, 176, 0, + 0, 0, 1, 0, 4, 0, 0, 0, 3, 192, 127, 240, 193, 140, 15, 0, + 148, 31, 0, 0, 96, 0, 0, 0, 5, 0, 0, 0, 15, 96, 0, 0, + 192, 255, 0, 0, 248, 255, 255, 1, 0, 0, 0, 15, 0, 0, 0, 48, + 10, 1, 0, 0, 0, 0, 0, 80, 255, 255, 255, 255, 255, 255, 223, 255, + 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, + 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, + 255, 255, 255, 247, 255, 127, 255, 255, 255, 253, 255, 255, 247, 207, 255, 255, + 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, + 238, 251, 255, 15, +}; + +/* Other_Math: 502 bytes. */ + +RE_UINT32 re_get_other_math(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_other_math_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_other_math_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_other_math_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_other_math_stage_4[pos + f] << 5; + pos += code; + value = (re_other_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Hex_Digit. */ + +static RE_UINT8 re_hex_digit_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_hex_digit_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_hex_digit_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, +}; + +static RE_UINT8 re_hex_digit_stage_4[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, +}; + +static RE_UINT8 re_hex_digit_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Hex_Digit: 129 bytes. */ + +RE_UINT32 re_get_hex_digit(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_hex_digit_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_hex_digit_stage_2[pos + f] << 3; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_hex_digit_stage_3[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_hex_digit_stage_4[pos + f] << 7; + pos += code; + value = (re_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* ASCII_Hex_Digit. */ + +static RE_UINT8 re_ascii_hex_digit_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_4[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_ascii_hex_digit_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* ASCII_Hex_Digit: 97 bytes. */ + +RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ascii_hex_digit_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_ascii_hex_digit_stage_2[pos + f] << 3; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_ascii_hex_digit_stage_3[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_ascii_hex_digit_stage_4[pos + f] << 7; + pos += code; + value = (re_ascii_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Alphabetic. */ + +static RE_UINT8 re_other_alphabetic_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, +}; + +static RE_UINT8 re_other_alphabetic_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, + 6, 10, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_other_alphabetic_stage_3[] = { + 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 0, 0, 14, 0, 0, 0, 15, 16, 17, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, + 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, + 0, 0, 29, 0, 0, 0, 0, 0, 30, 31, 0, 0, 0, 0, 32, 0, + 0, 0, 0, 0, 0, 0, 0, 33, +}; + +static RE_UINT8 re_other_alphabetic_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 5, 6, 0, 0, 7, 8, + 9, 10, 0, 0, 0, 11, 0, 0, 12, 13, 0, 0, 0, 0, 0, 14, + 15, 16, 17, 18, 19, 20, 21, 18, 19, 20, 22, 23, 19, 20, 24, 18, + 19, 20, 25, 18, 26, 20, 27, 0, 19, 20, 28, 18, 18, 20, 28, 18, + 18, 20, 29, 18, 18, 0, 30, 31, 0, 32, 33, 0, 0, 34, 33, 0, + 0, 0, 0, 35, 36, 37, 0, 0, 0, 38, 39, 40, 41, 0, 0, 0, + 0, 0, 42, 0, 0, 0, 0, 0, 31, 31, 31, 31, 0, 43, 44, 0, + 0, 0, 0, 0, 0, 45, 0, 0, 0, 46, 0, 0, 0, 10, 47, 0, + 48, 0, 49, 50, 0, 0, 0, 0, 51, 52, 15, 0, 53, 54, 0, 55, + 0, 56, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 43, 57, 58, + 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 59, 42, 0, 0, 0, + 0, 60, 0, 0, 61, 62, 15, 0, 0, 63, 64, 0, 15, 62, 0, 0, + 0, 65, 66, 0, 0, 67, 0, 68, 0, 0, 0, 0, 0, 0, 0, 69, + 70, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, + 53, 72, 73, 0, 26, 74, 0, 0, 53, 64, 0, 0, 53, 75, 0, 0, + 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 35, 77, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_alphabetic_stage_5[] = { + 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 255, 191, 182, 0, 0, 0, + 0, 0, 255, 7, 0, 248, 255, 254, 0, 0, 1, 0, 0, 0, 192, 31, + 158, 33, 0, 0, 0, 0, 2, 0, 0, 0, 255, 255, 192, 255, 1, 0, + 0, 0, 192, 248, 239, 30, 0, 0, 240, 3, 255, 127, 15, 0, 0, 0, + 0, 0, 0, 204, 255, 223, 224, 0, 12, 0, 0, 0, 14, 0, 0, 0, + 0, 0, 0, 192, 159, 25, 128, 0, 135, 25, 2, 0, 0, 0, 35, 0, + 191, 27, 0, 0, 159, 25, 192, 0, 4, 0, 0, 0, 199, 29, 128, 0, + 223, 29, 96, 0, 223, 29, 128, 0, 0, 128, 95, 255, 0, 0, 12, 0, + 0, 0, 242, 7, 0, 32, 0, 0, 0, 0, 242, 27, 0, 0, 254, 255, + 3, 224, 255, 254, 255, 255, 255, 31, 0, 248, 127, 121, 0, 0, 192, 195, + 133, 1, 30, 0, 124, 0, 0, 48, 0, 0, 0, 128, 0, 0, 192, 255, + 255, 1, 0, 0, 0, 2, 0, 0, 255, 15, 255, 1, 1, 3, 0, 0, + 0, 0, 128, 15, 0, 0, 224, 127, 254, 255, 31, 0, 31, 0, 0, 0, + 0, 0, 224, 255, 7, 0, 0, 0, 254, 51, 0, 0, 128, 255, 3, 0, + 240, 255, 63, 0, 255, 255, 255, 255, 255, 3, 0, 0, 0, 0, 240, 15, + 248, 0, 0, 0, 3, 0, 0, 0, 0, 0, 240, 255, 192, 7, 0, 0, + 128, 255, 7, 0, 0, 254, 127, 0, 8, 48, 0, 0, 0, 0, 157, 65, + 0, 248, 32, 0, 248, 7, 0, 0, 0, 0, 0, 64, 110, 240, 0, 0, + 0, 0, 0, 255, 63, 0, 0, 0, 0, 0, 255, 1, 0, 0, 248, 255, + 0, 248, 63, 0, 255, 255, 255, 127, +}; + +/* Other_Alphabetic: 786 bytes. */ + +RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_other_alphabetic_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_other_alphabetic_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_other_alphabetic_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_other_alphabetic_stage_4[pos + f] << 5; + pos += code; + value = (re_other_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Ideographic. */ + +static RE_UINT8 re_ideographic_stage_1[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ideographic_stage_2[] = { + 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 0, 0, 0, 8, +}; + +static RE_UINT8 re_ideographic_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 5, 6, 0, 0, + 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 8, 9, 0, 0, 0, + 0, 0, 0, 0, 2, 9, 0, 0, +}; + +static RE_UINT8 re_ideographic_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, + 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 2, 2, 2, 2, + 2, 5, 2, 6, 0, 0, 0, 0, 2, 2, 2, 7, 2, 2, 2, 2, + 2, 2, 2, 2, 8, 2, 2, 2, 9, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_ideographic_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 254, 3, 0, 7, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 63, 0, + 255, 31, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 63, 255, 255, + 255, 255, 255, 3, 0, 0, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 0, 0, 0, 0, +}; + +/* Ideographic: 297 bytes. */ + +RE_UINT32 re_get_ideographic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ideographic_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_ideographic_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_ideographic_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_ideographic_stage_4[pos + f] << 6; + pos += code; + value = (re_ideographic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Diacritic. */ + +static RE_UINT8 re_diacritic_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_diacritic_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 7, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, + 4, 4, 10, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_diacritic_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 1, 1, 1, 1, 1, 17, 1, 18, 19, 20, 21, 22, 1, 23, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 1, 25, 1, + 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, + 29, 30, 31, 32, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, + 36, 37, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 40, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_diacritic_stage_4[] = { + 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 5, 5, 5, 5, 6, 7, 8, 0, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 0, + 0, 0, 14, 0, 0, 0, 15, 16, 0, 4, 17, 0, 0, 18, 0, 19, + 20, 0, 0, 0, 0, 0, 0, 21, 0, 22, 23, 24, 0, 22, 25, 0, + 0, 22, 25, 0, 0, 22, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, + 0, 0, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, 0, 0, 26, 0, + 0, 0, 27, 0, 0, 0, 28, 0, 20, 29, 0, 0, 30, 0, 31, 0, + 0, 32, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 0, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, + 0, 37, 38, 39, 0, 40, 0, 0, 0, 41, 0, 42, 0, 0, 4, 43, + 0, 44, 5, 17, 0, 0, 45, 46, 0, 0, 0, 0, 0, 47, 48, 49, + 0, 0, 0, 0, 0, 0, 0, 50, 0, 51, 0, 0, 0, 0, 0, 0, + 0, 52, 0, 0, 53, 0, 0, 22, 0, 0, 0, 54, 0, 0, 0, 55, + 56, 57, 0, 0, 58, 0, 0, 20, 0, 0, 0, 0, 0, 0, 38, 59, + 0, 60, 61, 0, 0, 61, 2, 0, 0, 0, 0, 62, 0, 15, 63, 64, + 0, 0, 0, 0, 0, 0, 0, 65, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 66, 0, 0, 0, 0, 0, 0, 0, 1, 2, 67, 68, 0, 0, 69, + 0, 0, 0, 0, 0, 70, 0, 0, 0, 71, 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, + 0, 0, 0, 73, 74, 75, 0, 0, +}; + +static RE_UINT8 re_diacritic_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 129, 144, 1, + 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 224, 7, 0, 48, 4, + 48, 0, 0, 0, 248, 0, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, + 251, 255, 255, 191, 22, 0, 0, 0, 0, 248, 135, 1, 0, 0, 0, 128, + 97, 28, 0, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 0, + 0, 0, 0, 3, 240, 255, 255, 127, 0, 0, 0, 16, 0, 32, 30, 0, + 0, 0, 2, 0, 0, 32, 0, 0, 0, 4, 0, 0, 128, 95, 0, 0, + 0, 31, 0, 0, 0, 0, 160, 194, 220, 0, 0, 0, 64, 0, 0, 0, + 0, 0, 128, 6, 128, 191, 0, 12, 0, 254, 15, 32, 0, 0, 0, 14, + 0, 0, 224, 159, 0, 0, 16, 0, 16, 0, 0, 0, 0, 248, 15, 0, + 0, 12, 0, 0, 0, 0, 192, 0, 0, 0, 0, 63, 255, 33, 16, 0, + 0, 240, 255, 255, 240, 255, 0, 0, 0, 0, 0, 224, 0, 0, 0, 160, + 3, 224, 0, 224, 0, 224, 0, 96, 0, 128, 3, 0, 0, 128, 0, 0, + 0, 252, 0, 0, 0, 0, 0, 30, 0, 128, 0, 176, 0, 0, 3, 0, + 0, 0, 128, 255, 3, 0, 0, 0, 0, 1, 0, 0, 255, 255, 3, 0, + 0, 120, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 7, 0, 0, 0, + 0, 0, 64, 0, 0, 48, 0, 0, 127, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 192, 8, 0, 0, 0, 0, 0, 0, 6, 0, 0, 24, 0, + 0, 128, 255, 255, 128, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, +}; + +/* Diacritic: 849 bytes. */ + +RE_UINT32 re_get_diacritic(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_diacritic_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_diacritic_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_diacritic_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_diacritic_stage_4[pos + f] << 5; + pos += code; + value = (re_diacritic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Extender. */ + +static RE_UINT8 re_extender_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_extender_stage_2[] = { + 0, 1, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_extender_stage_3[] = { + 0, 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 5, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 8, 1, 1, 1, + 9, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, + 1, 12, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, +}; + +static RE_UINT8 re_extender_stage_4[] = { + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 5, 0, + 6, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, + 0, 9, 0, 10, 0, 0, 0, 0, 11, 12, 0, 0, 13, 0, 0, 14, + 15, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 18, 0, 0, 19, 20, + 0, 0, 0, 18, 0, 0, 0, 0, +}; + +static RE_UINT8 re_extender_stage_5[] = { + 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 4, 64, 0, 0, 0, 0, 4, 0, 0, 8, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 8, 32, 0, 0, 0, + 0, 0, 62, 0, 0, 0, 0, 96, 0, 0, 0, 112, 0, 0, 32, 0, + 0, 16, 0, 0, 0, 128, 0, 0, 0, 0, 1, 0, 0, 0, 0, 32, + 0, 0, 24, 0, +}; + +/* Extender: 349 bytes. */ + +RE_UINT32 re_get_extender(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_extender_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_extender_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_extender_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_extender_stage_4[pos + f] << 5; + pos += code; + value = (re_extender_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Lowercase. */ + +static RE_UINT8 re_other_lowercase_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_lowercase_stage_2[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_other_lowercase_stage_3[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, + 4, 2, 5, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 7, 2, 2, 2, 2, +}; + +static RE_UINT8 re_other_lowercase_stage_4[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, + 0, 8, 9, 0, 0, 10, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, + 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 14, +}; + +static RE_UINT8 re_other_lowercase_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, + 0, 0, 0, 0, 0, 0, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 240, 255, 255, + 255, 255, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 2, 128, 0, 0, 255, 31, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 3, +}; + +/* Other_Lowercase: 273 bytes. */ + +RE_UINT32 re_get_other_lowercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_lowercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_lowercase_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_lowercase_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_lowercase_stage_4[pos + f] << 6; + pos += code; + value = (re_other_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Uppercase. */ + +static RE_UINT8 re_other_uppercase_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_uppercase_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_uppercase_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_uppercase_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 0, +}; + +static RE_UINT8 re_other_uppercase_stage_5[] = { + 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 192, 255, +}; + +/* Other_Uppercase: 117 bytes. */ + +RE_UINT32 re_get_other_uppercase(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_uppercase_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_uppercase_stage_2[pos + f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_other_uppercase_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_other_uppercase_stage_4[pos + f] << 5; + pos += code; + value = (re_other_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Noncharacter_Code_Point. */ + +static RE_UINT8 re_noncharacter_code_point_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 0, 0, 0, 0, 0, 0, 0, 2, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 2, +}; + +static RE_UINT8 re_noncharacter_code_point_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 192, +}; + +/* Noncharacter_Code_Point: 121 bytes. */ + +RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_noncharacter_code_point_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_noncharacter_code_point_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_noncharacter_code_point_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_noncharacter_code_point_stage_4[pos + f] << 6; + pos += code; + value = (re_noncharacter_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Grapheme_Extend. */ + +static RE_UINT8 re_other_grapheme_extend_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_2[] = { + 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_3[] = { + 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 3, 1, 2, 0, 4, + 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 8, 0, 0, +}; + +static RE_UINT8 re_other_grapheme_extend_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, + 0, 0, 128, 0, 0, 0, 0, 0, 4, 0, 96, 0, 0, 0, 0, 0, + 0, 128, 0, 128, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 32, 192, 7, 0, +}; + +/* Other_Grapheme_Extend: 249 bytes. */ + +RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_grapheme_extend_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_grapheme_extend_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_grapheme_extend_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_grapheme_extend_stage_4[pos + f] << 6; + pos += code; + value = (re_other_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* IDS_Binary_Operator. */ + +static RE_UINT8 re_ids_binary_operator_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ids_binary_operator_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_ids_binary_operator_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_binary_operator_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_binary_operator_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 15, +}; + +/* IDS_Binary_Operator: 97 bytes. */ + +RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ids_binary_operator_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_ids_binary_operator_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_ids_binary_operator_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_ids_binary_operator_stage_4[pos + f] << 6; + pos += code; + value = (re_ids_binary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* IDS_Trinary_Operator. */ + +static RE_UINT8 re_ids_trinary_operator_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_ids_trinary_operator_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, +}; + +/* IDS_Trinary_Operator: 97 bytes. */ + +RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_ids_trinary_operator_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_ids_trinary_operator_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_ids_trinary_operator_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_ids_trinary_operator_stage_4[pos + f] << 6; + pos += code; + value = (re_ids_trinary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Radical. */ + +static RE_UINT8 re_radical_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_radical_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_radical_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, +}; + +static RE_UINT8 re_radical_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 3, 2, 2, 2, 2, 2, 2, 4, 0, +}; + +static RE_UINT8 re_radical_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 251, 255, 255, 255, 255, 255, 255, 15, 0, + 255, 255, 63, 0, +}; + +/* Radical: 117 bytes. */ + +RE_UINT32 re_get_radical(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_radical_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_radical_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_radical_stage_3[pos + f] << 4; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_radical_stage_4[pos + f] << 5; + pos += code; + value = (re_radical_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Unified_Ideograph. */ + +static RE_UINT8 re_unified_ideograph_stage_1[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_unified_ideograph_stage_2[] = { + 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 0, 0, 0, 0, +}; + +static RE_UINT8 re_unified_ideograph_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 4, 0, 0, + 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 6, 7, 0, 0, 0, +}; + +static RE_UINT8 re_unified_ideograph_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 3, + 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 5, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 1, 1, 1, 7, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_unified_ideograph_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 63, 0, 255, 31, 0, 0, 0, 0, 0, 0, + 0, 192, 26, 128, 154, 3, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, + 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 0, 0, 0, 0, +}; + +/* Unified_Ideograph: 257 bytes. */ + +RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_unified_ideograph_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_unified_ideograph_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_unified_ideograph_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_unified_ideograph_stage_4[pos + f] << 6; + pos += code; + value = (re_unified_ideograph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_Default_Ignorable_Code_Point. */ + +static RE_UINT8 re_other_default_ignorable_code_point_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_3[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 7, 8, 8, 8, 8, 8, 8, 8, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 9, 0, 0, 0, 10, + 9, 9, 9, 9, 9, 9, 9, 9, +}; + +static RE_UINT8 re_other_default_ignorable_code_point_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, + 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 1, + 253, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 0, 0, 0, 0, 0, 255, 255, +}; + +/* Other_Default_Ignorable_Code_Point: 281 bytes. */ + +RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_4[pos + f] << 6; + pos += code; + value = (re_other_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Deprecated. */ + +static RE_UINT8 re_deprecated_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, +}; + +static RE_UINT8 re_deprecated_stage_2[] = { + 0, 1, 2, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_deprecated_stage_3[] = { + 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 0, 0, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_deprecated_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, + 0, 6, 0, 0, 0, 0, 0, 0, 7, 8, 8, 8, 0, 0, 0, 0, +}; + +static RE_UINT8 re_deprecated_stage_5[] = { + 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 8, 0, 0, 0, 128, 2, + 24, 0, 0, 0, 0, 252, 0, 0, 0, 6, 0, 0, 2, 0, 0, 0, + 255, 255, 255, 255, +}; + +/* Deprecated: 230 bytes. */ + +RE_UINT32 re_get_deprecated(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_deprecated_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_deprecated_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_deprecated_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_deprecated_stage_4[pos + f] << 5; + pos += code; + value = (re_deprecated_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Soft_Dotted. */ + +static RE_UINT8 re_soft_dotted_stage_1[] = { + 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, +}; + +static RE_UINT8 re_soft_dotted_stage_2[] = { + 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_soft_dotted_stage_3[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 6, 7, 5, 8, 9, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 11, 12, 13, 5, +}; + +static RE_UINT8 re_soft_dotted_stage_4[] = { + 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 10, 11, 0, 0, 0, 12, 0, 0, 0, 0, 13, 0, + 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, + 0, 0, 0, 16, 0, 0, 0, 0, 0, 17, 18, 0, 19, 20, 0, 21, + 0, 22, 23, 0, 24, 0, 17, 18, 0, 19, 20, 0, 21, 0, 0, 0, +}; + +static RE_UINT8 re_soft_dotted_stage_5[] = { + 0, 0, 0, 0, 0, 6, 0, 0, 0, 128, 0, 0, 0, 2, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 32, 0, 0, 4, 0, 0, 0, 8, 0, + 0, 0, 64, 1, 4, 0, 0, 0, 0, 0, 64, 0, 16, 1, 0, 0, + 0, 32, 0, 0, 0, 8, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, + 0, 0, 0, 16, 12, 0, 0, 0, 0, 0, 192, 0, 0, 12, 0, 0, + 0, 0, 0, 192, 0, 0, 12, 0, 192, 0, 0, 0, 0, 0, 0, 12, + 0, 192, 0, 0, +}; + +/* Soft_Dotted: 342 bytes. */ + +RE_UINT32 re_get_soft_dotted(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_soft_dotted_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_soft_dotted_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_soft_dotted_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_soft_dotted_stage_4[pos + f] << 5; + pos += code; + value = (re_soft_dotted_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Logical_Order_Exception. */ + +static RE_UINT8 re_logical_order_exception_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_logical_order_exception_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_logical_order_exception_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, +}; + +static RE_UINT8 re_logical_order_exception_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_logical_order_exception_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 96, 26, +}; + +/* Logical_Order_Exception: 121 bytes. */ + +RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_logical_order_exception_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_logical_order_exception_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_logical_order_exception_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_logical_order_exception_stage_4[pos + f] << 6; + pos += code; + value = (re_logical_order_exception_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_ID_Start. */ + +static RE_UINT8 re_other_id_start_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_id_start_stage_2[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_id_start_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_id_start_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_other_id_start_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 64, 0, 0, + 0, 0, 0, 24, 0, 0, 0, 0, +}; + +/* Other_ID_Start: 113 bytes. */ + +RE_UINT32 re_get_other_id_start(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_id_start_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_other_id_start_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_id_start_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_id_start_stage_4[pos + f] << 6; + pos += code; + value = (re_other_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Other_ID_Continue. */ + +static RE_UINT8 re_other_id_continue_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_other_id_continue_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_other_id_continue_stage_3[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_other_id_continue_stage_4[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 4, +}; + +static RE_UINT8 re_other_id_continue_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, + 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 3, 0, + 0, 0, 0, 4, 0, 0, 0, 0, +}; + +/* Other_ID_Continue: 145 bytes. */ + +RE_UINT32 re_get_other_id_continue(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_other_id_continue_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_other_id_continue_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_other_id_continue_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_other_id_continue_stage_4[pos + f] << 6; + pos += code; + value = (re_other_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* STerm. */ + +static RE_UINT8 re_sterm_stage_1[] = { + 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +static RE_UINT8 re_sterm_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 3, 3, 9, 10, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 12, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 13, + 3, 3, 14, 3, 15, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static RE_UINT8 re_sterm_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, + 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 9, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, 12, 1, + 13, 1, 14, 1, 1, 15, 16, 1, 17, 1, 1, 1, 1, 1, 1, 1, + 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19, 1, 1, 1, + 20, 1, 1, 1, 1, 1, 1, 1, 1, 21, 1, 1, 22, 23, 1, 1, + 24, 25, 26, 27, 28, 29, 1, 30, 1, 1, 1, 1, 31, 1, 32, 1, + 1, 1, 1, 1, 33, 1, 1, 1, 34, 35, 36, 37, 1, 1, 1, 1, +}; + +static RE_UINT8 re_sterm_stage_4[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, + 4, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 7, + 0, 0, 0, 8, 0, 0, 9, 0, 0, 0, 0, 10, 0, 0, 0, 11, + 0, 12, 0, 0, 13, 0, 0, 0, 0, 0, 8, 0, 0, 14, 0, 0, + 0, 0, 15, 0, 0, 16, 0, 17, 0, 18, 19, 0, 0, 11, 0, 0, + 20, 0, 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 22, + 0, 0, 0, 23, 0, 0, 21, 0, 0, 24, 0, 0, 0, 0, 25, 0, + 0, 0, 26, 0, 0, 0, 0, 27, 0, 0, 0, 28, 0, 0, 29, 0, + 1, 0, 0, 30, 0, 0, 23, 0, 0, 0, 31, 0, 0, 17, 32, 0, + 0, 0, 33, 0, 0, 0, 34, 0, +}; + +static RE_UINT8 re_sterm_stage_5[] = { + 0, 0, 0, 0, 2, 64, 0, 128, 0, 0, 0, 80, 0, 2, 0, 0, + 0, 0, 0, 128, 0, 0, 16, 0, 7, 0, 0, 0, 0, 0, 0, 2, + 48, 0, 0, 0, 0, 12, 0, 0, 132, 1, 0, 0, 0, 64, 0, 0, + 0, 0, 96, 0, 8, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 204, + 0, 0, 0, 24, 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, + 4, 0, 0, 0, 0, 192, 0, 0, 0, 0, 136, 0, 0, 0, 192, 0, + 0, 128, 0, 0, 0, 3, 0, 0, 0, 0, 0, 224, 0, 0, 3, 0, + 0, 8, 0, 0, 0, 0, 196, 0, 2, 0, 0, 0, 128, 1, 0, 0, + 3, 0, 0, 0, 14, 0, 0, 0, 96, 0, 0, 0, +}; + +/* STerm: 568 bytes. */ + +RE_UINT32 re_get_sterm(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 14; + code = ch ^ (f << 14); + pos = (RE_UINT32)re_sterm_stage_1[f] << 4; + f = code >> 10; + code ^= f << 10; + pos = (RE_UINT32)re_sterm_stage_2[pos + f] << 3; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_sterm_stage_3[pos + f] << 2; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_sterm_stage_4[pos + f] << 5; + pos += code; + value = (re_sterm_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Variation_Selector. */ + +static RE_UINT8 re_variation_selector_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, +}; + +static RE_UINT8 re_variation_selector_stage_2[] = { + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_variation_selector_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_variation_selector_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, +}; + +static RE_UINT8 re_variation_selector_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, + 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 0, 0, +}; + +/* Variation_Selector: 169 bytes. */ + +RE_UINT32 re_get_variation_selector(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_variation_selector_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_variation_selector_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_variation_selector_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_variation_selector_stage_4[pos + f] << 6; + pos += code; + value = (re_variation_selector_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Pattern_White_Space. */ + +static RE_UINT8 re_pattern_white_space_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_2[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_white_space_stage_5[] = { + 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 32, 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 3, 0, 0, +}; + +/* Pattern_White_Space: 129 bytes. */ + +RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_pattern_white_space_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_pattern_white_space_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_pattern_white_space_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_pattern_white_space_stage_4[pos + f] << 6; + pos += code; + value = (re_pattern_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Pattern_Syntax. */ + +static RE_UINT8 re_pattern_syntax_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_pattern_syntax_stage_2[] = { + 0, 1, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_pattern_syntax_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 4, 4, 5, 4, 4, 6, 4, 4, 4, 4, 1, 1, 7, 1, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 10, 1, +}; + +static RE_UINT8 re_pattern_syntax_stage_4[] = { + 0, 1, 2, 2, 0, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, + 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, + 8, 8, 8, 9, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, + 11, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, + 0, 0, 14, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_pattern_syntax_stage_5[] = { + 0, 0, 0, 0, 254, 255, 0, 252, 1, 0, 0, 120, 254, 90, 67, 136, + 0, 0, 128, 0, 0, 0, 255, 255, 255, 0, 255, 127, 254, 255, 239, 127, + 255, 255, 255, 255, 255, 255, 63, 0, 0, 0, 240, 255, 14, 255, 255, 255, + 1, 0, 1, 0, 0, 0, 0, 192, 96, 0, 0, 0, +}; + +/* Pattern_Syntax: 277 bytes. */ + +RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_pattern_syntax_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_pattern_syntax_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_pattern_syntax_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_pattern_syntax_stage_4[pos + f] << 5; + pos += code; + value = (re_pattern_syntax_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Hangul_Syllable_Type. */ + +static RE_UINT8 re_hangul_syllable_type_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_2[] = { + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, 4, + 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, + 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, + 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, + 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, + 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 11, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, + 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, + 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, + 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, + 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, + 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, + 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, + 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, + 6, 5, 6, 6, 8, 0, 2, 2, 9, 10, 3, 3, 3, 3, 3, 11, +}; + +static RE_UINT8 re_hangul_syllable_type_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 0, 0, 0, 4, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, + 5, 5, 5, 5, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, + 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, +}; + +/* Hangul_Syllable_Type: 497 bytes. */ + +RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_hangul_syllable_type_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_hangul_syllable_type_stage_2[pos + f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_hangul_syllable_type_stage_3[pos + f] << 4; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_hangul_syllable_type_stage_4[pos + f] << 3; + value = re_hangul_syllable_type_stage_5[pos + code]; + + return value; +} + +/* Bidi_Class. */ + +static RE_UINT8 re_bidi_class_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 7, + 8, 9, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 11, 12, 13, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 15, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, +}; + +static RE_UINT8 re_bidi_class_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 2, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 2, 2, 2, 2, 30, 31, 32, 2, 2, 2, 2, 33, 34, 35, + 36, 37, 38, 39, 40, 2, 41, 42, 43, 44, 2, 45, 2, 2, 2, 46, + 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 52, 52, 52, 57, 58, 52, + 2, 2, 52, 52, 52, 52, 59, 2, 2, 60, 61, 62, 63, 64, 52, 65, + 66, 67, 2, 68, 69, 70, 71, 72, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 73, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 74, 2, 2, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 2, 85, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 86, 87, 87, 87, 88, 89, 90, 91, 92, 93, + 2, 2, 94, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 96, 96, 97, 96, 98, 96, 99, 96, 96, 96, 96, 96, 100, 96, 96, 96, + 101, 102, 103, 104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 106, + 2, 2, 107, 108, 109, 2, 110, 2, 2, 2, 2, 2, 2, 111, 112, 113, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 87, 114, 96, 96, + 115, 116, 117, 2, 2, 2, 118, 119, 120, 121, 122, 123, 124, 125, 126, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, + 128, 128, 129, 130, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, +}; + +static RE_UINT8 re_bidi_class_stage_3[] = { + 0, 1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 10, 11, 12, 11, 12, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 14, 14, 15, 16, + 17, 17, 17, 17, 17, 17, 17, 18, 19, 11, 11, 11, 11, 11, 11, 20, + 21, 11, 11, 11, 11, 11, 11, 11, 22, 23, 17, 24, 25, 26, 26, 26, + 27, 28, 29, 29, 30, 17, 31, 32, 29, 29, 29, 29, 29, 33, 34, 35, + 29, 36, 29, 17, 28, 29, 29, 29, 29, 29, 37, 32, 26, 26, 38, 39, + 26, 40, 41, 26, 26, 42, 26, 26, 26, 26, 29, 29, 29, 29, 43, 44, + 45, 11, 11, 46, 47, 48, 49, 11, 50, 11, 11, 51, 52, 11, 49, 53, + 54, 11, 11, 51, 55, 50, 11, 56, 54, 11, 11, 51, 57, 11, 49, 58, + 50, 11, 11, 59, 52, 60, 49, 11, 61, 11, 11, 11, 62, 11, 11, 63, + 11, 11, 11, 64, 65, 66, 49, 67, 11, 11, 11, 51, 68, 11, 49, 11, + 11, 11, 11, 11, 52, 11, 49, 11, 11, 11, 11, 11, 69, 70, 11, 11, + 11, 11, 11, 71, 72, 11, 11, 11, 11, 11, 11, 73, 74, 11, 11, 11, + 11, 75, 11, 76, 11, 11, 11, 77, 78, 79, 17, 80, 60, 11, 11, 11, + 11, 11, 81, 82, 11, 83, 84, 85, 86, 87, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 81, 11, 11, 11, 88, 11, 11, 11, 11, 11, 11, + 4, 11, 11, 11, 11, 11, 11, 11, 89, 90, 11, 11, 11, 11, 11, 11, + 11, 91, 11, 91, 11, 49, 11, 49, 11, 11, 11, 92, 93, 94, 11, 88, + 95, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 11, 11, 11, 11, 11, + 11, 11, 97, 98, 99, 11, 11, 11, 11, 11, 11, 11, 11, 100, 16, 16, + 11, 101, 11, 11, 11, 102, 103, 104, 105, 11, 11, 106, 61, 11, 107, 105, + 108, 11, 109, 11, 11, 11, 110, 108, 11, 11, 111, 112, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 113, 114, 115, 11, 11, 11, 11, 17, 17, 116, 111, + 11, 11, 11, 117, 118, 119, 119, 120, 121, 16, 122, 123, 124, 125, 126, 127, + 128, 11, 129, 129, 129, 17, 17, 84, 130, 131, 132, 133, 134, 16, 11, 11, + 135, 16, 16, 16, 16, 16, 16, 16, 16, 136, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 137, 11, 11, 11, 5, + 16, 138, 16, 16, 16, 16, 16, 139, 16, 16, 140, 11, 141, 11, 16, 16, + 142, 143, 11, 11, 11, 11, 144, 16, 16, 16, 145, 16, 16, 16, 16, 16, + 146, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 147, 88, 11, 11, + 11, 11, 11, 11, 11, 11, 148, 149, 11, 11, 11, 11, 11, 11, 11, 150, + 11, 11, 11, 11, 11, 11, 17, 17, 16, 16, 16, 151, 11, 11, 11, 11, + 16, 152, 16, 16, 16, 16, 16, 139, 16, 16, 16, 16, 16, 137, 11, 151, + 153, 16, 154, 155, 11, 11, 11, 11, 11, 156, 4, 11, 11, 11, 11, 157, + 11, 11, 11, 11, 16, 16, 139, 11, 11, 120, 11, 11, 11, 16, 11, 158, + 11, 11, 11, 146, 159, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 160, + 11, 11, 11, 11, 11, 100, 11, 161, 11, 11, 11, 11, 16, 16, 16, 16, + 11, 16, 16, 16, 140, 11, 11, 11, 119, 11, 11, 11, 11, 11, 150, 162, + 11, 150, 11, 11, 11, 11, 11, 108, 16, 16, 163, 11, 11, 11, 11, 11, + 164, 11, 11, 11, 11, 11, 11, 11, 165, 11, 166, 167, 11, 11, 11, 168, + 11, 11, 11, 11, 115, 11, 17, 108, 11, 11, 169, 11, 170, 108, 11, 11, + 45, 11, 11, 171, 11, 11, 11, 11, 11, 11, 172, 173, 174, 11, 11, 11, + 11, 11, 11, 175, 50, 11, 68, 60, 11, 11, 11, 11, 11, 11, 176, 11, + 11, 177, 178, 26, 26, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 179, 29, 29, 29, 29, 29, 29, 29, 29, 29, 8, 8, 180, + 17, 88, 116, 16, 16, 181, 182, 29, 29, 29, 29, 29, 29, 29, 29, 183, + 184, 3, 4, 5, 4, 5, 137, 11, 11, 11, 11, 11, 11, 11, 185, 186, + 187, 11, 11, 11, 16, 16, 16, 16, 141, 151, 11, 11, 11, 11, 11, 87, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 188, 26, 26, 26, 26, 26, 26, + 189, 26, 26, 190, 26, 26, 26, 26, 26, 26, 26, 191, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 192, 193, 50, 11, 11, 194, 116, 14, 137, 11, + 108, 11, 11, 195, 11, 11, 11, 11, 45, 11, 196, 197, 11, 11, 11, 11, + 108, 11, 11, 198, 11, 11, 11, 11, 11, 11, 199, 200, 11, 11, 11, 11, + 150, 45, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 201, 202, + 203, 11, 204, 11, 11, 11, 11, 11, 16, 16, 16, 16, 205, 11, 11, 11, + 16, 16, 16, 16, 16, 140, 11, 11, 11, 11, 11, 11, 11, 157, 11, 11, + 11, 206, 11, 11, 161, 11, 11, 11, 135, 11, 11, 11, 207, 208, 208, 208, + 29, 29, 29, 29, 29, 29, 29, 209, 16, 16, 151, 16, 16, 16, 16, 16, + 16, 139, 210, 211, 146, 146, 11, 11, 212, 11, 11, 11, 11, 11, 133, 11, + 16, 16, 4, 213, 16, 16, 16, 147, 16, 139, 16, 16, 214, 11, 16, 4, + 16, 16, 16, 210, 215, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 216, + 16, 16, 16, 217, 139, 16, 218, 11, 11, 11, 11, 11, 11, 11, 11, 5, + 16, 16, 16, 16, 219, 11, 11, 11, 16, 16, 16, 16, 137, 11, 11, 11, + 16, 16, 16, 16, 16, 16, 16, 139, 11, 11, 11, 11, 11, 11, 11, 220, + 8, 8, 8, 8, 8, 8, 8, 8, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 8, +}; + +static RE_UINT8 re_bidi_class_stage_4[] = { + 0, 0, 1, 2, 0, 0, 0, 3, 4, 5, 6, 7, 8, 8, 9, 10, + 11, 12, 12, 12, 12, 12, 13, 10, 12, 12, 13, 14, 0, 15, 0, 0, + 0, 0, 0, 0, 16, 5, 17, 18, 19, 20, 21, 10, 12, 12, 12, 12, + 12, 13, 12, 12, 12, 12, 22, 12, 23, 10, 10, 10, 12, 24, 10, 17, + 10, 10, 10, 10, 25, 25, 25, 25, 12, 26, 12, 27, 12, 17, 12, 12, + 12, 27, 12, 12, 28, 25, 29, 12, 12, 12, 27, 30, 31, 25, 25, 25, + 25, 25, 25, 32, 33, 32, 34, 34, 34, 34, 34, 34, 35, 36, 37, 38, + 25, 25, 39, 40, 40, 40, 40, 40, 40, 40, 41, 25, 35, 35, 42, 43, + 44, 40, 40, 40, 40, 45, 25, 46, 25, 47, 48, 49, 8, 8, 50, 40, + 51, 40, 40, 40, 40, 45, 25, 25, 34, 34, 52, 25, 25, 53, 54, 34, + 34, 55, 32, 25, 25, 31, 31, 56, 34, 34, 31, 34, 40, 25, 25, 25, + 25, 25, 25, 39, 57, 12, 12, 12, 12, 12, 58, 59, 60, 25, 59, 61, + 60, 25, 12, 12, 62, 12, 12, 12, 61, 12, 12, 12, 12, 12, 12, 59, + 60, 59, 12, 61, 63, 12, 30, 12, 64, 12, 12, 12, 64, 28, 65, 29, + 29, 61, 12, 12, 60, 66, 59, 61, 67, 12, 12, 12, 12, 12, 12, 65, + 12, 58, 12, 12, 58, 12, 12, 12, 59, 12, 12, 61, 13, 10, 68, 12, + 12, 12, 12, 62, 59, 62, 69, 29, 12, 64, 12, 12, 12, 12, 10, 70, + 12, 12, 12, 29, 12, 12, 58, 12, 62, 71, 12, 12, 61, 25, 57, 30, + 12, 28, 25, 57, 61, 25, 66, 59, 12, 12, 25, 29, 12, 12, 29, 12, + 12, 72, 73, 26, 60, 25, 25, 57, 25, 69, 12, 60, 25, 25, 60, 25, + 25, 25, 25, 59, 12, 12, 12, 60, 69, 25, 64, 64, 12, 12, 29, 62, + 59, 12, 12, 12, 60, 59, 12, 12, 58, 64, 12, 61, 12, 12, 12, 61, + 10, 10, 26, 12, 74, 12, 12, 12, 12, 12, 13, 11, 62, 59, 12, 12, + 12, 66, 25, 29, 12, 58, 60, 25, 25, 12, 30, 61, 10, 10, 75, 76, + 12, 12, 61, 12, 57, 28, 59, 12, 58, 12, 60, 12, 11, 26, 12, 12, + 12, 12, 12, 23, 12, 28, 65, 12, 12, 58, 25, 57, 71, 60, 25, 59, + 28, 25, 25, 65, 25, 12, 12, 12, 12, 69, 57, 59, 12, 12, 28, 25, + 29, 12, 12, 12, 62, 29, 66, 12, 12, 58, 29, 72, 12, 12, 12, 25, + 25, 62, 12, 12, 57, 25, 25, 25, 69, 25, 59, 61, 12, 59, 12, 12, + 25, 57, 12, 12, 12, 12, 12, 77, 26, 12, 12, 24, 12, 12, 12, 24, + 12, 12, 12, 22, 78, 78, 79, 80, 10, 10, 81, 82, 83, 84, 10, 10, + 10, 85, 10, 10, 10, 10, 10, 86, 0, 87, 88, 0, 89, 8, 90, 70, + 8, 8, 90, 70, 83, 83, 83, 83, 17, 70, 26, 12, 12, 20, 11, 23, + 10, 77, 91, 92, 12, 12, 23, 12, 10, 11, 23, 26, 12, 12, 91, 12, + 93, 10, 10, 10, 10, 26, 12, 12, 10, 20, 10, 10, 10, 12, 12, 12, + 10, 70, 12, 12, 10, 10, 70, 12, 10, 10, 8, 8, 8, 8, 8, 12, + 12, 12, 23, 10, 10, 10, 10, 24, 24, 10, 10, 10, 10, 10, 10, 11, + 12, 24, 70, 28, 29, 12, 24, 10, 12, 12, 12, 28, 10, 10, 10, 12, + 10, 10, 17, 10, 94, 11, 10, 10, 11, 12, 62, 29, 11, 23, 12, 24, + 12, 12, 95, 11, 12, 12, 13, 12, 12, 12, 12, 70, 12, 12, 12, 10, + 12, 13, 70, 12, 12, 12, 12, 13, 96, 25, 25, 97, 26, 12, 12, 12, + 12, 12, 11, 12, 58, 58, 28, 12, 12, 64, 10, 12, 12, 12, 98, 12, + 12, 10, 12, 12, 12, 62, 25, 29, 12, 28, 25, 25, 28, 62, 29, 59, + 12, 12, 60, 57, 64, 64, 12, 12, 28, 12, 12, 59, 69, 65, 59, 62, + 12, 61, 59, 61, 12, 12, 12, 99, 34, 34, 100, 34, 40, 40, 40, 101, + 40, 40, 40, 102, 103, 104, 10, 105, 106, 70, 107, 12, 40, 40, 40, 108, + 109, 5, 6, 7, 5, 110, 10, 70, 0, 0, 111, 112, 91, 12, 12, 12, + 34, 34, 34, 113, 31, 33, 34, 25, 34, 34, 114, 52, 34, 34, 115, 10, + 35, 35, 35, 35, 35, 35, 35, 116, 12, 12, 25, 25, 28, 57, 64, 12, + 12, 28, 25, 60, 25, 59, 12, 12, 12, 62, 25, 57, 12, 12, 28, 61, + 25, 66, 12, 12, 12, 28, 29, 12, 117, 0, 118, 25, 57, 60, 25, 12, + 12, 12, 62, 29, 119, 120, 12, 12, 12, 91, 12, 12, 13, 12, 12, 121, + 8, 8, 8, 8, 122, 40, 40, 40, 10, 10, 10, 70, 24, 10, 10, 70, + 8, 8, 123, 12, 10, 17, 10, 10, 10, 20, 70, 12, 20, 10, 10, 10, + 10, 10, 24, 11, 10, 10, 10, 26, 10, 10, 12, 12, 11, 24, 10, 10, + 12, 12, 12, 124, +}; + +static RE_UINT8 re_bidi_class_stage_5[] = { + 11, 11, 11, 11, 11, 8, 7, 8, 9, 7, 11, 11, 7, 7, 7, 8, + 9, 10, 10, 4, 4, 4, 10, 10, 10, 10, 10, 3, 6, 3, 6, 6, + 2, 2, 2, 2, 2, 2, 6, 10, 10, 10, 10, 10, 10, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 11, 11, 7, 11, 11, + 6, 10, 4, 4, 10, 10, 0, 10, 10, 11, 10, 10, 4, 4, 2, 2, + 10, 0, 10, 10, 10, 2, 0, 10, 0, 10, 10, 0, 0, 0, 10, 10, + 0, 10, 10, 10, 12, 12, 12, 12, 10, 10, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 12, 12, 12, 0, 0, 0, 0, 0, 4, 1, 12, 12, 12, + 12, 12, 1, 12, 1, 12, 12, 1, 1, 1, 1, 1, 5, 5, 5, 5, + 5, 13, 10, 10, 13, 4, 4, 13, 6, 13, 10, 10, 12, 12, 12, 13, + 13, 13, 13, 13, 13, 13, 13, 12, 5, 5, 4, 5, 5, 13, 13, 13, + 12, 13, 13, 13, 13, 13, 12, 12, 12, 5, 10, 12, 12, 13, 13, 12, + 12, 10, 12, 12, 12, 12, 13, 13, 2, 2, 13, 13, 13, 12, 13, 13, + 1, 1, 1, 12, 1, 1, 10, 10, 10, 10, 1, 1, 1, 1, 12, 12, + 12, 12, 1, 1, 12, 12, 12, 0, 0, 0, 12, 0, 12, 0, 0, 0, + 0, 12, 12, 12, 0, 12, 0, 0, 0, 0, 12, 12, 0, 0, 4, 4, + 0, 12, 12, 0, 12, 0, 0, 12, 12, 12, 0, 12, 0, 4, 0, 0, + 10, 4, 10, 0, 12, 0, 12, 12, 10, 10, 10, 0, 12, 0, 12, 0, + 0, 12, 0, 12, 0, 12, 10, 10, 9, 0, 0, 0, 10, 10, 10, 12, + 12, 12, 11, 0, 0, 10, 0, 10, 9, 9, 9, 9, 9, 9, 9, 11, + 11, 11, 0, 1, 9, 7, 16, 17, 18, 14, 15, 6, 4, 4, 4, 4, + 4, 10, 10, 10, 6, 10, 10, 10, 10, 10, 10, 9, 11, 11, 19, 20, + 21, 22, 11, 11, 2, 0, 0, 0, 2, 2, 3, 3, 0, 10, 0, 0, + 0, 0, 4, 0, 10, 10, 3, 4, 9, 10, 10, 10, 0, 12, 12, 10, + 12, 12, 12, 10, 12, 12, 10, 10, 4, 4, 0, 0, 0, 1, 12, 1, + 1, 3, 1, 1, 13, 13, 10, 10, 13, 10, 13, 13, 6, 10, 6, 0, + 10, 6, 10, 10, 10, 10, 10, 4, 10, 10, 3, 3, 10, 4, 4, 10, + 13, 13, 13, 11, 0, 10, 10, 4, 10, 4, 4, 0, 11, 10, 10, 10, + 10, 10, 11, 11, 1, 1, 1, 10, 12, 12, 12, 1, 1, 10, 10, 10, + 5, 5, 5, 1, 0, 0, 0, 11, 11, 11, 11, 12, 10, 10, 12, 12, + 12, 10, 0, 0, 0, 0, 2, 2, 10, 10, 13, 13, 2, 2, 2, 0, + 0, 0, 11, 11, +}; + +/* Bidi_Class: 3216 bytes. */ + +RE_UINT32 re_get_bidi_class(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_bidi_class_stage_1[f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_bidi_class_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_bidi_class_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_bidi_class_stage_4[pos + f] << 2; + value = re_bidi_class_stage_5[pos + code]; + + return value; +} + +/* Canonical_Combining_Class. */ + +static RE_UINT8 re_canonical_combining_class_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 6, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_canonical_combining_class_stage_2[] = { + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, + 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 33, 0, + 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, + 36, 37, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_canonical_combining_class_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, + 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17, + 18, 19, 20, 0, 0, 0, 0, 21, 0, 22, 23, 0, 0, 22, 24, 0, + 0, 22, 24, 0, 0, 22, 24, 0, 0, 22, 24, 0, 0, 0, 24, 0, + 0, 0, 25, 0, 0, 22, 24, 0, 0, 0, 24, 0, 0, 0, 26, 0, + 0, 27, 28, 0, 0, 29, 30, 0, 31, 32, 0, 33, 34, 0, 35, 0, + 0, 36, 0, 0, 37, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, + 39, 39, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 41, 0, 0, + 0, 42, 0, 0, 0, 0, 0, 0, 43, 0, 0, 44, 0, 0, 0, 0, + 0, 45, 46, 47, 0, 48, 0, 49, 0, 50, 0, 0, 0, 0, 51, 52, + 0, 0, 0, 0, 0, 0, 53, 54, 0, 0, 0, 0, 0, 0, 55, 56, + 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 58, 0, 0, 0, 59, + 0, 60, 0, 0, 61, 0, 0, 0, 0, 0, 0, 62, 63, 0, 0, 64, + 65, 0, 0, 0, 0, 0, 46, 66, 0, 67, 68, 0, 0, 69, 70, 0, + 0, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, 0, 0, 24, + 74, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 76, 77, 78, 0, 0, 0, 0, 0, 0, + 0, 0, 65, 0, 0, 79, 0, 0, 80, 81, 0, 0, 0, 0, 70, 0, + 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 83, 84, 85, 0, 0, + 0, 0, 86, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_canonical_combining_class_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, + 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17, + 18, 1, 1, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, 25, 26, 27, 28, + 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 31, 0, + 0, 0, 32, 33, 34, 35, 1, 36, 0, 0, 0, 0, 37, 0, 0, 0, + 0, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, 0, 0, 0, 0, + 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45, + 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 0, + 0, 0, 0, 0, 0, 38, 47, 1, 1, 48, 48, 49, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 50, 0, 0, 21, 43, 51, 52, 21, 35, 53, + 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 55, 56, 57, 0, 0, + 0, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 55, 0, 58, 0, 0, + 0, 0, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, + 0, 0, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, + 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, + 0, 0, 0, 0, 0, 65, 66, 0, 0, 0, 0, 0, 67, 68, 69, 70, + 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 74, 75, 0, 0, 0, 0, 76, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 77, 0, 0, + 0, 0, 0, 0, 59, 0, 0, 78, 0, 0, 79, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 19, 81, 0, + 77, 0, 0, 0, 0, 48, 1, 82, 0, 0, 0, 0, 0, 54, 0, 0, + 0, 77, 0, 0, 0, 0, 0, 0, 0, 0, 19, 10, 1, 0, 0, 0, + 0, 0, 83, 0, 0, 0, 0, 0, 0, 84, 0, 0, 83, 0, 0, 0, + 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 53, 9, 12, 4, + 85, 8, 86, 76, 0, 57, 0, 0, 21, 1, 21, 87, 88, 1, 1, 1, + 1, 53, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 90, 1, 91, 57, + 78, 92, 93, 4, 57, 0, 0, 0, 0, 0, 0, 19, 49, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 94, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 95, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, + 0, 0, 0, 19, 0, 1, 1, 49, 0, 0, 0, 0, 0, 0, 0, 19, + 0, 0, 0, 0, 49, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 49, 0, 0, 0, 0, 0, 98, 64, 0, 0, 0, 0, + 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, + 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 100, 57, 38, + 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 101, 1, 53, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 102, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 103, 94, 0, 0, 0, 0, 0, 0, 104, 0, + 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 77, 0, 0, + 0, 0, 0, 0, 0, 105, 0, 0, 0, 106, 107, 108, 109, 0, 98, 4, + 110, 48, 23, 0, 0, 0, 0, 0, 0, 0, 38, 49, 0, 0, 0, 0, + 38, 57, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_canonical_combining_class_stage_5[] = { + 0, 0, 0, 0, 50, 50, 50, 50, 50, 51, 45, 45, 45, 45, 51, 43, + 45, 45, 45, 45, 45, 41, 41, 45, 45, 45, 45, 41, 41, 45, 45, 45, + 1, 1, 1, 1, 1, 45, 45, 45, 45, 50, 50, 50, 50, 54, 50, 45, + 45, 45, 50, 50, 50, 45, 45, 0, 50, 50, 50, 45, 45, 45, 45, 50, + 51, 45, 45, 50, 52, 53, 53, 52, 53, 53, 52, 50, 0, 0, 0, 50, + 0, 45, 50, 50, 50, 50, 45, 50, 50, 50, 46, 45, 50, 50, 45, 45, + 50, 46, 49, 50, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 15, + 16, 17, 0, 18, 0, 19, 20, 0, 50, 45, 0, 13, 25, 26, 27, 0, + 0, 0, 0, 22, 23, 24, 25, 26, 27, 28, 29, 50, 50, 45, 45, 50, + 45, 50, 50, 45, 30, 0, 0, 0, 0, 0, 50, 50, 50, 0, 0, 50, + 50, 0, 45, 50, 50, 45, 0, 0, 0, 31, 0, 0, 50, 45, 50, 50, + 45, 45, 50, 45, 45, 50, 45, 50, 45, 50, 50, 0, 50, 50, 0, 50, + 0, 50, 50, 50, 50, 50, 0, 0, 0, 45, 45, 45, 50, 45, 45, 45, + 22, 23, 24, 50, 50, 50, 50, 0, 2, 0, 0, 0, 0, 4, 0, 0, + 0, 50, 45, 50, 50, 0, 0, 0, 0, 32, 33, 0, 0, 0, 4, 0, + 34, 34, 4, 0, 35, 35, 35, 35, 36, 36, 0, 0, 37, 37, 37, 37, + 45, 45, 0, 0, 0, 45, 0, 45, 0, 43, 0, 0, 0, 38, 39, 0, + 40, 0, 0, 0, 0, 0, 39, 39, 39, 39, 0, 0, 39, 0, 50, 50, + 4, 0, 50, 50, 0, 0, 45, 0, 0, 0, 0, 2, 0, 4, 4, 0, + 0, 45, 0, 0, 4, 0, 0, 0, 0, 50, 0, 0, 0, 49, 0, 0, + 0, 46, 50, 45, 45, 0, 0, 0, 50, 0, 0, 45, 0, 0, 4, 4, + 0, 0, 2, 0, 50, 0, 1, 1, 1, 0, 0, 0, 50, 53, 42, 45, + 41, 50, 50, 50, 52, 45, 50, 45, 50, 50, 1, 1, 1, 1, 1, 50, + 0, 1, 1, 50, 45, 50, 1, 1, 0, 0, 0, 4, 0, 0, 44, 49, + 51, 46, 47, 47, 0, 3, 3, 0, 0, 0, 0, 45, 50, 0, 50, 50, + 45, 0, 0, 50, 0, 0, 21, 0, 0, 45, 0, 50, 50, 1, 45, 0, + 0, 4, 2, 0, 0, 0, 4, 2, 0, 43, 43, 1, 1, 1, 0, 0, + 0, 48, 43, 43, 43, 43, 43, 0, 45, 45, 45, 0, +}; + +/* Canonical_Combining_Class: 1828 bytes. */ + +RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_canonical_combining_class_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_canonical_combining_class_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_canonical_combining_class_stage_3[pos + f] << 3; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_canonical_combining_class_stage_4[pos + f] << 2; + value = re_canonical_combining_class_stage_5[pos + code]; + + return value; +} + +/* Decomposition_Type. */ + +static RE_UINT8 re_decomposition_type_stage_1[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 2, 2, 2, 7, 8, + 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_decomposition_type_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 16, 7, 17, 18, 19, + 20, 21, 22, 23, 24, 7, 7, 7, 7, 7, 25, 7, 26, 27, 28, 29, + 30, 31, 32, 33, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 34, 7, 7, 7, 7, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 36, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 35, 37, 38, 39, 40, 41, 42, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 43, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 45, 7, 7, 46, 47, 48, 49, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 50, 7, + 7, 51, 52, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 35, 35, 53, 7, 7, 7, 7, 7, +}; + +static RE_UINT8 re_decomposition_type_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 3, 5, + 6, 7, 8, 9, 10, 11, 8, 12, 0, 0, 13, 14, 15, 16, 17, 18, + 6, 19, 20, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 23, 24, 0, + 0, 0, 0, 0, 25, 0, 0, 26, 27, 14, 28, 14, 29, 30, 0, 31, + 32, 33, 0, 33, 0, 32, 0, 34, 0, 0, 0, 0, 35, 36, 37, 38, + 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 40, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 43, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 33, 44, 0, 45, 0, 0, 0, 0, 0, 0, 46, 47, 0, 0, + 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, 53, 0, 0, 0, + 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, + 0, 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, + 0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 57, 0, 58, 0, 0, + 59, 0, 0, 0, 60, 61, 33, 62, 63, 60, 61, 33, 0, 0, 0, 0, + 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, + 66, 67, 0, 68, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 70, 71, 72, 73, 74, 75, 0, 76, 73, 73, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 77, 6, 6, 6, 6, 6, 78, + 6, 79, 6, 6, 79, 80, 6, 81, 6, 6, 6, 82, 83, 84, 6, 85, + 86, 87, 88, 89, 90, 91, 0, 92, 93, 94, 95, 0, 0, 0, 0, 0, + 96, 97, 98, 99, 100, 101, 102, 102, 103, 104, 105, 0, 106, 0, 0, 0, + 107, 0, 108, 109, 110, 0, 111, 112, 112, 0, 113, 0, 0, 0, 114, 0, + 0, 0, 115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 116, 117, 102, 102, 102, 118, 116, 116, 119, 0, + 120, 0, 0, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 122, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 125, 0, 0, 0, 0, 0, 57, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 126, 0, 0, + 127, 0, 0, 128, 129, 130, 131, 132, 0, 133, 129, 130, 131, 132, 0, 134, + 0, 0, 0, 135, 102, 102, 102, 102, 136, 137, 0, 0, 0, 0, 0, 0, + 102, 136, 102, 102, 138, 139, 116, 140, 116, 116, 116, 116, 141, 116, 116, 140, + 142, 142, 142, 142, 142, 143, 102, 144, 142, 142, 142, 142, 142, 142, 102, 145, + 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, 0, 0, 147, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 0, 0, 0, 0, 0, + 81, 148, 149, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 78, 0, 0, + 150, 151, 152, 153, 154, 155, 156, 156, 157, 156, 158, 159, 0, 160, 161, 162, + 163, 163, 163, 163, 163, 163, 164, 165, 165, 166, 167, 167, 167, 168, 169, 170, + 163, 171, 172, 173, 0, 174, 175, 176, 177, 178, 165, 179, 180, 0, 0, 181, + 0, 182, 0, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, + 195, 196, 196, 196, 196, 196, 197, 198, 198, 198, 198, 199, 200, 201, 202, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 203, 204, 0, 0, 0, 0, 0, + 0, 0, 205, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 205, 206, 0, 0, 0, 0, 207, 14, 0, 0, 0, + 208, 208, 208, 208, 208, 209, 208, 208, 208, 210, 211, 212, 213, 208, 208, 208, + 214, 215, 208, 216, 217, 218, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 219, 208, 208, 208, 208, 208, + 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 220, 208, 208, 208, + 213, 208, 221, 222, 223, 224, 225, 226, 227, 228, 229, 228, 0, 0, 0, 0, + 230, 102, 231, 142, 142, 0, 232, 0, 0, 233, 0, 0, 0, 0, 0, 0, + 234, 142, 142, 235, 236, 237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_decomposition_type_stage_4[] = { + 0, 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 8, 8, + 10, 11, 10, 12, 10, 11, 10, 9, 8, 8, 8, 8, 13, 8, 8, 8, + 8, 12, 8, 8, 14, 8, 10, 15, 16, 8, 17, 8, 12, 8, 8, 8, + 8, 8, 8, 15, 12, 0, 0, 18, 19, 0, 0, 0, 0, 20, 20, 21, + 8, 8, 8, 22, 8, 13, 8, 8, 23, 12, 8, 8, 8, 8, 8, 13, + 0, 13, 8, 8, 8, 0, 0, 0, 24, 24, 25, 0, 0, 0, 20, 5, + 24, 25, 0, 0, 9, 19, 0, 0, 0, 19, 26, 27, 0, 21, 11, 22, + 0, 0, 13, 8, 0, 0, 13, 11, 28, 29, 0, 0, 30, 5, 31, 0, + 9, 18, 0, 11, 0, 0, 32, 0, 0, 13, 0, 0, 33, 0, 0, 0, + 8, 13, 13, 8, 13, 8, 13, 8, 8, 12, 12, 0, 0, 3, 0, 0, + 13, 11, 0, 0, 0, 34, 35, 0, 36, 0, 0, 0, 18, 0, 0, 0, + 32, 19, 0, 0, 0, 0, 8, 8, 0, 0, 18, 19, 0, 0, 0, 9, + 18, 27, 0, 0, 0, 0, 10, 27, 0, 0, 37, 19, 0, 0, 0, 12, + 0, 19, 0, 0, 0, 0, 13, 19, 0, 0, 19, 0, 19, 18, 22, 0, + 0, 0, 27, 11, 3, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, + 18, 0, 0, 32, 27, 18, 0, 19, 18, 38, 17, 0, 32, 0, 0, 0, + 0, 27, 0, 0, 0, 0, 0, 25, 0, 27, 36, 36, 27, 0, 0, 0, + 0, 0, 18, 32, 9, 0, 0, 0, 0, 0, 0, 39, 24, 24, 39, 24, + 24, 24, 24, 40, 24, 24, 24, 24, 41, 42, 43, 0, 0, 0, 25, 0, + 0, 0, 44, 24, 8, 8, 45, 0, 8, 8, 12, 0, 8, 12, 8, 12, + 8, 8, 46, 46, 8, 8, 8, 12, 8, 22, 8, 47, 21, 22, 8, 8, + 8, 13, 8, 10, 13, 22, 8, 48, 49, 50, 30, 0, 51, 3, 0, 0, + 0, 30, 0, 52, 3, 53, 0, 54, 0, 3, 5, 0, 0, 3, 0, 3, + 55, 24, 24, 24, 42, 42, 42, 43, 42, 42, 42, 56, 0, 0, 35, 0, + 57, 34, 58, 59, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 59, + 69, 61, 62, 0, 70, 70, 70, 70, 20, 20, 20, 20, 0, 0, 71, 0, + 0, 0, 13, 0, 0, 0, 0, 27, 0, 0, 0, 10, 0, 19, 32, 19, + 0, 36, 0, 72, 35, 0, 0, 0, 32, 37, 32, 0, 36, 0, 0, 10, + 12, 12, 12, 0, 0, 0, 0, 8, 8, 0, 13, 12, 0, 0, 33, 0, + 73, 73, 73, 73, 73, 20, 20, 20, 20, 74, 73, 73, 73, 73, 75, 0, + 0, 0, 0, 35, 0, 30, 0, 0, 0, 0, 0, 19, 0, 0, 0, 76, + 0, 0, 0, 44, 0, 0, 0, 3, 20, 5, 0, 0, 77, 0, 0, 0, + 0, 26, 30, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 46, 32, 0, + 9, 22, 33, 12, 0, 19, 3, 78, 0, 37, 11, 79, 34, 20, 20, 20, + 20, 20, 20, 30, 4, 24, 24, 24, 20, 73, 0, 0, 80, 73, 73, 73, + 73, 73, 73, 75, 20, 20, 20, 81, 81, 81, 81, 81, 81, 81, 20, 20, + 82, 81, 81, 81, 20, 20, 20, 83, 25, 0, 0, 0, 0, 0, 55, 0, + 36, 10, 8, 11, 36, 33, 13, 8, 20, 30, 0, 0, 3, 20, 0, 46, + 59, 59, 84, 8, 8, 11, 8, 36, 9, 22, 8, 15, 85, 86, 86, 86, + 86, 86, 86, 86, 86, 85, 85, 85, 87, 85, 86, 86, 88, 0, 0, 0, + 89, 90, 91, 92, 85, 87, 86, 85, 85, 85, 93, 87, 94, 94, 94, 94, + 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 98, 99, 99, 99, 99, 99, 100, 94, 94, 101, 95, 95, 95, + 95, 95, 95, 102, 97, 99, 99, 103, 104, 97, 105, 106, 107, 105, 108, 105, + 104, 96, 95, 105, 96, 109, 110, 97, 111, 106, 112, 105, 95, 106, 113, 95, + 96, 106, 0, 0, 94, 94, 94, 114, 115, 115, 116, 0, 115, 115, 115, 115, + 115, 117, 118, 20, 119, 120, 120, 120, 120, 119, 120, 0, 121, 122, 123, 123, + 124, 91, 125, 126, 90, 125, 127, 127, 127, 127, 126, 91, 125, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 126, 125, 126, 91, 128, 129, 130, 130, 130, + 130, 130, 130, 130, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133, + 134, 132, 134, 132, 134, 132, 134, 135, 130, 136, 132, 133, 0, 0, 27, 19, + 0, 0, 18, 0, 0, 0, 0, 13, 8, 19, 0, 0, 0, 0, 18, 8, + 59, 59, 59, 59, 59, 137, 59, 59, 59, 59, 59, 137, 138, 139, 61, 137, + 59, 59, 66, 61, 59, 61, 59, 59, 59, 66, 140, 61, 59, 137, 59, 137, + 59, 59, 66, 140, 59, 141, 142, 59, 137, 59, 59, 59, 59, 62, 59, 59, + 59, 59, 59, 142, 139, 143, 61, 59, 140, 59, 144, 0, 138, 145, 144, 61, + 139, 143, 144, 144, 139, 143, 140, 59, 140, 59, 61, 141, 59, 59, 66, 59, + 59, 59, 59, 0, 61, 61, 66, 59, 20, 20, 30, 0, 20, 20, 146, 75, + 0, 0, 4, 0, 147, 0, 0, 0, 148, 0, 0, 0, 81, 81, 148, 0, + 20, 20, 35, 0, 149, 0, 0, 0, +}; + +static RE_UINT8 re_decomposition_type_stage_5[] = { + 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 10, 0, 0, 0, 0, 2, + 0, 0, 10, 10, 2, 2, 0, 0, 2, 10, 10, 0, 17, 17, 17, 0, + 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 1, 1, 2, + 2, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, + 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, + 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, + 2, 2, 2, 1, 1, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, + 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 2, 10, 10, 10, 0, + 10, 10, 0, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, + 0, 0, 0, 10, 1, 1, 2, 1, 0, 1, 0, 1, 1, 2, 1, 2, + 1, 1, 2, 0, 1, 1, 2, 2, 2, 2, 2, 4, 0, 4, 0, 0, + 0, 0, 0, 4, 2, 0, 2, 2, 2, 0, 2, 0, 10, 10, 0, 0, + 11, 0, 0, 0, 2, 2, 3, 2, 0, 2, 3, 3, 3, 3, 3, 3, + 0, 3, 2, 0, 0, 3, 3, 3, 3, 3, 0, 0, 10, 2, 10, 0, + 3, 0, 1, 0, 3, 0, 1, 1, 3, 3, 0, 3, 3, 2, 2, 2, + 2, 3, 0, 2, 3, 0, 0, 0, 17, 17, 17, 17, 0, 17, 0, 0, + 2, 2, 0, 2, 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 0, + 11, 10, 0, 0, 13, 0, 0, 0, 2, 0, 1, 12, 0, 0, 1, 12, + 16, 9, 9, 9, 16, 16, 16, 16, 2, 16, 16, 16, 2, 2, 2, 16, + 3, 3, 1, 1, 8, 7, 8, 7, 5, 6, 8, 7, 8, 7, 5, 6, + 8, 7, 0, 0, 0, 0, 0, 8, 7, 5, 6, 8, 7, 8, 7, 8, + 7, 8, 8, 7, 5, 8, 7, 5, 8, 8, 8, 8, 7, 7, 7, 7, + 7, 7, 7, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, + 6, 8, 8, 8, 8, 7, 7, 7, 7, 5, 5, 5, 7, 8, 0, 0, + 5, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 5, 5, 7, 5, + 5, 7, 7, 5, 7, 7, 5, 7, 5, 5, 5, 7, 0, 0, 5, 5, + 5, 7, 7, 7, 5, 7, 5, 7, 8, 0, 0, 0, 12, 12, 12, 12, + 12, 12, 0, 0, 12, 0, 0, 12, 12, 2, 2, 2, 15, 15, 15, 0, + 15, 15, 15, 15, 8, 6, 8, 0, 8, 0, 8, 6, 8, 6, 8, 6, + 8, 8, 7, 8, 7, 8, 7, 5, 6, 8, 7, 8, 6, 8, 7, 5, + 7, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 0, 0, 0, + 13, 13, 13, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 3, 3, 0, + 3, 3, 3, 0, 3, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, + 0, 3, 0, 3, 0, 0, 0, 3, 2, 2, 2, 9, 16, 0, 0, 0, + 16, 16, 16, 0, 9, 9, 0, 0, +}; + +/* Decomposition_Type: 2872 bytes. */ + +RE_UINT32 re_get_decomposition_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_decomposition_type_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_decomposition_type_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_decomposition_type_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_decomposition_type_stage_4[pos + f] << 2; + value = re_decomposition_type_stage_5[pos + code]; + + return value; +} + +/* East_Asian_Width. */ + +static RE_UINT8 re_east_asian_width_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 7, 8, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 10, 10, 10, 12, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 14, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 15, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 15, +}; + +static RE_UINT8 re_east_asian_width_stage_2[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 7, 8, 9, 10, 11, 12, 13, 14, 5, 15, 5, 16, 5, 5, 17, 18, + 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 24, 5, 5, 5, 5, 25, 5, 5, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 26, 5, 5, 5, 5, 5, 5, 5, 5, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 22, 22, 5, 5, 5, 28, 29, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 30, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 31, 32, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, + 5, 34, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 35, +}; + +static RE_UINT8 re_east_asian_width_stage_3[] = { + 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 11, 0, 0, 0, 0, 0, 15, 16, 0, 0, + 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 17, 18, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 0, 0, 20, 21, 20, 21, 0, 0, 0, + 9, 19, 19, 19, 19, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 23, 24, 25, 0, 0, 0, 26, 27, 0, 28, 0, 0, 0, 0, 0, + 29, 30, 31, 0, 0, 32, 33, 34, 35, 34, 0, 36, 0, 37, 38, 0, + 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 0, 0, 0, 0, + 0, 44, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 51, 19, + 19, 19, 19, 19, 33, 19, 19, 52, 19, 53, 21, 54, 55, 56, 57, 0, + 58, 59, 0, 0, 60, 0, 61, 0, 0, 62, 0, 62, 63, 19, 64, 19, + 0, 0, 0, 65, 0, 38, 0, 66, 0, 0, 0, 0, 0, 0, 67, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 22, 70, 22, 22, 22, 22, 22, 71, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 72, 0, 73, + 74, 22, 22, 75, 76, 22, 22, 22, 22, 77, 22, 22, 22, 22, 22, 22, + 78, 22, 79, 76, 22, 22, 22, 22, 75, 22, 22, 80, 22, 22, 71, 22, + 22, 75, 22, 22, 81, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 75, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 82, 22, 22, 22, 83, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 22, 82, 0, 0, 0, 0, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 71, 0, 0, 0, 0, 0, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 84, 0, 22, 22, 85, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 87, 88, 88, 88, 88, 88, 89, 90, 90, 90, 90, 91, 92, 93, 94, 65, + 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 96, 19, 97, 19, 19, 19, 34, 19, 19, 96, 0, 0, 0, 0, 0, 0, + 98, 22, 22, 80, 99, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 79, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 97, +}; + +static RE_UINT8 re_east_asian_width_stage_4[] = { + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 7, 0, 10, 0, 0, 11, 12, 11, 13, 14, 10, 9, 14, + 8, 12, 9, 5, 15, 0, 0, 0, 16, 0, 12, 0, 0, 13, 12, 0, + 17, 0, 11, 12, 9, 11, 7, 15, 13, 0, 0, 0, 0, 0, 0, 10, + 5, 5, 5, 11, 0, 18, 17, 15, 11, 0, 7, 16, 7, 7, 7, 7, + 17, 7, 7, 7, 19, 7, 14, 0, 20, 20, 20, 20, 18, 9, 14, 14, + 9, 7, 0, 0, 8, 15, 12, 10, 0, 11, 0, 12, 17, 11, 0, 0, + 0, 0, 21, 11, 12, 15, 15, 0, 12, 10, 0, 0, 22, 10, 12, 0, + 12, 11, 12, 9, 7, 7, 7, 0, 7, 7, 14, 0, 0, 0, 15, 0, + 0, 0, 14, 0, 10, 11, 0, 0, 0, 12, 0, 0, 8, 12, 18, 12, + 15, 15, 10, 17, 18, 16, 7, 5, 0, 7, 0, 14, 0, 0, 11, 11, + 10, 0, 0, 0, 14, 7, 13, 13, 13, 13, 0, 0, 0, 15, 15, 0, + 0, 15, 0, 0, 0, 0, 0, 12, 0, 0, 23, 0, 7, 7, 19, 7, + 7, 0, 0, 0, 13, 14, 0, 0, 13, 13, 0, 14, 14, 13, 18, 13, + 14, 0, 0, 0, 13, 14, 0, 12, 0, 22, 15, 13, 0, 14, 0, 5, + 5, 0, 0, 0, 19, 19, 9, 19, 0, 0, 0, 13, 0, 7, 7, 19, + 19, 0, 7, 7, 0, 0, 0, 15, 0, 13, 7, 7, 0, 24, 1, 25, + 0, 26, 0, 0, 0, 17, 14, 0, 20, 20, 27, 20, 20, 0, 0, 0, + 20, 28, 0, 0, 20, 20, 20, 0, 29, 20, 20, 20, 20, 20, 20, 30, + 31, 20, 20, 20, 20, 30, 31, 20, 0, 31, 20, 20, 20, 20, 20, 28, + 20, 20, 30, 0, 20, 20, 7, 7, 20, 20, 20, 32, 20, 30, 0, 0, + 20, 20, 28, 0, 30, 20, 20, 20, 20, 30, 20, 0, 33, 34, 34, 34, + 34, 34, 34, 34, 35, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, + 38, 36, 38, 36, 38, 36, 38, 39, 34, 40, 36, 37, 28, 0, 0, 0, + 7, 7, 9, 0, 7, 7, 7, 14, 30, 0, 0, 0, 20, 20, 32, 0, +}; + +static RE_UINT8 re_east_asian_width_stage_5[] = { + 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 1, 5, 5, + 1, 5, 5, 1, 1, 0, 1, 0, 5, 1, 1, 5, 1, 1, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, + 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, + 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, + 3, 3, 3, 3, 0, 2, 0, 0, 0, 1, 1, 0, 0, 3, 3, 0, + 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, 5, 0, 3, 3, 0, 3, + 3, 3, 0, 0, 4, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, + 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, + 4, 4, 4, 0, +}; + +/* East_Asian_Width: 1668 bytes. */ + +RE_UINT32 re_get_east_asian_width(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_east_asian_width_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_east_asian_width_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_east_asian_width_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_east_asian_width_stage_4[pos + f] << 2; + value = re_east_asian_width_stage_5[pos + code]; + + return value; +} + +/* Joining_Group. */ + +static RE_UINT8 re_joining_group_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_joining_group_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_joining_group_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_joining_group_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 21, 0, 22, + 0, 0, 23, 24, 25, 26, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, + 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_joining_group_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 3, 3, 43, 3, 45, 3, + 4, 41, 4, 4, 13, 13, 13, 6, 6, 31, 31, 35, 35, 33, 33, 39, + 39, 1, 1, 11, 11, 55, 55, 55, 0, 9, 29, 19, 22, 24, 26, 16, + 43, 45, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 29, + 0, 3, 3, 3, 0, 3, 43, 43, 45, 4, 4, 4, 4, 4, 4, 4, + 4, 13, 13, 13, 13, 13, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 31, 31, 31, 31, 31, 31, 31, 31, 31, 35, 35, 35, 33, 33, 39, + 1, 9, 9, 9, 9, 9, 9, 29, 29, 11, 38, 11, 19, 19, 19, 11, + 11, 11, 11, 11, 11, 22, 22, 22, 22, 26, 26, 26, 26, 56, 21, 13, + 41, 17, 17, 14, 43, 43, 43, 43, 43, 43, 43, 43, 55, 47, 55, 43, + 45, 45, 46, 46, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 6, 31, + 0, 0, 35, 33, 1, 0, 0, 21, 2, 0, 5, 12, 12, 7, 7, 15, + 44, 50, 18, 42, 42, 48, 49, 20, 23, 25, 27, 36, 10, 8, 28, 32, + 34, 30, 7, 37, 40, 5, 12, 7, 0, 0, 0, 0, 0, 51, 52, 53, + 4, 4, 4, 4, 4, 4, 4, 13, 13, 6, 6, 31, 35, 1, 1, 1, + 9, 9, 11, 11, 11, 24, 24, 26, 26, 26, 22, 31, 31, 35, 13, 13, + 35, 31, 13, 3, 3, 55, 55, 45, 43, 43, 54, 54, 13, 35, 35, 19, + 4, 0, 13, 39, 9, 29, 22, 24, 45, 45, 31, 43, 57, 0, 0, 0, +}; + +/* Joining_Group: 481 bytes. */ + +RE_UINT32 re_get_joining_group(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_joining_group_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_joining_group_stage_2[pos + f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_joining_group_stage_3[pos + f] << 4; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_joining_group_stage_4[pos + f] << 3; + value = re_joining_group_stage_5[pos + code]; + + return value; +} + +/* Joining_Type. */ + +static RE_UINT8 re_joining_type_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 2, 7, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_joining_type_stage_2[] = { + 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 1, 1, 16, 1, 1, 1, 17, 18, 19, 20, 21, 22, 23, 1, 1, + 24, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 25, 26, 1, 1, + 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 28, 1, 29, 30, 31, 32, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, + 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 1, + 38, 39, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 42, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 44, 45, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_joining_type_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 5, 6, 0, 0, 0, + 0, 7, 8, 9, 10, 2, 11, 12, 13, 14, 15, 15, 16, 17, 18, 19, + 20, 21, 22, 2, 23, 24, 25, 26, 0, 0, 27, 28, 29, 15, 30, 31, + 0, 32, 33, 0, 34, 35, 0, 0, 0, 0, 36, 0, 0, 0, 37, 38, + 39, 0, 0, 40, 41, 42, 43, 0, 44, 0, 0, 45, 46, 0, 43, 0, + 47, 0, 0, 45, 48, 44, 0, 49, 47, 0, 0, 45, 50, 0, 43, 0, + 44, 0, 0, 51, 46, 52, 43, 0, 53, 0, 0, 0, 54, 0, 0, 0, + 0, 0, 0, 55, 56, 57, 43, 0, 0, 0, 0, 51, 58, 0, 43, 0, + 0, 0, 0, 0, 46, 0, 43, 0, 0, 0, 0, 0, 59, 60, 0, 0, + 0, 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 64, 0, 0, 0, + 0, 65, 0, 66, 0, 0, 0, 67, 68, 69, 2, 70, 52, 0, 0, 0, + 0, 0, 71, 72, 0, 73, 28, 74, 75, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 76, 0, 76, 0, 43, 0, 43, 0, 0, 0, 77, 78, 79, 0, 0, + 80, 0, 15, 15, 15, 15, 15, 81, 82, 15, 83, 0, 0, 0, 0, 0, + 0, 0, 84, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 86, 0, 0, 0, 87, 88, 89, 0, 0, 0, 0, 0, 0, 0, 0, + 90, 0, 0, 91, 53, 0, 92, 90, 93, 0, 94, 0, 0, 0, 95, 93, + 0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, 99, 100, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 101, 96, + 102, 0, 103, 0, 0, 0, 104, 0, 0, 0, 0, 0, 0, 2, 2, 28, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 93, + 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 2, + 0, 0, 105, 0, 0, 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 20, 107, 0, 20, 0, 0, 0, 0, 0, 93, + 108, 0, 57, 0, 15, 15, 15, 109, 0, 0, 0, 0, 100, 0, 2, 93, + 0, 0, 110, 0, 111, 93, 0, 0, 39, 0, 0, 112, 0, 0, 0, 0, + 0, 0, 113, 114, 115, 0, 0, 0, 0, 0, 0, 116, 44, 0, 117, 52, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, + 0, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 121, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 44, 0, 0, 123, 101, 0, 0, 0, 93, 0, 0, 124, 0, 0, 0, 0, + 39, 0, 125, 126, 0, 0, 0, 0, 93, 0, 0, 127, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 20, 39, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 130, 131, 132, 0, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 44, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, +}; + +static RE_UINT8 re_joining_type_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 2, 4, 0, + 5, 2, 2, 2, 2, 2, 2, 6, 7, 6, 0, 0, 2, 2, 8, 9, + 10, 11, 12, 13, 14, 15, 15, 15, 16, 15, 17, 2, 0, 0, 0, 18, + 19, 20, 15, 15, 15, 15, 21, 21, 21, 21, 22, 15, 15, 15, 15, 15, + 23, 21, 21, 24, 25, 26, 2, 27, 2, 27, 28, 29, 0, 0, 18, 30, + 0, 0, 0, 3, 31, 32, 22, 33, 15, 15, 34, 23, 2, 2, 8, 35, + 15, 15, 32, 15, 15, 15, 13, 36, 24, 36, 22, 15, 0, 37, 2, 2, + 9, 0, 0, 0, 0, 0, 18, 15, 15, 15, 38, 2, 2, 0, 39, 0, + 0, 37, 6, 2, 2, 5, 5, 4, 36, 33, 12, 13, 15, 40, 5, 0, + 41, 15, 25, 42, 0, 2, 2, 2, 2, 2, 2, 8, 8, 0, 0, 0, + 0, 0, 43, 9, 5, 2, 9, 1, 5, 2, 0, 0, 37, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 9, 5, 9, 0, 1, 7, 0, 0, 0, + 7, 3, 27, 4, 4, 1, 0, 0, 5, 6, 9, 1, 0, 0, 0, 27, + 0, 43, 0, 0, 43, 0, 0, 0, 9, 0, 0, 1, 0, 0, 0, 37, + 9, 37, 28, 4, 0, 7, 0, 0, 0, 43, 0, 4, 0, 0, 43, 0, + 37, 44, 0, 0, 1, 2, 8, 0, 0, 3, 2, 8, 1, 2, 6, 9, + 0, 0, 2, 4, 0, 0, 4, 0, 0, 45, 1, 0, 5, 2, 2, 8, + 2, 28, 0, 5, 2, 2, 5, 2, 2, 2, 2, 9, 0, 0, 0, 5, + 28, 2, 7, 7, 0, 0, 4, 37, 5, 9, 0, 0, 43, 7, 0, 1, + 37, 9, 0, 0, 0, 6, 2, 4, 0, 43, 5, 2, 2, 0, 0, 1, + 0, 46, 47, 4, 15, 15, 0, 0, 0, 46, 15, 15, 15, 15, 48, 0, + 8, 3, 9, 0, 43, 0, 5, 0, 0, 3, 27, 0, 0, 43, 2, 8, + 44, 5, 2, 9, 3, 2, 2, 27, 2, 0, 0, 0, 0, 28, 8, 9, + 0, 0, 3, 2, 4, 0, 0, 0, 37, 4, 6, 0, 0, 43, 4, 45, + 0, 0, 0, 2, 2, 37, 0, 0, 8, 2, 2, 2, 28, 2, 9, 1, + 0, 9, 0, 0, 2, 8, 0, 0, 0, 0, 3, 49, 0, 0, 37, 8, + 2, 9, 37, 2, 0, 0, 37, 4, 0, 0, 7, 0, 8, 2, 2, 4, + 43, 43, 3, 0, 50, 0, 0, 0, 0, 37, 2, 4, 0, 3, 2, 2, + 3, 37, 4, 9, 0, 0, 5, 8, 7, 7, 0, 0, 3, 0, 0, 9, + 28, 27, 9, 37, 0, 0, 0, 4, 0, 1, 9, 1, 0, 0, 0, 43, + 0, 0, 5, 0, 5, 7, 0, 2, 0, 0, 8, 3, 0, 0, 2, 2, + 3, 8, 7, 1, 0, 3, 2, 5, 2, 9, 0, 0, 0, 37, 2, 8, + 0, 0, 3, 1, 2, 6, 0, 0, 0, 3, 4, 0, 3, 2, 2, 2, + 8, 5, 2, 0, +}; + +static RE_UINT8 re_joining_type_stage_5[] = { + 0, 0, 0, 0, 0, 5, 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, + 5, 5, 0, 0, 0, 5, 5, 5, 5, 5, 0, 5, 0, 5, 5, 0, + 5, 5, 5, 0, 5, 0, 0, 0, 2, 0, 3, 3, 3, 3, 2, 3, + 2, 3, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 3, 2, 2, 5, 0, 0, 2, 2, 5, 3, 3, 3, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, + 2, 3, 2, 3, 2, 2, 3, 3, 0, 3, 5, 5, 5, 0, 0, 5, + 5, 0, 5, 5, 5, 5, 3, 3, 2, 0, 0, 2, 3, 5, 2, 2, + 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 0, 3, 2, 2, + 3, 2, 2, 2, 0, 0, 5, 5, 2, 2, 2, 5, 0, 0, 1, 0, + 3, 2, 0, 0, 2, 0, 2, 2, 3, 0, 0, 0, 0, 0, 5, 0, + 5, 0, 5, 0, 0, 5, 0, 5, 0, 0, 0, 2, 0, 0, 1, 5, + 2, 5, 2, 0, 0, 1, 5, 5, 2, 2, 4, 0, +}; + +/* Joining_Type: 1896 bytes. */ + +RE_UINT32 re_get_joining_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_joining_type_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_joining_type_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_joining_type_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_joining_type_stage_4[pos + f] << 2; + value = re_joining_type_stage_5[pos + code]; + + return value; +} + +/* Line_Break. */ + +static RE_UINT8 re_line_break_stage_1[] = { + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 10, 10, 16, 10, 10, 10, 10, 17, 10, 18, 19, 20, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 22, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, +}; + +static RE_UINT8 re_line_break_stage_2[] = { + 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 2, 2, 2, 2, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 2, 51, 2, 2, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 87, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 88, 79, 79, 79, 79, 79, 79, 79, 79, 89, 2, 2, 90, 91, 2, 92, + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 101, + 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, + 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, + 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, + 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, + 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 108, + 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 79, 79, 79, 79, 110, 111, 2, 2, 112, 113, 114, 115, 116, 117, + 118, 119, 120, 121, 72, 122, 123, 124, 2, 125, 72, 72, 72, 72, 72, 72, + 126, 72, 127, 128, 129, 72, 130, 72, 131, 72, 72, 72, 132, 72, 72, 72, + 133, 134, 135, 136, 72, 72, 72, 72, 72, 72, 72, 72, 72, 137, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 2, 2, 138, 72, 139, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 140, 141, 142, 2, 143, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 144, 72, 72, 72, 72, 72, 72, 72, 72, 72, 145, 146, + 147, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 148, 149, 150, 151, 72, 152, 72, 153, 154, 155, 2, 2, 156, 2, 157, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 158, 159, 72, 72, + 160, 161, 162, 163, 164, 72, 165, 166, 167, 168, 169, 170, 171, 172, 173, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 174, + 175, 72, 176, 177, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, +}; + +static RE_UINT16 re_line_break_stage_3[] = { + 0, 1, 2, 3, 4, 5, 4, 6, 7, 1, 8, 9, 4, 10, 4, 10, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 12, 4, 4, + 1, 1, 1, 1, 13, 14, 15, 16, 17, 4, 18, 4, 4, 4, 4, 4, + 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 21, 4, 22, 21, 4, + 23, 24, 1, 25, 26, 27, 28, 29, 30, 31, 4, 4, 32, 1, 33, 34, + 4, 4, 4, 4, 4, 35, 36, 37, 38, 39, 4, 1, 40, 4, 4, 4, + 4, 4, 41, 42, 37, 4, 32, 43, 4, 44, 45, 46, 4, 47, 48, 48, + 48, 48, 49, 48, 48, 48, 50, 51, 52, 4, 4, 53, 1, 54, 55, 56, + 57, 58, 59, 60, 61, 62, 63, 64, 65, 58, 59, 66, 67, 68, 69, 70, + 71, 18, 59, 72, 73, 74, 63, 75, 57, 58, 59, 72, 76, 77, 63, 20, + 78, 79, 80, 81, 82, 83, 69, 84, 85, 86, 59, 87, 88, 89, 63, 90, + 91, 86, 59, 92, 88, 93, 63, 94, 91, 86, 4, 95, 96, 97, 63, 98, + 99, 100, 4, 101, 102, 103, 48, 104, 105, 106, 106, 107, 108, 109, 48, 48, + 110, 111, 112, 113, 114, 115, 48, 48, 116, 117, 37, 118, 56, 4, 119, 120, + 121, 122, 1, 123, 124, 125, 48, 48, 106, 106, 106, 106, 126, 106, 106, 106, + 106, 127, 4, 4, 128, 4, 4, 4, 129, 129, 129, 129, 129, 129, 130, 130, + 130, 130, 131, 132, 132, 132, 132, 132, 4, 4, 4, 4, 133, 134, 4, 4, + 133, 4, 4, 135, 136, 137, 4, 4, 4, 136, 4, 4, 4, 138, 139, 119, + 4, 140, 4, 4, 4, 4, 4, 141, 142, 4, 4, 4, 4, 4, 4, 4, + 142, 143, 4, 4, 4, 4, 144, 74, 145, 146, 4, 147, 4, 148, 145, 149, + 106, 106, 106, 106, 106, 150, 151, 140, 152, 151, 4, 4, 4, 4, 4, 20, + 4, 4, 153, 4, 4, 4, 4, 154, 4, 119, 155, 155, 156, 106, 157, 158, + 106, 106, 159, 106, 160, 161, 4, 4, 4, 162, 106, 106, 106, 163, 106, 164, + 151, 151, 157, 48, 48, 48, 48, 48, 165, 4, 4, 166, 167, 168, 169, 170, + 171, 4, 172, 37, 4, 4, 41, 173, 4, 4, 166, 174, 175, 37, 4, 176, + 48, 48, 48, 48, 20, 177, 178, 179, 4, 4, 4, 4, 1, 1, 180, 181, + 4, 182, 4, 4, 182, 183, 4, 184, 4, 4, 4, 185, 185, 186, 4, 187, + 188, 189, 190, 191, 192, 193, 194, 195, 196, 119, 197, 198, 199, 1, 1, 200, + 201, 202, 203, 4, 4, 204, 205, 206, 207, 206, 4, 4, 4, 208, 4, 4, + 209, 210, 211, 212, 213, 214, 215, 4, 216, 217, 218, 219, 4, 4, 4, 4, + 4, 220, 221, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 222, + 4, 4, 223, 48, 224, 48, 225, 225, 225, 225, 225, 225, 225, 225, 225, 226, + 225, 225, 225, 225, 205, 225, 225, 227, 225, 228, 229, 230, 231, 232, 233, 4, + 234, 235, 4, 236, 237, 4, 238, 239, 4, 240, 4, 241, 242, 243, 244, 245, + 246, 4, 4, 4, 4, 247, 248, 249, 225, 250, 4, 4, 251, 4, 252, 4, + 253, 254, 4, 4, 4, 255, 4, 256, 4, 4, 4, 4, 119, 257, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 4, 4, 46, 4, 4, 46, 4, 4, + 4, 4, 4, 4, 4, 4, 258, 259, 4, 4, 128, 4, 4, 4, 260, 261, + 4, 223, 262, 262, 262, 262, 1, 1, 263, 264, 265, 266, 48, 48, 48, 48, + 267, 268, 267, 267, 267, 267, 267, 222, 267, 267, 267, 267, 267, 267, 267, 267, + 267, 267, 267, 267, 267, 269, 48, 270, 271, 272, 273, 274, 275, 267, 276, 267, + 277, 278, 279, 267, 276, 267, 277, 280, 281, 267, 282, 283, 267, 267, 267, 267, + 284, 267, 267, 285, 267, 267, 222, 286, 267, 284, 267, 267, 287, 267, 267, 267, + 267, 267, 267, 267, 267, 267, 267, 284, 267, 267, 267, 267, 4, 4, 4, 4, + 267, 288, 267, 267, 267, 267, 267, 267, 289, 267, 267, 267, 290, 4, 4, 176, + 291, 4, 292, 48, 4, 4, 258, 293, 4, 294, 4, 4, 4, 4, 4, 295, + 46, 296, 224, 48, 48, 48, 48, 90, 297, 4, 298, 299, 4, 4, 4, 300, + 301, 4, 4, 166, 302, 151, 1, 303, 37, 4, 304, 4, 305, 306, 129, 307, + 52, 4, 4, 308, 309, 310, 48, 48, 4, 4, 311, 180, 312, 313, 106, 159, + 106, 106, 106, 106, 314, 315, 32, 316, 317, 318, 262, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 4, 4, 319, 151, 320, 321, 322, 323, 322, 324, 322, 320, + 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, + 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, + 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, + 323, 322, 325, 130, 326, 132, 132, 327, 328, 328, 328, 328, 328, 328, 328, 328, + 223, 329, 330, 331, 332, 4, 4, 4, 4, 4, 4, 4, 333, 334, 4, 4, + 4, 4, 4, 335, 48, 4, 4, 4, 4, 336, 4, 4, 20, 48, 48, 337, + 1, 338, 180, 339, 340, 341, 342, 185, 4, 4, 4, 4, 4, 4, 4, 343, + 344, 345, 267, 346, 267, 347, 348, 349, 4, 350, 4, 46, 351, 352, 353, 354, + 355, 4, 137, 356, 184, 184, 48, 48, 4, 4, 4, 4, 4, 4, 4, 224, + 357, 4, 4, 358, 4, 4, 4, 4, 224, 359, 48, 48, 48, 4, 4, 360, + 4, 119, 4, 4, 4, 74, 48, 48, 4, 46, 296, 4, 224, 48, 48, 48, + 4, 361, 4, 4, 362, 363, 48, 48, 4, 184, 151, 48, 48, 48, 48, 48, + 364, 4, 4, 365, 4, 366, 48, 48, 4, 367, 4, 368, 48, 48, 48, 48, + 4, 4, 4, 369, 48, 48, 48, 48, 370, 371, 4, 372, 20, 373, 4, 4, + 4, 4, 4, 374, 4, 375, 4, 376, 4, 4, 4, 4, 377, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 4, 46, 171, 4, 4, 378, 379, 336, 380, 48, + 171, 4, 4, 381, 382, 4, 377, 151, 171, 4, 305, 383, 384, 48, 48, 48, + 171, 4, 4, 308, 385, 151, 48, 48, 4, 4, 32, 386, 151, 48, 48, 48, + 4, 4, 4, 4, 4, 4, 46, 48, 4, 4, 4, 4, 4, 4, 387, 384, + 4, 4, 4, 4, 4, 388, 4, 4, 389, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 390, 4, 4, 46, 48, 48, 48, 48, 48, + 4, 4, 4, 377, 48, 48, 48, 48, 4, 4, 4, 4, 141, 391, 1, 51, + 392, 171, 48, 48, 48, 48, 48, 48, 393, 48, 48, 48, 48, 48, 48, 48, + 4, 4, 4, 4, 4, 4, 4, 154, 4, 4, 22, 4, 4, 4, 394, 1, + 395, 4, 396, 4, 4, 184, 48, 48, 4, 4, 4, 4, 397, 48, 48, 48, + 4, 4, 4, 4, 4, 223, 4, 333, 4, 4, 4, 4, 4, 185, 4, 4, + 4, 145, 398, 399, 400, 4, 4, 4, 401, 402, 4, 403, 404, 86, 4, 4, + 4, 4, 375, 4, 4, 4, 4, 4, 4, 4, 4, 4, 405, 406, 406, 406, + 400, 4, 407, 408, 409, 410, 411, 412, 413, 359, 414, 359, 48, 48, 48, 333, + 267, 267, 270, 267, 267, 267, 267, 267, 267, 222, 284, 415, 283, 283, 48, 48, + 416, 225, 417, 225, 225, 225, 418, 225, 225, 416, 48, 48, 48, 48, 419, 420, + 421, 267, 267, 285, 422, 393, 48, 48, 267, 267, 423, 424, 267, 267, 267, 289, + 267, 222, 267, 425, 426, 48, 267, 423, 267, 267, 267, 284, 427, 267, 267, 267, + 267, 267, 428, 429, 267, 267, 267, 430, 431, 432, 433, 434, 296, 267, 435, 48, + 48, 48, 48, 48, 48, 48, 48, 436, 267, 267, 267, 267, 437, 48, 48, 48, + 267, 267, 267, 267, 269, 48, 48, 48, 4, 4, 4, 4, 4, 4, 4, 296, + 267, 267, 267, 267, 267, 267, 267, 282, 438, 48, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, +}; + +static RE_UINT8 re_line_break_stage_4[] = { + 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 13, 14, 15, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 17, 14, + 14, 14, 14, 14, 14, 16, 18, 19, 0, 0, 20, 0, 0, 0, 0, 0, + 21, 22, 23, 24, 25, 26, 27, 14, 22, 28, 29, 28, 28, 26, 28, 30, + 14, 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 24, 31, 28, 31, 14, + 25, 14, 14, 14, 28, 28, 24, 32, 0, 0, 0, 0, 0, 0, 0, 33, + 0, 0, 0, 0, 0, 0, 34, 34, 34, 35, 0, 0, 0, 0, 0, 0, + 14, 14, 14, 14, 36, 14, 14, 37, 36, 36, 14, 14, 14, 38, 38, 14, + 14, 39, 14, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 14, 14, 14, + 14, 14, 14, 14, 36, 36, 36, 36, 39, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 38, 39, 14, 14, 14, 14, 14, 14, 14, 40, 41, 36, 42, + 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, + 19, 45, 0, 46, 36, 36, 36, 36, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 48, 36, 36, 47, 49, 38, 36, 36, 36, 36, 36, + 14, 14, 38, 14, 50, 51, 13, 14, 0, 0, 0, 0, 0, 52, 53, 54, + 14, 14, 14, 14, 14, 19, 0, 0, 12, 12, 12, 12, 12, 55, 56, 14, + 45, 14, 14, 14, 14, 14, 14, 14, 14, 14, 57, 0, 0, 0, 45, 19, + 0, 0, 45, 19, 45, 0, 0, 14, 12, 12, 12, 12, 12, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 39, 19, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 0, 0, 0, 53, 39, 14, 14, 14, 14, 0, 0, 0, 0, 0, + 45, 36, 36, 36, 36, 36, 36, 36, 0, 0, 14, 14, 58, 38, 36, 36, + 14, 14, 14, 0, 0, 19, 0, 0, 0, 0, 19, 0, 19, 0, 0, 36, + 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, 19, 0, 36, 38, + 36, 36, 36, 36, 36, 36, 36, 36, 38, 14, 14, 14, 14, 14, 38, 36, + 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, + 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 45, 0, + 19, 0, 0, 0, 14, 14, 14, 14, 14, 0, 59, 12, 12, 12, 12, 12, + 14, 14, 14, 14, 39, 14, 14, 14, 43, 0, 39, 14, 14, 14, 38, 39, + 38, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, + 38, 38, 36, 14, 14, 36, 45, 0, 0, 0, 53, 43, 53, 43, 0, 38, + 36, 36, 36, 43, 36, 36, 14, 39, 14, 0, 36, 12, 12, 12, 12, 12, + 14, 51, 14, 14, 50, 9, 36, 36, 43, 0, 39, 14, 14, 38, 36, 39, + 38, 14, 39, 38, 14, 36, 53, 0, 0, 53, 36, 43, 53, 43, 0, 36, + 43, 36, 36, 36, 39, 14, 38, 38, 36, 36, 36, 12, 12, 12, 12, 12, + 0, 14, 19, 36, 36, 36, 36, 36, 43, 0, 39, 14, 14, 14, 14, 39, + 38, 14, 39, 14, 14, 36, 45, 0, 0, 0, 0, 43, 0, 43, 0, 36, + 38, 36, 36, 36, 36, 36, 36, 36, 9, 36, 36, 36, 36, 36, 36, 36, + 0, 0, 53, 43, 53, 43, 0, 36, 36, 36, 36, 0, 36, 36, 14, 39, + 36, 45, 39, 14, 14, 38, 36, 14, 38, 14, 14, 36, 39, 38, 38, 14, + 36, 39, 38, 36, 14, 38, 36, 14, 14, 14, 14, 14, 14, 36, 36, 0, + 0, 53, 36, 0, 53, 0, 0, 36, 38, 36, 36, 43, 36, 36, 36, 36, + 14, 14, 14, 14, 9, 38, 36, 36, 43, 0, 39, 14, 14, 14, 38, 14, + 38, 14, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 14, 36, 39, 0, + 0, 0, 53, 0, 53, 0, 0, 36, 36, 36, 43, 53, 14, 36, 36, 36, + 36, 36, 36, 36, 14, 14, 14, 14, 36, 0, 39, 14, 14, 14, 38, 14, + 14, 14, 39, 14, 14, 36, 45, 0, 36, 36, 43, 53, 36, 36, 36, 38, + 39, 38, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 38, 39, 0, + 0, 0, 53, 0, 53, 0, 0, 38, 36, 36, 36, 43, 36, 36, 36, 36, + 14, 14, 14, 36, 60, 14, 14, 14, 36, 0, 39, 14, 14, 14, 14, 14, + 14, 14, 14, 38, 36, 14, 14, 14, 14, 39, 14, 14, 14, 14, 39, 36, + 14, 14, 14, 38, 36, 53, 36, 43, 0, 0, 53, 53, 0, 0, 0, 0, + 36, 0, 38, 36, 36, 36, 36, 36, 61, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 36, 42, + 62, 62, 62, 62, 62, 62, 62, 64, 12, 12, 12, 12, 12, 59, 36, 36, + 61, 63, 63, 61, 63, 63, 61, 36, 36, 36, 62, 62, 61, 62, 62, 62, + 61, 62, 61, 61, 36, 62, 61, 62, 62, 62, 62, 62, 62, 61, 62, 36, + 62, 62, 63, 63, 62, 62, 62, 36, 12, 12, 12, 12, 12, 36, 62, 62, + 32, 65, 29, 65, 66, 67, 68, 54, 54, 69, 57, 14, 0, 14, 14, 14, + 14, 14, 44, 19, 19, 70, 70, 0, 14, 14, 14, 14, 14, 14, 38, 36, + 43, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 14, 14, 19, 0, + 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 59, + 14, 14, 14, 45, 14, 14, 38, 14, 65, 71, 14, 14, 72, 73, 36, 36, + 12, 12, 12, 12, 12, 59, 14, 14, 12, 12, 12, 12, 12, 62, 62, 62, + 14, 14, 14, 39, 36, 36, 39, 36, 74, 74, 74, 74, 74, 74, 74, 74, + 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, + 76, 76, 76, 76, 76, 76, 76, 76, 14, 14, 14, 14, 38, 14, 14, 36, + 14, 14, 14, 38, 38, 14, 14, 36, 38, 14, 14, 36, 14, 14, 14, 38, + 38, 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 38, 43, 0, 27, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 36, 36, 36, 14, 14, 38, 36, 36, 36, 36, 36, + 77, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 78, 36, + 14, 14, 14, 14, 14, 27, 59, 14, 14, 14, 14, 14, 14, 14, 38, 14, + 14, 0, 53, 36, 36, 36, 36, 36, 14, 0, 1, 41, 36, 36, 36, 36, + 14, 0, 36, 36, 36, 36, 36, 36, 38, 0, 36, 36, 36, 36, 36, 36, + 62, 62, 59, 79, 77, 80, 62, 36, 12, 12, 12, 12, 12, 36, 36, 36, + 14, 54, 59, 29, 54, 19, 0, 73, 14, 14, 14, 14, 19, 38, 36, 36, + 14, 14, 14, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, 36, 36, + 38, 36, 54, 12, 12, 12, 12, 12, 62, 62, 62, 62, 62, 62, 62, 36, + 62, 62, 63, 36, 36, 36, 36, 36, 62, 62, 62, 62, 62, 62, 36, 36, + 62, 62, 62, 62, 62, 36, 36, 36, 12, 12, 12, 12, 12, 63, 36, 62, + 14, 14, 14, 19, 0, 0, 36, 14, 62, 62, 62, 62, 62, 62, 62, 63, + 62, 62, 62, 62, 62, 62, 63, 43, 0, 0, 45, 14, 14, 14, 14, 14, + 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 45, 14, 14, 14, 36, 36, + 12, 12, 12, 12, 12, 59, 27, 59, 77, 14, 14, 14, 14, 19, 0, 0, + 0, 0, 14, 14, 14, 14, 38, 36, 0, 45, 14, 14, 14, 14, 14, 14, + 19, 0, 0, 0, 0, 0, 0, 14, 0, 0, 36, 36, 36, 36, 14, 14, + 0, 0, 0, 0, 36, 81, 59, 59, 12, 12, 12, 12, 12, 36, 39, 14, + 14, 14, 14, 14, 14, 14, 14, 59, 0, 45, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 45, 14, 19, 14, 14, 0, 45, 38, 36, 36, 36, 36, + 0, 0, 0, 53, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 0, + 14, 14, 14, 36, 14, 14, 14, 36, 14, 14, 14, 14, 39, 39, 39, 39, + 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 38, 14, 14, 14, 14, 14, + 14, 14, 36, 14, 14, 14, 39, 14, 36, 14, 38, 14, 14, 14, 32, 38, + 59, 59, 59, 82, 59, 83, 0, 0, 82, 59, 84, 25, 85, 86, 85, 86, + 28, 14, 87, 88, 89, 0, 0, 33, 51, 51, 51, 51, 7, 90, 91, 14, + 14, 14, 92, 93, 91, 14, 14, 14, 14, 14, 14, 77, 59, 59, 27, 59, + 94, 14, 38, 0, 0, 0, 0, 0, 14, 36, 25, 14, 14, 14, 16, 95, + 24, 28, 25, 14, 14, 14, 16, 78, 23, 23, 23, 6, 23, 23, 23, 23, + 23, 23, 23, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 53, 36, 36, 36, 36, 36, 36, 36, 14, 50, 24, 14, 50, 14, 14, 14, + 14, 24, 14, 96, 14, 14, 14, 14, 24, 25, 14, 14, 14, 24, 14, 14, + 14, 14, 28, 14, 14, 24, 14, 25, 28, 28, 28, 28, 28, 28, 14, 14, + 28, 28, 28, 28, 28, 14, 14, 14, 14, 14, 14, 14, 24, 36, 36, 36, + 14, 25, 25, 14, 14, 14, 14, 14, 25, 28, 14, 24, 25, 24, 14, 24, + 24, 23, 24, 14, 14, 25, 24, 28, 25, 24, 24, 24, 28, 28, 25, 25, + 14, 14, 28, 28, 14, 14, 28, 14, 14, 14, 14, 14, 25, 14, 25, 14, + 14, 25, 14, 14, 14, 14, 14, 14, 28, 14, 28, 28, 14, 28, 14, 28, + 14, 28, 14, 28, 14, 14, 14, 14, 14, 14, 24, 14, 24, 14, 14, 14, + 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 24, + 14, 25, 14, 14, 14, 97, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, + 97, 97, 36, 36, 36, 36, 36, 36, 14, 14, 14, 38, 36, 36, 36, 36, + 14, 14, 14, 14, 14, 38, 36, 36, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 25, 28, 28, 25, 14, 14, 14, 14, 14, + 14, 28, 28, 14, 14, 14, 14, 14, 28, 24, 28, 28, 28, 14, 14, 14, + 14, 28, 14, 28, 14, 14, 28, 14, 28, 14, 14, 28, 25, 24, 14, 28, + 28, 14, 14, 14, 14, 14, 14, 14, 14, 28, 28, 14, 14, 14, 14, 24, + 97, 97, 24, 25, 24, 14, 14, 28, 14, 14, 97, 28, 99, 97, 97, 97, + 14, 14, 14, 14, 100, 97, 14, 14, 25, 25, 14, 14, 14, 14, 14, 14, + 28, 24, 28, 24, 101, 25, 28, 24, 14, 14, 14, 14, 14, 14, 14, 100, + 14, 14, 14, 14, 14, 14, 14, 28, 14, 14, 14, 14, 14, 14, 100, 97, + 97, 97, 97, 97, 101, 28, 102, 100, 97, 102, 101, 28, 97, 28, 101, 102, + 97, 24, 14, 14, 28, 101, 28, 28, 102, 97, 97, 102, 97, 101, 102, 97, + 103, 97, 99, 14, 97, 97, 97, 14, 14, 14, 14, 24, 14, 7, 85, 5, + 14, 54, 14, 14, 70, 70, 70, 70, 70, 70, 70, 28, 28, 28, 28, 28, + 28, 28, 14, 14, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, 14, 14, + 14, 14, 14, 70, 70, 70, 70, 70, 14, 16, 104, 104, 104, 104, 104, 104, + 104, 104, 104, 104, 98, 14, 14, 14, 14, 14, 14, 14, 70, 70, 14, 14, + 14, 14, 14, 14, 14, 14, 70, 14, 14, 14, 24, 28, 28, 36, 36, 36, + 14, 14, 14, 14, 14, 14, 14, 19, 0, 14, 36, 36, 105, 59, 77, 106, + 14, 14, 14, 14, 36, 36, 36, 39, 41, 36, 36, 36, 36, 36, 36, 43, + 14, 14, 14, 38, 14, 14, 14, 38, 85, 85, 85, 85, 85, 85, 85, 59, + 59, 59, 59, 27, 107, 14, 85, 14, 85, 70, 70, 70, 70, 59, 59, 57, + 59, 27, 77, 14, 14, 108, 36, 36, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 103, 97, 97, 97, 97, 97, 36, 36, 36, 36, 36, + 97, 97, 97, 97, 97, 97, 36, 36, 18, 109, 110, 97, 70, 70, 70, 70, + 70, 97, 70, 70, 70, 70, 111, 112, 97, 97, 97, 97, 97, 0, 0, 0, + 97, 97, 113, 97, 97, 110, 114, 97, 115, 116, 116, 116, 116, 97, 97, 97, + 97, 116, 97, 97, 97, 97, 97, 97, 97, 116, 116, 116, 97, 97, 97, 117, + 97, 97, 116, 118, 43, 119, 91, 114, 120, 116, 116, 116, 116, 97, 97, 97, + 97, 97, 116, 117, 97, 110, 121, 114, 36, 36, 103, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 36, 103, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 122, 97, 97, 97, 97, 97, 122, 36, 36, + 123, 123, 123, 123, 123, 123, 123, 123, 97, 97, 97, 97, 28, 28, 28, 28, + 97, 97, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 122, 36, + 97, 97, 97, 122, 36, 36, 36, 36, 14, 14, 14, 14, 14, 14, 27, 106, + 12, 12, 12, 12, 12, 14, 36, 36, 0, 45, 0, 0, 0, 0, 0, 14, + 14, 14, 14, 14, 36, 36, 36, 43, 0, 27, 59, 59, 36, 36, 36, 36, + 14, 14, 36, 36, 36, 36, 36, 36, 14, 45, 14, 45, 14, 19, 14, 14, + 14, 19, 0, 0, 14, 14, 36, 36, 14, 14, 14, 14, 124, 36, 36, 36, + 14, 14, 65, 54, 36, 36, 36, 36, 0, 14, 14, 14, 14, 14, 14, 14, + 0, 0, 53, 36, 36, 36, 36, 59, 0, 14, 14, 14, 14, 14, 36, 36, + 14, 14, 14, 0, 0, 0, 0, 59, 14, 14, 14, 19, 0, 0, 0, 0, + 0, 0, 36, 36, 36, 36, 36, 39, 74, 74, 74, 74, 74, 74, 125, 36, + 14, 19, 0, 0, 0, 0, 0, 0, 45, 14, 14, 27, 59, 14, 14, 39, + 12, 12, 12, 12, 12, 36, 36, 14, 14, 14, 14, 14, 19, 0, 0, 0, + 14, 19, 14, 14, 14, 14, 0, 36, 12, 12, 12, 12, 12, 36, 27, 59, + 62, 63, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 62, 62, + 59, 14, 19, 53, 36, 36, 36, 36, 39, 14, 14, 38, 39, 14, 14, 38, + 39, 14, 14, 38, 36, 36, 36, 36, 14, 19, 0, 0, 0, 1, 0, 36, + 126, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, 127, 127, + 127, 127, 126, 127, 127, 127, 127, 127, 127, 127, 36, 36, 36, 36, 36, 36, + 75, 75, 75, 128, 36, 129, 76, 76, 76, 76, 76, 76, 76, 76, 36, 36, + 130, 130, 130, 130, 130, 130, 130, 130, 36, 39, 14, 14, 36, 36, 131, 132, + 47, 47, 47, 47, 49, 47, 47, 47, 47, 47, 47, 48, 47, 47, 48, 48, + 47, 131, 48, 47, 47, 47, 47, 47, 14, 36, 36, 36, 36, 36, 36, 36, + 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 70, + 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 124, 36, + 133, 134, 58, 135, 136, 36, 36, 36, 97, 97, 137, 104, 104, 104, 104, 104, + 104, 104, 109, 137, 109, 97, 97, 97, 109, 78, 91, 54, 137, 104, 104, 109, + 97, 97, 97, 122, 138, 139, 36, 36, 14, 14, 14, 14, 14, 14, 38, 140, + 105, 97, 6, 97, 70, 97, 109, 109, 97, 97, 97, 97, 97, 91, 97, 141, + 97, 97, 97, 97, 97, 137, 142, 97, 97, 97, 97, 97, 97, 137, 142, 137, + 112, 70, 93, 143, 123, 123, 123, 123, 144, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 91, 36, 14, 14, 14, 36, 14, 14, 14, + 36, 14, 14, 14, 36, 14, 38, 36, 22, 97, 138, 145, 14, 14, 14, 38, + 36, 36, 36, 36, 43, 0, 146, 36, 14, 14, 14, 14, 14, 14, 39, 14, + 14, 14, 14, 14, 14, 38, 14, 39, 59, 41, 36, 39, 14, 14, 14, 14, + 14, 14, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 36, + 14, 14, 14, 14, 14, 14, 19, 36, 14, 14, 14, 14, 14, 14, 14, 81, + 14, 14, 36, 36, 14, 14, 14, 14, 77, 14, 14, 36, 36, 36, 36, 36, + 14, 14, 14, 36, 38, 14, 14, 14, 14, 14, 14, 39, 38, 36, 38, 39, + 14, 14, 14, 81, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 81, + 14, 14, 14, 14, 14, 36, 36, 39, 14, 14, 14, 14, 36, 36, 36, 14, + 19, 0, 43, 53, 36, 36, 0, 0, 14, 14, 39, 14, 39, 14, 14, 14, + 14, 14, 36, 36, 0, 53, 36, 43, 59, 59, 59, 59, 38, 36, 36, 36, + 14, 14, 14, 36, 81, 59, 59, 59, 14, 14, 14, 36, 14, 14, 14, 14, + 14, 38, 36, 36, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, 36, 36, + 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 1, 77, 14, 14, 36, + 14, 14, 14, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 45, 14, 59, + 59, 36, 36, 36, 36, 36, 36, 36, 0, 0, 53, 12, 12, 12, 12, 12, + 59, 59, 36, 36, 36, 36, 36, 36, 45, 14, 27, 77, 41, 36, 36, 36, + 0, 0, 0, 0, 36, 36, 36, 36, 14, 38, 36, 36, 36, 36, 36, 36, + 14, 14, 14, 14, 147, 70, 112, 14, 14, 98, 14, 70, 70, 14, 14, 14, + 14, 14, 14, 14, 16, 112, 14, 14, 19, 0, 0, 0, 0, 0, 0, 0, + 36, 36, 36, 36, 36, 36, 36, 43, 97, 36, 36, 36, 36, 36, 36, 36, + 14, 14, 19, 0, 0, 14, 19, 0, 0, 45, 19, 0, 0, 0, 14, 14, + 14, 14, 14, 14, 14, 0, 0, 14, 14, 0, 45, 36, 36, 36, 36, 36, + 36, 38, 39, 38, 39, 14, 38, 14, 14, 14, 14, 14, 14, 39, 39, 14, + 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 38, 39, 14, + 14, 14, 38, 14, 14, 14, 38, 14, 14, 14, 14, 14, 14, 39, 14, 38, + 14, 14, 38, 38, 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 39, 38, 38, 39, 39, 14, 14, 14, + 14, 38, 14, 14, 39, 39, 36, 36, 36, 38, 36, 39, 39, 39, 39, 14, + 39, 38, 38, 39, 39, 39, 39, 39, 39, 38, 38, 39, 14, 38, 14, 14, + 14, 38, 14, 14, 39, 14, 38, 38, 14, 14, 14, 14, 14, 39, 14, 14, + 39, 14, 39, 14, 14, 39, 14, 14, 103, 97, 97, 97, 97, 97, 97, 122, + 28, 28, 28, 28, 28, 148, 36, 36, 28, 28, 28, 28, 28, 28, 28, 38, + 28, 28, 28, 28, 28, 14, 36, 36, 36, 36, 36, 149, 149, 149, 149, 149, + 149, 149, 149, 149, 149, 149, 149, 149, 97, 122, 36, 36, 36, 36, 36, 36, + 97, 97, 97, 97, 122, 36, 36, 36, 122, 36, 36, 36, 36, 36, 36, 36, + 97, 97, 97, 103, 97, 97, 97, 97, 97, 97, 99, 100, 97, 97, 100, 97, + 97, 97, 122, 97, 97, 122, 36, 36, 122, 97, 97, 97, 97, 97, 97, 97, + 100, 100, 100, 97, 97, 97, 97, 99, 99, 100, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 103, 97, 122, 36, 14, 14, 14, 100, 97, 97, 97, 97, + 97, 97, 97, 99, 14, 14, 14, 14, 14, 14, 100, 97, 97, 97, 97, 97, + 97, 14, 14, 14, 14, 14, 14, 36, 97, 97, 97, 97, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 103, 97, 97, 122, 36, 103, 97, 97, 97, 97, 97, + 43, 36, 36, 36, 36, 36, 36, 36, +}; + +static RE_UINT8 re_line_break_stage_5[] = { + 16, 16, 16, 18, 22, 20, 20, 21, 19, 6, 3, 12, 9, 10, 12, 3, + 1, 36, 12, 9, 8, 15, 8, 7, 11, 11, 8, 8, 12, 12, 12, 6, + 12, 1, 9, 36, 18, 2, 12, 16, 16, 29, 4, 1, 10, 9, 9, 9, + 12, 25, 25, 12, 25, 3, 12, 18, 25, 25, 17, 12, 25, 1, 17, 25, + 12, 17, 16, 4, 4, 4, 4, 16, 0, 0, 8, 0, 12, 0, 0, 12, + 0, 8, 18, 0, 0, 9, 0, 16, 18, 16, 16, 12, 6, 16, 37, 37, + 37, 0, 37, 12, 12, 10, 10, 10, 16, 6, 16, 0, 6, 6, 10, 11, + 11, 12, 6, 12, 8, 6, 18, 18, 0, 10, 0, 24, 24, 24, 24, 0, + 24, 12, 17, 17, 4, 17, 17, 18, 4, 6, 4, 12, 1, 2, 18, 17, + 12, 4, 4, 0, 31, 31, 32, 32, 33, 33, 18, 12, 2, 0, 5, 24, + 18, 9, 0, 18, 18, 4, 18, 28, 26, 25, 3, 3, 1, 3, 14, 14, + 14, 18, 20, 20, 3, 25, 5, 5, 8, 1, 2, 5, 30, 12, 2, 25, + 9, 12, 13, 13, 2, 12, 13, 12, 12, 13, 13, 25, 25, 13, 0, 13, + 2, 1, 0, 6, 6, 18, 1, 18, 26, 26, 2, 13, 13, 5, 5, 1, + 2, 2, 13, 16, 5, 13, 0, 38, 13, 38, 38, 13, 38, 0, 16, 5, + 5, 38, 38, 5, 13, 0, 38, 38, 10, 12, 31, 0, 34, 35, 35, 35, + 32, 0, 0, 33, 27, 27, 0, 37, 16, 37, 8, 2, 2, 8, 6, 1, + 2, 14, 13, 1, 13, 9, 10, 13, 0, 30, 13, 6, 13, 2, 12, 38, + 38, 12, 9, 0, 23, 25, 1, 1, 25, 0, 39, 39, +}; + +/* Line_Break: 7668 bytes. */ + +RE_UINT32 re_get_line_break(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_line_break_stage_1[f] << 5; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_line_break_stage_2[pos + f] << 3; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_line_break_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_line_break_stage_4[pos + f] << 1; + value = re_line_break_stage_5[pos + code]; + + return value; +} + +/* Numeric_Type. */ + +static RE_UINT8 re_numeric_type_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, + 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, + 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +}; + +static RE_UINT8 re_numeric_type_stage_2[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, + 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, + 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, + 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, + 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, + 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, + 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, + 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, + 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, + 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, + 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, + 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_numeric_type_stage_3[] = { + 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, + 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, + 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, + 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, + 0, 0, 0, 16, 17, 0, 0, 0, 0, 0, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 21, 22, 0, 0, 23, 0, 0, 0, 24, 25, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 26, 27, 28, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 29, 0, 0, 0, 0, 30, 31, 0, 30, 32, 0, 0, + 33, 0, 0, 0, 34, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, + 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 26, 0, 38, 39, 40, 41, + 36, 0, 0, 42, 0, 0, 0, 0, 43, 0, 44, 45, 0, 0, 0, 0, + 0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 50, 0, 0, 0, 51, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, + 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, + 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, + 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, + 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, + 0, 42, 0, 0, 0, 0, 0, 0, 0, 58, 59, 60, 0, 0, 0, 56, + 0, 3, 0, 0, 0, 0, 0, 61, 0, 62, 0, 0, 0, 0, 1, 0, + 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 63, 0, 55, 64, 26, + 65, 66, 19, 67, 35, 0, 0, 0, 0, 68, 69, 0, 0, 0, 70, 0, + 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, + 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 74, 0, 0, 0, 0, + 0, 0, 71, 71, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, + 0, 0, 76, 77, 0, 0, 0, 1, 0, 78, 0, 0, 0, 0, 1, 0, + 19, 19, 19, 79, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 81, 82, 83, 0, 0, 0, 0, 0, 0, 0, + 58, 0, 0, 43, 0, 0, 0, 84, 0, 58, 0, 0, 0, 0, 0, 0, + 0, 35, 0, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, + 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, + 0, 0, 0, 0, 60, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 36, 0, 0, 0, 0, +}; + +static RE_UINT8 re_numeric_type_stage_4[] = { + 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 1, 2, 0, 0, + 5, 1, 0, 0, 5, 1, 6, 7, 5, 1, 8, 0, 5, 1, 9, 0, + 5, 1, 0, 10, 5, 1, 11, 0, 1, 12, 13, 0, 0, 14, 15, 16, + 0, 17, 18, 0, 1, 2, 19, 7, 0, 0, 1, 20, 1, 2, 1, 2, + 0, 0, 21, 22, 23, 22, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, + 24, 7, 0, 0, 23, 25, 26, 27, 19, 23, 25, 13, 0, 28, 29, 30, + 0, 0, 31, 32, 23, 33, 34, 0, 0, 0, 0, 35, 36, 0, 0, 0, + 37, 7, 0, 9, 0, 0, 38, 0, 19, 7, 0, 0, 0, 19, 37, 19, + 0, 0, 37, 19, 35, 0, 0, 0, 39, 0, 0, 0, 0, 40, 0, 0, + 0, 35, 0, 0, 41, 42, 0, 0, 0, 43, 44, 0, 0, 0, 0, 36, + 18, 0, 0, 36, 0, 18, 0, 0, 0, 0, 18, 0, 43, 0, 0, 0, + 45, 0, 0, 0, 0, 46, 0, 0, 47, 43, 0, 0, 48, 0, 0, 0, + 0, 0, 0, 39, 0, 0, 42, 42, 0, 0, 0, 40, 0, 0, 0, 17, + 0, 49, 18, 0, 0, 0, 0, 45, 0, 43, 0, 0, 0, 0, 40, 0, + 0, 0, 45, 0, 0, 45, 39, 0, 42, 0, 0, 0, 45, 43, 0, 0, + 0, 0, 0, 18, 17, 19, 0, 0, 0, 0, 11, 0, 0, 39, 39, 18, + 0, 0, 50, 0, 36, 19, 19, 19, 19, 19, 13, 0, 19, 19, 19, 18, + 13, 0, 0, 0, 42, 40, 0, 0, 0, 0, 51, 0, 0, 0, 0, 19, + 0, 0, 17, 13, 52, 0, 0, 0, 0, 0, 0, 53, 23, 25, 19, 10, + 0, 0, 54, 55, 56, 1, 0, 0, 0, 0, 5, 1, 9, 0, 0, 0, + 19, 19, 7, 0, 0, 5, 1, 1, 1, 1, 1, 1, 23, 57, 0, 0, + 40, 0, 0, 0, 39, 43, 0, 43, 0, 40, 0, 35, 0, 0, 0, 42, +}; + +static RE_UINT8 re_numeric_type_stage_5[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, + 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 3, 3, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, + 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 3, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, + 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, + 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, + 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 3, 2, 2, 2, 0, 0, 0, 0, 0, +}; + +/* Numeric_Type: 2088 bytes. */ + +RE_UINT32 re_get_numeric_type(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_numeric_type_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_numeric_type_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_numeric_type_stage_3[pos + f] << 2; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_numeric_type_stage_4[pos + f] << 3; + value = re_numeric_type_stage_5[pos + code]; + + return value; +} + +/* Numeric_Value. */ + +static RE_UINT8 re_numeric_value_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, + 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, + 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +}; + +static RE_UINT8 re_numeric_value_stage_2[] = { + 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, + 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, + 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, + 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, + 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, + 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, + 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, + 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, + 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, + 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, + 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, + 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_numeric_value_stage_3[] = { + 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, + 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, + 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, + 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, + 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, + 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 14, 0, 0, 0, 0, 0, 13, 0, 0, 0, + 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, + 0, 0, 0, 15, 3, 0, 0, 0, 0, 0, 16, 17, 18, 0, 0, 0, + 0, 0, 0, 19, 20, 0, 0, 21, 0, 0, 0, 22, 23, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 24, 25, 26, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 27, 0, 0, 0, 0, 28, 29, 0, 28, 30, 0, 0, + 31, 0, 0, 0, 32, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, + 0, 0, 34, 0, 0, 0, 0, 0, 35, 0, 36, 0, 37, 38, 39, 40, + 41, 0, 0, 42, 0, 0, 0, 0, 43, 0, 44, 45, 0, 0, 0, 0, + 0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 50, 0, 0, 0, 51, + 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, + 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, + 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, + 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, + 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 0, 0, + 0, 62, 0, 0, 0, 0, 0, 0, 0, 63, 64, 65, 0, 0, 0, 66, + 0, 3, 0, 0, 0, 0, 0, 67, 0, 68, 0, 0, 0, 0, 1, 0, + 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 69, 0, 70, 71, 72, + 73, 74, 75, 76, 77, 0, 0, 0, 0, 78, 79, 0, 0, 0, 80, 0, + 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, + 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 0, 0, 0, 0, + 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, + 0, 0, 87, 88, 0, 0, 0, 1, 0, 89, 0, 0, 0, 0, 1, 0, + 90, 91, 92, 93, 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, + 98, 0, 0, 99, 0, 0, 0, 100, 0, 101, 0, 0, 0, 0, 0, 0, + 0, 102, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, + 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, + 0, 0, 0, 0, 106, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, +}; + +static RE_UINT8 re_numeric_value_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, + 0, 0, 0, 0, 4, 0, 5, 6, 1, 2, 3, 0, 0, 0, 0, 0, + 0, 7, 8, 9, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 0, + 0, 7, 8, 9, 12, 13, 0, 0, 0, 7, 8, 9, 14, 0, 0, 0, + 0, 7, 8, 9, 0, 0, 1, 15, 0, 7, 8, 9, 16, 17, 0, 0, + 1, 2, 18, 19, 20, 0, 0, 0, 0, 0, 21, 2, 22, 23, 24, 25, + 0, 0, 0, 26, 27, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 0, + 0, 0, 0, 0, 1, 2, 28, 0, 0, 0, 0, 0, 29, 2, 3, 0, + 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, 36, 37, 34, 35, 36, 37, + 38, 39, 40, 0, 0, 0, 0, 0, 34, 35, 36, 41, 42, 34, 35, 36, + 41, 42, 34, 35, 36, 41, 42, 0, 0, 0, 43, 44, 45, 46, 2, 47, + 0, 0, 0, 0, 0, 48, 49, 50, 34, 35, 51, 49, 50, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 52, 0, 53, 0, 0, 0, 0, 0, 0, + 21, 2, 3, 0, 0, 0, 54, 0, 0, 0, 0, 0, 48, 55, 0, 0, + 34, 35, 56, 0, 0, 0, 0, 0, 0, 0, 57, 58, 59, 60, 61, 62, + 0, 0, 0, 0, 63, 64, 65, 66, 0, 67, 0, 0, 0, 0, 0, 0, + 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, + 0, 0, 0, 70, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 75, 0, 76, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 77, 78, 0, 0, 0, 0, 0, 0, 79, + 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, + 0, 0, 0, 0, 81, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, + 0, 83, 0, 0, 0, 0, 0, 0, 0, 0, 84, 85, 0, 0, 0, 0, + 86, 87, 0, 88, 0, 0, 0, 0, 89, 80, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 90, 0, 0, 0, 0, 0, 5, 0, 5, 0, + 0, 0, 0, 0, 0, 0, 91, 0, 0, 0, 0, 0, 0, 0, 0, 92, + 0, 0, 0, 15, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, + 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, + 0, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 97, 0, 98, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 99, 68, 0, 0, 0, + 0, 0, 0, 0, 75, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, + 0, 101, 0, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, + 0, 0, 0, 0, 0, 103, 0, 0, 0, 48, 49, 104, 0, 0, 0, 0, + 0, 0, 0, 0, 105, 106, 0, 0, 0, 0, 107, 0, 108, 0, 75, 0, + 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 110, 0, 111, 8, 9, 57, 58, 112, 113, + 114, 115, 116, 117, 118, 0, 0, 0, 119, 120, 121, 122, 123, 124, 125, 126, + 127, 128, 129, 130, 122, 131, 132, 0, 0, 0, 103, 0, 0, 0, 0, 0, + 133, 0, 0, 0, 0, 0, 0, 0, 134, 0, 135, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 136, 137, 0, 0, 0, 0, 0, 0, 0, 0, 138, 139, + 0, 0, 0, 0, 0, 140, 141, 0, 34, 142, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 143, 0, 0, 0, 0, 0, 0, 34, 142, + 34, 35, 144, 145, 146, 147, 148, 149, 0, 0, 0, 0, 48, 49, 50, 150, + 151, 152, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, + 8, 9, 49, 153, 35, 154, 2, 155, 156, 157, 9, 158, 159, 158, 160, 161, + 162, 163, 164, 165, 166, 167, 168, 169, 170, 0, 0, 0, 0, 0, 0, 0, + 34, 35, 144, 145, 171, 0, 0, 0, 0, 0, 0, 7, 8, 9, 1, 2, + 172, 8, 9, 1, 2, 172, 8, 9, 173, 49, 174, 0, 0, 0, 0, 0, + 70, 0, 0, 0, 0, 0, 0, 0, 0, 175, 0, 0, 0, 0, 0, 0, + 98, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 91, 0, 0, 0, 0, 0, 176, 0, 0, 88, 0, 0, 0, 88, + 0, 0, 101, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 73, 0, + 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 107, 0, + 0, 0, 0, 177, 0, 0, 0, 0, 0, 0, 0, 0, 178, 0, 0, 0, +}; + +static RE_UINT8 re_numeric_value_stage_5[] = { + 0, 0, 0, 0, 2, 23, 25, 27, 29, 31, 33, 35, 37, 39, 0, 0, + 0, 0, 25, 27, 0, 23, 0, 0, 11, 15, 19, 0, 0, 0, 2, 23, + 25, 27, 29, 31, 33, 35, 37, 39, 3, 6, 9, 11, 19, 46, 0, 0, + 0, 0, 11, 15, 19, 3, 6, 9, 40, 85, 94, 0, 23, 25, 27, 0, + 40, 85, 94, 11, 15, 19, 0, 0, 37, 39, 15, 24, 26, 28, 30, 32, + 34, 36, 38, 1, 0, 23, 25, 27, 37, 39, 40, 50, 60, 70, 80, 81, + 82, 83, 84, 85, 103, 0, 0, 0, 0, 0, 47, 48, 49, 0, 0, 0, + 37, 39, 23, 0, 2, 0, 0, 0, 7, 5, 4, 12, 18, 10, 14, 16, + 20, 8, 21, 6, 13, 17, 22, 23, 23, 25, 27, 29, 31, 33, 35, 37, + 39, 40, 41, 42, 80, 85, 89, 94, 94, 98, 103, 0, 0, 33, 80, 107, + 112, 2, 0, 0, 43, 44, 45, 46, 47, 48, 49, 50, 0, 0, 2, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 23, 25, 27, 37, 39, 40, 2, + 0, 0, 23, 25, 27, 29, 31, 33, 35, 37, 39, 40, 39, 40, 23, 25, + 0, 15, 0, 0, 0, 0, 0, 2, 40, 50, 60, 0, 27, 29, 0, 0, + 39, 40, 0, 0, 40, 50, 60, 70, 80, 81, 82, 83, 0, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 0, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 31, 0, 0, + 0, 0, 0, 25, 0, 0, 31, 0, 0, 35, 0, 0, 23, 0, 0, 35, + 0, 0, 0, 103, 0, 27, 0, 0, 0, 39, 0, 0, 25, 0, 0, 0, + 31, 0, 29, 0, 0, 0, 0, 115, 40, 0, 0, 0, 0, 0, 0, 94, + 27, 0, 0, 0, 85, 0, 0, 0, 115, 0, 0, 0, 0, 0, 116, 0, + 0, 25, 0, 37, 0, 33, 0, 0, 0, 40, 0, 94, 50, 60, 0, 0, + 70, 0, 0, 0, 0, 27, 27, 27, 0, 0, 0, 29, 0, 0, 23, 0, + 0, 0, 39, 50, 0, 0, 40, 0, 37, 0, 0, 0, 0, 0, 35, 0, + 0, 0, 39, 0, 0, 0, 85, 0, 0, 0, 29, 0, 0, 0, 25, 0, + 0, 94, 0, 0, 0, 0, 33, 0, 33, 0, 0, 0, 0, 0, 2, 0, + 35, 37, 39, 2, 11, 15, 19, 3, 6, 9, 0, 0, 0, 0, 0, 27, + 0, 0, 0, 40, 0, 33, 0, 33, 0, 40, 0, 0, 0, 0, 0, 23, + 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 11, 15, 23, 31, + 80, 89, 98, 107, 31, 40, 80, 85, 89, 94, 98, 31, 40, 80, 85, 89, + 94, 103, 107, 40, 23, 23, 23, 25, 25, 25, 25, 31, 40, 40, 40, 40, + 40, 60, 80, 80, 80, 80, 85, 87, 89, 89, 89, 89, 80, 15, 15, 18, + 19, 0, 0, 0, 23, 31, 40, 80, 0, 84, 0, 0, 0, 0, 93, 0, + 0, 23, 25, 40, 50, 85, 0, 0, 23, 25, 27, 40, 50, 85, 94, 103, + 0, 0, 23, 40, 50, 85, 25, 27, 40, 50, 85, 94, 0, 23, 80, 0, + 39, 40, 50, 60, 70, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 15, 11, 12, 18, 0, 50, 60, 70, 80, 81, 82, 83, 84, + 85, 94, 2, 23, 35, 37, 39, 29, 39, 23, 25, 27, 37, 39, 23, 25, + 27, 29, 31, 25, 27, 27, 29, 31, 23, 25, 27, 27, 29, 31, 113, 114, + 29, 31, 27, 27, 29, 29, 29, 29, 33, 35, 35, 35, 37, 37, 39, 39, + 39, 39, 25, 27, 29, 31, 33, 23, 25, 27, 29, 29, 31, 31, 25, 27, + 23, 25, 12, 18, 21, 12, 18, 6, 11, 8, 11, 0, 83, 84, 0, 0, + 37, 39, 2, 23, 2, 2, 23, 25, 35, 37, 39, 0, 29, 0, 0, 0, + 0, 0, 0, 60, 0, 29, 0, 0, 39, 0, 0, 0, +}; + +/* Numeric_Value: 2876 bytes. */ + +RE_UINT32 re_get_numeric_value(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 12; + code = ch ^ (f << 12); + pos = (RE_UINT32)re_numeric_value_stage_1[f] << 4; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_numeric_value_stage_2[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_numeric_value_stage_3[pos + f] << 3; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_numeric_value_stage_4[pos + f] << 2; + value = re_numeric_value_stage_5[pos + code]; + + return value; +} + +/* Bidi_Mirrored. */ + +static RE_UINT8 re_bidi_mirrored_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_bidi_mirrored_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_bidi_mirrored_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, + 4, 5, 1, 6, 7, 8, 1, 9, 10, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, + 1, 1, 1, 12, 1, 1, 1, 1, +}; + +static RE_UINT8 re_bidi_mirrored_stage_4[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, + 6, 7, 8, 3, 3, 9, 3, 3, 10, 11, 12, 13, 14, 3, 3, 3, + 3, 3, 3, 3, 3, 15, 3, 16, 3, 3, 3, 3, 3, 3, 17, 18, + 19, 20, 21, 22, 3, 3, 3, 3, 23, 3, 3, 3, 3, 3, 3, 3, + 24, 3, 3, 3, 3, 3, 3, 3, 3, 25, 3, 3, 26, 27, 3, 3, + 3, 3, 3, 28, 29, 30, 31, 32, +}; + +static RE_UINT8 re_bidi_mirrored_stage_5[] = { + 0, 0, 0, 0, 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 40, + 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 24, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 6, 96, 0, 0, 0, 0, 0, 0, 96, + 0, 96, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 30, 63, 98, 188, 87, 248, 15, 250, 255, 31, 60, 128, 245, 207, 255, 255, + 255, 159, 7, 1, 204, 255, 255, 193, 0, 62, 195, 255, 255, 63, 255, 255, + 0, 15, 0, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, 255, 63, 0, + 121, 59, 120, 112, 252, 255, 0, 0, 248, 255, 255, 249, 255, 255, 0, 1, + 63, 194, 55, 31, 58, 3, 240, 51, 0, 252, 255, 223, 83, 122, 48, 112, + 0, 0, 128, 1, 48, 188, 25, 254, 255, 255, 255, 255, 207, 191, 255, 255, + 255, 255, 127, 80, 124, 112, 136, 47, 60, 54, 0, 48, 255, 3, 0, 0, + 0, 255, 243, 15, 0, 0, 0, 0, 0, 0, 0, 126, 48, 0, 0, 0, + 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 168, 13, 0, 0, 0, + 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 8, 0, 0, 0, 0, 0, 0, 0, +}; + +/* Bidi_Mirrored: 489 bytes. */ + +RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_bidi_mirrored_stage_1[f] << 4; + f = code >> 12; + code ^= f << 12; + pos = (RE_UINT32)re_bidi_mirrored_stage_2[pos + f] << 3; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_bidi_mirrored_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_bidi_mirrored_stage_4[pos + f] << 6; + pos += code; + value = (re_bidi_mirrored_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Indic_Matra_Category. */ + +static RE_UINT8 re_indic_matra_category_stage_1[] = { + 0, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_indic_matra_category_stage_2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, + 19, 20, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_indic_matra_category_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 5, 6, 7, 4, 0, + 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, 9, 0, 4, 0, + 0, 0, 0, 10, 11, 12, 4, 0, 0, 0, 0, 13, 14, 7, 0, 0, + 0, 0, 0, 15, 16, 17, 4, 0, 0, 0, 0, 10, 18, 19, 4, 0, + 0, 0, 0, 13, 20, 7, 4, 0, 0, 0, 0, 0, 21, 22, 0, 23, + 0, 0, 0, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 0, 32, 33, 34, 35, 36, 0, 0, 0, 0, 0, 0, + 0, 37, 0, 37, 0, 38, 0, 38, 0, 0, 0, 39, 40, 41, 0, 0, + 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 43, 44, 0, 0, 0, + 0, 45, 0, 0, 0, 0, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 23, + 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 52, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 0, 0, 0, + 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0, + 0, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 63, 64, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, + 66, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 68, 69, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, + 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 44, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74, 69, 0, 0, 0, 0, +}; + +static RE_UINT8 re_indic_matra_category_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, + 3, 4, 5, 6, 1, 7, 3, 8, 0, 0, 9, 4, 0, 0, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, + 3, 4, 10, 11, 12, 13, 14, 0, 0, 0, 0, 15, 0, 0, 0, 0, + 3, 10, 0, 9, 16, 9, 17, 0, 3, 4, 5, 9, 18, 15, 3, 0, + 0, 0, 0, 0, 0, 0, 0, 19, 3, 4, 10, 11, 20, 13, 21, 0, + 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 17, 10, 0, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 1, 7, 25, 6, 26, 6, 6, 0, 0, 0, 9, 10, 0, 0, 0, 0, + 27, 7, 25, 18, 28, 29, 6, 0, 0, 0, 15, 25, 0, 0, 0, 0, + 7, 3, 10, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 16, 0, 15, + 7, 6, 10, 10, 2, 30, 23, 31, 0, 7, 0, 0, 0, 0, 0, 0, + 19, 7, 6, 6, 4, 10, 0, 0, 32, 32, 33, 9, 0, 0, 0, 16, + 19, 7, 6, 6, 4, 9, 0, 0, 32, 32, 34, 0, 0, 0, 0, 0, + 35, 36, 4, 37, 37, 6, 6, 0, 36, 0, 10, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 15, 19, 17, 38, 6, 6, 0, 39, 16, 0, 0, + 0, 0, 0, 7, 4, 0, 0, 0, 0, 25, 0, 15, 25, 0, 0, 0, + 9, 6, 16, 0, 0, 0, 0, 0, 0, 15, 40, 16, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 19, 0, 0, 17, 10, 0, 0, 0, 0, 0, + 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 6, 17, 4, 41, + 42, 22, 23, 0, 25, 0, 0, 0, 9, 43, 0, 0, 0, 0, 0, 0, + 6, 44, 45, 46, 16, 0, 0, 0, 7, 7, 2, 22, 7, 8, 7, 7, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 38, 2, 0, 0, + 47, 1, 19, 6, 17, 5, 44, 22, 22, 40, 16, 0, 0, 0, 0, 0, + 0, 0, 15, 6, 4, 48, 49, 22, 23, 18, 25, 0, 0, 0, 0, 0, + 0, 0, 17, 8, 6, 25, 0, 0, 0, 0, 0, 2, 50, 7, 10, 0, + 0, 0, 0, 16, 0, 0, 0, 0, 0, 15, 3, 1, 0, 0, 0, 0, + 0, 0, 15, 7, 7, 7, 7, 7, 7, 7, 10, 0, 0, 0, 0, 0, + 0, 0, 0, 35, 4, 17, 4, 10, 0, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 6, 4, 22, 16, 0, 51, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 9, 6, 17, 52, 40, 10, 0, 0, 0, 0, 0, 0, + 1, 6, 53, 54, 55, 56, 33, 16, 0, 0, 0, 0, 0, 11, 5, 8, + 0, 0, 0, 43, 0, 0, 0, 0, 0, 15, 19, 7, 44, 25, 35, 0, + 57, 4, 9, 58, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 39, + 0, 0, 0, 0, 6, 6, 4, 4, 4, 6, 6, 16, 0, 0, 0, 0, + 2, 3, 5, 1, 3, 0, 0, 0, 0, 0, 0, 9, 6, 4, 40, 37, + 17, 59, 16, 0, 0, 0, 0, 0, 0, 15, 8, 4, 4, 4, 6, 18, + 0, 0, 0, 0, 0, 0, 9, 8, +}; + +static RE_UINT8 re_indic_matra_category_stage_5[] = { + 0, 0, 5, 1, 1, 2, 1, 6, 6, 6, 6, 5, 5, 5, 1, 1, + 2, 1, 0, 5, 6, 0, 0, 2, 2, 0, 0, 4, 4, 6, 0, 1, + 5, 0, 5, 6, 5, 8, 1, 5, 9, 0, 10, 6, 2, 2, 4, 4, + 4, 5, 1, 0, 7, 0, 8, 1, 8, 0, 8, 8, 9, 2, 4, 1, + 3, 3, 3, 1, 3, 0, 0, 6, 5, 7, 7, 7, 6, 2, 0, 14, + 2, 5, 9, 10, 4, 2, 14, 0, 6, 1, 1, 8, 8, 5, 14, 1, + 6, 11, 7, 12, 2, 9, 11, 0, 5, 2, 6, 3, 3, 5, 5, 3, + 1, 3, 0, 13, 13, 0, 6, 14, +}; + +/* Indic_Matra_Category: 1336 bytes. */ + +RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_indic_matra_category_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_indic_matra_category_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_indic_matra_category_stage_3[pos + f] << 3; + f = code >> 1; + code ^= f << 1; + pos = (RE_UINT32)re_indic_matra_category_stage_4[pos + f] << 1; + value = re_indic_matra_category_stage_5[pos + code]; + + return value; +} + +/* Indic_Syllabic_Category. */ + +static RE_UINT8 re_indic_syllabic_category_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_2[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 1, 1, 1, 1, 1, 1, 10, 1, 11, 12, 13, 14, 1, 1, 1, + 1, 1, 1, 1, 1, 15, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 16, 17, 18, 19, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 1, 1, 1, + 21, 22, 1, 1, 1, 1, 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 16, + 17, 11, 12, 18, 19, 20, 0, 21, 22, 23, 12, 24, 25, 0, 8, 0, + 10, 11, 12, 24, 26, 27, 8, 28, 29, 30, 31, 32, 33, 34, 0, 0, + 35, 36, 12, 37, 38, 39, 8, 0, 40, 36, 12, 41, 38, 42, 8, 0, + 40, 36, 4, 43, 44, 34, 8, 45, 46, 47, 4, 48, 49, 50, 0, 51, + 52, 4, 53, 54, 55, 0, 0, 0, 56, 57, 58, 59, 60, 61, 0, 0, + 0, 0, 0, 0, 62, 4, 63, 64, 65, 66, 67, 68, 0, 0, 0, 0, + 4, 4, 69, 70, 0, 71, 72, 73, 74, 75, 0, 0, 0, 0, 0, 0, + 76, 77, 78, 77, 78, 79, 76, 80, 4, 4, 81, 82, 83, 84, 0, 0, + 85, 63, 86, 87, 0, 4, 88, 89, 4, 4, 90, 91, 92, 0, 0, 0, + 4, 93, 4, 4, 94, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, + 98, 78, 4, 99, 100, 0, 0, 0, 101, 4, 102, 103, 4, 4, 104, 105, + 4, 4, 106, 107, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, + 111, 4, 112, 0, 4, 113, 114, 115, 116, 117, 4, 118, 119, 0, 0, 0, + 120, 4, 121, 4, 122, 123, 0, 0, 124, 4, 4, 125, 126, 0, 0, 0, + 127, 4, 128, 129, 130, 0, 4, 131, 4, 4, 4, 132, 133, 0, 134, 135, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 137, 138, 0, + 139, 140, 4, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 142, 78, 4, 143, 144, 0, 0, 0, 145, 4, 4, 146, 0, 0, 0, 0, + 147, 4, 148, 149, 0, 0, 0, 0, 150, 151, 4, 152, 153, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 154, 4, 155, 156, 0, 0, 0, 0, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_4[] = { + 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 3, 3, 3, 4, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 7, 8, 8, 8, 9, 0, 10, 5, 5, + 11, 0, 0, 0, 12, 3, 13, 5, 14, 15, 3, 16, 16, 4, 5, 5, + 5, 5, 17, 5, 18, 19, 20, 7, 8, 21, 21, 22, 0, 23, 0, 24, + 20, 0, 0, 0, 14, 15, 25, 26, 17, 27, 20, 28, 29, 23, 21, 30, + 0, 0, 13, 18, 31, 32, 0, 0, 14, 15, 3, 33, 33, 4, 5, 5, + 17, 13, 20, 7, 8, 34, 34, 30, 8, 21, 21, 30, 0, 35, 0, 24, + 36, 0, 0, 0, 37, 15, 25, 12, 38, 39, 27, 17, 40, 41, 42, 19, + 5, 5, 20, 35, 29, 35, 43, 30, 0, 23, 0, 0, 14, 15, 3, 38, + 38, 4, 5, 5, 5, 13, 20, 44, 8, 43, 43, 30, 0, 45, 20, 0, + 46, 15, 3, 38, 5, 13, 20, 7, 0, 45, 0, 47, 5, 5, 42, 44, + 8, 43, 43, 48, 0, 0, 49, 50, 46, 15, 3, 3, 3, 25, 19, 5, + 24, 5, 5, 36, 5, 42, 51, 23, 8, 52, 8, 8, 35, 0, 0, 0, + 13, 5, 5, 5, 5, 5, 5, 42, 8, 8, 53, 0, 8, 34, 54, 55, + 27, 56, 18, 36, 0, 5, 13, 5, 13, 57, 19, 27, 8, 8, 34, 58, + 8, 59, 54, 60, 0, 0, 0, 20, 5, 5, 13, 5, 5, 5, 5, 41, + 10, 8, 8, 61, 62, 63, 64, 65, 66, 66, 67, 66, 66, 66, 66, 66, + 66, 66, 66, 68, 69, 3, 70, 8, 8, 71, 72, 73, 74, 11, 75, 76, + 77, 78, 79, 80, 81, 82, 5, 5, 83, 84, 54, 85, 0, 0, 86, 87, + 88, 5, 5, 17, 6, 89, 0, 0, 88, 5, 5, 5, 6, 0, 0, 0, + 90, 0, 0, 0, 91, 3, 3, 3, 3, 35, 8, 8, 8, 61, 92, 93, + 94, 0, 0, 95, 96, 5, 5, 5, 8, 8, 97, 0, 98, 99, 100, 0, + 101, 102, 102, 103, 104, 105, 0, 0, 5, 5, 5, 0, 8, 8, 8, 8, + 106, 99, 107, 0, 5, 108, 8, 0, 5, 5, 5, 69, 88, 109, 99, 110, + 111, 8, 8, 8, 8, 79, 107, 0, 112, 113, 3, 3, 5, 114, 8, 8, + 8, 115, 5, 0, 116, 3, 117, 5, 118, 8, 119, 120, 0, 0, 121, 122, + 5, 123, 8, 8, 124, 0, 0, 0, 5, 125, 8, 106, 99, 126, 0, 0, + 0, 0, 0, 13, 127, 0, 0, 0, 0, 0, 0, 1, 33, 128, 129, 5, + 108, 8, 0, 0, 5, 5, 5, 130, 131, 132, 133, 5, 134, 0, 0, 0, + 135, 3, 3, 3, 117, 5, 5, 5, 5, 136, 8, 8, 8, 89, 0, 0, + 0, 0, 19, 5, 130, 102, 137, 107, 5, 108, 8, 138, 139, 0, 0, 0, + 140, 3, 4, 88, 141, 8, 8, 142, 89, 0, 0, 0, 3, 117, 5, 5, + 5, 5, 81, 8, 143, 144, 0, 0, 99, 99, 99, 145, 13, 0, 146, 0, + 8, 8, 8, 84, 147, 0, 0, 0, 117, 5, 108, 8, 0, 148, 0, 0, + 5, 5, 5, 74, 149, 5, 150, 99, 151, 8, 29, 152, 81, 45, 0, 153, + 5, 13, 13, 5, 5, 0, 0, 154, 155, 15, 3, 3, 5, 5, 8, 8, + 8, 53, 0, 0, 156, 3, 3, 4, 8, 8, 157, 0, 156, 88, 5, 5, + 5, 108, 8, 8, 158, 89, 0, 0, 156, 3, 3, 3, 4, 5, 5, 5, + 108, 8, 8, 8, 63, 0, 0, 0, 3, 3, 117, 5, 5, 5, 129, 159, + 8, 160, 0, 0, +}; + +static RE_UINT8 re_indic_syllabic_category_stage_5[] = { + 0, 0, 0, 0, 9, 0, 0, 0, 1, 1, 1, 2, 6, 6, 6, 6, + 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 4, 3, 7, 7, + 7, 7, 7, 7, 7, 5, 7, 7, 0, 7, 7, 7, 6, 6, 7, 7, + 0, 0, 6, 6, 0, 10, 10, 10, 0, 1, 1, 2, 0, 6, 6, 6, + 6, 0, 0, 6, 10, 0, 10, 10, 10, 0, 10, 0, 0, 0, 10, 10, + 10, 10, 0, 0, 7, 0, 0, 7, 7, 5, 11, 0, 0, 0, 0, 7, + 10, 10, 0, 10, 6, 6, 6, 0, 0, 0, 0, 6, 0, 10, 10, 0, + 4, 0, 7, 7, 7, 7, 7, 0, 7, 5, 0, 0, 1, 0, 9, 9, + 0, 14, 0, 0, 6, 6, 0, 6, 7, 7, 0, 7, 0, 0, 7, 7, + 0, 10, 0, 0, 0, 0, 1, 17, 6, 0, 6, 6, 6, 10, 0, 0, + 0, 0, 0, 10, 10, 0, 0, 0, 10, 10, 10, 0, 7, 0, 7, 7, + 0, 3, 7, 7, 0, 7, 7, 0, 0, 0, 1, 2, 0, 0, 10, 0, + 7, 5, 12, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 0, 5, 0, + 7, 0, 7, 0, 7, 7, 5, 0, 19, 19, 19, 19, 0, 1, 5, 0, + 10, 0, 0, 10, 0, 10, 0, 10, 14, 14, 0, 0, 7, 0, 0, 0, + 0, 1, 0, 0, 7, 7, 1, 2, 7, 7, 1, 1, 5, 3, 0, 0, + 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 0, 13, 13, 13, + 13, 0, 0, 0, 10, 6, 6, 6, 6, 6, 6, 7, 7, 7, 1, 19, + 2, 5, 5, 14, 14, 14, 14, 10, 10, 10, 6, 6, 7, 7, 10, 10, + 10, 10, 14, 14, 14, 10, 7, 19, 19, 10, 10, 7, 7, 19, 19, 19, + 19, 19, 10, 10, 10, 7, 7, 7, 7, 10, 10, 10, 10, 10, 14, 7, + 7, 7, 7, 19, 19, 19, 10, 19, 0, 0, 19, 19, 7, 7, 0, 0, + 6, 6, 6, 10, 5, 0, 0, 0, 10, 0, 7, 7, 10, 10, 10, 6, + 7, 20, 20, 0, 12, 0, 0, 0, 0, 5, 5, 0, 3, 0, 0, 0, + 9, 10, 10, 10, 7, 13, 13, 13, 15, 15, 1, 15, 15, 15, 15, 15, + 15, 0, 0, 0, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, + 18, 18, 18, 18, 18, 0, 0, 0, 7, 15, 15, 15, 19, 19, 0, 0, + 10, 10, 10, 7, 10, 14, 14, 15, 15, 15, 15, 0, 5, 7, 7, 7, + 1, 1, 1, 12, 2, 6, 6, 6, 4, 7, 7, 7, 5, 10, 10, 10, + 1, 12, 2, 6, 6, 6, 10, 10, 10, 13, 13, 13, 7, 7, 5, 5, + 13, 13, 10, 10, 0, 0, 3, 10, 10, 10, 15, 15, 6, 6, 4, 7, + 15, 15, 5, 5, 13, 13, 7, 7, 1, 1, 0, 4, 0, 0, 2, 2, + 6, 6, 5, 10, 10, 10, 10, 1, 10, 10, 8, 8, 8, 8, 10, 10, + 10, 10, 8, 13, 13, 10, 10, 10, 10, 13, 10, 1, 1, 2, 6, 6, + 15, 7, 7, 7, 8, 8, 8, 19, 7, 7, 7, 15, 15, 15, 15, 5, + 1, 1, 12, 2, 10, 10, 10, 4, 7, 13, 14, 14, 7, 7, 7, 14, + 14, 14, 14, 0, 15, 15, 0, 0, 0, 0, 10, 19, 18, 19, 18, 0, + 0, 2, 5, 0, 10, 6, 10, 10, 10, 10, 10, 15, 15, 15, 15, 7, + 19, 5, 0, 0, 7, 0, 1, 2, 0, 0, 0, 5, 1, 1, 2, 0, + 1, 1, 2, 6, 7, 5, 4, 0, 7, 7, 7, 5, 2, 7, 7, 7, + 7, 7, 5, 4, +}; + +/* Indic_Syllabic_Category: 1952 bytes. */ + +RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_indic_syllabic_category_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_indic_syllabic_category_stage_2[pos + f] << 4; + f = code >> 4; + code ^= f << 4; + pos = (RE_UINT32)re_indic_syllabic_category_stage_3[pos + f] << 2; + f = code >> 2; + code ^= f << 2; + pos = (RE_UINT32)re_indic_syllabic_category_stage_4[pos + f] << 2; + value = re_indic_syllabic_category_stage_5[pos + code]; + + return value; +} + +/* Alphanumeric. */ + +static RE_UINT8 re_alphanumeric_stage_1[] = { + 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, +}; + +static RE_UINT8 re_alphanumeric_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_alphanumeric_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, + 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, + 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, + 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, + 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, + 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, + 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_alphanumeric_stage_4[] = { + 0, 1, 2, 2, 0, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 6, 7, 0, 0, 8, 9, 10, 11, 5, 12, + 5, 5, 5, 5, 13, 5, 5, 5, 5, 14, 15, 16, 17, 18, 19, 20, + 21, 5, 22, 23, 5, 5, 24, 25, 26, 5, 27, 5, 5, 28, 5, 29, + 30, 31, 32, 0, 0, 33, 0, 34, 5, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 38, 47, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 49, 59, 57, 60, 61, 59, 62, 63, 64, 65, 66, 67, 68, + 16, 69, 70, 0, 71, 72, 73, 0, 74, 75, 76, 77, 78, 79, 0, 0, + 5, 80, 81, 82, 83, 5, 84, 85, 5, 5, 86, 5, 87, 88, 89, 5, + 90, 5, 91, 0, 92, 5, 5, 93, 16, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 94, 2, 5, 5, 95, 96, 97, 97, 98, 5, 99, 100, 75, + 1, 5, 5, 101, 5, 102, 5, 103, 79, 104, 105, 106, 5, 107, 108, 0, + 109, 5, 110, 111, 108, 112, 0, 0, 5, 113, 114, 0, 5, 115, 5, 116, + 5, 103, 117, 118, 0, 0, 0, 119, 5, 5, 5, 5, 5, 5, 0, 0, + 120, 5, 121, 118, 5, 122, 123, 124, 0, 0, 0, 125, 126, 0, 0, 0, + 127, 128, 129, 5, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 131, 5, 75, 5, 132, 110, 5, 5, 5, 5, 133, + 5, 84, 5, 134, 135, 136, 136, 5, 0, 137, 0, 0, 0, 0, 0, 0, + 138, 139, 16, 5, 140, 16, 5, 85, 141, 142, 5, 5, 143, 69, 0, 26, + 5, 5, 5, 5, 5, 103, 0, 0, 5, 5, 5, 5, 5, 5, 31, 0, + 5, 5, 5, 5, 31, 0, 26, 118, 144, 145, 5, 146, 147, 5, 5, 92, + 148, 149, 5, 5, 150, 151, 0, 152, 153, 17, 5, 97, 5, 5, 154, 155, + 5, 102, 156, 79, 5, 157, 158, 0, 5, 135, 159, 160, 5, 110, 161, 162, + 163, 164, 0, 0, 0, 0, 5, 165, 5, 5, 5, 5, 5, 166, 167, 109, + 5, 5, 5, 168, 5, 5, 169, 0, 170, 171, 172, 5, 5, 28, 173, 5, + 5, 118, 26, 5, 174, 5, 17, 175, 0, 0, 0, 176, 5, 5, 5, 79, + 1, 2, 2, 105, 5, 110, 177, 0, 178, 179, 180, 0, 5, 5, 5, 69, + 0, 0, 5, 93, 0, 0, 0, 0, 0, 0, 0, 0, 79, 5, 181, 0, + 110, 26, 151, 0, 118, 5, 182, 0, 5, 5, 5, 5, 118, 75, 0, 0, + 183, 184, 103, 0, 0, 0, 0, 0, 103, 169, 0, 0, 5, 185, 0, 0, + 186, 97, 0, 79, 0, 0, 0, 0, 5, 103, 103, 156, 0, 0, 0, 0, + 5, 5, 130, 0, 0, 0, 0, 0, 5, 5, 187, 55, 149, 32, 26, 188, + 5, 189, 0, 0, 5, 5, 190, 0, 0, 0, 0, 0, 5, 103, 75, 0, + 5, 5, 5, 143, 0, 0, 0, 0, 5, 5, 5, 191, 0, 0, 0, 0, + 5, 143, 0, 0, 0, 0, 0, 0, 5, 32, 0, 0, 0, 0, 0, 0, + 5, 5, 192, 110, 173, 0, 0, 0, 193, 0, 0, 0, 0, 0, 0, 0, + 5, 5, 194, 5, 195, 196, 197, 5, 198, 199, 200, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 201, 202, 85, 194, 194, 132, 132, 203, 203, 204, 5, + 197, 205, 206, 207, 208, 209, 0, 0, 5, 5, 5, 5, 5, 5, 135, 0, + 5, 93, 5, 5, 5, 5, 5, 5, 118, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_alphanumeric_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 7, 0, 4, 32, 4, + 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, + 32, 0, 0, 0, 0, 0, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, + 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, + 255, 7, 7, 0, 0, 0, 255, 7, 255, 255, 255, 254, 255, 195, 255, 255, + 255, 255, 239, 31, 254, 225, 255, 159, 0, 0, 255, 255, 0, 224, 255, 255, + 255, 255, 3, 0, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, + 255, 255, 255, 1, 253, 31, 0, 0, 240, 3, 255, 127, 255, 255, 255, 239, + 255, 223, 225, 255, 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 227, + 159, 89, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, + 135, 25, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, + 191, 27, 1, 0, 207, 255, 0, 0, 159, 25, 192, 176, 207, 255, 2, 0, + 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, 192, 255, 0, 0, + 238, 223, 253, 255, 255, 253, 239, 227, 223, 29, 96, 3, 236, 223, 253, 255, + 223, 29, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 93, 128, 0, + 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, + 0, 0, 12, 0, 255, 255, 255, 7, 127, 32, 255, 3, 150, 37, 240, 254, + 174, 236, 255, 59, 95, 32, 255, 243, 1, 0, 0, 0, 255, 3, 0, 0, + 255, 254, 255, 255, 255, 31, 254, 255, 3, 255, 255, 254, 255, 255, 255, 31, + 255, 255, 127, 249, 255, 3, 255, 255, 231, 193, 255, 255, 127, 64, 255, 51, + 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, + 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 135, + 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, + 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 207, 255, + 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 15, 255, 1, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 255, 3, 255, 255, 255, 15, 255, 255, 255, 127, 254, 255, 31, 0, + 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 255, 3, 255, 243, 255, 255, + 191, 255, 3, 0, 255, 227, 255, 255, 255, 255, 255, 63, 0, 222, 111, 0, + 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, + 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, + 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, + 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, + 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, + 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, + 255, 31, 255, 255, 255, 15, 0, 0, 255, 127, 240, 143, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 187, 247, 255, 255, 15, 0, 255, 3, 0, 0, 252, 8, + 255, 255, 7, 0, 255, 255, 247, 255, 0, 128, 255, 3, 255, 63, 255, 3, + 255, 255, 127, 4, 5, 0, 0, 56, 255, 255, 60, 0, 126, 126, 126, 0, + 127, 127, 0, 0, 255, 7, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, + 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, + 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, + 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 255, 255, 1, 0, 15, 255, 62, 0, 63, 253, 255, 255, + 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, 63, 0, 0, 0, + 255, 1, 255, 3, 255, 255, 199, 255, 30, 0, 255, 3, 7, 0, 0, 0, + 31, 0, 255, 255, 3, 0, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, + 247, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* Alphanumeric: 1849 bytes. */ + +RE_UINT32 re_get_alphanumeric(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_alphanumeric_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_alphanumeric_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_alphanumeric_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_alphanumeric_stage_4[pos + f] << 5; + pos += code; + value = (re_alphanumeric_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Any. */ + +RE_UINT32 re_get_any(RE_UINT32 ch) { + return 1; +} + +/* Blank. */ + +static RE_UINT8 re_blank_stage_1[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +static RE_UINT8 re_blank_stage_2[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_blank_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_blank_stage_4[] = { + 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, + 3, 1, 1, 1, 1, 1, 1, 1, +}; + +static RE_UINT8 re_blank_stage_5[] = { + 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 255, 7, 0, 0, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, +}; + +/* Blank: 169 bytes. */ + +RE_UINT32 re_get_blank(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_blank_stage_1[f] << 3; + f = code >> 13; + code ^= f << 13; + pos = (RE_UINT32)re_blank_stage_2[pos + f] << 4; + f = code >> 9; + code ^= f << 9; + pos = (RE_UINT32)re_blank_stage_3[pos + f] << 3; + f = code >> 6; + code ^= f << 6; + pos = (RE_UINT32)re_blank_stage_4[pos + f] << 6; + pos += code; + value = (re_blank_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Graph. */ + +static RE_UINT8 re_graph_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 4, 8, + 4, 8, +}; + +static RE_UINT8 re_graph_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 7, 7, 7, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 30, +}; + +static RE_UINT8 re_graph_stage_3[] = { + 0, 1, 1, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 1, 15, 16, 1, 1, 17, 18, 19, 20, 21, 22, 23, 24, 1, 25, + 26, 27, 1, 28, 29, 1, 1, 30, 1, 1, 1, 31, 32, 33, 34, 35, + 36, 37, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, + 1, 1, 1, 1, 41, 1, 42, 43, 44, 45, 46, 47, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 48, 49, 49, 49, 49, 49, 49, 49, 49, + 1, 1, 50, 51, 1, 52, 53, 54, 55, 56, 57, 58, 59, 49, 49, 49, + 60, 61, 62, 63, 64, 49, 65, 49, 66, 67, 49, 49, 49, 49, 68, 49, + 1, 1, 1, 69, 70, 49, 49, 49, 1, 1, 1, 1, 71, 49, 49, 49, + 1, 1, 72, 49, 49, 49, 49, 73, 74, 49, 49, 49, 49, 49, 49, 49, + 75, 76, 77, 78, 79, 80, 81, 82, 49, 49, 49, 49, 49, 49, 83, 49, + 84, 85, 86, 87, 88, 89, 90, 91, 1, 1, 1, 1, 1, 1, 92, 1, + 1, 1, 1, 1, 1, 1, 1, 93, 94, 49, 49, 49, 49, 49, 49, 49, + 1, 1, 94, 49, 49, 49, 49, 49, 95, 96, 49, 49, 49, 49, 49, 49, + 1, 1, 1, 1, 1, 1, 1, 97, +}; + +static RE_UINT8 re_graph_stage_4[] = { + 0, 1, 2, 3, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 4, 5, 6, 2, 2, 2, 7, 8, 1, 9, 2, 10, 11, + 12, 2, 2, 2, 2, 2, 2, 2, 13, 2, 14, 2, 2, 15, 2, 16, + 2, 17, 18, 0, 0, 19, 0, 20, 2, 2, 2, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 22, 31, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 44, 48, 49, 50, 51, 52, 53, 54, + 1, 55, 56, 0, 57, 58, 59, 0, 2, 2, 60, 61, 21, 62, 63, 0, + 2, 2, 2, 2, 2, 2, 64, 2, 2, 2, 65, 2, 66, 67, 68, 2, + 69, 2, 48, 70, 71, 2, 2, 72, 2, 2, 2, 2, 73, 2, 2, 74, + 75, 76, 77, 78, 2, 2, 79, 80, 81, 2, 2, 82, 2, 83, 2, 84, + 70, 85, 86, 87, 2, 88, 89, 2, 90, 2, 3, 91, 80, 92, 0, 0, + 2, 2, 88, 70, 2, 2, 2, 93, 2, 94, 95, 2, 0, 0, 10, 76, + 2, 2, 2, 2, 2, 2, 2, 96, 97, 2, 98, 79, 2, 99, 100, 101, + 102, 103, 3, 104, 105, 16, 106, 74, 2, 2, 2, 2, 107, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 77, 2, 108, 109, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 110, 0, 0, 0, 0, 0, + 2, 111, 3, 2, 2, 2, 2, 112, 2, 64, 2, 113, 76, 114, 114, 2, + 2, 56, 0, 0, 115, 2, 2, 77, 2, 2, 2, 2, 2, 2, 84, 116, + 1, 2, 1, 2, 8, 2, 2, 2, 117, 118, 2, 2, 111, 16, 2, 119, + 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 84, 2, 2, + 2, 2, 2, 2, 2, 2, 120, 0, 2, 2, 2, 2, 121, 2, 122, 2, + 2, 123, 2, 2, 124, 2, 2, 82, 2, 2, 2, 2, 125, 109, 0, 126, + 2, 127, 2, 82, 2, 2, 128, 56, 2, 2, 129, 70, 2, 2, 130, 0, + 2, 76, 131, 56, 2, 2, 132, 76, 133, 134, 0, 0, 0, 0, 2, 135, + 2, 2, 2, 2, 2, 119, 136, 56, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 137, 2, 2, 71, 0, 138, 139, 140, 2, 2, 2, 141, 2, + 2, 2, 106, 2, 142, 2, 143, 144, 71, 122, 145, 146, 2, 2, 2, 91, + 1, 2, 2, 2, 2, 3, 147, 148, 149, 150, 151, 0, 2, 2, 2, 16, + 152, 153, 2, 2, 154, 0, 106, 79, 0, 0, 0, 0, 70, 2, 74, 0, + 3, 119, 109, 0, 155, 2, 156, 0, 2, 2, 2, 2, 79, 157, 0, 0, + 158, 159, 160, 0, 0, 0, 0, 0, 161, 162, 0, 0, 2, 163, 0, 0, + 164, 165, 166, 2, 0, 0, 0, 0, 2, 167, 168, 169, 0, 0, 0, 0, + 2, 2, 170, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, + 2, 2, 171, 172, 2, 2, 173, 174, 2, 99, 175, 0, 2, 2, 174, 0, + 0, 0, 0, 0, 2, 82, 157, 0, 2, 2, 2, 176, 0, 0, 0, 0, + 2, 2, 2, 177, 0, 0, 0, 0, 2, 176, 0, 0, 0, 0, 0, 0, + 2, 178, 0, 0, 0, 0, 0, 0, 2, 2, 179, 3, 180, 0, 0, 0, + 181, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 84, + 2, 182, 2, 2, 2, 2, 79, 0, 2, 2, 183, 0, 0, 0, 0, 0, + 2, 2, 76, 15, 0, 0, 0, 0, 2, 2, 99, 2, 62, 184, 185, 2, + 186, 187, 188, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 189, 2, 2, + 2, 2, 2, 2, 2, 2, 190, 2, 185, 191, 192, 193, 194, 195, 0, 196, + 2, 88, 2, 2, 77, 197, 198, 0, 83, 111, 2, 88, 16, 0, 0, 199, + 200, 16, 201, 0, 0, 0, 0, 0, 2, 202, 2, 70, 77, 2, 203, 74, + 2, 3, 204, 2, 2, 2, 2, 205, 2, 79, 119, 143, 0, 0, 0, 206, + 2, 2, 207, 0, 2, 2, 183, 0, 2, 2, 2, 77, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 76, 0, 2, 72, 2, 2, 2, 2, 2, 2, + 79, 0, 0, 0, 0, 0, 0, 0, 208, 2, 2, 2, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 172, 2, 2, 2, 2, 2, 2, 2, 79, +}; + +static RE_UINT8 re_graph_stage_5[] = { + 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 127, + 255, 255, 255, 124, 240, 215, 255, 255, 251, 255, 255, 255, 255, 0, 254, 255, + 255, 255, 127, 254, 255, 134, 254, 255, 255, 0, 255, 255, 255, 7, 31, 0, + 223, 255, 255, 223, 255, 191, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, + 255, 255, 255, 7, 255, 63, 255, 127, 255, 255, 255, 79, 253, 31, 0, 0, + 240, 255, 255, 127, 255, 255, 255, 254, 238, 159, 249, 255, 255, 253, 197, 243, + 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 249, 255, 255, 253, 109, 211, + 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, + 191, 59, 1, 0, 207, 255, 3, 0, 159, 57, 192, 176, 207, 255, 255, 0, + 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 255, 7, + 238, 223, 253, 255, 255, 253, 239, 227, 223, 61, 96, 3, 207, 255, 0, 255, + 236, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, + 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 63, 254, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 132, 95, 255, 0, 0, 28, 0, 255, 255, 255, 135, + 255, 255, 255, 15, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, + 255, 254, 255, 255, 255, 31, 254, 255, 255, 255, 255, 223, 255, 223, 255, 7, + 191, 32, 255, 255, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 31, 255, 255, 255, 3, + 255, 255, 31, 0, 254, 255, 255, 31, 255, 255, 1, 0, 255, 223, 31, 0, + 255, 255, 127, 0, 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 255, 63, + 255, 3, 255, 3, 255, 127, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, + 255, 255, 63, 0, 255, 15, 255, 15, 241, 255, 255, 255, 255, 63, 31, 0, + 255, 15, 255, 255, 255, 3, 255, 199, 255, 255, 255, 207, 255, 255, 255, 159, + 255, 63, 0, 0, 255, 255, 15, 240, 255, 255, 255, 248, 255, 227, 255, 255, + 127, 0, 0, 240, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 255, + 223, 255, 207, 239, 255, 255, 220, 127, 0, 248, 255, 255, 255, 124, 255, 255, + 223, 255, 243, 255, 255, 127, 255, 31, 0, 0, 255, 255, 255, 3, 255, 255, + 127, 0, 0, 0, 255, 7, 0, 0, 255, 31, 255, 3, 255, 127, 255, 255, + 255, 255, 15, 254, 255, 128, 1, 128, 127, 127, 127, 127, 255, 255, 255, 251, + 0, 0, 255, 15, 224, 255, 255, 255, 255, 63, 254, 255, 15, 0, 255, 255, + 255, 31, 0, 0, 255, 31, 255, 255, 127, 0, 255, 255, 255, 15, 0, 0, + 255, 255, 255, 128, 255, 127, 15, 0, 0, 0, 0, 255, 255, 15, 255, 3, + 31, 192, 255, 3, 255, 255, 15, 128, 255, 191, 255, 195, 255, 63, 255, 243, + 7, 0, 0, 248, 126, 126, 126, 0, 127, 127, 0, 0, 255, 63, 255, 3, + 127, 248, 255, 255, 255, 63, 255, 255, 127, 0, 248, 224, 255, 255, 127, 95, + 219, 255, 255, 255, 3, 0, 248, 255, 255, 255, 252, 255, 255, 0, 0, 0, + 0, 0, 255, 63, 255, 255, 247, 255, 127, 15, 223, 255, 252, 252, 252, 28, + 127, 127, 0, 62, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, + 135, 255, 255, 255, 255, 255, 143, 255, 255, 7, 255, 15, 255, 255, 255, 191, + 15, 255, 63, 0, 255, 3, 0, 0, 63, 253, 255, 255, 255, 255, 191, 145, + 255, 255, 191, 255, 255, 255, 255, 143, 255, 255, 255, 131, 255, 255, 255, 192, + 111, 240, 239, 254, 255, 255, 15, 135, 255, 0, 255, 1, 255, 255, 63, 254, + 255, 255, 63, 255, 255, 255, 7, 255, 255, 1, 0, 0, 255, 63, 252, 255, + 255, 255, 0, 0, 3, 0, 255, 255, 255, 1, 255, 3, 15, 0, 0, 0, + 255, 127, 0, 0, 7, 0, 15, 0, 255, 255, 255, 1, 31, 0, 255, 255, + 0, 128, 255, 255, 3, 0, 0, 0, 127, 254, 255, 255, 63, 0, 0, 0, + 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, + 95, 252, 253, 255, 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, + 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, + 0, 0, 3, 0, 255, 127, 254, 127, 254, 255, 254, 255, 192, 255, 255, 255, + 7, 0, 255, 255, 255, 1, 3, 0, 1, 0, 191, 255, 223, 7, 0, 0, + 253, 255, 255, 255, 255, 255, 255, 30, 0, 0, 0, 248, 225, 255, 0, 0, + 2, 0, 0, 0, +}; + +/* Graph: 2046 bytes. */ + +RE_UINT32 re_get_graph(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_graph_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_graph_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_graph_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_graph_stage_4[pos + f] << 5; + pos += code; + value = (re_graph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Print. */ + +static RE_UINT8 re_print_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 4, 8, + 4, 8, +}; + +static RE_UINT8 re_print_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 7, 7, 7, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 30, +}; + +static RE_UINT8 re_print_stage_3[] = { + 0, 1, 1, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 1, 15, 16, 1, 1, 17, 18, 19, 20, 21, 22, 23, 24, 1, 25, + 26, 27, 1, 28, 29, 1, 1, 30, 1, 1, 1, 31, 32, 33, 34, 35, + 36, 37, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, + 1, 1, 1, 1, 41, 1, 42, 43, 44, 45, 46, 47, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 48, 49, 49, 49, 49, 49, 49, 49, 49, + 1, 1, 50, 51, 1, 52, 53, 54, 55, 56, 57, 58, 59, 49, 49, 49, + 60, 61, 62, 63, 64, 49, 65, 49, 66, 67, 49, 49, 49, 49, 68, 49, + 1, 1, 1, 69, 70, 49, 49, 49, 1, 1, 1, 1, 71, 49, 49, 49, + 1, 1, 72, 49, 49, 49, 49, 73, 74, 49, 49, 49, 49, 49, 49, 49, + 75, 76, 77, 78, 79, 80, 81, 82, 49, 49, 49, 49, 49, 49, 83, 49, + 84, 85, 86, 87, 88, 89, 90, 91, 1, 1, 1, 1, 1, 1, 92, 1, + 1, 1, 1, 1, 1, 1, 1, 93, 94, 49, 49, 49, 49, 49, 49, 49, + 1, 1, 94, 49, 49, 49, 49, 49, 95, 96, 49, 49, 49, 49, 49, 49, + 1, 1, 1, 1, 1, 1, 1, 97, +}; + +static RE_UINT8 re_print_stage_4[] = { + 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 3, 4, 5, 1, 1, 1, 6, 7, 8, 9, 1, 10, 11, + 12, 1, 1, 1, 1, 1, 1, 1, 13, 1, 14, 1, 1, 15, 1, 16, + 1, 17, 18, 0, 0, 19, 0, 20, 1, 1, 1, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 31, 32, 33, 22, 31, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 44, 48, 49, 50, 51, 52, 53, 54, + 8, 55, 56, 0, 57, 58, 59, 0, 1, 1, 60, 61, 21, 62, 63, 0, + 1, 1, 1, 1, 1, 1, 64, 1, 1, 1, 65, 1, 66, 67, 68, 1, + 69, 1, 48, 70, 71, 1, 1, 72, 1, 1, 1, 1, 70, 1, 1, 73, + 74, 75, 76, 77, 1, 1, 78, 79, 80, 1, 1, 81, 1, 82, 1, 83, + 70, 84, 85, 86, 1, 87, 88, 1, 89, 1, 2, 90, 79, 91, 0, 0, + 1, 1, 87, 70, 1, 1, 1, 92, 1, 93, 94, 1, 0, 0, 10, 75, + 1, 1, 1, 1, 1, 1, 1, 95, 96, 1, 97, 78, 1, 98, 99, 100, + 1, 101, 1, 102, 103, 16, 104, 73, 1, 1, 1, 1, 105, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 76, 1, 106, 107, 1, 1, 1, 1, 1, + 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 108, 0, 0, 0, 0, 0, + 1, 109, 2, 1, 1, 1, 1, 110, 1, 64, 1, 111, 75, 112, 112, 1, + 1, 56, 0, 0, 113, 1, 1, 76, 1, 1, 1, 1, 1, 1, 83, 114, + 1, 1, 8, 1, 7, 1, 1, 1, 115, 116, 1, 1, 109, 16, 1, 117, + 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 83, 1, 1, + 1, 1, 1, 1, 1, 1, 118, 0, 1, 1, 1, 1, 119, 1, 120, 1, + 1, 121, 1, 1, 122, 1, 1, 81, 1, 1, 1, 1, 123, 107, 0, 124, + 1, 125, 1, 81, 1, 1, 126, 56, 1, 1, 127, 70, 1, 1, 128, 0, + 1, 75, 129, 56, 1, 1, 130, 75, 131, 132, 0, 0, 0, 0, 1, 133, + 1, 1, 1, 1, 1, 117, 134, 56, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 135, 1, 1, 71, 0, 136, 137, 138, 1, 1, 1, 139, 1, + 1, 1, 104, 1, 140, 1, 141, 142, 71, 120, 143, 144, 1, 1, 1, 90, + 8, 1, 1, 1, 1, 2, 145, 146, 147, 148, 149, 0, 1, 1, 1, 16, + 150, 151, 1, 1, 152, 0, 104, 78, 0, 0, 0, 0, 70, 1, 73, 0, + 2, 117, 107, 0, 153, 1, 154, 0, 1, 1, 1, 1, 78, 155, 0, 0, + 156, 157, 158, 0, 0, 0, 0, 0, 159, 160, 0, 0, 1, 161, 0, 0, + 162, 163, 164, 1, 0, 0, 0, 0, 1, 165, 166, 167, 0, 0, 0, 0, + 1, 1, 168, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, + 1, 1, 169, 170, 1, 1, 171, 172, 1, 98, 173, 0, 1, 1, 172, 0, + 0, 0, 0, 0, 1, 81, 155, 0, 1, 1, 1, 174, 0, 0, 0, 0, + 1, 1, 1, 175, 0, 0, 0, 0, 1, 174, 0, 0, 0, 0, 0, 0, + 1, 176, 0, 0, 0, 0, 0, 0, 1, 1, 177, 2, 178, 0, 0, 0, + 179, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 83, + 1, 180, 1, 1, 1, 1, 78, 0, 1, 1, 181, 0, 0, 0, 0, 0, + 1, 1, 75, 15, 0, 0, 0, 0, 1, 1, 98, 1, 62, 182, 183, 1, + 184, 185, 186, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 187, 1, 1, + 1, 1, 1, 1, 1, 1, 188, 1, 183, 189, 190, 191, 192, 193, 0, 194, + 1, 87, 1, 1, 76, 195, 196, 0, 82, 109, 1, 87, 16, 0, 0, 197, + 198, 16, 199, 0, 0, 0, 0, 0, 1, 200, 1, 70, 76, 1, 201, 73, + 1, 2, 202, 1, 1, 1, 1, 203, 1, 78, 117, 141, 0, 0, 0, 204, + 1, 1, 205, 0, 1, 1, 181, 0, 1, 1, 1, 76, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 75, 0, 1, 72, 1, 1, 1, 1, 1, 1, + 78, 0, 0, 0, 0, 0, 0, 0, 206, 1, 1, 1, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 170, 1, 1, 1, 1, 1, 1, 1, 78, +}; + +static RE_UINT8 re_print_stage_5[] = { + 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 124, + 240, 215, 255, 255, 251, 255, 255, 255, 255, 0, 254, 255, 255, 255, 127, 254, + 254, 255, 255, 255, 255, 134, 254, 255, 255, 0, 255, 255, 255, 7, 31, 0, + 223, 255, 255, 223, 255, 191, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, + 255, 255, 255, 7, 255, 63, 255, 127, 255, 255, 255, 79, 253, 31, 0, 0, + 240, 255, 255, 127, 255, 255, 255, 254, 238, 159, 249, 255, 255, 253, 197, 243, + 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 249, 255, 255, 253, 109, 211, + 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, + 191, 59, 1, 0, 207, 255, 3, 0, 159, 57, 192, 176, 207, 255, 255, 0, + 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 255, 7, + 238, 223, 253, 255, 255, 253, 239, 227, 223, 61, 96, 3, 207, 255, 0, 255, + 236, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, + 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 63, 254, 236, 255, 127, 252, + 255, 255, 251, 47, 127, 132, 95, 255, 0, 0, 28, 0, 255, 255, 255, 135, + 255, 255, 255, 15, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, + 255, 254, 255, 255, 255, 31, 254, 255, 255, 255, 255, 223, 255, 223, 255, 7, + 191, 32, 255, 255, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, + 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 31, 255, 255, 255, 3, + 255, 255, 31, 0, 255, 255, 1, 0, 255, 223, 31, 0, 255, 255, 127, 0, + 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 255, 63, 255, 3, 255, 3, + 255, 127, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, + 255, 15, 255, 15, 241, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, + 255, 3, 255, 199, 255, 255, 255, 207, 255, 255, 255, 159, 255, 63, 0, 0, + 255, 255, 15, 240, 255, 255, 255, 248, 255, 227, 255, 255, 127, 0, 0, 240, + 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 255, 223, 255, 207, 239, + 255, 255, 220, 127, 255, 252, 255, 255, 223, 255, 243, 255, 255, 127, 255, 31, + 0, 0, 255, 255, 255, 3, 255, 255, 127, 0, 0, 0, 255, 7, 0, 0, + 255, 31, 255, 3, 255, 127, 255, 255, 255, 255, 15, 254, 255, 128, 1, 128, + 127, 127, 127, 127, 255, 255, 255, 251, 0, 0, 255, 15, 224, 255, 255, 255, + 255, 63, 254, 255, 15, 0, 255, 255, 255, 31, 0, 0, 255, 31, 255, 255, + 127, 0, 255, 255, 255, 15, 0, 0, 255, 255, 255, 128, 255, 127, 15, 0, + 0, 0, 0, 255, 255, 15, 255, 3, 31, 192, 255, 3, 255, 255, 15, 128, + 255, 191, 255, 195, 255, 63, 255, 243, 7, 0, 0, 248, 126, 126, 126, 0, + 127, 127, 0, 0, 255, 63, 255, 3, 127, 248, 255, 255, 255, 63, 255, 255, + 127, 0, 248, 224, 255, 255, 127, 95, 219, 255, 255, 255, 3, 0, 248, 255, + 255, 255, 252, 255, 255, 0, 0, 0, 0, 0, 255, 63, 255, 255, 247, 255, + 127, 15, 223, 255, 252, 252, 252, 28, 127, 127, 0, 62, 255, 239, 255, 255, + 127, 255, 255, 183, 255, 63, 255, 63, 135, 255, 255, 255, 255, 255, 143, 255, + 255, 7, 255, 15, 255, 255, 255, 191, 15, 255, 63, 0, 255, 3, 0, 0, + 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 191, 255, 255, 255, 255, 143, + 255, 255, 255, 131, 255, 255, 255, 192, 111, 240, 239, 254, 255, 255, 15, 135, + 255, 0, 255, 1, 255, 255, 63, 254, 255, 255, 63, 255, 255, 255, 7, 255, + 255, 1, 0, 0, 255, 63, 252, 255, 255, 255, 0, 0, 3, 0, 255, 255, + 255, 1, 255, 3, 15, 0, 0, 0, 255, 127, 0, 0, 7, 0, 15, 0, + 255, 255, 255, 1, 31, 0, 255, 255, 0, 128, 255, 255, 3, 0, 0, 0, + 127, 254, 255, 255, 63, 0, 0, 0, 100, 222, 255, 235, 239, 255, 255, 255, + 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, + 255, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, + 255, 251, 255, 15, 238, 251, 255, 15, 0, 0, 3, 0, 255, 127, 254, 127, + 254, 255, 254, 255, 192, 255, 255, 255, 7, 0, 255, 255, 255, 1, 3, 0, + 1, 0, 191, 255, 223, 7, 0, 0, 253, 255, 255, 255, 255, 255, 255, 30, + 0, 0, 0, 248, 225, 255, 0, 0, 2, 0, 0, 0, +}; + +/* Print: 2038 bytes. */ + +RE_UINT32 re_get_print(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_print_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_print_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_print_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_print_stage_4[pos + f] << 5; + pos += code; + value = (re_print_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* Word. */ + +static RE_UINT8 re_word_stage_1[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, + 6, 6, +}; + +static RE_UINT8 re_word_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, + 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +}; + +static RE_UINT8 re_word_stage_3[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, + 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, + 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, + 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, + 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, + 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, + 31, 72, 73, 31, 74, 75, 76, 77, 31, 31, 31, 31, 31, 31, 78, 31, + 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 1, 1, 1, 1, 80, + 81, 31, 31, 31, 31, 31, 31, 31, 1, 1, 81, 31, 31, 31, 31, 31, + 31, 82, 31, 31, 31, 31, 31, 31, +}; + +static RE_UINT8 re_word_stage_4[] = { + 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, + 6, 6, 6, 6, 11, 6, 6, 6, 6, 13, 14, 15, 13, 16, 17, 18, + 19, 6, 6, 20, 6, 6, 21, 22, 23, 6, 24, 6, 6, 25, 6, 26, + 6, 27, 28, 0, 0, 29, 0, 30, 6, 6, 6, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 32, 41, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 43, 53, 54, 55, 56, 53, 57, 58, 59, 60, 61, 62, 63, + 15, 64, 65, 0, 66, 67, 68, 0, 69, 70, 71, 72, 73, 74, 75, 0, + 6, 6, 76, 6, 77, 6, 78, 79, 6, 6, 80, 6, 81, 82, 83, 6, + 84, 6, 57, 0, 85, 6, 6, 86, 15, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 87, 3, 6, 6, 88, 89, 86, 90, 91, 6, 6, 92, 93, + 94, 6, 6, 95, 6, 96, 6, 97, 74, 98, 99, 100, 6, 101, 102, 0, + 28, 6, 103, 104, 102, 105, 0, 0, 6, 6, 106, 107, 6, 6, 6, 90, + 6, 95, 108, 77, 0, 0, 109, 110, 6, 6, 6, 6, 6, 6, 6, 111, + 112, 6, 113, 77, 6, 114, 115, 116, 117, 118, 119, 120, 121, 0, 23, 122, + 123, 124, 125, 6, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 127, 6, 93, 6, 128, 103, 6, 6, 6, 6, 129, + 6, 78, 6, 130, 110, 131, 131, 6, 0, 132, 0, 0, 0, 0, 0, 0, + 133, 134, 15, 6, 135, 15, 6, 79, 136, 137, 6, 6, 138, 64, 0, 23, + 6, 6, 6, 6, 6, 97, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, + 6, 6, 6, 6, 139, 0, 23, 77, 140, 141, 6, 142, 143, 6, 6, 25, + 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 90, 6, 6, 150, 151, + 6, 152, 90, 74, 6, 6, 153, 0, 6, 110, 154, 155, 6, 6, 156, 157, + 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 28, + 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 25, 167, 6, + 6, 77, 23, 6, 168, 6, 149, 169, 85, 170, 171, 172, 6, 6, 6, 74, + 1, 2, 3, 99, 6, 103, 173, 0, 174, 175, 176, 0, 6, 6, 6, 64, + 0, 0, 6, 86, 0, 0, 0, 177, 0, 0, 0, 0, 74, 6, 122, 0, + 103, 23, 147, 0, 77, 6, 178, 0, 6, 6, 6, 6, 77, 93, 0, 0, + 179, 180, 97, 0, 0, 0, 0, 0, 97, 163, 0, 0, 6, 181, 0, 0, + 182, 183, 0, 74, 0, 0, 0, 0, 6, 97, 97, 184, 0, 0, 0, 0, + 6, 6, 126, 0, 0, 0, 0, 0, 6, 6, 185, 49, 6, 64, 23, 186, + 6, 187, 0, 0, 6, 6, 150, 0, 0, 0, 0, 0, 6, 95, 93, 0, + 6, 6, 6, 138, 0, 0, 0, 0, 6, 6, 6, 188, 0, 0, 0, 0, + 6, 138, 0, 0, 0, 0, 0, 0, 6, 189, 0, 0, 0, 0, 0, 0, + 6, 6, 190, 103, 191, 0, 0, 0, 192, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 193, 194, 195, 0, 0, 0, 0, 196, 0, 0, 0, 0, 0, + 6, 6, 187, 6, 197, 198, 199, 6, 200, 201, 202, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 203, 204, 79, 187, 187, 128, 128, 205, 205, 206, 6, + 199, 207, 208, 209, 210, 211, 0, 0, 6, 6, 6, 6, 6, 6, 110, 0, + 6, 86, 6, 6, 6, 6, 6, 6, 77, 0, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 85, +}; + +static RE_UINT8 re_word_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + 0, 4, 32, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, + 31, 80, 0, 0, 255, 255, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, + 255, 255, 191, 255, 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, + 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, 0, 0, 255, 7, + 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, 0, 0, 255, 255, + 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, 255, 63, 0, 0, + 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, 207, 255, 254, 254, + 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, 207, 255, 3, 0, + 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, 192, 255, 63, 0, + 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, 207, 255, 0, 0, + 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, + 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, 255, 253, 239, 227, + 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, + 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 0, 252, + 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, 0, 0, 12, 0, + 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, + 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, + 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, + 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, + 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, + 255, 255, 61, 255, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, + 255, 199, 1, 0, 255, 223, 31, 0, 255, 255, 15, 0, 255, 223, 13, 0, + 255, 255, 143, 48, 255, 3, 0, 0, 0, 56, 255, 3, 255, 255, 255, 0, + 255, 7, 255, 255, 255, 255, 63, 0, 255, 15, 255, 15, 192, 255, 255, 255, + 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 255, 3, 255, 255, 255, 127, + 255, 255, 255, 159, 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, + 255, 227, 255, 255, 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, + 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, + 255, 31, 220, 31, 0, 48, 0, 0, 0, 0, 0, 128, 1, 0, 16, 0, + 0, 0, 2, 128, 0, 0, 255, 31, 255, 255, 1, 0, 132, 252, 47, 62, + 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, + 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, 127, 127, 127, 127, + 0, 128, 0, 0, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, + 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, + 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 247, 191, 255, 255, 255, 128, + 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, + 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, + 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, + 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, + 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, + 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, + 255, 255, 252, 255, 0, 0, 255, 15, 127, 0, 24, 0, 0, 224, 0, 0, + 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, + 255, 63, 255, 63, 0, 0, 0, 32, 15, 255, 62, 0, 63, 253, 255, 255, + 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, 255, 255, 15, 135, + 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, 255, 255, 223, 255, + 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, 0, 128, 255, 255, + 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, + 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, + 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, + 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, 150, 254, 247, 10, + 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, +}; + +/* Word: 1906 bytes. */ + +RE_UINT32 re_get_word(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 15; + code = ch ^ (f << 15); + pos = (RE_UINT32)re_word_stage_1[f] << 4; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_word_stage_2[pos + f] << 3; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_word_stage_3[pos + f] << 3; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_word_stage_4[pos + f] << 5; + pos += code; + value = (re_word_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* XDigit. */ + +static RE_UINT8 re_xdigit_stage_1[] = { + 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, +}; + +static RE_UINT8 re_xdigit_stage_2[] = { + 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, + 8, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; + +static RE_UINT8 re_xdigit_stage_3[] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 4, + 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 6, 6, 7, 1, + 4, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, + 8, 1, 9, 6, 1, 10, 6, 11, 12, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, + 1, 6, 13, 6, 6, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, + 5, 3, 15, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, +}; + +static RE_UINT8 re_xdigit_stage_4[] = { + 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, + 0, 0, 3, 0, 0, 0, 0, 4, 0, 0, 1, 0, 0, 3, 0, 0, + 1, 0, 0, 0, 0, 0, 4, 0, 5, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 5, 0, 3, 0, 0, 0, 1, 2, 2, 0, 0, 6, 0, 0, + 0, 0, 7, 8, +}; + +static RE_UINT8 re_xdigit_stage_5[] = { + 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 255, 3, 0, 0, + 192, 255, 0, 0, 255, 3, 255, 3, 0, 0, 192, 255, 0, 192, 255, 255, + 255, 255, 255, 255, +}; + +/* XDigit: 393 bytes. */ + +RE_UINT32 re_get_xdigit(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + + f = ch >> 16; + code = ch ^ (f << 16); + pos = (RE_UINT32)re_xdigit_stage_1[f] << 5; + f = code >> 11; + code ^= f << 11; + pos = (RE_UINT32)re_xdigit_stage_2[pos + f] << 4; + f = code >> 7; + code ^= f << 7; + pos = (RE_UINT32)re_xdigit_stage_3[pos + f] << 2; + f = code >> 5; + code ^= f << 5; + pos = (RE_UINT32)re_xdigit_stage_4[pos + f] << 5; + pos += code; + value = (re_xdigit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; + + return value; +} + +/* All_Cases. */ + +static RE_UINT8 re_all_cases_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_all_cases_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, + 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 17, 6, 6, 6, 18, + 6, 6, 6, 6, 19, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_all_cases_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 18, 18, 18, 18, 18, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 21, 34, 18, 18, 35, 18, + 18, 18, 18, 18, 36, 18, 37, 38, 39, 18, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 18, 18, 18, 63, 64, + 65, 65, 11, 11, 11, 11, 15, 15, 15, 15, 66, 66, 18, 18, 18, 18, + 67, 68, 18, 18, 18, 18, 18, 18, 69, 70, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 0, 71, 72, 72, 72, 73, 0, 74, 75, 75, 75, + 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 77, 77, 77, 77, 78, 79, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 81, 18, 18, 18, + 18, 18, 82, 83, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 84, 85, 86, 87, 84, 85, 84, 85, 86, 87, 88, 89, 84, 85, 90, 91, + 84, 85, 84, 85, 84, 85, 92, 93, 94, 95, 96, 97, 98, 99, 94, 100, + 0, 0, 0, 0, 101, 102, 103, 0, 0, 104, 0, 0, 105, 105, 106, 106, + 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 108, 109, 109, 109, 110, 110, 110, 111, 0, 0, + 72, 72, 72, 72, 72, 73, 75, 75, 75, 75, 75, 76, 112, 113, 114, 115, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 37, 116, 117, 0, + 118, 118, 118, 118, 119, 120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 82, 0, 0, + 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 68, 18, 68, 18, 18, 18, 18, 18, 18, 18, 0, 121, + 18, 122, 37, 0, 18, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 11, 11, 4, 5, 15, 15, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 125, 125, 125, 125, 125, 126, 126, 126, 126, 126, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_all_cases_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, + 5, 6, 5, 7, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, + 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, + 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 11, + 5, 5, 5, 5, 5, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 13, + 14, 15, 14, 15, 14, 15, 14, 15, 16, 17, 14, 15, 14, 15, 14, 15, + 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, + 15, 0, 14, 15, 14, 15, 14, 15, 18, 14, 15, 14, 15, 14, 15, 19, + 20, 21, 14, 15, 14, 15, 22, 14, 15, 23, 23, 14, 15, 0, 24, 25, + 26, 14, 15, 23, 27, 28, 29, 30, 14, 15, 31, 0, 29, 32, 33, 34, + 14, 15, 14, 15, 14, 15, 35, 14, 15, 35, 0, 0, 14, 15, 35, 14, + 15, 36, 36, 14, 15, 14, 15, 37, 14, 15, 0, 0, 14, 15, 0, 38, + 0, 0, 0, 0, 39, 40, 41, 39, 40, 41, 39, 40, 41, 14, 15, 14, + 15, 14, 15, 14, 15, 42, 14, 15, 0, 39, 40, 41, 14, 15, 43, 44, + 45, 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, + 0, 0, 46, 14, 15, 47, 48, 49, 49, 14, 15, 50, 51, 52, 14, 15, + 53, 54, 55, 56, 57, 0, 58, 58, 0, 59, 0, 60, 0, 0, 0, 0, + 58, 0, 0, 61, 0, 62, 63, 0, 64, 65, 0, 66, 0, 0, 0, 65, + 0, 67, 68, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, + 71, 0, 0, 71, 0, 0, 0, 0, 71, 72, 73, 73, 74, 0, 0, 0, + 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, + 14, 15, 14, 15, 0, 0, 14, 15, 0, 0, 0, 33, 33, 33, 0, 0, + 0, 0, 0, 0, 0, 0, 77, 0, 78, 78, 78, 0, 79, 0, 80, 80, + 81, 1, 82, 1, 1, 83, 1, 1, 84, 85, 86, 1, 87, 1, 1, 1, + 88, 89, 0, 90, 1, 1, 91, 1, 1, 92, 1, 1, 93, 94, 94, 94, + 95, 5, 96, 5, 5, 97, 5, 5, 98, 99, 100, 5, 101, 5, 5, 5, + 102, 103, 104, 105, 5, 5, 106, 5, 5, 107, 5, 5, 108, 109, 109, 110, + 111, 112, 0, 0, 0, 113, 114, 115, 116, 117, 118, 0, 119, 120, 0, 14, + 15, 121, 14, 15, 0, 45, 45, 45, 122, 122, 122, 122, 122, 122, 122, 122, + 123, 123, 123, 123, 123, 123, 123, 123, 14, 15, 0, 0, 0, 0, 0, 0, + 0, 0, 14, 15, 14, 15, 14, 15, 124, 14, 15, 14, 15, 14, 15, 14, + 15, 14, 15, 14, 15, 14, 15, 125, 0, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 0, + 0, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 0, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, + 0, 129, 0, 0, 0, 130, 0, 0, 131, 132, 14, 15, 14, 15, 14, 15, + 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, 0, 133, 0, 0, 134, 0, + 110, 110, 110, 110, 110, 110, 110, 110, 115, 115, 115, 115, 115, 115, 115, 115, + 110, 110, 110, 110, 110, 110, 0, 0, 115, 115, 115, 115, 115, 115, 0, 0, + 0, 110, 0, 110, 0, 110, 0, 110, 0, 115, 0, 115, 0, 115, 0, 115, + 135, 135, 136, 136, 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, 0, 0, + 110, 110, 0, 141, 0, 0, 0, 0, 115, 115, 142, 142, 143, 0, 144, 0, + 0, 0, 0, 141, 0, 0, 0, 0, 145, 145, 145, 145, 143, 0, 0, 0, + 110, 110, 0, 146, 0, 0, 0, 0, 115, 115, 147, 147, 0, 0, 0, 0, + 110, 110, 0, 148, 0, 118, 0, 0, 115, 115, 149, 149, 121, 0, 0, 0, + 150, 150, 151, 151, 143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 0, + 0, 0, 153, 154, 0, 0, 0, 0, 0, 0, 155, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 156, 0, 157, 157, 157, 157, 157, 157, 157, 157, + 158, 158, 158, 158, 158, 158, 158, 158, 0, 0, 0, 14, 15, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, + 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 0, 0, + 14, 15, 161, 162, 163, 164, 165, 14, 15, 14, 15, 14, 15, 166, 167, 168, + 169, 0, 14, 15, 0, 14, 15, 0, 0, 0, 0, 0, 0, 0, 170, 170, + 0, 0, 0, 14, 15, 14, 15, 0, 0, 0, 14, 15, 0, 0, 0, 0, + 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 0, 171, + 0, 0, 0, 0, 0, 171, 0, 0, 0, 14, 15, 14, 15, 172, 14, 15, + 0, 0, 0, 14, 15, 173, 0, 0, 14, 15, 174, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 14, 15, 0, 175, 175, 175, 175, 175, 175, 175, 175, + 176, 176, 176, 176, 176, 176, 176, 176, +}; + +/* All_Cases: 1984 bytes. */ + +static RE_AllCases re_all_cases_table[] = { + {{ 0, 0, 0}}, + {{ 32, 0, 0}}, + {{ 32, 232, 0}}, + {{ 32, 8415, 0}}, + {{ 32, 300, 0}}, + {{ -32, 0, 0}}, + {{ -32, 199, 0}}, + {{ -32, 8383, 0}}, + {{ -32, 268, 0}}, + {{ 743, 775, 0}}, + {{ 32, 8294, 0}}, + {{ 7615, 0, 0}}, + {{ -32, 8262, 0}}, + {{ 121, 0, 0}}, + {{ 1, 0, 0}}, + {{ -1, 0, 0}}, + {{ -199, 0, 0}}, + {{ -232, 0, 0}}, + {{ -121, 0, 0}}, + {{ -300, -268, 0}}, + {{ 195, 0, 0}}, + {{ 210, 0, 0}}, + {{ 206, 0, 0}}, + {{ 205, 0, 0}}, + {{ 79, 0, 0}}, + {{ 202, 0, 0}}, + {{ 203, 0, 0}}, + {{ 207, 0, 0}}, + {{ 97, 0, 0}}, + {{ 211, 0, 0}}, + {{ 209, 0, 0}}, + {{ 163, 0, 0}}, + {{ 213, 0, 0}}, + {{ 130, 0, 0}}, + {{ 214, 0, 0}}, + {{ 218, 0, 0}}, + {{ 217, 0, 0}}, + {{ 219, 0, 0}}, + {{ 56, 0, 0}}, + {{ 1, 2, 0}}, + {{ -1, 1, 0}}, + {{ -2, -1, 0}}, + {{ -79, 0, 0}}, + {{ -97, 0, 0}}, + {{ -56, 0, 0}}, + {{ -130, 0, 0}}, + {{ 10795, 0, 0}}, + {{ -163, 0, 0}}, + {{ 10792, 0, 0}}, + {{ 10815, 0, 0}}, + {{ -195, 0, 0}}, + {{ 69, 0, 0}}, + {{ 71, 0, 0}}, + {{ 10783, 0, 0}}, + {{ 10780, 0, 0}}, + {{ 10782, 0, 0}}, + {{ -210, 0, 0}}, + {{ -206, 0, 0}}, + {{ -205, 0, 0}}, + {{ -202, 0, 0}}, + {{ -203, 0, 0}}, + {{ -207, 0, 0}}, + {{ 42280, 0, 0}}, + {{ 42308, 0, 0}}, + {{ -209, 0, 0}}, + {{ -211, 0, 0}}, + {{ 10743, 0, 0}}, + {{ 10749, 0, 0}}, + {{ -213, 0, 0}}, + {{ -214, 0, 0}}, + {{ 10727, 0, 0}}, + {{ -218, 0, 0}}, + {{ -69, 0, 0}}, + {{ -217, 0, 0}}, + {{ -71, 0, 0}}, + {{ -219, 0, 0}}, + {{ 84, 116, 7289}}, + {{ 38, 0, 0}}, + {{ 37, 0, 0}}, + {{ 64, 0, 0}}, + {{ 63, 0, 0}}, + {{ 7235, 0, 0}}, + {{ 32, 62, 0}}, + {{ 32, 96, 0}}, + {{ 32, 57, 92}}, + {{ -84, 32, 7205}}, + {{ 32, 86, 0}}, + {{ -743, 32, 0}}, + {{ 32, 54, 0}}, + {{ 32, 80, 0}}, + {{ 31, 32, 0}}, + {{ 32, 47, 0}}, + {{ 32, 7549, 0}}, + {{ -38, 0, 0}}, + {{ -37, 0, 0}}, + {{ 7219, 0, 0}}, + {{ -32, 30, 0}}, + {{ -32, 64, 0}}, + {{ -32, 25, 60}}, + {{ -116, -32, 7173}}, + {{ -32, 54, 0}}, + {{ -775, -32, 0}}, + {{ -32, 22, 0}}, + {{ -32, 48, 0}}, + {{ -31, 1, 0}}, + {{ -32, -1, 0}}, + {{ -32, 15, 0}}, + {{ -32, 7517, 0}}, + {{ -64, 0, 0}}, + {{ -63, 0, 0}}, + {{ 8, 0, 0}}, + {{ -62, -30, 0}}, + {{ -57, -25, 35}}, + {{ -47, -15, 0}}, + {{ -54, -22, 0}}, + {{ -8, 0, 0}}, + {{ -86, -54, 0}}, + {{ -80, -48, 0}}, + {{ 7, 0, 0}}, + {{ -92, -60, -35}}, + {{ -96, -64, 0}}, + {{ -7, 0, 0}}, + {{ 80, 0, 0}}, + {{ -80, 0, 0}}, + {{ 15, 0, 0}}, + {{ -15, 0, 0}}, + {{ 48, 0, 0}}, + {{ -48, 0, 0}}, + {{ 7264, 0, 0}}, + {{ 35332, 0, 0}}, + {{ 3814, 0, 0}}, + {{ 1, 59, 0}}, + {{ -1, 58, 0}}, + {{ -59, -58, 0}}, + {{ -7615, 0, 0}}, + {{ 74, 0, 0}}, + {{ 86, 0, 0}}, + {{ 100, 0, 0}}, + {{ 128, 0, 0}}, + {{ 112, 0, 0}}, + {{ 126, 0, 0}}, + {{ 9, 0, 0}}, + {{ -74, 0, 0}}, + {{ -9, 0, 0}}, + {{ -7289, -7205, -7173}}, + {{ -86, 0, 0}}, + {{ -7235, 0, 0}}, + {{ -100, 0, 0}}, + {{ -7219, 0, 0}}, + {{ -112, 0, 0}}, + {{ -128, 0, 0}}, + {{ -126, 0, 0}}, + {{ -7549, -7517, 0}}, + {{ -8415, -8383, 0}}, + {{ -8294, -8262, 0}}, + {{ 28, 0, 0}}, + {{ -28, 0, 0}}, + {{ 16, 0, 0}}, + {{ -16, 0, 0}}, + {{ 26, 0, 0}}, + {{ -26, 0, 0}}, + {{-10743, 0, 0}}, + {{ -3814, 0, 0}}, + {{-10727, 0, 0}}, + {{-10795, 0, 0}}, + {{-10792, 0, 0}}, + {{-10780, 0, 0}}, + {{-10749, 0, 0}}, + {{-10783, 0, 0}}, + {{-10782, 0, 0}}, + {{-10815, 0, 0}}, + {{ -7264, 0, 0}}, + {{-35332, 0, 0}}, + {{-42280, 0, 0}}, + {{-42308, 0, 0}}, + {{ 40, 0, 0}}, + {{ -40, 0, 0}}, +}; + +/* All_Cases: 2124 bytes. */ + +int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + RE_AllCases* all_cases; + int count; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_all_cases_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_all_cases_stage_2[pos + f] << 5; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_all_cases_stage_3[pos + f] << 3; + value = re_all_cases_stage_4[pos + code]; + + all_cases = &re_all_cases_table[value]; + + codepoints[0] = ch; + count = 1; + + while (count < RE_MAX_CASES && all_cases->diffs[count - 1] != 0) { + codepoints[count] = (RE_UINT32)((RE_INT32)ch + all_cases->diffs[count - + 1]); + ++count; + } + + return count; +} + +/* Simple_Case_Folding. */ + +static RE_UINT8 re_simple_case_folding_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_simple_case_folding_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, + 6, 6, 6, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_simple_case_folding_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 5, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 7, 8, 8, 7, 6, 6, 6, 6, 6, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 8, 20, 6, 6, 21, 6, + 6, 6, 6, 6, 22, 6, 23, 24, 25, 6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 27, 0, + 28, 29, 1, 2, 30, 31, 0, 0, 32, 33, 34, 6, 6, 6, 35, 36, + 37, 37, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, + 38, 7, 6, 6, 6, 6, 6, 6, 39, 40, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 0, 41, 42, 42, 42, 43, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 44, 44, 44, 44, 45, 46, 0, 0, 0, 0, 0, 0, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 47, 48, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 0, 49, 0, 50, 0, 49, 0, 49, 0, 50, 0, 51, 0, 49, 0, 0, + 0, 49, 0, 49, 0, 49, 0, 52, 0, 53, 0, 54, 0, 55, 0, 56, + 0, 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 60, 60, 0, 0, + 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 62, 63, 63, 63, 0, 0, 0, 0, 0, 0, + 42, 42, 42, 42, 42, 43, 0, 0, 0, 0, 0, 0, 64, 65, 66, 67, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 68, 32, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 47, 0, 0, + 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 6, 7, 6, 6, 6, 6, 6, 6, 6, 0, 69, + 6, 70, 23, 0, 6, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 72, 72, 72, 72, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_simple_case_folding_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, + 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 3, 0, + 0, 3, 0, 3, 0, 3, 0, 3, 4, 3, 0, 3, 0, 3, 0, 5, + 0, 6, 3, 0, 3, 0, 7, 3, 0, 8, 8, 3, 0, 0, 9, 10, + 11, 3, 0, 8, 12, 0, 13, 14, 3, 0, 0, 0, 13, 15, 0, 16, + 3, 0, 3, 0, 3, 0, 17, 3, 0, 17, 0, 0, 3, 0, 17, 3, + 0, 18, 18, 3, 0, 3, 0, 19, 3, 0, 0, 0, 3, 0, 0, 0, + 0, 0, 0, 0, 20, 3, 0, 20, 3, 0, 20, 3, 0, 3, 0, 3, + 0, 3, 0, 3, 0, 0, 3, 0, 0, 20, 3, 0, 3, 0, 21, 22, + 23, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, + 0, 0, 24, 3, 0, 25, 26, 0, 0, 3, 0, 27, 28, 29, 3, 0, + 0, 0, 0, 0, 0, 30, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, + 0, 0, 0, 0, 0, 0, 31, 0, 32, 32, 32, 0, 33, 0, 34, 34, + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, + 36, 37, 0, 0, 0, 38, 39, 0, 40, 41, 0, 0, 42, 43, 0, 3, + 0, 44, 3, 0, 0, 23, 23, 23, 45, 45, 45, 45, 45, 45, 45, 45, + 3, 0, 0, 0, 0, 0, 0, 0, 46, 3, 0, 3, 0, 3, 0, 3, + 0, 3, 0, 3, 0, 3, 0, 0, 0, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 0, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 0, 48, + 0, 0, 0, 0, 0, 48, 0, 0, 3, 0, 3, 0, 3, 0, 0, 0, + 0, 0, 0, 49, 0, 0, 50, 0, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 0, 0, 0, 51, 0, 51, 0, 51, 0, 51, + 51, 51, 52, 52, 53, 0, 54, 0, 55, 55, 55, 55, 53, 0, 0, 0, + 51, 51, 56, 56, 0, 0, 0, 0, 51, 51, 57, 57, 44, 0, 0, 0, + 58, 58, 59, 59, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, + 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, + 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 3, 0, 66, 67, 68, 0, 0, 3, 0, 3, 0, 3, 0, 69, 70, 71, + 72, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 73, 73, + 0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 74, 3, 0, + 0, 0, 0, 3, 0, 75, 0, 0, 3, 0, 76, 0, 0, 0, 0, 0, + 77, 77, 77, 77, 77, 77, 77, 77, +}; + +/* Simple_Case_Folding: 1456 bytes. */ + +static RE_INT32 re_simple_case_folding_table[] = { + 0, + 32, + 775, + 1, + -121, + -268, + 210, + 206, + 205, + 79, + 202, + 203, + 207, + 211, + 209, + 213, + 214, + 218, + 217, + 219, + 2, + -97, + -56, + -130, + 10795, + -163, + 10792, + -195, + 69, + 71, + 116, + 38, + 37, + 64, + 63, + 8, + -30, + -25, + -15, + -22, + -54, + -48, + -60, + -64, + -7, + 80, + 15, + 48, + 7264, + -58, + -7615, + -8, + -74, + -9, + -7173, + -86, + -100, + -112, + -128, + -126, + -7517, + -8383, + -8262, + 28, + 16, + 26, + -10743, + -3814, + -10727, + -10780, + -10749, + -10783, + -10782, + -10815, + -35332, + -42280, + -42308, + 40, +}; + +/* Simple_Case_Folding: 312 bytes. */ + +RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + RE_INT32 diff; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_simple_case_folding_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_simple_case_folding_stage_2[pos + f] << 5; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_simple_case_folding_stage_3[pos + f] << 3; + value = re_simple_case_folding_stage_4[pos + code]; + + diff = re_simple_case_folding_table[value]; + + return (RE_UINT32)((RE_INT32)ch + diff); +} + +/* Full_Case_Folding. */ + +static RE_UINT8 re_full_case_folding_stage_1[] = { + 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, +}; + +static RE_UINT8 re_full_case_folding_stage_2[] = { + 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, + 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, 6, 6, 6, 16, + 6, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static RE_UINT8 re_full_case_folding_stage_3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 6, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 8, 9, 9, 10, 7, 7, 7, 7, 7, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 9, 22, 7, 7, 23, 7, + 7, 7, 7, 7, 24, 7, 25, 26, 27, 7, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 29, 0, + 30, 31, 32, 2, 33, 34, 35, 0, 36, 37, 38, 7, 7, 7, 39, 40, + 41, 41, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, + 42, 43, 7, 7, 7, 7, 7, 7, 44, 45, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 0, 46, 47, 47, 47, 48, 0, 0, 0, 0, 0, + 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 50, 50, 50, 50, 51, 52, 0, 0, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 53, 54, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 0, 55, 0, 56, 0, 55, 0, 55, 0, 56, 57, 58, 0, 55, 0, 0, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 0, 0, 0, 0, 75, 76, 77, 0, 0, 0, 0, 0, 78, 78, 0, 0, + 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 80, 81, 81, 81, 0, 0, 0, 0, 0, 0, + 47, 47, 47, 47, 47, 48, 0, 0, 0, 0, 0, 0, 82, 83, 84, 85, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 86, 36, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 87, 0, 0, + 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 43, 7, 43, 7, 7, 7, 7, 7, 7, 7, 0, 88, + 7, 89, 25, 0, 7, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 91, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 93, 93, 93, 93, 93, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static RE_UINT8 re_full_case_folding_stage_4[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 3, 4, 0, 4, 0, 4, 0, 4, 0, + 5, 0, 4, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 4, + 0, 6, 4, 0, 4, 0, 4, 0, 7, 4, 0, 4, 0, 4, 0, 8, + 0, 9, 4, 0, 4, 0, 10, 4, 0, 11, 11, 4, 0, 0, 12, 13, + 14, 4, 0, 11, 15, 0, 16, 17, 4, 0, 0, 0, 16, 18, 0, 19, + 4, 0, 4, 0, 4, 0, 20, 4, 0, 20, 0, 0, 4, 0, 20, 4, + 0, 21, 21, 4, 0, 4, 0, 22, 4, 0, 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 23, 4, 0, 23, 4, 0, 23, 4, 0, 4, 0, 4, + 0, 4, 0, 4, 0, 0, 4, 0, 24, 23, 4, 0, 4, 0, 25, 26, + 27, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 28, 4, 0, 29, 30, 0, 0, 4, 0, 31, 32, 33, 4, 0, + 0, 0, 0, 0, 0, 34, 0, 0, 4, 0, 4, 0, 0, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 35, 0, 36, 36, 36, 0, 37, 0, 38, 38, + 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, + 42, 43, 0, 0, 0, 44, 45, 0, 46, 47, 0, 0, 48, 49, 0, 4, + 0, 50, 4, 0, 0, 27, 27, 27, 51, 51, 51, 51, 51, 51, 51, 51, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 4, 0, + 52, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, + 0, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, + 0, 0, 0, 0, 0, 55, 0, 0, 4, 0, 4, 0, 4, 0, 56, 57, + 58, 59, 60, 61, 0, 0, 62, 0, 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 0, 0, 64, 0, 65, 0, 66, 0, 67, 0, + 0, 63, 0, 63, 0, 63, 0, 63, 68, 68, 68, 68, 68, 68, 68, 68, + 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, + 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, + 73, 73, 73, 73, 73, 73, 73, 73, 0, 0, 74, 75, 76, 0, 77, 78, + 63, 63, 79, 79, 80, 0, 81, 0, 0, 0, 82, 83, 84, 0, 85, 86, + 87, 87, 87, 87, 88, 0, 0, 0, 0, 0, 89, 90, 0, 0, 91, 92, + 63, 63, 93, 93, 0, 0, 0, 0, 0, 0, 94, 95, 96, 0, 97, 98, + 63, 63, 99, 99, 50, 0, 0, 0, 0, 0, 100, 101, 102, 0, 103, 104, + 105, 105, 106, 106, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 0, + 0, 0, 109, 110, 0, 0, 0, 0, 0, 0, 111, 0, 0, 0, 0, 0, + 112, 112, 112, 112, 112, 112, 112, 112, 0, 0, 0, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, + 4, 0, 114, 115, 116, 0, 0, 4, 0, 4, 0, 4, 0, 117, 118, 119, + 120, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, + 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 0, 0, + 0, 4, 0, 4, 0, 122, 4, 0, 0, 0, 0, 4, 0, 123, 0, 0, + 4, 0, 124, 0, 0, 0, 0, 0, 125, 126, 127, 128, 129, 130, 131, 0, + 0, 0, 0, 132, 133, 134, 135, 136, 137, 137, 137, 137, 137, 137, 137, 137, +}; + +/* Full_Case_Folding: 1656 bytes. */ + +static RE_FullCaseFolding re_full_case_folding_table[] = { + { 0, { 0, 0}}, + { 32, { 0, 0}}, + { 775, { 0, 0}}, + { -108, { 115, 0}}, + { 1, { 0, 0}}, + { -199, { 775, 0}}, + { 371, { 110, 0}}, + { -121, { 0, 0}}, + { -268, { 0, 0}}, + { 210, { 0, 0}}, + { 206, { 0, 0}}, + { 205, { 0, 0}}, + { 79, { 0, 0}}, + { 202, { 0, 0}}, + { 203, { 0, 0}}, + { 207, { 0, 0}}, + { 211, { 0, 0}}, + { 209, { 0, 0}}, + { 213, { 0, 0}}, + { 214, { 0, 0}}, + { 218, { 0, 0}}, + { 217, { 0, 0}}, + { 219, { 0, 0}}, + { 2, { 0, 0}}, + { -390, { 780, 0}}, + { -97, { 0, 0}}, + { -56, { 0, 0}}, + { -130, { 0, 0}}, + { 10795, { 0, 0}}, + { -163, { 0, 0}}, + { 10792, { 0, 0}}, + { -195, { 0, 0}}, + { 69, { 0, 0}}, + { 71, { 0, 0}}, + { 116, { 0, 0}}, + { 38, { 0, 0}}, + { 37, { 0, 0}}, + { 64, { 0, 0}}, + { 63, { 0, 0}}, + { 41, { 776, 769}}, + { 21, { 776, 769}}, + { 8, { 0, 0}}, + { -30, { 0, 0}}, + { -25, { 0, 0}}, + { -15, { 0, 0}}, + { -22, { 0, 0}}, + { -54, { 0, 0}}, + { -48, { 0, 0}}, + { -60, { 0, 0}}, + { -64, { 0, 0}}, + { -7, { 0, 0}}, + { 80, { 0, 0}}, + { 15, { 0, 0}}, + { 48, { 0, 0}}, + { -34, {1410, 0}}, + { 7264, { 0, 0}}, + { -7726, { 817, 0}}, + { -7715, { 776, 0}}, + { -7713, { 778, 0}}, + { -7712, { 778, 0}}, + { -7737, { 702, 0}}, + { -58, { 0, 0}}, + { -7723, { 115, 0}}, + { -8, { 0, 0}}, + { -7051, { 787, 0}}, + { -7053, { 787, 768}}, + { -7055, { 787, 769}}, + { -7057, { 787, 834}}, + { -128, { 953, 0}}, + { -136, { 953, 0}}, + { -112, { 953, 0}}, + { -120, { 953, 0}}, + { -64, { 953, 0}}, + { -72, { 953, 0}}, + { -66, { 953, 0}}, + { -7170, { 953, 0}}, + { -7176, { 953, 0}}, + { -7173, { 834, 0}}, + { -7174, { 834, 953}}, + { -74, { 0, 0}}, + { -7179, { 953, 0}}, + { -7173, { 0, 0}}, + { -78, { 953, 0}}, + { -7180, { 953, 0}}, + { -7190, { 953, 0}}, + { -7183, { 834, 0}}, + { -7184, { 834, 953}}, + { -86, { 0, 0}}, + { -7189, { 953, 0}}, + { -7193, { 776, 768}}, + { -7194, { 776, 769}}, + { -7197, { 834, 0}}, + { -7198, { 776, 834}}, + { -100, { 0, 0}}, + { -7197, { 776, 768}}, + { -7198, { 776, 769}}, + { -7203, { 787, 0}}, + { -7201, { 834, 0}}, + { -7202, { 776, 834}}, + { -112, { 0, 0}}, + { -118, { 953, 0}}, + { -7210, { 953, 0}}, + { -7206, { 953, 0}}, + { -7213, { 834, 0}}, + { -7214, { 834, 953}}, + { -128, { 0, 0}}, + { -126, { 0, 0}}, + { -7219, { 953, 0}}, + { -7517, { 0, 0}}, + { -8383, { 0, 0}}, + { -8262, { 0, 0}}, + { 28, { 0, 0}}, + { 16, { 0, 0}}, + { 26, { 0, 0}}, + {-10743, { 0, 0}}, + { -3814, { 0, 0}}, + {-10727, { 0, 0}}, + {-10780, { 0, 0}}, + {-10749, { 0, 0}}, + {-10783, { 0, 0}}, + {-10782, { 0, 0}}, + {-10815, { 0, 0}}, + {-35332, { 0, 0}}, + {-42280, { 0, 0}}, + {-42308, { 0, 0}}, + {-64154, { 102, 0}}, + {-64155, { 105, 0}}, + {-64156, { 108, 0}}, + {-64157, { 102, 105}}, + {-64158, { 102, 108}}, + {-64146, { 116, 0}}, + {-64147, { 116, 0}}, + {-62879, {1398, 0}}, + {-62880, {1381, 0}}, + {-62881, {1387, 0}}, + {-62872, {1398, 0}}, + {-62883, {1389, 0}}, + { 40, { 0, 0}}, +}; + +/* Full_Case_Folding: 1104 bytes. */ + +int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints) { + RE_UINT32 code; + RE_UINT32 f; + RE_UINT32 pos; + RE_UINT32 value; + RE_FullCaseFolding* case_folding; + int count; + + f = ch >> 13; + code = ch ^ (f << 13); + pos = (RE_UINT32)re_full_case_folding_stage_1[f] << 5; + f = code >> 8; + code ^= f << 8; + pos = (RE_UINT32)re_full_case_folding_stage_2[pos + f] << 5; + f = code >> 3; + code ^= f << 3; + pos = (RE_UINT32)re_full_case_folding_stage_3[pos + f] << 3; + value = re_full_case_folding_stage_4[pos + code]; + + case_folding = &re_full_case_folding_table[value]; + + codepoints[0] = (RE_UINT32)((RE_INT32)ch + case_folding->diff); + count = 1; + + while (count < RE_MAX_FOLDED && case_folding->codepoints[count - 1] != 0) { + codepoints[count] = case_folding->codepoints[count - 1]; + ++count; + } + + return count; +} + +/* Property function table. */ + +RE_GetPropertyFunc re_get_property[] = { + re_get_general_category, + re_get_block, + re_get_script, + re_get_word_break, + re_get_grapheme_cluster_break, + re_get_sentence_break, + re_get_math, + re_get_alphabetic, + re_get_lowercase, + re_get_uppercase, + re_get_cased, + re_get_case_ignorable, + re_get_changes_when_lowercased, + re_get_changes_when_uppercased, + re_get_changes_when_titlecased, + re_get_changes_when_casefolded, + re_get_changes_when_casemapped, + re_get_id_start, + re_get_id_continue, + re_get_xid_start, + re_get_xid_continue, + re_get_default_ignorable_code_point, + re_get_grapheme_extend, + re_get_grapheme_base, + re_get_grapheme_link, + re_get_white_space, + re_get_bidi_control, + re_get_join_control, + re_get_dash, + re_get_hyphen, + re_get_quotation_mark, + re_get_terminal_punctuation, + re_get_other_math, + re_get_hex_digit, + re_get_ascii_hex_digit, + re_get_other_alphabetic, + re_get_ideographic, + re_get_diacritic, + re_get_extender, + re_get_other_lowercase, + re_get_other_uppercase, + re_get_noncharacter_code_point, + re_get_other_grapheme_extend, + re_get_ids_binary_operator, + re_get_ids_trinary_operator, + re_get_radical, + re_get_unified_ideograph, + re_get_other_default_ignorable_code_point, + re_get_deprecated, + re_get_soft_dotted, + re_get_logical_order_exception, + re_get_other_id_start, + re_get_other_id_continue, + re_get_sterm, + re_get_variation_selector, + re_get_pattern_white_space, + re_get_pattern_syntax, + re_get_hangul_syllable_type, + re_get_bidi_class, + re_get_canonical_combining_class, + re_get_decomposition_type, + re_get_east_asian_width, + re_get_joining_group, + re_get_joining_type, + re_get_line_break, + re_get_numeric_type, + re_get_numeric_value, + re_get_bidi_mirrored, + re_get_indic_matra_category, + re_get_indic_syllabic_category, + re_get_alphanumeric, + re_get_any, + re_get_blank, + re_get_graph, + re_get_print, + re_get_word, + re_get_xdigit, +}; diff --git a/lib/regex/_regex_unicode.h b/lib/regex/_regex_unicode.h new file mode 100644 index 00000000..fa8114be --- /dev/null +++ b/lib/regex/_regex_unicode.h @@ -0,0 +1,218 @@ +typedef unsigned char RE_UINT8; +typedef signed char RE_INT8; +typedef unsigned short RE_UINT16; +typedef signed short RE_INT16; +typedef unsigned int RE_UINT32; +typedef signed int RE_INT32; + +typedef unsigned char BOOL; +enum {FALSE, TRUE}; + +#define RE_ASCII_MAX 0x7F +#define RE_LOCALE_MAX 0xFF +#define RE_UNICODE_MAX 0x10FFFF + +#define RE_MAX_CASES 4 +#define RE_MAX_FOLDED 3 + +typedef struct RE_Property { + RE_UINT16 name; + RE_UINT8 id; + RE_UINT8 value_set; +} RE_Property; + +typedef struct RE_PropertyValue { + RE_UINT16 name; + RE_UINT8 value_set; + RE_UINT8 id; +} RE_PropertyValue; + +typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); + +#define RE_PROP_GC 0x0 +#define RE_PROP_CASED 0xA +#define RE_PROP_UPPERCASE 0x9 +#define RE_PROP_LOWERCASE 0x8 + +#define RE_PROP_C 30 +#define RE_PROP_L 31 +#define RE_PROP_M 32 +#define RE_PROP_N 33 +#define RE_PROP_P 34 +#define RE_PROP_S 35 +#define RE_PROP_Z 36 +#define RE_PROP_ASSIGNED 38 +#define RE_PROP_CASEDLETTER 37 + +#define RE_PROP_CN 0 +#define RE_PROP_LU 1 +#define RE_PROP_LL 2 +#define RE_PROP_LT 3 +#define RE_PROP_LM 4 +#define RE_PROP_LO 5 +#define RE_PROP_MN 6 +#define RE_PROP_ME 7 +#define RE_PROP_MC 8 +#define RE_PROP_ND 9 +#define RE_PROP_NL 10 +#define RE_PROP_NO 11 +#define RE_PROP_ZS 12 +#define RE_PROP_ZL 13 +#define RE_PROP_ZP 14 +#define RE_PROP_CC 15 +#define RE_PROP_CF 16 +#define RE_PROP_CO 17 +#define RE_PROP_CS 18 +#define RE_PROP_PD 19 +#define RE_PROP_PS 20 +#define RE_PROP_PE 21 +#define RE_PROP_PC 22 +#define RE_PROP_PO 23 +#define RE_PROP_SM 24 +#define RE_PROP_SC 25 +#define RE_PROP_SK 26 +#define RE_PROP_SO 27 +#define RE_PROP_PI 28 +#define RE_PROP_PF 29 + +#define RE_PROP_C_MASK 0x00078001 +#define RE_PROP_L_MASK 0x0000003E +#define RE_PROP_M_MASK 0x000001C0 +#define RE_PROP_N_MASK 0x00000E00 +#define RE_PROP_P_MASK 0x30F80000 +#define RE_PROP_S_MASK 0x0F000000 +#define RE_PROP_Z_MASK 0x00007000 + +#define RE_PROP_ALNUM 0x460001 +#define RE_PROP_ALPHA 0x070001 +#define RE_PROP_ANY 0x470001 +#define RE_PROP_ASCII 0x010001 +#define RE_PROP_BLANK 0x480001 +#define RE_PROP_CNTRL 0x00000F +#define RE_PROP_DIGIT 0x000009 +#define RE_PROP_GRAPH 0x490001 +#define RE_PROP_LOWER 0x080001 +#define RE_PROP_PRINT 0x4A0001 +#define RE_PROP_SPACE 0x190001 +#define RE_PROP_UPPER 0x090001 +#define RE_PROP_WORD 0x4B0001 +#define RE_PROP_XDIGIT 0x4C0001 + +#define RE_BREAK_OTHER 0 +#define RE_BREAK_DOUBLEQUOTE 1 +#define RE_BREAK_SINGLEQUOTE 2 +#define RE_BREAK_HEBREWLETTER 3 +#define RE_BREAK_CR 4 +#define RE_BREAK_LF 5 +#define RE_BREAK_NEWLINE 6 +#define RE_BREAK_EXTEND 7 +#define RE_BREAK_REGIONALINDICATOR 8 +#define RE_BREAK_FORMAT 9 +#define RE_BREAK_KATAKANA 10 +#define RE_BREAK_ALETTER 11 +#define RE_BREAK_MIDLETTER 12 +#define RE_BREAK_MIDNUM 13 +#define RE_BREAK_MIDNUMLET 14 +#define RE_BREAK_NUMERIC 15 +#define RE_BREAK_EXTENDNUMLET 16 + +#define RE_GBREAK_OTHER 0 +#define RE_GBREAK_CR 1 +#define RE_GBREAK_LF 2 +#define RE_GBREAK_CONTROL 3 +#define RE_GBREAK_EXTEND 4 +#define RE_GBREAK_REGIONALINDICATOR 5 +#define RE_GBREAK_SPACINGMARK 6 +#define RE_GBREAK_L 7 +#define RE_GBREAK_V 8 +#define RE_GBREAK_T 9 +#define RE_GBREAK_LV 10 +#define RE_GBREAK_LVT 11 +#define RE_GBREAK_PREPEND 12 + +extern char* re_strings[1160]; +extern RE_Property re_properties[143]; +extern RE_PropertyValue re_property_values[1251]; +extern RE_UINT16 re_expand_on_folding[104]; +extern RE_GetPropertyFunc re_get_property[77]; + +RE_UINT32 re_get_general_category(RE_UINT32 ch); +RE_UINT32 re_get_block(RE_UINT32 ch); +RE_UINT32 re_get_script(RE_UINT32 ch); +RE_UINT32 re_get_word_break(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch); +RE_UINT32 re_get_sentence_break(RE_UINT32 ch); +RE_UINT32 re_get_math(RE_UINT32 ch); +RE_UINT32 re_get_alphabetic(RE_UINT32 ch); +RE_UINT32 re_get_lowercase(RE_UINT32 ch); +RE_UINT32 re_get_uppercase(RE_UINT32 ch); +RE_UINT32 re_get_cased(RE_UINT32 ch); +RE_UINT32 re_get_case_ignorable(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch); +RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch); +RE_UINT32 re_get_id_start(RE_UINT32 ch); +RE_UINT32 re_get_id_continue(RE_UINT32 ch); +RE_UINT32 re_get_xid_start(RE_UINT32 ch); +RE_UINT32 re_get_xid_continue(RE_UINT32 ch); +RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_base(RE_UINT32 ch); +RE_UINT32 re_get_grapheme_link(RE_UINT32 ch); +RE_UINT32 re_get_white_space(RE_UINT32 ch); +RE_UINT32 re_get_bidi_control(RE_UINT32 ch); +RE_UINT32 re_get_join_control(RE_UINT32 ch); +RE_UINT32 re_get_dash(RE_UINT32 ch); +RE_UINT32 re_get_hyphen(RE_UINT32 ch); +RE_UINT32 re_get_quotation_mark(RE_UINT32 ch); +RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch); +RE_UINT32 re_get_other_math(RE_UINT32 ch); +RE_UINT32 re_get_hex_digit(RE_UINT32 ch); +RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch); +RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch); +RE_UINT32 re_get_ideographic(RE_UINT32 ch); +RE_UINT32 re_get_diacritic(RE_UINT32 ch); +RE_UINT32 re_get_extender(RE_UINT32 ch); +RE_UINT32 re_get_other_lowercase(RE_UINT32 ch); +RE_UINT32 re_get_other_uppercase(RE_UINT32 ch); +RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch); +RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch); +RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch); +RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch); +RE_UINT32 re_get_radical(RE_UINT32 ch); +RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch); +RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch); +RE_UINT32 re_get_deprecated(RE_UINT32 ch); +RE_UINT32 re_get_soft_dotted(RE_UINT32 ch); +RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch); +RE_UINT32 re_get_other_id_start(RE_UINT32 ch); +RE_UINT32 re_get_other_id_continue(RE_UINT32 ch); +RE_UINT32 re_get_sterm(RE_UINT32 ch); +RE_UINT32 re_get_variation_selector(RE_UINT32 ch); +RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch); +RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch); +RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch); +RE_UINT32 re_get_bidi_class(RE_UINT32 ch); +RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch); +RE_UINT32 re_get_decomposition_type(RE_UINT32 ch); +RE_UINT32 re_get_east_asian_width(RE_UINT32 ch); +RE_UINT32 re_get_joining_group(RE_UINT32 ch); +RE_UINT32 re_get_joining_type(RE_UINT32 ch); +RE_UINT32 re_get_line_break(RE_UINT32 ch); +RE_UINT32 re_get_numeric_type(RE_UINT32 ch); +RE_UINT32 re_get_numeric_value(RE_UINT32 ch); +RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch); +RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch); +RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch); +RE_UINT32 re_get_alphanumeric(RE_UINT32 ch); +RE_UINT32 re_get_any(RE_UINT32 ch); +RE_UINT32 re_get_blank(RE_UINT32 ch); +RE_UINT32 re_get_graph(RE_UINT32 ch); +RE_UINT32 re_get_print(RE_UINT32 ch); +RE_UINT32 re_get_word(RE_UINT32 ch); +RE_UINT32 re_get_xdigit(RE_UINT32 ch); +int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints); +RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch); +int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints); diff --git a/lib/regex/regex.py b/lib/regex/regex.py new file mode 100644 index 00000000..e5e40d1f --- /dev/null +++ b/lib/regex/regex.py @@ -0,0 +1,684 @@ +# +# Secret Labs' Regular Expression Engine +# +# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. +# +# This version of the SRE library can be redistributed under CNRI's +# Python 1.6 license. For any other use, please contact Secret Labs +# AB (info@pythonware.com). +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# +# 2010-01-16 mrab Python front-end re-written and extended + +r"""Support for regular expressions (RE). + +This module provides regular expression matching operations similar to those +found in Perl. It supports both 8-bit and Unicode strings; both the pattern and +the strings being processed can contain null bytes and characters outside the +US ASCII range. + +Regular expressions can contain both special and ordinary characters. Most +ordinary characters, like "A", "a", or "0", are the simplest regular +expressions; they simply match themselves. You can concatenate ordinary +characters, so last matches the string 'last'. + +There are a few differences between the old (legacy) behaviour and the new +(enhanced) behaviour, which are indicated by VERSION0 or VERSION1. + +The special characters are: + "." Matches any character except a newline. + "^" Matches the start of the string. + "$" Matches the end of the string or just before the + newline at the end of the string. + "*" Matches 0 or more (greedy) repetitions of the preceding + RE. Greedy means that it will match as many repetitions + as possible. + "+" Matches 1 or more (greedy) repetitions of the preceding + RE. + "?" Matches 0 or 1 (greedy) of the preceding RE. + *?,+?,?? Non-greedy versions of the previous three special + characters. + *+,++,?+ Possessive versions of the previous three special + characters. + {m,n} Matches from m to n repetitions of the preceding RE. + {m,n}? Non-greedy version of the above. + {m,n}+ Possessive version of the above. + {...} Fuzzy matching constraints. + "\\" Either escapes special characters or signals a special + sequence. + [...] Indicates a set of characters. A "^" as the first + character indicates a complementing set. + "|" A|B, creates an RE that will match either A or B. + (...) Matches the RE inside the parentheses. The contents are + captured and can be retrieved or matched later in the + string. + (?flags-flags) VERSION1: Sets/clears the flags for the remainder of + the group or pattern; VERSION0: Sets the flags for the + entire pattern. + (?:...) Non-capturing version of regular parentheses. + (?>...) Atomic non-capturing version of regular parentheses. + (?flags-flags:...) Non-capturing version of regular parentheses with local + flags. + (?P...) The substring matched by the group is accessible by + name. + (?...) The substring matched by the group is accessible by + name. + (?P=name) Matches the text matched earlier by the group named + name. + (?#...) A comment; ignored. + (?=...) Matches if ... matches next, but doesn't consume the + string. + (?!...) Matches if ... doesn't match next. + (?<=...) Matches if preceded by .... + (? Matches the text matched by the group named name. + \G Matches the empty string, but only at the position where + the search started. + \L Named list. The list is provided as a keyword argument. + \m Matches the empty string, but only at the start of a word. + \M Matches the empty string, but only at the end of a word. + \n Matches the newline character. + \N{name} Matches the named character. + \p{name=value} Matches the character if its property has the specified + value. + \P{name=value} Matches the character if its property hasn't the specified + value. + \r Matches the carriage-return character. + \s Matches any whitespace character; equivalent to + [ \t\n\r\f\v]. + \S Matches any non-whitespace character; equivalent to [^\s]. + \t Matches the tab character. + \uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX. + \UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code + XXXXXXXX. + \v Matches the vertical tab character. + \w Matches any alphanumeric character; equivalent to + [a-zA-Z0-9_] when matching a bytestring or a Unicode string + with the ASCII flag, or the whole range of Unicode + alphanumeric characters (letters plus digits plus + underscore) when matching a Unicode string. With LOCALE, it + will match the set [0-9_] plus characters defined as + letters for the current locale. + \W Matches the complement of \w; equivalent to [^\w]. + \xXX Matches the character with 2-digit hex code XX. + \X Matches a grapheme. + \Z Matches only at the end of the string. + \\ Matches a literal backslash. + +This module exports the following functions: + match Match a regular expression pattern at the beginning of a string. + fullmatch Match a regular expression pattern against all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string using a + template string. + subf Substitute occurrences of a pattern found in a string using a + format string. + subn Same as sub, but also return the number of substitutions made. + subfn Same as subf, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. VERSION1: will + split at zero-width match; VERSION0: won't split at zero-width + match. + splititer Return an iterator yielding the parts of a split string. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a match object for each match. + compile Compile a pattern into a Pattern object. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics or special characters in a + string. + +Most of the functions support a concurrent parameter: if True, the GIL will be +released during matching, allowing other Python threads to run concurrently. If +the string changes during matching, the behaviour is undefined. This parameter +is not needed when working on the builtin (immutable) string classes. + +Some of the functions in this module take flags as optional parameters. Most of +these flags can also be set within an RE: + A a ASCII Make \w, \W, \b, \B, \d, and \D match the + corresponding ASCII character categories. Default + when matching a bytestring. + B b BESTMATCH Find the best fuzzy match (default is first). + D DEBUG Print the parsed pattern. + F f FULLCASE Use full case-folding when performing + case-insensitive matching in Unicode. + I i IGNORECASE Perform case-insensitive matching. + L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the + current locale. (One byte per character only.) + M m MULTILINE "^" matches the beginning of lines (after a newline) + as well as the string. "$" matches the end of lines + (before a newline) as well as the end of the string. + E e ENHANCEMATCH Attempt to improve the fit after finding the first + fuzzy match. + R r REVERSE Searches backwards. + S s DOTALL "." matches any character at all, including the + newline. + U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the + Unicode locale. Default when matching a Unicode + string. + V0 V0 VERSION0 Turn on the old legacy behaviour. + V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag + includes the FULLCASE flag. + W w WORD Make \b and \B work with default Unicode word breaks + and make ".", "^" and "$" work with Unicode line + breaks. + X x VERBOSE Ignore whitespace and comments for nicer looking REs. + +This module also defines an exception 'error'. + +""" + +# Public symbols. +__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match", + "purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn", + "template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", + "ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L", + "LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", + "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error", + "Regex"] + +__version__ = "2.4.45" + +# -------------------------------------------------------------------- +# Public interface. + +def match(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, **kwargs): + """Try to apply the pattern at the start of the string, returning a match + object, or None if no match was found.""" + return _compile(pattern, flags, kwargs).match(string, pos, endpos, + concurrent, partial) + +def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, **kwargs): + """Try to apply the pattern against all of the string, returning a match + object, or None if no match was found.""" + return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos, + concurrent, partial) + +def search(pattern, string, flags=0, pos=None, endpos=None, partial=False, + concurrent=None, **kwargs): + """Search through string looking for a match to the pattern, returning a + match object, or None if no match was found.""" + return _compile(pattern, flags, kwargs).search(string, pos, endpos, + concurrent, partial) + +def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return the string obtained by replacing the leftmost (or rightmost with a + reverse pattern) non-overlapping occurrences of the pattern in string by the + replacement repl. repl can be either a string or a callable; if a string, + backslash escapes in it are processed; if a callable, it's passed the match + object and must return a replacement string to be used.""" + return _compile(pattern, flags, kwargs).sub(repl, string, count, pos, + endpos, concurrent) + +def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return the string obtained by replacing the leftmost (or rightmost with a + reverse pattern) non-overlapping occurrences of the pattern in string by the + replacement format. format can be either a string or a callable; if a string, + it's treated as a format string; if a callable, it's passed the match object + and must return a replacement string to be used.""" + return _compile(pattern, flags, kwargs).subf(format, string, count, pos, + endpos, concurrent) + +def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return a 2-tuple containing (new_string, number). new_string is the string + obtained by replacing the leftmost (or rightmost with a reverse pattern) + non-overlapping occurrences of the pattern in the source string by the + replacement repl. number is the number of substitutions that were made. repl + can be either a string or a callable; if a string, backslash escapes in it + are processed; if a callable, it's passed the match object and must return a + replacement string to be used.""" + return _compile(pattern, flags, kwargs).subn(repl, string, count, pos, + endpos, concurrent) + +def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None, + concurrent=None, **kwargs): + """Return a 2-tuple containing (new_string, number). new_string is the string + obtained by replacing the leftmost (or rightmost with a reverse pattern) + non-overlapping occurrences of the pattern in the source string by the + replacement format. number is the number of substitutions that were made. format + can be either a string or a callable; if a string, it's treated as a format + string; if a callable, it's passed the match object and must return a + replacement string to be used.""" + return _compile(pattern, flags, kwargs).subfn(format, string, count, pos, + endpos, concurrent) + +def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): + """Split the source string by the occurrences of the pattern, returning a + list containing the resulting substrings. If capturing parentheses are used + in pattern, then the text of all groups in the pattern are also returned as + part of the resulting list. If maxsplit is nonzero, at most maxsplit splits + occur, and the remainder of the string is returned as the final element of + the list.""" + return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent) + +def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): + "Return an iterator yielding the parts of a split string." + return _compile(pattern, flags, kwargs).splititer(string, maxsplit, + concurrent) + +def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, + concurrent=None, **kwargs): + """Return a list of all matches in the string. The matches may be overlapped + if overlapped is True. If one or more groups are present in the pattern, + return a list of groups; this will be a list of tuples if the pattern has + more than one group. Empty matches are included in the result.""" + return _compile(pattern, flags, kwargs).findall(string, pos, endpos, + overlapped, concurrent) + +def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, + partial=False, concurrent=None, **kwargs): + """Return an iterator over all matches in the string. The matches may be + overlapped if overlapped is True. For each match, the iterator returns a + match object. Empty matches are included in the result.""" + return _compile(pattern, flags, kwargs).finditer(string, pos, endpos, + overlapped, concurrent, partial) + +def compile(pattern, flags=0, **kwargs): + "Compile a regular expression pattern, returning a pattern object." + return _compile(pattern, flags, kwargs) + +def purge(): + "Clear the regular expression cache" + _cache.clear() + +def template(pattern, flags=0): + "Compile a template pattern, returning a pattern object." + return _compile(pattern, flags | TEMPLATE) + +def escape(pattern, special_only=False): + "Escape all non-alphanumeric characters or special characters in pattern." + if isinstance(pattern, unicode): + s = [] + if special_only: + for c in pattern: + if c in _METACHARS: + s.append(u"\\") + s.append(c) + elif c == u"\x00": + s.append(u"\\000") + else: + s.append(c) + else: + for c in pattern: + if c in _ALNUM: + s.append(c) + elif c == u"\x00": + s.append(u"\\000") + else: + s.append(u"\\") + s.append(c) + + return u"".join(s) + else: + s = [] + if special_only: + for c in pattern: + if c in _METACHARS: + s.append("\\") + s.append(c) + elif c == "\x00": + s.append("\\000") + else: + s.append(c) + else: + for c in pattern: + if c in _ALNUM: + s.append(c) + elif c == "\x00": + s.append("\\000") + else: + s.append("\\") + s.append(c) + + return "".join(s) + +# -------------------------------------------------------------------- +# Internals. + +import _regex_core +import sys +if sys.version_info < (2, 6): + from Python25 import _regex +elif sys.version_info < (2, 7): + from Python26 import _regex +else: + from Python27 import _regex +from threading import RLock as _RLock +from _regex_core import * +from _regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, + _UnscopedFlagSet, _check_group_features, _compile_firstset, + _compile_replacement, _flatten_code, _fold_case, _get_required_string, + _parse_pattern, _shrink_cache) +from _regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as + _Source, Fuzzy as _Fuzzy) + +# Version 0 is the old behaviour, compatible with the original 're' module. +# Version 1 is the new behaviour, which differs slightly. + +DEFAULT_VERSION = VERSION0 + +_METACHARS = frozenset("()[]{}?*+|^$\\.") + +_regex_core.DEFAULT_VERSION = DEFAULT_VERSION + +# Caches for the patterns and replacements. +_cache = {} +_cache_lock = _RLock() +_named_args = {} +_replacement_cache = {} + +# Maximum size of the cache. +_MAXCACHE = 500 +_MAXREPCACHE = 500 + +def _compile(pattern, flags=0, kwargs={}): + "Compiles a regular expression to a PatternObject." + try: + # Do we know what keyword arguments are needed? + args_key = pattern, type(pattern), flags + args_needed = _named_args[args_key] + + # Are we being provided with its required keyword arguments? + args_supplied = set() + if args_needed: + for k, v in args_needed: + try: + args_supplied.add((k, frozenset(kwargs[k]))) + except KeyError: + raise error("missing named list") + + args_supplied = frozenset(args_supplied) + + # Have we already seen this regular expression and named list? + pattern_key = (pattern, type(pattern), flags, args_supplied, + DEFAULT_VERSION) + return _cache[pattern_key] + except KeyError: + # It's a new pattern, or new named list for a known pattern. + pass + + # Guess the encoding from the class of the pattern string. + if isinstance(pattern, unicode): + guess_encoding = UNICODE + elif isinstance(pattern, str): + guess_encoding = ASCII + elif isinstance(pattern, _pattern_type): + if flags: + raise ValueError("can't process flags argument with a compiled pattern") + + return pattern + else: + raise TypeError("first argument must be a string or compiled pattern") + + # Set the default version in the core code in case it has been changed. + _regex_core.DEFAULT_VERSION = DEFAULT_VERSION + + caught_exception = None + global_flags = flags + + while True: + try: + source = _Source(pattern) + info = _Info(global_flags, source.char_type, kwargs) + info.guess_encoding = guess_encoding + source.ignore_space = bool(info.flags & VERBOSE) + parsed = _parse_pattern(source, info) + break + except _UnscopedFlagSet: + # Remember the global flags for the next attempt. + global_flags = info.global_flags + except error, e: + caught_exception = e + + if caught_exception: + raise error(str(caught_exception)) + + if not source.at_end(): + raise error("trailing characters in pattern at position %d" % source.pos) + + # Check the global flags for conflicts. + version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION + if version not in (0, VERSION0, VERSION1): + raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") + + if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE): + raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible") + + if not (info.flags & _ALL_ENCODINGS): + if isinstance(pattern, unicode): + info.flags |= UNICODE + else: + info.flags |= ASCII + + reverse = bool(info.flags & REVERSE) + fuzzy = isinstance(parsed, _Fuzzy) + + # Should we print the parsed pattern? + if flags & DEBUG: + parsed.dump(indent=0, reverse=reverse) + + # Fix the group references. + parsed.fix_groups(reverse, False) + + # Optimise the parsed pattern. + parsed = parsed.optimise(info) + parsed = parsed.pack_characters(info) + + # Get the required string. + req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags) + + # Build the named lists. + named_lists = {} + named_list_indexes = [None] * len(info.named_lists_used) + args_needed = set() + for key, index in info.named_lists_used.items(): + name, case_flags = key + values = frozenset(kwargs[name]) + if case_flags: + items = frozenset(_fold_case(info, v) for v in values) + else: + items = values + named_lists[name] = values + named_list_indexes[index] = items + args_needed.add((name, values)) + + # Check the features of the groups. + _check_group_features(info, parsed) + + # Compile the parsed pattern. The result is a list of tuples. + code = parsed.compile(reverse) + + # Is there a group call to the pattern as a whole? + key = (0, reverse, fuzzy) + ref = info.call_refs.get(key) + if ref is not None: + code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )] + + # Add the final 'success' opcode. + code += [(_OP.SUCCESS, )] + + # Compile the additional copies of the groups that we need. + for group, rev, fuz in info.additional_groups: + code += group.compile(rev, fuz) + + # Flatten the code into a list of ints. + code = _flatten_code(code) + + if not parsed.has_simple_start(): + # Get the first set, if possible. + try: + fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) + fs_code = _flatten_code(fs_code) + code = fs_code + code + except _FirstSetError: + pass + + # The named capture groups. + index_group = dict((v, n) for n, v in info.group_index.items()) + + # Create the PatternObject. + # + # Local flags like IGNORECASE affect the code generation, but aren't needed + # by the PatternObject itself. Conversely, global flags like LOCALE _don't_ + # affect the code generation but _are_ needed by the PatternObject. + compiled_pattern = _regex.compile(pattern, info.flags | version, code, + info.group_index, index_group, named_lists, named_list_indexes, + req_offset, req_chars, req_flags, info.group_count) + + # Do we need to reduce the size of the cache? + if len(_cache) >= _MAXCACHE: + _cache_lock.acquire() + try: + _shrink_cache(_cache, _named_args, _MAXCACHE) + finally: + _cache_lock.release() + + args_needed = frozenset(args_needed) + + # Store this regular expression and named list. + pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION) + _cache[pattern_key] = compiled_pattern + + # Store what keyword arguments are needed. + _named_args[args_key] = args_needed + + return compiled_pattern + +def _compile_replacement_helper(pattern, template): + "Compiles a replacement template." + # This function is called by the _regex module. + + # Have we seen this before? + key = pattern.pattern, pattern.flags, template + compiled = _replacement_cache.get(key) + if compiled is not None: + return compiled + + if len(_replacement_cache) >= _MAXREPCACHE: + _replacement_cache.clear() + + is_unicode = isinstance(template, unicode) + source = _Source(template) + if is_unicode: + def make_string(char_codes): + return u"".join(unichr(c) for c in char_codes) + else: + def make_string(char_codes): + return "".join(chr(c) for c in char_codes) + + compiled = [] + literal = [] + while True: + ch = source.get() + if not ch: + break + if ch == "\\": + # '_compile_replacement' will return either an int group reference + # or a string literal. It returns items (plural) in order to handle + # a 2-character literal (an invalid escape sequence). + is_group, items = _compile_replacement(source, pattern, is_unicode) + if is_group: + # It's a group, so first flush the literal. + if literal: + compiled.append(make_string(literal)) + literal = [] + compiled.extend(items) + else: + literal.extend(items) + else: + literal.append(ord(ch)) + + # Flush the literal. + if literal: + compiled.append(make_string(literal)) + + _replacement_cache[key] = compiled + + return compiled + +# We define _pattern_type here after all the support objects have been defined. +_pattern_type = type(_compile("", 0, {})) + +# We'll define an alias for the 'compile' function so that the repr of a +# pattern object is eval-able. +Regex = compile + +# Register myself for pickling. +import copy_reg as _copy_reg + +def _pickle(p): + return _compile, (p.pattern, p.flags) + +_copy_reg.pickle(_pattern_type, _pickle, _compile) + +if not hasattr(str, "format"): + # Strings don't have the .format method (below Python 2.6). + while True: + _start = __doc__.find(" subf") + if _start < 0: + break + + _end = __doc__.find("\n", _start) + 1 + while __doc__.startswith(" ", _end): + _end = __doc__.find("\n", _end) + 1 + + __doc__ = __doc__[ : _start] + __doc__[_end : ] + + __all__ = [_name for _name in __all__ if not _name.startswith("subf")] + + del _start, _end + + del subf, subfn diff --git a/lib/regex/test_regex.py b/lib/regex/test_regex.py new file mode 100644 index 00000000..55b14c4d --- /dev/null +++ b/lib/regex/test_regex.py @@ -0,0 +1,3230 @@ +from __future__ import with_statement +import regex +import string +from weakref import proxy +import unittest +import copy +from test.test_support import run_unittest +import re + +# _AssertRaisesContext is defined here because the class doesn't exist before +# Python 2.7. +class _AssertRaisesContext(object): + """A context manager used to implement TestCase.assertRaises* methods.""" + + def __init__(self, expected, test_case, expected_regexp=None): + self.expected = expected + self.failureException = test_case.failureException + self.expected_regexp = expected_regexp + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + if exc_type is None: + try: + exc_name = self.expected.__name__ + except AttributeError: + exc_name = str(self.expected) + raise self.failureException( + "{0} not raised".format(exc_name)) + if not issubclass(exc_type, self.expected): + # let unexpected exceptions pass through + return False + self.exception = exc_value # store for later retrieval + if self.expected_regexp is None: + return True + + expected_regexp = self.expected_regexp + if isinstance(expected_regexp, basestring): + expected_regexp = re.compile(expected_regexp) + if not expected_regexp.search(str(exc_value)): + raise self.failureException('"%s" does not match "%s"' % + (expected_regexp.pattern, str(exc_value))) + return True + +class RegexTests(unittest.TestCase): + PATTERN_CLASS = "" + FLAGS_WITH_COMPILED_PAT = "can't process flags argument with a compiled pattern" + INVALID_GROUP_REF = "invalid group reference" + MISSING_GT = "missing >" + BAD_GROUP_NAME = "bad group name" + MISSING_LT = "missing <" + UNKNOWN_GROUP_I = "unknown group" + UNKNOWN_GROUP = "unknown group" + BAD_ESCAPE = "bad escape" + BAD_OCTAL_ESCAPE = "bad octal escape" + BAD_SET = "bad set" + STR_PAT_ON_BYTES = "can't use a string pattern on a bytes-like object" + BYTES_PAT_ON_STR = "can't use a bytes pattern on a string-like object" + STR_PAT_BYTES_TEMPL = "expected str instance, bytes found" + BYTES_PAT_STR_TEMPL = "expected bytes instance, str found" + BYTES_PAT_UNI_FLAG = "can't use UNICODE flag with a bytes pattern" + MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible" + MISSING_RPAREN = "missing \\)" # Need to escape parenthesis for unittest. + TRAILING_CHARS = "trailing characters in pattern" + BAD_CHAR_RANGE = "bad character range" + NOTHING_TO_REPEAT = "nothing to repeat" + OPEN_GROUP = "can't refer to an open group" + DUPLICATE_GROUP = "duplicate group" + CANT_TURN_OFF = "bad inline flags: can't turn flags off" + UNDEF_CHAR_NAME = "undefined character name" + + # assertRaisesRegex is defined here because the method isn't in the + # superclass before Python 2.7. + def assertRaisesRegex(self, expected_exception, expected_regexp, + callable_obj=None, *args, **kwargs): + """Asserts that the message in a raised exception matches a regexp. + + Args: + expected_exception: Exception class expected to be raised. + expected_regexp: Regexp (re pattern object or string) expected + to be found in error message. + callable_obj: Function to be called. + args: Extra args. + kwargs: Extra kwargs. + """ + context = _AssertRaisesContext(expected_exception, self, expected_regexp) + if callable_obj is None: + return context + with context: + callable_obj(*args, **kwargs) + + def assertTypedEqual(self, actual, expect, msg=None): + self.assertEqual(actual, expect, msg) + + def recurse(actual, expect): + if isinstance(expect, (tuple, list)): + for x, y in zip(actual, expect): + recurse(x, y) + else: + self.assertIs(type(actual), type(expect), msg) + + recurse(actual, expect) + + def test_weakref(self): + s = 'QabbbcR' + x = regex.compile('ab+c') + y = proxy(x) + if x.findall('QabbbcR') != y.findall('QabbbcR'): + self.fail() + + def test_search_star_plus(self): + self.assertEqual(regex.search('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(regex.search('x*', 'axx').span(), (0, 0)) + self.assertEqual(regex.search('x+', 'axx').span(0), (1, 3)) + self.assertEqual(regex.search('x+', 'axx').span(), (1, 3)) + self.assertEqual(regex.search('x', 'aaa'), None) + self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0)) + self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3)) + self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3)) + self.assertEqual(regex.match('a+', 'xxx'), None) + + def bump_num(self, matchobj): + int_value = int(matchobj[0]) + return str(int_value + 1) + + def test_basic_regex_sub(self): + self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') + self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), + '9.3 -3 24x100y') + self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), + '9.3 -3 23x99y') + + self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n") + self.assertEqual(regex.sub('.', r"\n", 'x'), "\n") + + self.assertEqual(regex.sub('(?Px)', r'\g\g', 'xx'), 'xxxx') + self.assertEqual(regex.sub('(?Px)', r'\g\g<1>', 'xx'), 'xxxx') + self.assertEqual(regex.sub('(?Px)', r'\g\g', 'xx'), + 'xxxx') + self.assertEqual(regex.sub('(?Px)', r'\g<1>\g<1>', 'xx'), 'xxxx') + + self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', + 'a'), "\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D") + self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a") + self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10) + + chr(11) + chr(13) + chr(12) + chr(7)) + + self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest') + + self.assertEqual(regex.sub(ur"x", ur"\x0A", u"x"), u"\n") + self.assertEqual(regex.sub(ur"x", ur"\u000A", u"x"), u"\n") + self.assertEqual(regex.sub(ur"x", ur"\U0000000A", u"x"), u"\n") + self.assertEqual(regex.sub(ur"x", ur"\N{LATIN CAPITAL LETTER A}", + u"x"), u"A") + + self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n") + self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\\u000A") + self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"), "\\U0000000A") + self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}", "x"), + "\\N{LATIN CAPITAL LETTER A}") + + def test_bug_449964(self): + # Fails for group followed by other escape. + self.assertEqual(regex.sub(r'(?Px)', r'\g<1>\g<1>\b', 'xx'), + "xx\bxx\b") + + def test_bug_449000(self): + # Test for sub() on escaped characters. + self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'), + "abc\ndef\n") + + def test_bug_1140(self): + # regex.sub(x, y, u'') should return u'', not '', and + # regex.sub(x, y, '') should return '', not u''. + # Also: + # regex.sub(x, y, unicode(x)) should return unicode(y), and + # regex.sub(x, y, str(x)) should return + # str(y) if isinstance(y, str) else unicode(y). + for x in 'x', u'x': + for y in 'y', u'y': + z = regex.sub(x, y, u'') + self.assertEqual((type(z), z), (unicode, u'')) + z = regex.sub(x, y, '') + self.assertEqual((type(z), z), (str, '')) + z = regex.sub(x, y, unicode(x)) + self.assertEqual((type(z), z), (unicode, unicode(y))) + z = regex.sub(x, y, str(x)) + self.assertEqual((type(z), z), (type(y), y)) + + def test_bug_1661(self): + # Verify that flags do not get silently ignored with compiled patterns + pattern = regex.compile('.') + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.match(pattern, 'A', regex.I)) + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.search(pattern, 'A', regex.I)) + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.findall(pattern, 'A', regex.I)) + self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, + lambda: regex.compile(pattern, regex.I)) + + def test_bug_3629(self): + # A regex that triggered a bug in the sre-code validator + self.assertEqual(repr(type(regex.compile("(?P)(?(quote))"))), + self.PATTERN_CLASS) + + def test_sub_template_numeric_escape(self): + # Bug 776311 and friends. + self.assertEqual(regex.sub('x', r'\0', 'x'), "\0") + self.assertEqual(regex.sub('x', r'\000', 'x'), "\000") + self.assertEqual(regex.sub('x', r'\001', 'x'), "\001") + self.assertEqual(regex.sub('x', r'\008', 'x'), "\0" + "8") + self.assertEqual(regex.sub('x', r'\009', 'x'), "\0" + "9") + self.assertEqual(regex.sub('x', r'\111', 'x'), "\111") + self.assertEqual(regex.sub('x', r'\117', 'x'), "\117") + + self.assertEqual(regex.sub('x', r'\1111', 'x'), "\1111") + self.assertEqual(regex.sub('x', r'\1111', 'x'), "\111" + "1") + + self.assertEqual(regex.sub('x', r'\00', 'x'), '\x00') + self.assertEqual(regex.sub('x', r'\07', 'x'), '\x07') + self.assertEqual(regex.sub('x', r'\08', 'x'), "\0" + "8") + self.assertEqual(regex.sub('x', r'\09', 'x'), "\0" + "9") + self.assertEqual(regex.sub('x', r'\0a', 'x'), "\0" + "a") + + self.assertEqual(regex.sub(u'x', ur'\400', u'x'), u"\u0100") + self.assertEqual(regex.sub(u'x', ur'\777', u'x'), u"\u01FF") + self.assertEqual(regex.sub('x', r'\400', 'x'), "\x00") + self.assertEqual(regex.sub('x', r'\777', 'x'), "\xFF") + + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\1', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\8', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\9', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\11', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\18', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\1a', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\90', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\99', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\118', 'x')) # r'\11' + '8' + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\11a', 'x')) + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\181', 'x')) # r'\18' + '1' + self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: + regex.sub('x', r'\800', 'x')) # r'\80' + '0' + + # In Python 2.3 (etc), these loop endlessly in sre_parser.py. + self.assertEqual(regex.sub('(((((((((((x)))))))))))', r'\11', 'x'), + 'x') + self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), + 'xz8') + self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), + 'xza') + + def test_qualified_re_sub(self): + self.assertEqual(regex.sub('a', 'b', 'aaaaa'), 'bbbbb') + self.assertEqual(regex.sub('a', 'b', 'aaaaa', 1), 'baaaa') + + def test_bug_114660(self): + self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), + 'hello there') + + def test_bug_462270(self): + # Test for empty sub() behaviour, see SF bug #462270 + self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-') + self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-') + self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d') + + def test_bug_14462(self): + # chr(255) is not a valid identifier in Python 2. + group_name = u'\xFF' + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.search(ur'(?P<' + group_name + '>a)', u'a')) + + def test_symbolic_refs(self): + self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda: + regex.sub('(?Px)', r'\gx)', r'\g<', 'xx')) + self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda: + regex.sub('(?Px)', r'\g', 'xx')) + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.sub('(?Px)', r'\g', 'xx')) + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.sub('(?Px)', r'\g<1a1>', 'xx')) + self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda: + regex.sub('(?Px)', r'\g', 'xx')) + + # The new behaviour of unmatched but valid groups is to treat them like + # empty matches in the replacement template, like in Perl. + self.assertEqual(regex.sub('(?Px)|(?Py)', r'\g', 'xx'), '') + self.assertEqual(regex.sub('(?Px)|(?Py)', r'\2', 'xx'), '') + + # The old behaviour was to raise it as an IndexError. + self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: + regex.sub('(?Px)', r'\g<-1>', 'xx')) + + def test_re_subn(self): + self.assertEqual(regex.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) + self.assertEqual(regex.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) + self.assertEqual(regex.subn("b+", "x", "xyz"), ('xyz', 0)) + self.assertEqual(regex.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) + self.assertEqual(regex.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) + + def test_re_split(self): + self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) + self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) + self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a', ':', + 'b', '::', 'c']) + self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) + self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a', ':', + 'b', ':', 'c']) + self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a', + ':b::', 'c']) + self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':', + 'a', None, ':', '', 'b', None, '', None, '::', 'c']) + self.assertEqual(regex.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '', + '', 'c']) + + self.assertEqual(regex.split("x", "xaxbxc"), ['', 'a', 'b', 'c']) + self.assertEqual([m for m in regex.splititer("x", "xaxbxc")], ['', 'a', + 'b', 'c']) + + self.assertEqual(regex.split("(?r)x", "xaxbxc"), ['c', 'b', 'a', '']) + self.assertEqual([m for m in regex.splititer("(?r)x", "xaxbxc")], ['c', + 'b', 'a', '']) + + self.assertEqual(regex.split("(x)|(y)", "xaxbxc"), ['', 'x', None, 'a', + 'x', None, 'b', 'x', None, 'c']) + self.assertEqual([m for m in regex.splititer("(x)|(y)", "xaxbxc")], + ['', 'x', None, 'a', 'x', None, 'b', 'x', None, 'c']) + + self.assertEqual(regex.split("(?r)(x)|(y)", "xaxbxc"), ['c', 'x', None, + 'b', 'x', None, 'a', 'x', None, '']) + self.assertEqual([m for m in regex.splititer("(?r)(x)|(y)", "xaxbxc")], + ['c', 'x', None, 'b', 'x', None, 'a', 'x', None, '']) + + self.assertEqual(regex.split(r"(?V1)\b", "a b c"), ['', 'a', ' ', 'b', + ' ', 'c', '']) + self.assertEqual(regex.split(r"(?V1)\m", "a b c"), ['', 'a ', 'b ', + 'c']) + self.assertEqual(regex.split(r"(?V1)\M", "a b c"), ['a', ' b', ' c', + '']) + + def test_qualified_re_split(self): + self.assertEqual(regex.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) + self.assertEqual(regex.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) + self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':', + 'b::c']) + self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a', ':', + 'b::c']) + + def test_re_findall(self): + self.assertEqual(regex.findall(":+", "abc"), []) + self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::']) + self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::', + ':::']) + self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''), + (':', ':'), (':', '::')]) + + self.assertEqual(regex.findall(r"\((?P.{0,5}?TEST)\)", + "(MY TEST)"), ["MY TEST"]) + self.assertEqual(regex.findall(r"\((?P.{0,3}?TEST)\)", + "(MY TEST)"), ["MY TEST"]) + self.assertEqual(regex.findall(r"\((?P.{0,3}?T)\)", "(MY T)"), + ["MY T"]) + + self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n S"), [' S']) + self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), ['\n S']) + self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), [' S']) + + self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF", + "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')]) + + self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End", + "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')]) + + def test_bug_117612(self): + self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b', + 'b'), ('a', '')]) + + def test_re_match(self): + self.assertEqual(regex.match('a', 'a')[:], ('a',)) + self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a')) + self.assertEqual(regex.match(r'(a)', 'a')[0], 'a') + self.assertEqual(regex.match(r'(a)', 'a')[1], 'a') + self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a')) + + pat = regex.compile('((a)|(b))(c)?') + self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None)) + self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None)) + self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c')) + self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c')) + self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c')) + + # A single group. + m = regex.match('(a)', 'a') + self.assertEqual(m.group(), 'a') + self.assertEqual(m.group(0), 'a') + self.assertEqual(m.group(1), 'a') + self.assertEqual(m.group(1, 1), ('a', 'a')) + + pat = regex.compile('(?:(?Pa)|(?Pb))(?Pc)?') + self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) + self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b', + None)) + self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) + + def test_re_groupref_exists(self): + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:], + ('(a)', '(', 'a')) + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a', + None, 'a')) + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None) + self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab', + 'a', 'b')) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd', + None, 'd')) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd', + None, 'd')) + self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a', + 'a', '')) + + # Tests for bug #1177831: exercise groups other than the first group. + p = regex.compile('(?Pa)(?Pb)?((?(g2)c|d))') + self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c')) + self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd')) + self.assertEqual(p.match('abd'), None) + self.assertEqual(p.match('ac'), None) + + def test_re_groupref(self): + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|', + '|', 'a')) + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a', + None, 'a')) + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None) + self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None) + self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a', + 'a')) + self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None, + None)) + + self.assertEqual(regex.findall("(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\\3(\ |;)+(.{1,80}?)\\1", + "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST', + ' LEST', ' ', '123 ')]) + + def test_groupdict(self): + self.assertEqual(regex.match('(?Pfirst) (?Psecond)', + 'first second').groupdict(), {'first': 'first', 'second': 'second'}) + + def test_expand(self): + self.assertEqual(regex.match("(?Pfirst) (?Psecond)", + "first second").expand(r"\2 \1 \g \g"), + 'second first second first') + + def test_repeat_minmax(self): + self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None) + self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None) + self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None) + self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None) + + self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c') + self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c') + + self.assertEqual(regex.match("^x{1}$", "xxx"), None) + self.assertEqual(regex.match("^x{1}?$", "xxx"), None) + self.assertEqual(regex.match("^x{1,2}$", "xxx"), None) + self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None) + + self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x') + self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x') + self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x') + self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '') + + self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True) + self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True) + + self.assertEqual(regex.match("^x{}$", "xxx"), None) + self.assertEqual(bool(regex.match("^x{}$", "x{}")), True) + + def test_getattr(self): + self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)') + self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.A | regex.I | + regex.DEFAULT_VERSION) + self.assertEqual(regex.compile(u"(?i)(a)(b)").flags, regex.I | regex.U + | regex.DEFAULT_VERSION) + self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2) + self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {}) + + self.assertEqual(regex.compile("(?i)(?Pa)(?Pb)").groupindex, + {'first': 1, 'other': 2}) + + self.assertEqual(regex.match("(a)", "a").pos, 0) + self.assertEqual(regex.match("(a)", "a").endpos, 1) + + self.assertEqual(regex.search("b(c)", "abcdef").pos, 0) + self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6) + self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3)) + self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3)) + + self.assertEqual(regex.match("(a)", "a").string, 'a') + self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1))) + self.assertEqual(repr(type(regex.match("(a)", "a").re)), + self.PATTERN_CLASS) + + # Issue 14260. + p = regex.compile(r'abc(?Pdef)') + p.groupindex["n"] = 0 + self.assertEqual(p.groupindex["n"], 1) + + def test_special_escapes(self): + self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx")[1], 'bx') + self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd")[1], 'bx') + self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx", + regex.LOCALE)[1], 'bx') + self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd", + regex.LOCALE)[1], 'bx') + self.assertEqual(regex.search(ur"\b(b.)\b", u"abcd abc bcd bx", + regex.UNICODE)[1], u'bx') + self.assertEqual(regex.search(ur"\B(b.)\B", u"abc bcd bc abxd", + regex.UNICODE)[1], u'bx') + + self.assertEqual(regex.search(r"^abc$", "\nabc\n", regex.M)[0], 'abc') + self.assertEqual(regex.search(r"^\Aabc\Z$", "abc", regex.M)[0], 'abc') + self.assertEqual(regex.search(r"^\Aabc\Z$", "\nabc\n", regex.M), None) + + self.assertEqual(regex.search(ur"\b(b.)\b", u"abcd abc bcd bx")[1], + u'bx') + self.assertEqual(regex.search(ur"\B(b.)\B", u"abc bcd bc abxd")[1], + u'bx') + self.assertEqual(regex.search(ur"^abc$", u"\nabc\n", regex.M)[0], + u'abc') + self.assertEqual(regex.search(ur"^\Aabc\Z$", u"abc", regex.M)[0], + u'abc') + self.assertEqual(regex.search(ur"^\Aabc\Z$", u"\nabc\n", regex.M), + None) + + self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a")[0], '1aa! a') + self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a", + regex.LOCALE)[0], '1aa! a') + self.assertEqual(regex.search(ur"\d\D\w\W\s\S", u"1aa! a", + regex.UNICODE)[0], u'1aa! a') + + def test_bigcharset(self): + self.assertEqual(regex.match(ur"(?u)([\u2222\u2223])", u"\u2222")[1], + u'\u2222') + self.assertEqual(regex.match(ur"(?u)([\u2222\u2223])", u"\u2222", + regex.UNICODE)[1], u'\u2222') + self.assertEqual(u"".join(regex.findall(u".", + u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), + u'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') + self.assertEqual(u"".join(regex.findall(ur"[e\xe8\xe9\xea\xeb\u0113\u011b\u0117]", + u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), + u'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') + self.assertEqual(u"".join(regex.findall(ur"e|\xe8|\xe9|\xea|\xeb|\u0113|\u011b|\u0117", + u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), + u'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') + + def test_anyall(self): + self.assertEqual(regex.match("a.b", "a\nb", regex.DOTALL)[0], "a\nb") + self.assertEqual(regex.match("a.*b", "a\n\nb", regex.DOTALL)[0], + "a\n\nb") + + def test_non_consuming(self): + self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a') + self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a') + self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a') + self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a') + + self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a') + self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a') + self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a') + self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a') + + def test_ignore_case(self): + self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC') + self.assertEqual(regex.match(u"abc", u"ABC", regex.I)[0], u'ABC') + + self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1], + 'a bb') + self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b') + self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1], + 'a bb') + self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a') + self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1], + 'a aa') + self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1], + 'a a') + self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1], + 'a aa') + + # Issue 3511. + self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1)) + self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1)) + + self.assertEqual(bool(regex.match(ur"(?iu)nao", u"nAo")), True) + self.assertEqual(bool(regex.match(ur"(?iu)n\xE3o", u"n\xC3o")), True) + self.assertEqual(bool(regex.match(ur"(?iu)n\xE3o", u"N\xC3O")), True) + self.assertEqual(bool(regex.match(ur"(?iu)s", u"\u017F")), True) + + def test_case_folding(self): + self.assertEqual(regex.search(ur"(?fiu)ss", u"SS").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)SS", u"ss").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)SS", + u"\N{LATIN SMALL LETTER SHARP S}").span(), (0, 1)) + self.assertEqual(regex.search(ur"(?fi)\N{LATIN SMALL LETTER SHARP S}", + u"SS").span(), (0, 2)) + + self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE ST}", + u"ST").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)ST", + u"\N{LATIN SMALL LIGATURE ST}").span(), (0, 1)) + self.assertEqual(regex.search(ur"(?fiu)ST", + u"\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 1)) + + self.assertEqual(regex.search(ur"(?fiu)SST", + u"\N{LATIN SMALL LETTER SHARP S}t").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)SST", + u"s\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)SST", + u"s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE ST}", + u"SST").span(), (1, 3)) + self.assertEqual(regex.search(ur"(?fiu)SST", + u"s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2)) + + self.assertEqual(regex.search(ur"(?fiu)FFI", + u"\N{LATIN SMALL LIGATURE FFI}").span(), (0, 1)) + self.assertEqual(regex.search(ur"(?fiu)FFI", + u"\N{LATIN SMALL LIGATURE FF}i").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)FFI", + u"f\N{LATIN SMALL LIGATURE FI}").span(), (0, 2)) + self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE FFI}", + u"FFI").span(), (0, 3)) + self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE FF}i", + u"FFI").span(), (0, 3)) + self.assertEqual(regex.search(ur"(?fiu)f\N{LATIN SMALL LIGATURE FI}", + u"FFI").span(), (0, 3)) + + sigma = u"\u03A3\u03C3\u03C2" + for ch1 in sigma: + for ch2 in sigma: + if not regex.match(ur"(?fiu)" + ch1, ch2): + self.fail() + + self.assertEqual(bool(regex.search(ur"(?iuV1)ff", u"\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)ff", u"\uFB01\uFB00")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)fi", u"\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)fi", u"\uFB01\uFB00")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)fffi", u"\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB03", + u"\uFB00\uFB01")), True) + self.assertEqual(bool(regex.search(ur"(?iuV1)ff", u"\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)fi", u"\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)fffi", u"\uFB00\uFB01")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB03", + u"\uFB00\uFB01")), True) + self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB01", u"\uFB00i")), + True) + self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB01", u"\uFB00i")), + True) + + self.assertEqual(regex.findall(ur"(?iuV0)\m(?:word){e<=3}\M(?ne", u"affine", + options=[u"\N{LATIN SMALL LIGATURE FFI}"]).span(), (0, 6)) + self.assertEqual(regex.search(ur"(?fi)a\Lne", + u"a\N{LATIN SMALL LIGATURE FFI}ne", options=[u"ffi"]).span(), (0, 4)) + + def test_category(self): + self.assertEqual(regex.match(r"(\s)", " ")[1], ' ') + + def test_not_literal(self): + self.assertEqual(regex.search(r"\s([^a])", " b")[1], 'b') + self.assertEqual(regex.search(r"\s([^a]*)", " bb")[1], 'bb') + + def test_search_coverage(self): + self.assertEqual(regex.search(r"\s(b)", " b")[1], 'b') + self.assertEqual(regex.search(r"a\s", "a ")[0], 'a ') + + def test_re_escape(self): + p = "" + self.assertEqual(regex.escape(p), p) + for i in range(0, 256): + p += chr(i) + self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))), + True) + self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(), + (0, 1)) + + pat = regex.compile(regex.escape(p)) + self.assertEqual(pat.match(p).span(), (0, 256)) + + def test_constants(self): + if regex.I != regex.IGNORECASE: + self.fail() + if regex.L != regex.LOCALE: + self.fail() + if regex.M != regex.MULTILINE: + self.fail() + if regex.S != regex.DOTALL: + self.fail() + if regex.X != regex.VERBOSE: + self.fail() + + def test_flags(self): + for flag in [regex.I, regex.M, regex.X, regex.S, regex.L]: + self.assertEqual(repr(type(regex.compile('^pattern$', flag))), + self.PATTERN_CLASS) + + def test_sre_character_literals(self): + for i in [0, 8, 16, 32, 64, 127, 128, 255]: + self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")), + True) + self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")), + True) + self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")), + True) + self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")), + True) + + self.assertRaisesRegex(regex.error, self.UNKNOWN_GROUP, lambda: + regex.match(r"\911", "")) + + def test_sre_character_class_literals(self): + for i in [0, 8, 16, 32, 64, 127, 128, 255]: + self.assertEqual(bool(regex.match(r"[\%03o]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\%03o0]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\%03o8]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\x%02x]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\x%02x0]" % i, chr(i))), True) + self.assertEqual(bool(regex.match(r"[\x%02xz]" % i, chr(i))), True) + + self.assertRaisesRegex(regex.error, self.BAD_OCTAL_ESCAPE, lambda: + regex.match(r"[\911]", "")) + + def test_bug_113254(self): + self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1) + self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1) + self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1)) + + def test_bug_527371(self): + # Bug described in patches 527371/672491. + self.assertEqual(regex.match(r'(a)?a','a').lastindex, None) + self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1) + self.assertEqual(regex.match(r'(?Pa)(?Pb)?b','ab').lastgroup, + 'a') + self.assertEqual(regex.match("(?Pa(b))", "ab").lastgroup, 'a') + self.assertEqual(regex.match("((a))", "a").lastindex, 1) + + def test_bug_545855(self): + # Bug 545855 -- This pattern failed to cause a compile error as it + # should, instead provoking a TypeError. + self.assertRaisesRegex(regex.error, self.BAD_SET, lambda: + regex.compile('foo[a-')) + + def test_bug_418626(self): + # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code + # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of + # pattern '*?' on a long string. + self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0), + 20001) + self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' + + 'cde').end(0), 20003) + self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0), + 60001) + # Non-simple '*?' still used to hit the recursion limit, before the + # non-recursive scheme was implemented. + self.assertEqual(regex.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0), + 20001) + + def test_bug_612074(self): + pat = u"[" + regex.escape(u"\u2039") + u"]" + self.assertEqual(regex.compile(pat) and 1, 1) + + def test_stack_overflow(self): + # Nasty cases that used to overflow the straightforward recursive + # implementation of repeated groups. + self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x') + self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x') + self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x') + + def test_scanner(self): + def s_ident(scanner, token): return token + def s_operator(scanner, token): return "op%s" % token + def s_float(scanner, token): return float(token) + def s_int(scanner, token): return int(token) + + scanner = regex.Scanner([(r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*", + s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+", + None), ]) + + self.assertEqual(repr(type(scanner.scanner.scanner("").pattern)), + self.PATTERN_CLASS) + + self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum', + 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], '')) + + def test_bug_448951(self): + # Bug 448951 (similar to 429357, but with single char match). + # (Also test greedy matches.) + for op in '', '?', '*': + self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z', + None, None)) + self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z', + 'a:', 'a')) + + def test_bug_725106(self): + # Capturing groups in alternatives in repeats. + self.assertEqual(regex.match('^((a)|b)*', 'abc')[:], ('ab', 'b', 'a')) + self.assertEqual(regex.match('^(([ab])|c)*', 'abc')[:], ('abc', 'c', + 'b')) + self.assertEqual(regex.match('^((d)|[ab])*', 'abc')[:], ('ab', 'b', + None)) + self.assertEqual(regex.match('^((a)c|[ab])*', 'abc')[:], ('ab', 'b', + None)) + self.assertEqual(regex.match('^((a)|b)*?c', 'abc')[:], ('abc', 'b', + 'a')) + self.assertEqual(regex.match('^(([ab])|c)*?d', 'abcd')[:], ('abcd', + 'c', 'b')) + self.assertEqual(regex.match('^((d)|[ab])*?c', 'abc')[:], ('abc', 'b', + None)) + self.assertEqual(regex.match('^((a)c|[ab])*?c', 'abc')[:], ('abc', 'b', + None)) + + def test_bug_725149(self): + # Mark_stack_base restoring before restoring marks. + self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a', + None)) + self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a', + None, None)) + + def test_bug_764548(self): + # Bug 764548, regex.compile() barfs on str/unicode subclasses. + class my_unicode(str): pass + pat = regex.compile(my_unicode("abc")) + self.assertEqual(pat.match("xyz"), None) + + def test_finditer(self): + it = regex.finditer(r":+", "a:b::c:::d") + self.assertEqual([item[0] for item in it], [':', '::', ':::']) + + def test_bug_926075(self): + if regex.compile('bug_926075') is regex.compile(u'bug_926075'): + self.fail() + + def test_bug_931848(self): + pattern = u"[\u002E\u3002\uFF0E\uFF61]" + self.assertEqual(regex.compile(pattern).split("a.b.c"), ['a', 'b', + 'c']) + + def test_bug_581080(self): + it = regex.finditer(r"\s", "a b") + self.assertEqual(it.next().span(), (1, 2)) + self.assertRaises(StopIteration, lambda: it.next()) + + scanner = regex.compile(r"\s").scanner("a b") + self.assertEqual(scanner.search().span(), (1, 2)) + self.assertEqual(scanner.search(), None) + + def test_bug_817234(self): + it = regex.finditer(r".*", "asdf") + self.assertEqual(it.next().span(), (0, 4)) + self.assertEqual(it.next().span(), (4, 4)) + self.assertRaises(StopIteration, lambda: it.next()) + + def test_empty_array(self): + # SF buf 1647541. + import array + for typecode in 'cbBuhHiIlLfd': + a = array.array(typecode) + self.assertEqual(regex.compile("bla").match(a), None) + self.assertEqual(regex.compile("").match(a)[1 : ], ()) + + def test_inline_flags(self): + # Bug #1700. + upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below + lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below + + p = regex.compile(upper_char, regex.I | regex.U) + self.assertEqual(bool(p.match(lower_char)), True) + + p = regex.compile(lower_char, regex.I | regex.U) + self.assertEqual(bool(p.match(upper_char)), True) + + p = regex.compile('(?i)' + upper_char, regex.U) + self.assertEqual(bool(p.match(lower_char)), True) + + p = regex.compile('(?i)' + lower_char, regex.U) + self.assertEqual(bool(p.match(upper_char)), True) + + p = regex.compile('(?iu)' + upper_char) + self.assertEqual(bool(p.match(lower_char)), True) + + p = regex.compile('(?iu)' + lower_char) + self.assertEqual(bool(p.match(upper_char)), True) + + self.assertEqual(bool(regex.match(r"(?i)a", "A")), True) + self.assertEqual(bool(regex.match(r"a(?i)", "A")), True) + self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True) + self.assertEqual(regex.match(r"a(?iV1)", "A"), None) + + def test_dollar_matches_twice(self): + # $ matches the end of string, and just before the terminating \n. + pattern = regex.compile('$') + self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') + self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') + self.assertEqual(pattern.sub('#', '\n'), '#\n#') + + pattern = regex.compile('$', regex.MULTILINE) + self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#') + self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') + self.assertEqual(pattern.sub('#', '\n'), '#\n#') + + def test_ascii_and_unicode_flag(self): + # Unicode patterns. + for flags in (0, regex.UNICODE): + pat = regex.compile(u'\xc0', flags | regex.IGNORECASE) + self.assertEqual(bool(pat.match(u'\xe0')), True) + pat = regex.compile(u'\w', flags) + self.assertEqual(bool(pat.match(u'\xe0')), True) + + pat = regex.compile(u'\xc0', regex.ASCII | regex.IGNORECASE) + self.assertEqual(pat.match(u'\xe0'), None) + pat = regex.compile(u'(?a)\xc0', regex.IGNORECASE) + self.assertEqual(pat.match(u'\xe0'), None) + pat = regex.compile(u'\w', regex.ASCII) + self.assertEqual(pat.match(u'\xe0'), None) + pat = regex.compile(u'(?a)\w') + self.assertEqual(pat.match(u'\xe0'), None) + + # String patterns. + for flags in (0, regex.ASCII): + pat = regex.compile('\xc0', flags | regex.IGNORECASE) + self.assertEqual(pat.match('\xe0'), None) + pat = regex.compile('\w') + self.assertEqual(pat.match('\xe0'), None) + self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: + regex.compile('(?au)\w')) + + def test_subscripting_match(self): + m = regex.match(r'(?\w)', 'xy') + if not m: + self.fail("Failed: expected match but returned None") + elif not m or m[0] != m.group(0) or m[1] != m.group(1): + self.fail("Failed") + if not m: + self.fail("Failed: expected match but returned None") + elif m[:] != ('x', 'x'): + self.fail("Failed: expected \"('x', 'x')\" but got %s instead" % + repr(m[:])) + + def test_new_named_groups(self): + m0 = regex.match(r'(?P\w)', 'x') + m1 = regex.match(r'(?\w)', 'x') + if not (m0 and m1 and m0[:] == m1[:]): + self.fail("Failed") + + def test_properties(self): + self.assertEqual(regex.match('(?i)\xC0', '\xE0'), None) + self.assertEqual(regex.match(r'(?i)\xC0', '\xE0'), None) + self.assertEqual(regex.match(r'\w', '\xE0'), None) + self.assertEqual(bool(regex.match(ur'(?u)\w', u'\xE0')), True) + + # Dropped the following test. It's not possible to determine what the + # correct result should be in the general case. +# self.assertEqual(bool(regex.match(r'(?L)\w', '\xE0')), +# '\xE0'.isalnum()) + + self.assertEqual(bool(regex.match(r'(?L)\d', '0')), True) + self.assertEqual(bool(regex.match(r'(?L)\s', ' ')), True) + self.assertEqual(bool(regex.match(r'(?L)\w', 'a')), True) + self.assertEqual(regex.match(r'(?L)\d', '?'), None) + self.assertEqual(regex.match(r'(?L)\s', '?'), None) + self.assertEqual(regex.match(r'(?L)\w', '?'), None) + + self.assertEqual(regex.match(r'(?L)\D', '0'), None) + self.assertEqual(regex.match(r'(?L)\S', ' '), None) + self.assertEqual(regex.match(r'(?L)\W', 'a'), None) + self.assertEqual(bool(regex.match(r'(?L)\D', '?')), True) + self.assertEqual(bool(regex.match(r'(?L)\S', '?')), True) + self.assertEqual(bool(regex.match(r'(?L)\W', '?')), True) + + self.assertEqual(bool(regex.match(ur'(?u)\p{Cyrillic}', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{IsCyrillic}', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{Script=Cyrillic}', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{InCyrillic}', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{Block=Cyrillic}', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:Cyrillic:]]', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:IsCyrillic:]]', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:Script=Cyrillic:]]', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:InCyrillic:]]', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:Block=Cyrillic:]]', + u'\N{CYRILLIC CAPITAL LETTER A}')), True) + + self.assertEqual(bool(regex.match(ur'(?u)\P{Cyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\P{IsCyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\P{Script=Cyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\P{InCyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\P{Block=Cyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{^Cyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{^IsCyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{^Script=Cyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{^InCyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{^Block=Cyrillic}', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:^Cyrillic:]]', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:^IsCyrillic:]]', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:^Script=Cyrillic:]]', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:^InCyrillic:]]', + u'\N{LATIN CAPITAL LETTER A}')), True) + self.assertEqual(bool(regex.match(ur'(?u)[[:^Block=Cyrillic:]]', + u'\N{LATIN CAPITAL LETTER A}')), True) + + self.assertEqual(bool(regex.match(ur'(?u)\d', u'0')), True) + self.assertEqual(bool(regex.match(ur'(?u)\s', u' ')), True) + self.assertEqual(bool(regex.match(ur'(?u)\w', u'A')), True) + self.assertEqual(regex.match(ur"(?u)\d", u"?"), None) + self.assertEqual(regex.match(ur"(?u)\s", u"?"), None) + self.assertEqual(regex.match(ur"(?u)\w", u"?"), None) + self.assertEqual(regex.match(ur"(?u)\D", u"0"), None) + self.assertEqual(regex.match(ur"(?u)\S", u" "), None) + self.assertEqual(regex.match(ur"(?u)\W", u"A"), None) + self.assertEqual(bool(regex.match(ur'(?u)\D', u'?')), True) + self.assertEqual(bool(regex.match(ur'(?u)\S', u'?')), True) + self.assertEqual(bool(regex.match(ur'(?u)\W', u'?')), True) + + self.assertEqual(bool(regex.match(ur'(?u)\p{L}', u'A')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{L}', u'a')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{Lu}', u'A')), True) + self.assertEqual(bool(regex.match(ur'(?u)\p{Ll}', u'a')), True) + + self.assertEqual(bool(regex.match(ur'(?u)(?i)a', u'a')), True) + self.assertEqual(bool(regex.match(ur'(?u)(?i)a', u'A')), True) + + self.assertEqual(bool(regex.match(ur'(?u)\w', u'0')), True) + self.assertEqual(bool(regex.match(ur'(?u)\w', u'a')), True) + self.assertEqual(bool(regex.match(ur'(?u)\w', u'_')), True) + + self.assertEqual(regex.match(ur"(?u)\X", u"\xE0").span(), (0, 1)) + self.assertEqual(regex.match(ur"(?u)\X", u"a\u0300").span(), (0, 2)) + self.assertEqual(regex.findall(ur"(?u)\X", + u"a\xE0a\u0300e\xE9e\u0301"), [u'a', u'\xe0', u'a\u0300', u'e', + u'\xe9', u'e\u0301']) + self.assertEqual(regex.findall(ur"(?u)\X{3}", + u"a\xE0a\u0300e\xE9e\u0301"), [u'a\xe0a\u0300', u'e\xe9e\u0301']) + self.assertEqual(regex.findall(ur"(?u)\X", u"\r\r\n\u0301A\u0301"), + [u'\r', u'\r\n', u'\u0301', u'A\u0301']) + + self.assertEqual(bool(regex.match(ur'(?u)\p{Ll}', u'a')), True) + + chars_u = u"-09AZaz_\u0393\u03b3" + chars_b = "-09AZaz_" + word_set = set("Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc".split()) + + tests = [ + (ur"(?u)\w", chars_u, u"09AZaz_\u0393\u03b3"), + (ur"(?u)[[:word:]]", chars_u, u"09AZaz_\u0393\u03b3"), + (ur"(?u)\W", chars_u, u"-"), + (ur"(?u)[[:^word:]]", chars_u, u"-"), + (ur"(?u)\d", chars_u, u"09"), + (ur"(?u)[[:digit:]]", chars_u, u"09"), + (ur"(?u)\D", chars_u, u"-AZaz_\u0393\u03b3"), + (ur"(?u)[[:^digit:]]", chars_u, u"-AZaz_\u0393\u03b3"), + (ur"(?u)[[:alpha:]]", chars_u, u"AZaz\u0393\u03b3"), + (ur"(?u)[[:^alpha:]]", chars_u, u"-09_"), + (ur"(?u)[[:alnum:]]", chars_u, u"09AZaz\u0393\u03b3"), + (ur"(?u)[[:^alnum:]]", chars_u, u"-_"), + (ur"(?u)[[:xdigit:]]", chars_u, u"09Aa"), + (ur"(?u)[[:^xdigit:]]", chars_u, u"-Zz_\u0393\u03b3"), + (ur"(?u)\p{InBasicLatin}", u"a\xE1", u"a"), + (ur"(?u)\P{InBasicLatin}", u"a\xE1", u"\xE1"), + (ur"(?iu)\p{InBasicLatin}", u"a\xE1", u"a"), + (ur"(?iu)\P{InBasicLatin}", u"a\xE1", u"\xE1"), + + (r"(?L)\w", chars_b, "09AZaz_"), + (r"(?L)[[:word:]]", chars_b, "09AZaz_"), + (r"(?L)\W", chars_b, "-"), + (r"(?L)[[:^word:]]", chars_b, "-"), + (r"(?L)\d", chars_b, "09"), + (r"(?L)[[:digit:]]", chars_b, "09"), + (r"(?L)\D", chars_b, "-AZaz_"), + (r"(?L)[[:^digit:]]", chars_b, "-AZaz_"), + (r"(?L)[[:alpha:]]", chars_b, "AZaz"), + (r"(?L)[[:^alpha:]]", chars_b, "-09_"), + (r"(?L)[[:alnum:]]", chars_b, "09AZaz"), + (r"(?L)[[:^alnum:]]", chars_b, "-_"), + (r"(?L)[[:xdigit:]]", chars_b, "09Aa"), + (r"(?L)[[:^xdigit:]]", chars_b, "-Zz_"), + + (r"\w", chars_b, "09AZaz_"), + (r"[[:word:]]", chars_b, "09AZaz_"), + (r"\W", chars_b, "-"), + (r"[[:^word:]]", chars_b, "-"), + (r"\d", chars_b, "09"), + (r"[[:digit:]]", chars_b, "09"), + (r"\D", chars_b, "-AZaz_"), + (r"[[:^digit:]]", chars_b, "-AZaz_"), + (r"[[:alpha:]]", chars_b, "AZaz"), + (r"[[:^alpha:]]", chars_b, "-09_"), + (r"[[:alnum:]]", chars_b, "09AZaz"), + (r"[[:^alnum:]]", chars_b, "-_"), + (r"[[:xdigit:]]", chars_b, "09Aa"), + (r"[[:^xdigit:]]", chars_b, "-Zz_"), + ] + for pattern, chars, expected in tests: + try: + if chars[ : 0].join(regex.findall(pattern, chars)) != expected: + self.fail("Failed: %s" % pattern) + except Exception, e: + self.fail("Failed: %s raised %s" % (pattern, repr(e))) + + self.assertEqual(bool(regex.match(ur"(?u)\p{NumericValue=0}", u"0")), + True) + self.assertEqual(bool(regex.match(ur"(?u)\p{NumericValue=1/2}", + u"\N{VULGAR FRACTION ONE HALF}")), True) + self.assertEqual(bool(regex.match(ur"(?u)\p{NumericValue=0.5}", + u"\N{VULGAR FRACTION ONE HALF}")), True) + + def test_word_class(self): + self.assertEqual(regex.findall(ur"(?u)\w+", + u" \u0939\u093f\u0928\u094d\u0926\u0940,"), + [u'\u0939\u093f\u0928\u094d\u0926\u0940']) + self.assertEqual(regex.findall(ur"(?u)\W+", + u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u' ', u',']) + self.assertEqual(regex.split(ur"(?uV1)\b", + u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u' ', + u'\u0939\u093f\u0928\u094d\u0926\u0940', u',']) + self.assertEqual(regex.split(ur"(?uV1)\B", + u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u'', u' \u0939', + u'\u093f', u'\u0928', u'\u094d', u'\u0926', u'\u0940,', u'']) + + def test_search_anchor(self): + self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd']) + + def test_search_reverse(self): + self.assertEqual(regex.findall(r"(?r).", "abc"), ['c', 'b', 'a']) + self.assertEqual(regex.findall(r"(?r).", "abc", overlapped=True), ['c', + 'b', 'a']) + self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc']) + self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True), + ['de', 'cd', 'bc', 'ab']) + self.assertEqual(regex.findall(r"(?r)(.)(-)(.)", "a-b-c", + overlapped=True), [("b", "-", "c"), ("a", "-", "b")]) + + self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c', + 'b', 'a']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", + overlapped=True)], ['de', 'cd', 'bc', 'ab']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c', + 'b', 'a']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", + overlapped=True)], ['de', 'cd', 'bc', 'ab']) + + self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo', + '']) + self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', + 'foo', '']) + + self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], + ['', 'foo', 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", + "foo bar")], ['', 'foo', 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", + "foo bar")], ['bar', 'foo', '']) + self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", + "foo bar")], ['bar', 'foo', '']) + + self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd']) + self.assertEqual(regex.findall(r".{2}(?<=\G.*)", "abcd"), ['ab', 'cd']) + self.assertEqual(regex.findall(r"(?r)\G\w{2}", "abcd ef"), []) + self.assertEqual(regex.findall(r"(?r)\w{2}\G", "abcd ef"), ['ef']) + + self.assertEqual(regex.findall(r"q*", "qqwe"), ['qq', '', '', '']) + self.assertEqual(regex.findall(r"(?V1)q*", "qqwe"), ['qq', '', '', '']) + self.assertEqual(regex.findall(r"(?r)q*", "qqwe"), ['', '', 'qq', '']) + self.assertEqual(regex.findall(r"(?rV1)q*", "qqwe"), ['', '', 'qq', + '']) + + self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=3), ['b', + 'c']) + self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=-1), ['b', + 'c']) + self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1, + endpos=3)], ['b', 'c']) + self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1, + endpos=-1)], ['b', 'c']) + + self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1, + endpos=3)], ['c', 'b']) + self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1, + endpos=-1)], ['c', 'b']) + self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=3), ['c', + 'b']) + self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=-1), + ['c', 'b']) + + self.assertEqual(regex.findall(r"[ab]", "aB", regex.I), ['a', 'B']) + self.assertEqual(regex.findall(r"(?r)[ab]", "aB", regex.I), ['B', 'a']) + + self.assertEqual(regex.findall(r"(?r).{2}", "abc"), ['bc']) + self.assertEqual(regex.findall(r"(?r).{2}", "abc", overlapped=True), + ['bc', 'ab']) + self.assertEqual(regex.findall(r"(\w+) (\w+)", + "first second third fourth fifth"), [('first', 'second'), ('third', + 'fourth')]) + self.assertEqual(regex.findall(r"(?r)(\w+) (\w+)", + "first second third fourth fifth"), [('fourth', 'fifth'), ('second', + 'third')]) + + self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc")], + ['bc']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc", + overlapped=True)], ['bc', 'ab']) + self.assertEqual([m[0] for m in regex.finditer(r"(\w+) (\w+)", + "first second third fourth fifth")], ['first second', + 'third fourth']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)(\w+) (\w+)", + "first second third fourth fifth")], ['fourth fifth', + 'second third']) + + self.assertEqual(regex.search("abcdef", "abcdef").span(), (0, 6)) + self.assertEqual(regex.search("(?r)abcdef", "abcdef").span(), (0, 6)) + self.assertEqual(regex.search("(?i)abcdef", "ABCDEF").span(), (0, 6)) + self.assertEqual(regex.search("(?ir)abcdef", "ABCDEF").span(), (0, 6)) + + self.assertEqual(regex.sub(r"(.)", r"\1", "abc"), 'abc') + self.assertEqual(regex.sub(r"(?r)(.)", r"\1", "abc"), 'abc') + + def test_atomic(self): + # Issue 433030. + self.assertEqual(regex.search(r"(?>a*)a", "aa"), None) + + def test_possessive(self): + # Single-character non-possessive. + self.assertEqual(regex.search(r"a?a", "a").span(), (0, 1)) + self.assertEqual(regex.search(r"a*a", "aaa").span(), (0, 3)) + self.assertEqual(regex.search(r"a+a", "aaa").span(), (0, 3)) + self.assertEqual(regex.search(r"a{1,3}a", "aaa").span(), (0, 3)) + + # Multiple-character non-possessive. + self.assertEqual(regex.search(r"(?:ab)?ab", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"(?:ab)*ab", "ababab").span(), (0, 6)) + self.assertEqual(regex.search(r"(?:ab)+ab", "ababab").span(), (0, 6)) + self.assertEqual(regex.search(r"(?:ab){1,3}ab", "ababab").span(), (0, + 6)) + + # Single-character possessive. + self.assertEqual(regex.search(r"a?+a", "a"), None) + self.assertEqual(regex.search(r"a*+a", "aaa"), None) + self.assertEqual(regex.search(r"a++a", "aaa"), None) + self.assertEqual(regex.search(r"a{1,3}+a", "aaa"), None) + + # Multiple-character possessive. + self.assertEqual(regex.search(r"(?:ab)?+ab", "ab"), None) + self.assertEqual(regex.search(r"(?:ab)*+ab", "ababab"), None) + self.assertEqual(regex.search(r"(?:ab)++ab", "ababab"), None) + self.assertEqual(regex.search(r"(?:ab){1,3}+ab", "ababab"), None) + + def test_zerowidth(self): + # Issue 3262. + self.assertEqual(regex.split(r"\b", "a b"), ['a b']) + self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b', + '']) + + # Issue 1647489. + self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], + ['', 'foo', 'bar']) + self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo', + '']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", + "foo bar")], ['bar', 'foo', '']) + self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', + 'bar']) + self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", + "foo bar")], ['', 'foo', 'bar']) + self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', + 'foo', '']) + self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", + "foo bar")], ['bar', 'foo', '']) + + self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc']) + self.assertEqual([m for m in regex.splititer("", "xaxbxc")], + ['xaxbxc']) + + self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc']) + self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], + ['xaxbxc']) + + self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x', + 'b', 'x', 'c', '']) + self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['', + 'x', 'a', 'x', 'b', 'x', 'c', '']) + + self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b', + 'x', 'a', 'x', '']) + self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['', + 'c', 'x', 'b', 'x', 'a', 'x', '']) + + def test_scoped_and_inline_flags(self): + # Issues 433028, 433024, 433027. + self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2)) + self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None) + + self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda: + regex.search(r"(?V0-i)Ab", "ab", flags=regex.I)) + + self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None) + self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None) + self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None) + self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None) + self.assertEqual(regex.search(r"A(?V1-i)b", "ab", + flags=regex.I).span(), (0, 2)) + + def test_repeated_repeats(self): + # Issue 2537. + self.assertEqual(regex.search(r"(?:a+)+", "aaa").span(), (0, 3)) + self.assertEqual(regex.search(r"(?:(?:ab)+c)+", "abcabc").span(), (0, + 6)) + + def test_lookbehind(self): + self.assertEqual(regex.search(r"123(?<=a\d+)", "a123").span(), (1, 4)) + self.assertEqual(regex.search(r"123(?<=a\d+)", "b123"), None) + self.assertEqual(regex.search(r"123(?[ \t]+\r*$)|(?P(?<=[^\n])\Z)') + self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', + 'foobar '), ('foobar', 1)) + self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', + '']) + pat = regex.compile(r'(?mV1)(?P[ \t]+\r*$)|(?P(?<=[^\n])\Z)') + self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', + 'foobar '), ('foobar', 2)) + self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', + '']) + + def test_overlapped(self): + self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd']) + self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab', + 'bc', 'cd', 'de']) + self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc']) + self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True), + ['de', 'cd', 'bc', 'ab']) + self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True), + [("a", "-", "b"), ("b", "-", "c")]) + + self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab', + 'cd']) + self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde", + overlapped=True)], ['ab', 'bc', 'cd', 'de']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")], + ['de', 'bc']) + self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", + overlapped=True)], ['de', 'cd', 'bc', 'ab']) + + self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)", + "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")]) + self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)", + "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")]) + + def test_splititer(self): + self.assertEqual(regex.split(r",", "a,b,,c,"), ['a', 'b', '', 'c', '']) + self.assertEqual([m for m in regex.splititer(r",", "a,b,,c,")], ['a', + 'b', '', 'c', '']) + + def test_grapheme(self): + self.assertEqual(regex.match(ur"(?u)\X", u"\xE0").span(), (0, 1)) + self.assertEqual(regex.match(ur"(?u)\X", u"a\u0300").span(), (0, 2)) + + self.assertEqual(regex.findall(ur"(?u)\X", + u"a\xE0a\u0300e\xE9e\u0301"), [u'a', u'\xe0', u'a\u0300', u'e', + u'\xe9', u'e\u0301']) + self.assertEqual(regex.findall(ur"(?u)\X{3}", + u"a\xE0a\u0300e\xE9e\u0301"), [u'a\xe0a\u0300', u'e\xe9e\u0301']) + self.assertEqual(regex.findall(ur"(?u)\X", u"\r\r\n\u0301A\u0301"), + [u'\r', u'\r\n', u'\u0301', u'A\u0301']) + + def test_word_boundary(self): + text = u'The quick ("brown") fox can\'t jump 32.3 feet, right?' + self.assertEqual(regex.split(ur'(?V1)\b', text), [u'', u'The', u' ', + u'quick', u' ("', u'brown', u'") ', u'fox', u' ', u'can', u"'", u't', + u' ', u'jump', u' ', u'32', u'.', u'3', u' ', u'feet', u', ', + u'right', u'?']) + self.assertEqual(regex.split(ur'(?V1w)\b', text), [u'', u'The', u' ', + u'quick', u' ', u'(', u'"', u'brown', u'"', u')', u' ', u'fox', u' ', + u"can't", u' ', u'jump', u' ', u'32.3', u' ', u'feet', u',', u' ', + u'right', u'?', u'']) + + text = u"The fox" + self.assertEqual(regex.split(ur'(?V1)\b', text), [u'', u'The', u' ', + u'fox', u'']) + self.assertEqual(regex.split(ur'(?V1w)\b', text), [u'', u'The', u' ', + u' ', u'fox', u'']) + + text = u"can't aujourd'hui l'objectif" + self.assertEqual(regex.split(ur'(?V1)\b', text), [u'', u'can', u"'", + u't', u' ', u'aujourd', u"'", u'hui', u' ', u'l', u"'", u'objectif', + u'']) + self.assertEqual(regex.split(ur'(?V1w)\b', text), [u'', u"can't", u' ', + u"aujourd'hui", u' ', u"l'", u'objectif', u'']) + + def test_line_boundary(self): + self.assertEqual(regex.findall(r".+", "Line 1\nLine 2\n"), ["Line 1", + "Line 2"]) + self.assertEqual(regex.findall(r".+", "Line 1\rLine 2\r"), + ["Line 1\rLine 2\r"]) + self.assertEqual(regex.findall(r".+", "Line 1\r\nLine 2\r\n"), + ["Line 1\r", "Line 2\r"]) + self.assertEqual(regex.findall(r"(?w).+", "Line 1\nLine 2\n"), + ["Line 1", "Line 2"]) + self.assertEqual(regex.findall(r"(?w).+", "Line 1\rLine 2\r"), + ["Line 1", "Line 2"]) + self.assertEqual(regex.findall(r"(?w).+", "Line 1\r\nLine 2\r\n"), + ["Line 1", "Line 2"]) + + self.assertEqual(regex.search(r"^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"^abc", "\nabc"), None) + self.assertEqual(regex.search(r"^abc", "\rabc"), None) + self.assertEqual(regex.search(r"(?w)^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"(?w)^abc", "\nabc"), None) + self.assertEqual(regex.search(r"(?w)^abc", "\rabc"), None) + + self.assertEqual(regex.search(r"abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"abc$", "abc\r"), None) + self.assertEqual(regex.search(r"(?w)abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"(?w)abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"(?w)abc$", "abc\r").start(), 0) + + self.assertEqual(regex.search(r"(?m)^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"(?m)^abc", "\nabc").start(), 1) + self.assertEqual(regex.search(r"(?m)^abc", "\rabc"), None) + self.assertEqual(regex.search(r"(?mw)^abc", "abc").start(), 0) + self.assertEqual(regex.search(r"(?mw)^abc", "\nabc").start(), 1) + self.assertEqual(regex.search(r"(?mw)^abc", "\rabc").start(), 1) + + self.assertEqual(regex.search(r"(?m)abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"(?m)abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"(?m)abc$", "abc\r"), None) + self.assertEqual(regex.search(r"(?mw)abc$", "abc").start(), 0) + self.assertEqual(regex.search(r"(?mw)abc$", "abc\n").start(), 0) + self.assertEqual(regex.search(r"(?mw)abc$", "abc\r").start(), 0) + + def test_branch_reset(self): + self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "ac").groups(), ('a', + None, 'c')) + self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "bc").groups(), (None, + 'b', 'c')) + self.assertEqual(regex.match(r"(?:(?a)|(?b))(?c)", + "ac").groups(), ('a', None, 'c')) + self.assertEqual(regex.match(r"(?:(?a)|(?b))(?c)", + "bc").groups(), (None, 'b', 'c')) + + self.assertEqual(regex.match(r"(?a)(?:(?b)|(?c))(?d)", + "abd").groups(), ('a', 'b', None, 'd')) + self.assertEqual(regex.match(r"(?a)(?:(?b)|(?c))(?d)", + "acd").groups(), ('a', None, 'c', 'd')) + self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "abd").groups(), + ('a', 'b', None, 'd')) + + self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "acd").groups(), + ('a', None, 'c', 'd')) + self.assertEqual(regex.match(r"(a)(?|(b)|(b))(d)", "abd").groups(), + ('a', 'b', 'd')) + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "ac").groups(), + ('a', None, 'c')) + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "bc").groups(), + (None, 'b', 'c')) + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "ac").groups(), + ('a', 'c')) + + self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "bc").groups(), + ('b', 'c')) + + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(?d))(e)", + "abe").groups(), ('a', 'b', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(?d))(e)", + "cde").groups(), ('d', 'c', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(d))(e)", + "abe").groups(), ('a', 'b', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(d))(e)", + "cde").groups(), ('d', 'c', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(d))(e)", + "abe").groups(), ('a', 'b', 'e')) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(d))(e)", + "cde").groups(), ('c', 'd', 'e')) + + # Hg issue 87. + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "abe").groups(), ("a", "b", "e")) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "abe").capturesdict(), {"a": ["a"], "b": ["b"]}) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "cde").groups(), ("d", None, "e")) + self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", + "cde").capturesdict(), {"a": ["c", "d"], "b": []}) + + def test_set(self): + self.assertEqual(regex.match(r"[a]", "a").span(), (0, 1)) + self.assertEqual(regex.match(r"(?i)[a]", "A").span(), (0, 1)) + self.assertEqual(regex.match(r"[a-b]", r"a").span(), (0, 1)) + self.assertEqual(regex.match(r"(?i)[a-b]", r"A").span(), (0, 1)) + + self.assertEqual(regex.sub(r"(?V0)([][])", r"-", "a[b]c"), "a-b-c") + + self.assertEqual(regex.findall(ur"[\p{Alpha}]", u"a0"), [u"a"]) + self.assertEqual(regex.findall(ur"(?i)[\p{Alpha}]", u"A0"), [u"A"]) + + self.assertEqual(regex.findall(ur"[a\p{Alpha}]", u"ab0"), [u"a", u"b"]) + self.assertEqual(regex.findall(ur"[a\P{Alpha}]", u"ab0"), [u"a", u"0"]) + self.assertEqual(regex.findall(ur"(?i)[a\p{Alpha}]", u"ab0"), [u"a", + u"b"]) + self.assertEqual(regex.findall(ur"(?i)[a\P{Alpha}]", u"ab0"), [u"a", + u"0"]) + + self.assertEqual(regex.findall(ur"[a-b\p{Alpha}]", u"abC0"), [u"a", + u"b", u"C"]) + self.assertEqual(regex.findall(ur"(?i)[a-b\p{Alpha}]", u"AbC0"), [u"A", + u"b", u"C"]) + + self.assertEqual(regex.findall(ur"[\p{Alpha}]", u"a0"), [u"a"]) + self.assertEqual(regex.findall(ur"[\P{Alpha}]", u"a0"), [u"0"]) + self.assertEqual(regex.findall(ur"[^\p{Alpha}]", u"a0"), [u"0"]) + self.assertEqual(regex.findall(ur"[^\P{Alpha}]", u"a0"), [u"a"]) + + self.assertEqual("".join(regex.findall(r"[^\d-h]", "a^b12c-h")), + 'a^bc') + self.assertEqual("".join(regex.findall(r"[^\dh]", "a^b12c-h")), + 'a^bc-') + self.assertEqual("".join(regex.findall(r"[^h\s\db]", "a^b 12c-h")), + 'a^c-') + self.assertEqual("".join(regex.findall(r"[^b\w]", "a b")), ' ') + self.assertEqual("".join(regex.findall(r"[^b\S]", "a b")), ' ') + self.assertEqual("".join(regex.findall(r"[^8\d]", "a 1b2")), 'a b') + + all_chars = u"".join(unichr(c) for c in range(0x100)) + self.assertEqual(len(regex.findall(ur"(?u)\p{ASCII}", all_chars)), 128) + self.assertEqual(len(regex.findall(ur"(?u)\p{Letter}", all_chars)), + 117) + self.assertEqual(len(regex.findall(ur"(?u)\p{Digit}", all_chars)), 10) + + # Set operators + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Letter}]", + all_chars)), 52) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Alnum}&&\p{Letter}]", + all_chars)), 52) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Alnum}&&\p{Digit}]", + all_chars)), 10) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Cc}]", + all_chars)), 33) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Graph}]", + all_chars)), 94) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}--\p{Cc}]", + all_chars)), 95) + self.assertEqual(len(regex.findall(ur"(?u)[\p{Letter}\p{Digit}]", + all_chars)), 127) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{Letter}||\p{Digit}]", + all_chars)), 127) + self.assertEqual(len(regex.findall(ur"(?u)\p{HexDigit}", all_chars)), + 22) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{HexDigit}~~\p{Digit}]", + all_chars)), 12) + self.assertEqual(len(regex.findall(ur"(?uV1)[\p{Digit}~~\p{HexDigit}]", + all_chars)), 12) + + self.assertEqual(repr(type(regex.compile(r"(?V0)([][-])"))), + self.PATTERN_CLASS) + self.assertEqual(regex.findall(r"(?V1)[[a-z]--[aei]]", "abc"), ["b", + "c"]) + self.assertEqual(regex.findall(r"(?iV1)[[a-z]--[aei]]", "abc"), ["b", + "c"]) + self.assertEqual(regex.findall("(?V1)[\w--a]","abc"), ["b", "c"]) + self.assertEqual(regex.findall("(?iV1)[\w--a]","abc"), ["b", "c"]) + + def test_various(self): + tests = [ + # Test ?P< and ?P= extensions. + ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with a digit. + ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char. + ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char. + + # Same tests, for the ?P= form. + ('(?Pa)(?P=foo_123', 'aa', '', regex.error, + self.MISSING_RPAREN), + ('(?Pa)(?P=1)', 'aa', '', regex.error, + self.BAD_GROUP_NAME), + ('(?Pa)(?P=!)', 'aa', '', regex.error, + self.BAD_GROUP_NAME), + ('(?Pa)(?P=foo_124)', 'aa', '', regex.error, + self.UNKNOWN_GROUP), # Backref to undefined group. + + ('(?Pa)', 'a', '1', repr('a')), + ('(?Pa)(?P=foo_123)', 'aa', '1', repr('a')), + + # Mal-formed \g in pattern treated as literal for compatibility. + (r'(?a)\ga)\g<1>', 'aa', '1', repr('a')), + (r'(?a)\g', 'aa', '', repr(None)), + (r'(?a)\g', 'aa', '', regex.error, + self.UNKNOWN_GROUP), # Backref to undefined group. + + ('(?a)', 'a', '1', repr('a')), + (r'(?a)\g', 'aa', '1', repr('a')), + + # Test octal escapes. + ('\\1', 'a', '', regex.error, self.UNKNOWN_GROUP), # Backreference. + ('[\\1]', '\1', '0', "'\\x01'"), # Character. + ('\\09', chr(0) + '9', '0', repr(chr(0) + '9')), + ('\\141', 'a', '0', repr('a')), + ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', + '0,11', repr(('abcdefghijklk9', 'k'))), + + # Test \0 is handled everywhere. + (r'\0', '\0', '0', repr('\0')), + (r'[\0a]', '\0', '0', repr('\0')), + (r'[a\0]', '\0', '0', repr('\0')), + (r'[^a\0]', '\0', '', repr(None)), + + # Test various letter escapes. + (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', '0', + repr('\a\b\f\n\r\t\v')), + (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', '0', + repr('\a\b\f\n\r\t\v')), + (r'\c\e\g\h\i\j\k\o\p\q\y\z', 'ceghijkopqyz', '0', + repr('ceghijkopqyz')), + (r'\xff', '\377', '0', repr(chr(255))), + + # New \x semantics. + (r'\x00ffffffffffffff', '\377', '', repr(None)), + (r'\x00f', '\017', '', repr(None)), + (r'\x00fe', '\376', '', repr(None)), + + (r'\x00ff', '\377', '', repr(None)), + (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', repr('\t\n\v\r\f\ag')), + ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', repr('\t\n\v\r\f\ag')), + (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', '0', repr(chr(9) + chr(10) + + chr(11) + chr(13) + chr(12) + chr(7))), + (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', '0', + repr('\t\n\v\r\f\b')), + + (r"^\w+=(\\[\000-\277]|[^\n\\])*", + "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", '0', + repr("SRC=eval.c g.c blah blah blah \\\\")), + + # Test that . only matches \n in DOTALL mode. + ('a.b', 'acb', '0', repr('acb')), + ('a.b', 'a\nb', '', repr(None)), + ('a.*b', 'acc\nccb', '', repr(None)), + ('a.{4,5}b', 'acc\nccb', '', repr(None)), + ('a.b', 'a\rb', '0', repr('a\rb')), + # The new behaviour is that the inline flag affects only what follows. + ('a.b(?s)', 'a\nb', '0', repr('a\nb')), + ('a.b(?sV1)', 'a\nb', '', repr(None)), + ('(?s)a.b', 'a\nb', '0', repr('a\nb')), + ('a.*(?s)b', 'acc\nccb', '0', repr('acc\nccb')), + ('a.*(?sV1)b', 'acc\nccb', '', repr(None)), + ('(?s)a.*b', 'acc\nccb', '0', repr('acc\nccb')), + ('(?s)a.{4,5}b', 'acc\nccb', '0', repr('acc\nccb')), + + (')', '', '', regex.error, self.TRAILING_CHARS), # Unmatched right bracket. + ('', '', '0', "''"), # Empty pattern. + ('abc', 'abc', '0', repr('abc')), + ('abc', 'xbc', '', repr(None)), + ('abc', 'axc', '', repr(None)), + ('abc', 'abx', '', repr(None)), + ('abc', 'xabcy', '0', repr('abc')), + ('abc', 'ababc', '0', repr('abc')), + ('ab*c', 'abc', '0', repr('abc')), + ('ab*bc', 'abc', '0', repr('abc')), + + ('ab*bc', 'abbc', '0', repr('abbc')), + ('ab*bc', 'abbbbc', '0', repr('abbbbc')), + ('ab+bc', 'abbc', '0', repr('abbc')), + ('ab+bc', 'abc', '', repr(None)), + ('ab+bc', 'abq', '', repr(None)), + ('ab+bc', 'abbbbc', '0', repr('abbbbc')), + ('ab?bc', 'abbc', '0', repr('abbc')), + ('ab?bc', 'abc', '0', repr('abc')), + ('ab?bc', 'abbbbc', '', repr(None)), + ('ab?c', 'abc', '0', repr('abc')), + + ('^abc$', 'abc', '0', repr('abc')), + ('^abc$', 'abcc', '', repr(None)), + ('^abc', 'abcc', '0', repr('abc')), + ('^abc$', 'aabc', '', repr(None)), + ('abc$', 'aabc', '0', repr('abc')), + ('^', 'abc', '0', repr('')), + ('$', 'abc', '0', repr('')), + ('a.c', 'abc', '0', repr('abc')), + ('a.c', 'axc', '0', repr('axc')), + ('a.*c', 'axyzc', '0', repr('axyzc')), + + ('a.*c', 'axyzd', '', repr(None)), + ('a[bc]d', 'abc', '', repr(None)), + ('a[bc]d', 'abd', '0', repr('abd')), + ('a[b-d]e', 'abd', '', repr(None)), + ('a[b-d]e', 'ace', '0', repr('ace')), + ('a[b-d]', 'aac', '0', repr('ac')), + ('a[-b]', 'a-', '0', repr('a-')), + ('a[\\-b]', 'a-', '0', repr('a-')), + ('a[b-]', 'a-', '0', repr('a-')), + ('a[]b', '-', '', regex.error, self.BAD_SET), + + ('a[', '-', '', regex.error, self.BAD_SET), + ('a\\', '-', '', regex.error, self.BAD_ESCAPE), + ('abc)', '-', '', regex.error, self.TRAILING_CHARS), + ('(abc', '-', '', regex.error, self.MISSING_RPAREN), + ('a]', 'a]', '0', repr('a]')), + ('a[]]b', 'a]b', '0', repr('a]b')), + ('a[]]b', 'a]b', '0', repr('a]b')), + ('a[^bc]d', 'aed', '0', repr('aed')), + ('a[^bc]d', 'abd', '', repr(None)), + ('a[^-b]c', 'adc', '0', repr('adc')), + + ('a[^-b]c', 'a-c', '', repr(None)), + ('a[^]b]c', 'a]c', '', repr(None)), + ('a[^]b]c', 'adc', '0', repr('adc')), + ('\\ba\\b', 'a-', '0', repr('a')), + ('\\ba\\b', '-a', '0', repr('a')), + ('\\ba\\b', '-a-', '0', repr('a')), + ('\\by\\b', 'xy', '', repr(None)), + ('\\by\\b', 'yz', '', repr(None)), + ('\\by\\b', 'xyz', '', repr(None)), + ('x\\b', 'xyz', '', repr(None)), + + ('x\\B', 'xyz', '0', repr('x')), + ('\\Bz', 'xyz', '0', repr('z')), + ('z\\B', 'xyz', '', repr(None)), + ('\\Bx', 'xyz', '', repr(None)), + ('\\Ba\\B', 'a-', '', repr(None)), + ('\\Ba\\B', '-a', '', repr(None)), + ('\\Ba\\B', '-a-', '', repr(None)), + ('\\By\\B', 'xy', '', repr(None)), + ('\\By\\B', 'yz', '', repr(None)), + ('\\By\\b', 'xy', '0', repr('y')), + + ('\\by\\B', 'yz', '0', repr('y')), + ('\\By\\B', 'xyz', '0', repr('y')), + ('ab|cd', 'abc', '0', repr('ab')), + ('ab|cd', 'abcd', '0', repr('ab')), + ('()ef', 'def', '0,1', repr(('ef', ''))), + ('$b', 'b', '', repr(None)), + ('a\\(b', 'a(b', '', repr(('a(b',))), + ('a\\(*b', 'ab', '0', repr('ab')), + ('a\\(*b', 'a((b', '0', repr('a((b')), + ('a\\\\b', 'a\\b', '0', repr('a\\b')), + + ('((a))', 'abc', '0,1,2', repr(('a', 'a', 'a'))), + ('(a)b(c)', 'abc', '0,1,2', repr(('abc', 'a', 'c'))), + ('a+b+c', 'aabbabc', '0', repr('abc')), + ('(a+|b)*', 'ab', '0,1', repr(('ab', 'b'))), + ('(a+|b)+', 'ab', '0,1', repr(('ab', 'b'))), + ('(a+|b)?', 'ab', '0,1', repr(('a', 'a'))), + (')(', '-', '', regex.error, self.TRAILING_CHARS), + ('[^ab]*', 'cde', '0', repr('cde')), + ('abc', '', '', repr(None)), + ('a*', '', '0', repr('')), + + ('a|b|c|d|e', 'e', '0', repr('e')), + ('(a|b|c|d|e)f', 'ef', '0,1', repr(('ef', 'e'))), + ('abcd*efg', 'abcdefg', '0', repr('abcdefg')), + ('ab*', 'xabyabbbz', '0', repr('ab')), + ('ab*', 'xayabbbz', '0', repr('a')), + ('(ab|cd)e', 'abcde', '0,1', repr(('cde', 'cd'))), + ('[abhgefdc]ij', 'hij', '0', repr('hij')), + ('^(ab|cd)e', 'abcde', '', repr(None)), + ('(abc|)ef', 'abcdef', '0,1', repr(('ef', ''))), + ('(a|b)c*d', 'abcd', '0,1', repr(('bcd', 'b'))), + + ('(ab|ab*)bc', 'abc', '0,1', repr(('abc', 'a'))), + ('a([bc]*)c*', 'abc', '0,1', repr(('abc', 'bc'))), + ('a([bc]*)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), + ('a([bc]+)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), + ('a([bc]*)(c+d)', 'abcd', '0,1,2', repr(('abcd', 'b', 'cd'))), + ('a[bcd]*dcdcde', 'adcdcde', '0', repr('adcdcde')), + ('a[bcd]+dcdcde', 'adcdcde', '', repr(None)), + ('(ab|a)b*c', 'abc', '0,1', repr(('abc', 'ab'))), + ('((a)(b)c)(d)', 'abcd', '1,2,3,4', repr(('abc', 'a', 'b', 'd'))), + ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', repr('alpha')), + + ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', repr(('bh', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', repr(('effgz', + 'effgz', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', repr(('ij', 'ij', + 'j'))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', repr(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', repr(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', repr(('effgz', + 'effgz', None))), + ('(((((((((a)))))))))', 'a', '0', repr('a')), + ('multiple words of text', 'uh-uh', '', repr(None)), + ('multiple words', 'multiple words, yeah', '0', + repr('multiple words')), + ('(.*)c(.*)', 'abcde', '0,1,2', repr(('abcde', 'ab', 'de'))), + + ('\\((.*), (.*)\\)', '(a, b)', '2,1', repr(('b', 'a'))), + ('[k]', 'ab', '', repr(None)), + ('a[-]?c', 'ac', '0', repr('ac')), + ('(abc)\\1', 'abcabc', '1', repr('abc')), + ('([a-c]*)\\1', 'abcabc', '1', repr('abc')), + ('^(.+)?B', 'AB', '1', repr('A')), + ('(a+).\\1$', 'aaaaa', '0,1', repr(('aaaaa', 'aa'))), + ('^(a+).\\1$', 'aaaa', '', repr(None)), + ('(abc)\\1', 'abcabc', '0,1', repr(('abcabc', 'abc'))), + ('([a-c]+)\\1', 'abcabc', '0,1', repr(('abcabc', 'abc'))), + + ('(a)\\1', 'aa', '0,1', repr(('aa', 'a'))), + ('(a+)\\1', 'aa', '0,1', repr(('aa', 'a'))), + ('(a+)+\\1', 'aa', '0,1', repr(('aa', 'a'))), + ('(a).+\\1', 'aba', '0,1', repr(('aba', 'a'))), + ('(a)ba*\\1', 'aba', '0,1', repr(('aba', 'a'))), + ('(aa|a)a\\1$', 'aaa', '0,1', repr(('aaa', 'a'))), + ('(a|aa)a\\1$', 'aaa', '0,1', repr(('aaa', 'a'))), + ('(a+)a\\1$', 'aaa', '0,1', repr(('aaa', 'a'))), + ('([abc]*)\\1', 'abcabc', '0,1', repr(('abcabc', 'abc'))), + ('(a)(b)c|ab', 'ab', '0,1,2', repr(('ab', None, None))), + + ('(a)+x', 'aaax', '0,1', repr(('aaax', 'a'))), + ('([ac])+x', 'aacx', '0,1', repr(('aacx', 'c'))), + ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', '0,1', + repr(('d:msgs/tdir/sub1/', 'tdir/'))), + ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', + '0,1,2,3', repr(('track1.title:TBlah blah blah', 'track1', + 'title', 'Blah blah blah'))), + ('([^N]*N)+', 'abNNxyzN', '0,1', repr(('abNNxyzN', 'xyzN'))), + ('([^N]*N)+', 'abNNxyz', '0,1', repr(('abNN', 'N'))), + ('([abc]*)x', 'abcx', '0,1', repr(('abcx', 'abc'))), + ('([abc]*)x', 'abc', '', repr(None)), + ('([xyz]*)x', 'abcx', '0,1', repr(('x', ''))), + ('(a)+b|aac', 'aac', '0,1', repr(('aac', None))), + + # Test symbolic groups. + ('(?Paaa)a', 'aaaa', '', regex.error, self.BAD_GROUP_NAME), + ('(?Paaa)a', 'aaaa', '0,id', repr(('aaaa', 'aaa'))), + ('(?Paa)(?P=id)', 'aaaa', '0,id', repr(('aaaa', 'aa'))), + ('(?Paa)(?P=xd)', 'aaaa', '', regex.error, self.UNKNOWN_GROUP), + + # Character properties. + (ur"\g", u"g", '0', repr(u'g')), + (ur"\g<1>", u"g", '', regex.error, self.UNKNOWN_GROUP), + (ur"(.)\g<1>", u"gg", '0', repr(u'gg')), + (ur"(.)\g<1>", u"gg", '', repr((u'gg', u'g'))), + (ur"\N", u"N", '0', repr(u'N')), + (ur"\N{LATIN SMALL LETTER A}", u"a", '0', repr(u'a')), + (ur"\p", u"p", '0', repr(u'p')), + (ur"\p{Ll}", u"a", '0', repr(u'a')), + (ur"\P", u"P", '0', repr(u'P')), + (ur"\P{Lu}", u"p", '0', repr(u'p')), + + # All tests from Perl. + ('abc', 'abc', '0', repr('abc')), + ('abc', 'xbc', '', repr(None)), + ('abc', 'axc', '', repr(None)), + ('abc', 'abx', '', repr(None)), + ('abc', 'xabcy', '0', repr('abc')), + ('abc', 'ababc', '0', repr('abc')), + + ('ab*c', 'abc', '0', repr('abc')), + ('ab*bc', 'abc', '0', repr('abc')), + ('ab*bc', 'abbc', '0', repr('abbc')), + ('ab*bc', 'abbbbc', '0', repr('abbbbc')), + ('ab{0,}bc', 'abbbbc', '0', repr('abbbbc')), + ('ab+bc', 'abbc', '0', repr('abbc')), + ('ab+bc', 'abc', '', repr(None)), + ('ab+bc', 'abq', '', repr(None)), + ('ab{1,}bc', 'abq', '', repr(None)), + ('ab+bc', 'abbbbc', '0', repr('abbbbc')), + + ('ab{1,}bc', 'abbbbc', '0', repr('abbbbc')), + ('ab{1,3}bc', 'abbbbc', '0', repr('abbbbc')), + ('ab{3,4}bc', 'abbbbc', '0', repr('abbbbc')), + ('ab{4,5}bc', 'abbbbc', '', repr(None)), + ('ab?bc', 'abbc', '0', repr('abbc')), + ('ab?bc', 'abc', '0', repr('abc')), + ('ab{0,1}bc', 'abc', '0', repr('abc')), + ('ab?bc', 'abbbbc', '', repr(None)), + ('ab?c', 'abc', '0', repr('abc')), + ('ab{0,1}c', 'abc', '0', repr('abc')), + + ('^abc$', 'abc', '0', repr('abc')), + ('^abc$', 'abcc', '', repr(None)), + ('^abc', 'abcc', '0', repr('abc')), + ('^abc$', 'aabc', '', repr(None)), + ('abc$', 'aabc', '0', repr('abc')), + ('^', 'abc', '0', repr('')), + ('$', 'abc', '0', repr('')), + ('a.c', 'abc', '0', repr('abc')), + ('a.c', 'axc', '0', repr('axc')), + ('a.*c', 'axyzc', '0', repr('axyzc')), + + ('a.*c', 'axyzd', '', repr(None)), + ('a[bc]d', 'abc', '', repr(None)), + ('a[bc]d', 'abd', '0', repr('abd')), + ('a[b-d]e', 'abd', '', repr(None)), + ('a[b-d]e', 'ace', '0', repr('ace')), + ('a[b-d]', 'aac', '0', repr('ac')), + ('a[-b]', 'a-', '0', repr('a-')), + ('a[b-]', 'a-', '0', repr('a-')), + ('a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE), + ('a[]b', '-', '', regex.error, self.BAD_SET), + + ('a[', '-', '', regex.error, self.BAD_SET), + ('a]', 'a]', '0', repr('a]')), + ('a[]]b', 'a]b', '0', repr('a]b')), + ('a[^bc]d', 'aed', '0', repr('aed')), + ('a[^bc]d', 'abd', '', repr(None)), + ('a[^-b]c', 'adc', '0', repr('adc')), + ('a[^-b]c', 'a-c', '', repr(None)), + ('a[^]b]c', 'a]c', '', repr(None)), + ('a[^]b]c', 'adc', '0', repr('adc')), + ('ab|cd', 'abc', '0', repr('ab')), + + ('ab|cd', 'abcd', '0', repr('ab')), + ('()ef', 'def', '0,1', repr(('ef', ''))), + ('*a', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('$b', 'b', '', repr(None)), + ('a\\', '-', '', regex.error, self.BAD_ESCAPE), + ('a\\(b', 'a(b', '', repr(('a(b',))), + ('a\\(*b', 'ab', '0', repr('ab')), + ('a\\(*b', 'a((b', '0', repr('a((b')), + ('a\\\\b', 'a\\b', '0', repr('a\\b')), + + ('abc)', '-', '', regex.error, self.TRAILING_CHARS), + ('(abc', '-', '', regex.error, self.MISSING_RPAREN), + ('((a))', 'abc', '0,1,2', repr(('a', 'a', 'a'))), + ('(a)b(c)', 'abc', '0,1,2', repr(('abc', 'a', 'c'))), + ('a+b+c', 'aabbabc', '0', repr('abc')), + ('a{1,}b{1,}c', 'aabbabc', '0', repr('abc')), + ('a**', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('a.+?c', 'abcabc', '0', repr('abc')), + ('(a+|b)*', 'ab', '0,1', repr(('ab', 'b'))), + ('(a+|b){0,}', 'ab', '0,1', repr(('ab', 'b'))), + + ('(a+|b)+', 'ab', '0,1', repr(('ab', 'b'))), + ('(a+|b){1,}', 'ab', '0,1', repr(('ab', 'b'))), + ('(a+|b)?', 'ab', '0,1', repr(('a', 'a'))), + ('(a+|b){0,1}', 'ab', '0,1', repr(('a', 'a'))), + (')(', '-', '', regex.error, self.TRAILING_CHARS), + ('[^ab]*', 'cde', '0', repr('cde')), + ('abc', '', '', repr(None)), + ('a*', '', '0', repr('')), + ('([abc])*d', 'abbbcd', '0,1', repr(('abbbcd', 'c'))), + ('([abc])*bcd', 'abcd', '0,1', repr(('abcd', 'a'))), + + ('a|b|c|d|e', 'e', '0', repr('e')), + ('(a|b|c|d|e)f', 'ef', '0,1', repr(('ef', 'e'))), + ('abcd*efg', 'abcdefg', '0', repr('abcdefg')), + ('ab*', 'xabyabbbz', '0', repr('ab')), + ('ab*', 'xayabbbz', '0', repr('a')), + ('(ab|cd)e', 'abcde', '0,1', repr(('cde', 'cd'))), + ('[abhgefdc]ij', 'hij', '0', repr('hij')), + ('^(ab|cd)e', 'abcde', '', repr(None)), + ('(abc|)ef', 'abcdef', '0,1', repr(('ef', ''))), + ('(a|b)c*d', 'abcd', '0,1', repr(('bcd', 'b'))), + + ('(ab|ab*)bc', 'abc', '0,1', repr(('abc', 'a'))), + ('a([bc]*)c*', 'abc', '0,1', repr(('abc', 'bc'))), + ('a([bc]*)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), + ('a([bc]+)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), + ('a([bc]*)(c+d)', 'abcd', '0,1,2', repr(('abcd', 'b', 'cd'))), + ('a[bcd]*dcdcde', 'adcdcde', '0', repr('adcdcde')), + ('a[bcd]+dcdcde', 'adcdcde', '', repr(None)), + ('(ab|a)b*c', 'abc', '0,1', repr(('abc', 'ab'))), + ('((a)(b)c)(d)', 'abcd', '1,2,3,4', repr(('abc', 'a', 'b', 'd'))), + ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', repr('alpha')), + + ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', repr(('bh', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', repr(('effgz', + 'effgz', None))), + ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', repr(('ij', 'ij', + 'j'))), + ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', repr(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', repr(None)), + ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', repr(('effgz', + 'effgz', None))), + ('((((((((((a))))))))))', 'a', '10', repr('a')), + ('((((((((((a))))))))))\\10', 'aa', '0', repr('aa')), + + # Python does not have the same rules for \\41 so this is a syntax error + # ('((((((((((a))))))))))\\41', 'aa', '', repr(None)), + # ('((((((((((a))))))))))\\41', 'a!', '0', repr('a!')), + ('((((((((((a))))))))))\\41', '', '', regex.error, + self.UNKNOWN_GROUP), + ('(?i)((((((((((a))))))))))\\41', '', '', regex.error, + self.UNKNOWN_GROUP), + + ('(((((((((a)))))))))', 'a', '0', repr('a')), + ('multiple words of text', 'uh-uh', '', repr(None)), + ('multiple words', 'multiple words, yeah', '0', + repr('multiple words')), + ('(.*)c(.*)', 'abcde', '0,1,2', repr(('abcde', 'ab', 'de'))), + ('\\((.*), (.*)\\)', '(a, b)', '2,1', repr(('b', 'a'))), + ('[k]', 'ab', '', repr(None)), + ('a[-]?c', 'ac', '0', repr('ac')), + ('(abc)\\1', 'abcabc', '1', repr('abc')), + ('([a-c]*)\\1', 'abcabc', '1', repr('abc')), + ('(?i)abc', 'ABC', '0', repr('ABC')), + + ('(?i)abc', 'XBC', '', repr(None)), + ('(?i)abc', 'AXC', '', repr(None)), + ('(?i)abc', 'ABX', '', repr(None)), + ('(?i)abc', 'XABCY', '0', repr('ABC')), + ('(?i)abc', 'ABABC', '0', repr('ABC')), + ('(?i)ab*c', 'ABC', '0', repr('ABC')), + ('(?i)ab*bc', 'ABC', '0', repr('ABC')), + ('(?i)ab*bc', 'ABBC', '0', repr('ABBC')), + ('(?i)ab*?bc', 'ABBBBC', '0', repr('ABBBBC')), + ('(?i)ab{0,}?bc', 'ABBBBC', '0', repr('ABBBBC')), + + ('(?i)ab+?bc', 'ABBC', '0', repr('ABBC')), + ('(?i)ab+bc', 'ABC', '', repr(None)), + ('(?i)ab+bc', 'ABQ', '', repr(None)), + ('(?i)ab{1,}bc', 'ABQ', '', repr(None)), + ('(?i)ab+bc', 'ABBBBC', '0', repr('ABBBBC')), + ('(?i)ab{1,}?bc', 'ABBBBC', '0', repr('ABBBBC')), + ('(?i)ab{1,3}?bc', 'ABBBBC', '0', repr('ABBBBC')), + ('(?i)ab{3,4}?bc', 'ABBBBC', '0', repr('ABBBBC')), + ('(?i)ab{4,5}?bc', 'ABBBBC', '', repr(None)), + ('(?i)ab??bc', 'ABBC', '0', repr('ABBC')), + + ('(?i)ab??bc', 'ABC', '0', repr('ABC')), + ('(?i)ab{0,1}?bc', 'ABC', '0', repr('ABC')), + ('(?i)ab??bc', 'ABBBBC', '', repr(None)), + ('(?i)ab??c', 'ABC', '0', repr('ABC')), + ('(?i)ab{0,1}?c', 'ABC', '0', repr('ABC')), + ('(?i)^abc$', 'ABC', '0', repr('ABC')), + ('(?i)^abc$', 'ABCC', '', repr(None)), + ('(?i)^abc', 'ABCC', '0', repr('ABC')), + ('(?i)^abc$', 'AABC', '', repr(None)), + ('(?i)abc$', 'AABC', '0', repr('ABC')), + + ('(?i)^', 'ABC', '0', repr('')), + ('(?i)$', 'ABC', '0', repr('')), + ('(?i)a.c', 'ABC', '0', repr('ABC')), + ('(?i)a.c', 'AXC', '0', repr('AXC')), + ('(?i)a.*?c', 'AXYZC', '0', repr('AXYZC')), + ('(?i)a.*c', 'AXYZD', '', repr(None)), + ('(?i)a[bc]d', 'ABC', '', repr(None)), + ('(?i)a[bc]d', 'ABD', '0', repr('ABD')), + ('(?i)a[b-d]e', 'ABD', '', repr(None)), + ('(?i)a[b-d]e', 'ACE', '0', repr('ACE')), + + ('(?i)a[b-d]', 'AAC', '0', repr('AC')), + ('(?i)a[-b]', 'A-', '0', repr('A-')), + ('(?i)a[b-]', 'A-', '0', repr('A-')), + ('(?i)a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE), + ('(?i)a[]b', '-', '', regex.error, self.BAD_SET), + ('(?i)a[', '-', '', regex.error, self.BAD_SET), + ('(?i)a]', 'A]', '0', repr('A]')), + ('(?i)a[]]b', 'A]B', '0', repr('A]B')), + ('(?i)a[^bc]d', 'AED', '0', repr('AED')), + ('(?i)a[^bc]d', 'ABD', '', repr(None)), + + ('(?i)a[^-b]c', 'ADC', '0', repr('ADC')), + ('(?i)a[^-b]c', 'A-C', '', repr(None)), + ('(?i)a[^]b]c', 'A]C', '', repr(None)), + ('(?i)a[^]b]c', 'ADC', '0', repr('ADC')), + ('(?i)ab|cd', 'ABC', '0', repr('AB')), + ('(?i)ab|cd', 'ABCD', '0', repr('AB')), + ('(?i)()ef', 'DEF', '0,1', repr(('EF', ''))), + ('(?i)*a', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(?i)(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(?i)$b', 'B', '', repr(None)), + + ('(?i)a\\', '-', '', regex.error, self.BAD_ESCAPE), + ('(?i)a\\(b', 'A(B', '', repr(('A(B',))), + ('(?i)a\\(*b', 'AB', '0', repr('AB')), + ('(?i)a\\(*b', 'A((B', '0', repr('A((B')), + ('(?i)a\\\\b', 'A\\B', '0', repr('A\\B')), + ('(?i)abc)', '-', '', regex.error, self.TRAILING_CHARS), + ('(?i)(abc', '-', '', regex.error, self.MISSING_RPAREN), + ('(?i)((a))', 'ABC', '0,1,2', repr(('A', 'A', 'A'))), + ('(?i)(a)b(c)', 'ABC', '0,1,2', repr(('ABC', 'A', 'C'))), + ('(?i)a+b+c', 'AABBABC', '0', repr('ABC')), + + ('(?i)a{1,}b{1,}c', 'AABBABC', '0', repr('ABC')), + ('(?i)a**', '-', '', regex.error, self.NOTHING_TO_REPEAT), + ('(?i)a.+?c', 'ABCABC', '0', repr('ABC')), + ('(?i)a.*?c', 'ABCABC', '0', repr('ABC')), + ('(?i)a.{0,5}?c', 'ABCABC', '0', repr('ABC')), + ('(?i)(a+|b)*', 'AB', '0,1', repr(('AB', 'B'))), + ('(?i)(a+|b){0,}', 'AB', '0,1', repr(('AB', 'B'))), + ('(?i)(a+|b)+', 'AB', '0,1', repr(('AB', 'B'))), + ('(?i)(a+|b){1,}', 'AB', '0,1', repr(('AB', 'B'))), + ('(?i)(a+|b)?', 'AB', '0,1', repr(('A', 'A'))), + + ('(?i)(a+|b){0,1}', 'AB', '0,1', repr(('A', 'A'))), + ('(?i)(a+|b){0,1}?', 'AB', '0,1', repr(('', None))), + ('(?i))(', '-', '', regex.error, self.TRAILING_CHARS), + ('(?i)[^ab]*', 'CDE', '0', repr('CDE')), + ('(?i)abc', '', '', repr(None)), + ('(?i)a*', '', '0', repr('')), + ('(?i)([abc])*d', 'ABBBCD', '0,1', repr(('ABBBCD', 'C'))), + ('(?i)([abc])*bcd', 'ABCD', '0,1', repr(('ABCD', 'A'))), + ('(?i)a|b|c|d|e', 'E', '0', repr('E')), + ('(?i)(a|b|c|d|e)f', 'EF', '0,1', repr(('EF', 'E'))), + + ('(?i)abcd*efg', 'ABCDEFG', '0', repr('ABCDEFG')), + ('(?i)ab*', 'XABYABBBZ', '0', repr('AB')), + ('(?i)ab*', 'XAYABBBZ', '0', repr('A')), + ('(?i)(ab|cd)e', 'ABCDE', '0,1', repr(('CDE', 'CD'))), + ('(?i)[abhgefdc]ij', 'HIJ', '0', repr('HIJ')), + ('(?i)^(ab|cd)e', 'ABCDE', '', repr(None)), + ('(?i)(abc|)ef', 'ABCDEF', '0,1', repr(('EF', ''))), + ('(?i)(a|b)c*d', 'ABCD', '0,1', repr(('BCD', 'B'))), + ('(?i)(ab|ab*)bc', 'ABC', '0,1', repr(('ABC', 'A'))), + ('(?i)a([bc]*)c*', 'ABC', '0,1', repr(('ABC', 'BC'))), + + ('(?i)a([bc]*)(c*d)', 'ABCD', '0,1,2', repr(('ABCD', 'BC', 'D'))), + ('(?i)a([bc]+)(c*d)', 'ABCD', '0,1,2', repr(('ABCD', 'BC', 'D'))), + ('(?i)a([bc]*)(c+d)', 'ABCD', '0,1,2', repr(('ABCD', 'B', 'CD'))), + ('(?i)a[bcd]*dcdcde', 'ADCDCDE', '0', repr('ADCDCDE')), + ('(?i)a[bcd]+dcdcde', 'ADCDCDE', '', repr(None)), + ('(?i)(ab|a)b*c', 'ABC', '0,1', repr(('ABC', 'AB'))), + ('(?i)((a)(b)c)(d)', 'ABCD', '1,2,3,4', repr(('ABC', 'A', 'B', + 'D'))), + ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', '0', repr('ALPHA')), + ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', '0,1', repr(('BH', None))), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', '0,1,2', repr(('EFFGZ', + 'EFFGZ', None))), + + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', '0,1,2', repr(('IJ', 'IJ', + 'J'))), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', '', repr(None)), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', '', repr(None)), + ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', '0,1,2', repr(('EFFGZ', + 'EFFGZ', None))), + ('(?i)((((((((((a))))))))))', 'A', '10', repr('A')), + ('(?i)((((((((((a))))))))))\\10', 'AA', '0', repr('AA')), + #('(?i)((((((((((a))))))))))\\41', 'AA', '', repr(None)), + #('(?i)((((((((((a))))))))))\\41', 'A!', '0', repr('A!')), + ('(?i)(((((((((a)))))))))', 'A', '0', repr('A')), + ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', '1', + repr('A')), + ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', '1', + repr('C')), + ('(?i)multiple words of text', 'UH-UH', '', repr(None)), + + ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', '0', + repr('MULTIPLE WORDS')), + ('(?i)(.*)c(.*)', 'ABCDE', '0,1,2', repr(('ABCDE', 'AB', 'DE'))), + ('(?i)\\((.*), (.*)\\)', '(A, B)', '2,1', repr(('B', 'A'))), + ('(?i)[k]', 'AB', '', repr(None)), + # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', repr(ABCD-$&-\\ABCD)), + # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', repr(BC-$1-\\BC)), + ('(?i)a[-]?c', 'AC', '0', repr('AC')), + ('(?i)(abc)\\1', 'ABCABC', '1', repr('ABC')), + ('(?i)([a-c]*)\\1', 'ABCABC', '1', repr('ABC')), + ('a(?!b).', 'abad', '0', repr('ad')), + ('a(?=d).', 'abad', '0', repr('ad')), + ('a(?=c|d).', 'abad', '0', repr('ad')), + + ('a(?:b|c|d)(.)', 'ace', '1', repr('e')), + ('a(?:b|c|d)*(.)', 'ace', '1', repr('e')), + ('a(?:b|c|d)+?(.)', 'ace', '1', repr('e')), + ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', '1,2', repr(('c', 'e'))), + + # Lookbehind: split by : but not if it is escaped by -. + ('(?]*?b', 'a>b', '', repr(None)), + # Bug 490573: minimizing repeat problem. + (r'^a*?$', 'foo', '', repr(None)), + # Bug 470582: nested groups problem. + (r'^((a)c)?(ab)$', 'ab', '1,2,3', repr((None, None, 'ab'))), + # Another minimizing repeat problem (capturing groups in assertions). + ('^([ab]*?)(?=(b)?)c', 'abc', '1,2', repr(('ab', None))), + ('^([ab]*?)(?!(b))c', 'abc', '1,2', repr(('ab', None))), + ('^([ab]*?)(?(.){0,2})d", "abcd").captures(1), + ['b', 'c']) + self.assertEqual(regex.search(r"(.)+", "a").captures(1), ['a']) + + def test_guards(self): + m = regex.search(r"(X.*?Y\s*){3}(X\s*)+AB:", + "XY\nX Y\nX Y\nXY\nXX AB:") + self.assertEqual(m.span(0, 1, 2), ((3, 21), (12, 15), (16, 18))) + + m = regex.search(r"(X.*?Y\s*){3,}(X\s*)+AB:", + "XY\nX Y\nX Y\nXY\nXX AB:") + self.assertEqual(m.span(0, 1, 2), ((0, 21), (12, 15), (16, 18))) + + m = regex.search(r'\d{4}(\s*\w)?\W*((?!\d)\w){2}', "9999XX") + self.assertEqual(m.span(0, 1, 2), ((0, 6), (-1, -1), (5, 6))) + + m = regex.search(r'A\s*?.*?(\n+.*?\s*?){0,2}\(X', 'A\n1\nS\n1 (X') + self.assertEqual(m.span(0, 1), ((0, 10), (5, 8))) + + m = regex.search('Derde\s*:', 'aaaaaa:\nDerde:') + self.assertEqual(m.span(), (8, 14)) + m = regex.search('Derde\s*:', 'aaaaa:\nDerde:') + self.assertEqual(m.span(), (7, 13)) + + def test_turkic(self): + # Turkish has dotted and dotless I/i. + pairs = u"I=i;I=\u0131;i=\u0130" + + all_chars = set() + matching = set() + for pair in pairs.split(";"): + ch1, ch2 = pair.split("=") + all_chars.update((ch1, ch2)) + matching.add((ch1, ch1)) + matching.add((ch1, ch2)) + matching.add((ch2, ch1)) + matching.add((ch2, ch2)) + + for ch1 in all_chars: + for ch2 in all_chars: + m = regex.match(ur"(?iu)\A" + ch1 + ur"\Z", ch2) + if m: + if (ch1, ch2) not in matching: + self.fail("%s matching %s" % (repr(ch1), repr(ch2))) + else: + if (ch1, ch2) in matching: + self.fail("%s not matching %s" % (repr(ch1), + repr(ch2))) + + def test_named_lists(self): + options = [u"one", u"two", u"three"] + self.assertEqual(regex.match(ur"333\L444", u"333one444", + bar=options).group(), u"333one444") + self.assertEqual(regex.match(ur"(?i)333\L444", u"333TWO444", + bar=options).group(), u"333TWO444") + self.assertEqual(regex.match(ur"333\L444", u"333four444", + bar=options), None) + + options = ["one", "two", "three"] + self.assertEqual(regex.match(r"333\L444", "333one444", + bar=options).group(), "333one444") + self.assertEqual(regex.match(r"(?i)333\L444", "333TWO444", + bar=options).group(), "333TWO444") + self.assertEqual(regex.match(r"333\L444", "333four444", + bar=options), None) + + self.assertEqual(repr(type(regex.compile(r"3\L4\L+5", + bar=["one", "two", "three"]))), self.PATTERN_CLASS) + + self.assertEqual(regex.findall(r"^\L", "solid QWERT", + options=set(['good', 'brilliant', '+s\\ol[i}d'])), []) + self.assertEqual(regex.findall(r"^\L", "+solid QWERT", + options=set(['good', 'brilliant', '+solid'])), ['+solid']) + + options = [u"STRASSE"] + self.assertEqual(regex.match(ur"(?fiu)\L", + u"stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0, + 6)) + + options = [u"STRASSE", u"stress"] + self.assertEqual(regex.match(ur"(?fiu)\L", + u"stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0, + 6)) + + options = [u"stra\N{LATIN SMALL LETTER SHARP S}e"] + self.assertEqual(regex.match(ur"(?fiu)\L", u"STRASSE", + words=options).span(), (0, 7)) + + options = ["kit"] + self.assertEqual(regex.search(ur"(?iu)\L", u"SKITS", + words=options).span(), (1, 4)) + self.assertEqual(regex.search(ur"(?iu)\L", + u"SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS", + words=options).span(), (1, 4)) + + self.assertEqual(regex.search(ur"(?fiu)\b(\w+) +\1\b", + u" stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15)) + self.assertEqual(regex.search(ur"(?fiu)\b(\w+) +\1\b", + u" STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15)) + + self.assertEqual(regex.search(r"^\L$", "", options=[]).span(), + (0, 0)) + + def test_fuzzy(self): + # Some tests borrowed from TRE library tests. + self.assertEqual(repr(type(regex.compile('(fou){s,e<=1}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(fuu){s}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(fuu){s,e}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1,e<=10}'))), + self.PATTERN_CLASS) + self.assertEqual(repr(type(regex.compile('(anaconda){s<=1,e<=1,1i+1d<1}'))), + self.PATTERN_CLASS) + + text = 'molasses anaconda foo bar baz smith anderson ' + self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<1}', text), + None) + self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<2}', + text).span(0, 1), ((9, 17), (9, 17))) + self.assertEqual(regex.search('(ananda){1i+1d<2}', text), None) + self.assertEqual(regex.search(r"(?:\bznacnda){e<=2}", text)[0], + "anaconda") + self.assertEqual(regex.search(r"(?:\bnacnda){e<=2}", text)[0], + "anaconda") + + text = 'anaconda foo bar baz smith anderson' + self.assertEqual(regex.search('(fuu){i<=3,d<=3,e<=5}', text).span(0, + 1), ((0, 0), (0, 0))) + self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e<=5}', + text).span(0, 1), ((9, 10), (9, 10))) + self.assertEqual(regex.search('(fuu){i<=2,d<=2,e<=5}', text).span(0, + 1), ((7, 10), (7, 10))) + self.assertEqual(regex.search('(?e)(fuu){i<=2,d<=2,e<=5}', + text).span(0, 1), ((9, 10), (9, 10))) + self.assertEqual(regex.search('(fuu){i<=3,d<=3,e}', text).span(0, 1), + ((0, 0), (0, 0))) + self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e}', text).span(0, + 1), ((9, 10), (9, 10))) + + self.assertEqual(repr(type(regex.compile('(approximate){s<=3,1i+1d<3}'))), + self.PATTERN_CLASS) + + # No cost limit. + self.assertEqual(regex.search('(foobar){e}', + 'xirefoabralfobarxie').span(0, 1), ((0, 6), (0, 6))) + self.assertEqual(regex.search('(?e)(foobar){e}', + 'xirefoabralfobarxie').span(0, 1), ((0, 3), (0, 3))) + self.assertEqual(regex.search('(?b)(foobar){e}', + 'xirefoabralfobarxie').span(0, 1), ((11, 16), (11, 16))) + + # At most two errors. + self.assertEqual(regex.search('(foobar){e<=2}', + 'xirefoabrzlfd').span(0, 1), ((4, 9), (4, 9))) + self.assertEqual(regex.search('(foobar){e<=2}', 'xirefoabzlfd'), None) + + # At most two inserts or substitutions and max two errors total. + self.assertEqual(regex.search('(foobar){i<=2,s<=2,e<=2}', + 'oobargoobaploowap').span(0, 1), ((5, 11), (5, 11))) + + # Find best whole word match for "foobar". + self.assertEqual(regex.search('\\b(foobar){e}\\b', 'zfoobarz').span(0, + 1), ((0, 8), (0, 8))) + self.assertEqual(regex.search('\\b(foobar){e}\\b', + 'boing zfoobarz goobar woop').span(0, 1), ((0, 6), (0, 6))) + self.assertEqual(regex.search('(?b)\\b(foobar){e}\\b', + 'boing zfoobarz goobar woop').span(0, 1), ((15, 21), (15, 21))) + + # Match whole string, allow only 1 error. + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobar').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobar').span(0, + 1), ((0, 7), (0, 7))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarx').span(0, + 1), ((0, 7), (0, 7))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooxbar').span(0, + 1), ((0, 7), (0, 7))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbar').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xoobar').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobax').span(0, 1), + ((0, 6), (0, 6))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'oobar').span(0, 1), + ((0, 5), (0, 5))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'fobar').span(0, 1), + ((0, 5), (0, 5))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooba').span(0, 1), + ((0, 5), (0, 5))) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobarx'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarxx'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xxfoobar'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoxbar'), None) + self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbarx'), None) + + # At most one insert, two deletes, and three substitutions. + # Additionally, deletes cost two and substitutes one, and total + # cost must be less than 4. + self.assertEqual(regex.search('(foobar){i<=1,d<=2,s<=3,2d+1s<4}', + '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((6, 13), (6, + 13))) + self.assertEqual(regex.search('(?b)(foobar){i<=1,d<=2,s<=3,2d+1s<4}', + '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((26, 33), + (26, 33))) + + # Partially fuzzy matches. + self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobarzap').span(0, + 1), ((0, 9), (3, 6))) + self.assertEqual(regex.search('foo(bar){e<=1}zap', 'fobarzap'), None) + self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobrzap').span(0, + 1), ((0, 8), (3, 5))) + + text = ('www.cnn.com 64.236.16.20\nwww.slashdot.org 66.35.250.150\n' + 'For useful information, use www.slashdot.org\nthis is demo data!\n') + self.assertEqual(regex.search(r'(?s)^.*(dot.org){e}.*$', text).span(0, + 1), ((0, 120), (120, 120))) + self.assertEqual(regex.search(r'(?es)^.*(dot.org){e}.*$', text).span(0, + 1), ((0, 120), (93, 100))) + self.assertEqual(regex.search(r'^.*(dot.org){e}.*$', text).span(0, 1), + ((0, 119), (24, 101))) + + # Behaviour is unexpected, but arguably not wrong. It first finds the + # best match, then the best in what follows, etc. + self.assertEqual(regex.findall(r"\b\L{e<=1}\b", + " book cot dog desk ", words="cat dog".split()), ["cot", "dog"]) + self.assertEqual(regex.findall(r"\b\L{e<=1}\b", + " book dog cot desk ", words="cat dog".split()), [" dog", "cot"]) + self.assertEqual(regex.findall(r"(?e)\b\L{e<=1}\b", + " book dog cot desk ", words="cat dog".split()), ["dog", "cot"]) + self.assertEqual(regex.findall(r"(?r)\b\L{e<=1}\b", + " book cot dog desk ", words="cat dog".split()), ["dog ", "cot"]) + self.assertEqual(regex.findall(r"(?er)\b\L{e<=1}\b", + " book cot dog desk ", words="cat dog".split()), ["dog", "cot"]) + self.assertEqual(regex.findall(r"(?r)\b\L{e<=1}\b", + " book dog cot desk ", words="cat dog".split()), ["cot", "dog"]) + self.assertEqual(regex.findall(ur"\b\L{e<=1}\b", + u" book cot dog desk ", words=u"cat dog".split()), [u"cot", u"dog"]) + self.assertEqual(regex.findall(ur"\b\L{e<=1}\b", + u" book dog cot desk ", words=u"cat dog".split()), [u" dog", u"cot"]) + self.assertEqual(regex.findall(ur"(?e)\b\L{e<=1}\b", + u" book dog cot desk ", words=u"cat dog".split()), [u"dog", u"cot"]) + self.assertEqual(regex.findall(ur"(?r)\b\L{e<=1}\b", + u" book cot dog desk ", words=u"cat dog".split()), [u"dog ", u"cot"]) + self.assertEqual(regex.findall(ur"(?er)\b\L{e<=1}\b", + u" book cot dog desk ", words=u"cat dog".split()), [u"dog", u"cot"]) + self.assertEqual(regex.findall(ur"(?r)\b\L{e<=1}\b", + u" book dog cot desk ", words=u"cat dog".split()), [u"cot", u"dog"]) + + self.assertEqual(regex.search(r"(\w+) (\1{e<=1})", "foo fou").groups(), + ("foo", "fou")) + self.assertEqual(regex.search(r"(?r)(\2{e<=1}) (\w+)", + "foo fou").groups(), ("foo", "fou")) + self.assertEqual(regex.search(ur"(\w+) (\1{e<=1})", + u"foo fou").groups(), (u"foo", u"fou")) + + self.assertEqual(regex.findall(r"(?:(?:QR)+){e}","abcde"), ["abcde", + ""]) + self.assertEqual(regex.findall(r"(?:Q+){e}","abc"), ["abc", ""]) + + # Hg issue 41. + self.assertEqual(regex.match(r"(?:service detection){0[^()]+)|(?R))*\)", "(ab(cd)ef)")[ + : ], ("(ab(cd)ef)", "ef")) + self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)", + "(ab(cd)ef)").captures(1), ["ab", "cd", "(cd)", "ef"]) + + self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)", + "(ab(cd)ef)")[ : ], ("(ab(cd)ef)", "ab")) + self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)", + "(ab(cd)ef)").captures(1), ["ef", "cd", "(cd)", "ab"]) + + self.assertEqual(regex.search(r"\(([^()]+|(?R))*\)", + "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "e")) + + self.assertEqual(regex.search(r"(?r)\(((?R)|[^()]+)*\)", + "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "a")) + + self.assertEqual(regex.search(r"(foo(\(((?:(?>[^()]+)|(?2))*)\)))", + "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))", + "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))", + "bar(baz)+baz(bop)")) + + self.assertEqual(regex.search(r"(?r)(foo(\(((?:(?2)|(?>[^()]+))*)\)))", + "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))", + "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))", + "bar(baz)+baz(bop)")) + + rgx = regex.compile(r"""^\s*(<\s*([a-zA-Z:]+)(?:\s*[a-zA-Z:]*\s*=\s*(?:'[^']*'|"[^"]*"))*\s*(/\s*)?>(?:[^<>]*|(?1))*(?(3)|<\s*/\s*\2\s*>))\s*$""") + self.assertEqual(bool(rgx.search('')), True) + self.assertEqual(bool(rgx.search('')), False) + self.assertEqual(bool(rgx.search('')), True) + self.assertEqual(bool(rgx.search('')), False) + self.assertEqual(bool(rgx.search('')), False) + + self.assertEqual(bool(rgx.search('')), False) + self.assertEqual(bool(rgx.search('')), True) + self.assertEqual(bool(rgx.search('< fooo / >')), True) + # The next regex should and does match. Perl 5.14 agrees. + #self.assertEqual(bool(rgx.search('foo')), False) + self.assertEqual(bool(rgx.search('foo')), False) + + self.assertEqual(bool(rgx.search('foo')), True) + self.assertEqual(bool(rgx.search('foo')), True) + self.assertEqual(bool(rgx.search('')), True) + + def test_copy(self): + # PatternObjects are immutable, therefore there's no need to clone them. + r = regex.compile("a") + self.assert_(copy.copy(r) is r) + self.assert_(copy.deepcopy(r) is r) + + # MatchObjects are normally mutable because the target string can be + # detached. However, after the target string has been detached, a + # MatchObject becomes immutable, so there's no need to clone it. + m = r.match("a") + self.assert_(copy.copy(m) is not m) + self.assert_(copy.deepcopy(m) is not m) + + self.assert_(m.string is not None) + m2 = copy.copy(m) + m2.detach_string() + self.assert_(m.string is not None) + self.assert_(m2.string is None) + + # The following behaviour matches that of the re module. + it = regex.finditer(".", "ab") + it2 = copy.copy(it) + self.assertEqual(it.next().group(), "a") + self.assertEqual(it2.next().group(), "b") + + # The following behaviour matches that of the re module. + it = regex.finditer(".", "ab") + it2 = copy.deepcopy(it) + self.assertEqual(it.next().group(), "a") + self.assertEqual(it2.next().group(), "b") + + # The following behaviour is designed to match that of copying 'finditer'. + it = regex.splititer(" ", "a b") + it2 = copy.copy(it) + self.assertEqual(it.next(), "a") + self.assertEqual(it2.next(), "b") + + # The following behaviour is designed to match that of copying 'finditer'. + it = regex.splititer(" ", "a b") + it2 = copy.deepcopy(it) + self.assertEqual(it.next(), "a") + self.assertEqual(it2.next(), "b") + + def test_format(self): + self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", + "foo bar"), "foo bar => bar foo") + self.assertEqual(regex.subf(r"(?\w+) (?\w+)", + "{word2} {word1}", "foo bar"), "bar foo") + + self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}", + "foo bar"), ("foo bar => bar foo", 1)) + self.assertEqual(regex.subfn(r"(?\w+) (?\w+)", + "{word2} {word1}", "foo bar"), ("bar foo", 1)) + + self.assertEqual(regex.match(r"(\w+) (\w+)", + "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo") + + def test_fullmatch(self): + self.assertEqual(bool(regex.fullmatch(r"abc", "abc")), True) + self.assertEqual(bool(regex.fullmatch(r"abc", "abcx")), False) + self.assertEqual(bool(regex.fullmatch(r"abc", "abcx", endpos=3)), True) + + self.assertEqual(bool(regex.fullmatch(r"abc", "xabc", pos=1)), True) + self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1)), False) + self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1, + endpos=4)), True) + + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abc")), True) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx")), False) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx", endpos=3)), + True) + + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabc", pos=1)), + True) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1)), + False) + self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1, + endpos=4)), True) + + def test_hg_bugs(self): + # Hg issue 28. + self.assertEqual(bool(regex.compile("(?>b)", flags=regex.V1)), True) + + # Hg issue 29. + self.assertEqual(bool(regex.compile("^((?>\w+)|(?>\s+))*$", + flags=regex.V1)), True) + + # Hg issue 31. + self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", + "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)']) + self.assertEqual(regex.findall(r"\((?:(?:[^()]+)|(?R))*\)", + "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)']) + self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", + "a(b(cd)e)f)g)h"), ['(b(cd)e)']) + self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", + "a(bc(d(e)f)gh"), ['(d(e)f)']) + self.assertEqual(regex.findall(r"(?r)\((?:(?>[^()]+)|(?R))*\)", + "a(bc(d(e)f)gh"), ['(d(e)f)']) + self.assertEqual([m.group() for m in + regex.finditer(r"\((?:[^()]*+|(?0))*\)", "a(b(c(de)fg)h")], + ['(c(de)fg)']) + + # Hg issue 32. + self.assertEqual(regex.search("a(bc)d", "abcd", regex.I | + regex.V1).group(0), "abcd") + + # Hg issue 33. + self.assertEqual(regex.search("([\da-f:]+)$", "E", regex.I | + regex.V1).group(0), "E") + self.assertEqual(regex.search("([\da-f:]+)$", "e", regex.I | + regex.V1).group(0), "e") + + # Hg issue 34. + self.assertEqual(regex.search("^(?=ab(de))(abd)(e)", "abde").groups(), + ('de', 'abd', 'e')) + + # Hg issue 35. + self.assertEqual(bool(regex.match(r"\ ", " ", flags=regex.X)), True) + + # Hg issue 36. + self.assertEqual(regex.search(r"^(a|)\1{2}b", "b").group(0, 1), ('b', + '')) + + # Hg issue 37. + self.assertEqual(regex.search("^(a){0,0}", "abc").group(0, 1), ('', + None)) + + # Hg issue 38. + self.assertEqual(regex.search("(?>.*/)b", "a/b").group(0), "a/b") + + # Hg issue 39. + self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1", + "blah BLAH").group(0, 1), ("blah BLAH", "blah")) + self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"), + None) + + # Hg issue 40. + self.assertEqual(regex.search(r"(\()?[^()]+(?(1)\)|)", + "(abcd").group(0), "abcd") + + # Hg issue 42. + self.assertEqual(regex.search("(a*)*", "a").span(1), (1, 1)) + self.assertEqual(regex.search("(a*)*", "aa").span(1), (2, 2)) + self.assertEqual(regex.search("(a*)*", "aaa").span(1), (3, 3)) + + # Hg issue 43. + self.assertEqual(regex.search("a(?#xxx)*", "aaa").group(), "aaa") + + # Hg issue 44. + self.assertEqual(regex.search("(?=abc){3}abc", "abcabcabc").span(), (0, + 3)) + + # Hg issue 45. + self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "a").span(), (0, 1)) + self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "aa").span(), (0, 2)) + + # Hg issue 46. + self.assertEqual(regex.search("a(?x: b c )d", "abcd").group(0), "abcd") + + # Hg issue 47. + self.assertEqual(regex.search("a#comment\n*", "aaa", + flags=regex.X).group(0), "aaa") + + # Hg issue 48. + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){1}", + "aaaaaaaaaa").span(0, 1), ((0, 1), (0, 1))) + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){2}", + "aaaaaaaaaa").span(0, 1), ((0, 3), (1, 3))) + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){3}", + "aaaaaaaaaa").span(0, 1), ((0, 6), (3, 6))) + self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){4}", + "aaaaaaaaaa").span(0, 1), ((0, 10), (6, 10))) + + # Hg issue 49. + self.assertEqual(regex.search("(?V1)(a)(?<=b(?1))", "baz").group(0), + "a") + + # Hg issue 50. + self.assertEqual(regex.findall(ur'(?fi)\L', + u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05', + keywords=['post','pos']), [u'POST', u'Post', u'post', u'po\u017Ft', + u'po\uFB06', u'po\uFB05']) + self.assertEqual(regex.findall(ur'(?fi)pos|post', + u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), [u'POS', + u'Pos', u'pos', u'po\u017F', u'po\uFB06', u'po\uFB05']) + self.assertEqual(regex.findall(ur'(?fi)post|pos', + u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), [u'POST', + u'Post', u'post', u'po\u017Ft', u'po\uFB06', u'po\uFB05']) + self.assertEqual(regex.findall(ur'(?fi)post|another', + u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), [u'POST', + u'Post', u'post', u'po\u017Ft', u'po\uFB06', u'po\uFB05']) + + # Hg issue 51. + self.assertEqual(regex.search("(?V1)((a)(?1)|(?2))", "a").group(0, 1, + 2), ('a', 'a', None)) + + # Hg issue 52. + self.assertEqual(regex.search(r"(?V1)(\1xx|){6}", "xx").span(0, 1), + ((0, 2), (2, 2))) + + # Hg issue 53. + self.assertEqual(regex.search("(a|)+", "a").group(0, 1), ("a", "")) + + # Hg issue 54. + self.assertEqual(regex.search(r"(a|)*\d", "a" * 80), None) + + # Hg issue 55. + self.assertEqual(regex.search("^(?:a?b?)*$", "ac"), None) + + # Hg issue 58. + self.assertRaisesRegex(regex.error, self.UNDEF_CHAR_NAME, lambda: + regex.compile("\\N{1}")) + + # Hg issue 59. + self.assertEqual(regex.search("\\Z", "a\na\n").span(0), (4, 4)) + + # Hg issue 60. + self.assertEqual(regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", + "xayxay").group(0), "xayxay") + + # Hg issue 61. + self.assertEqual(regex.search("(?i)[^a]", "A"), None) + + # Hg issue 63. + self.assertEqual(regex.search(u"(?iu)[[:ascii:]]", u"\N{KELVIN SIGN}"), + None) + + # Hg issue 66. + self.assertEqual(regex.search("((a|b(?1)c){3,5})", "baaaaca").group(0, + 1, 2), ('aaaa', 'aaaa', 'a')) + + # Hg issue 71. + self.assertEqual(regex.findall(r"(?<=:\S+ )\w+", ":9 abc :10 def"), + ['abc', 'def']) + self.assertEqual(regex.findall(r"(?<=:\S* )\w+", ":9 abc :10 def"), + ['abc', 'def']) + self.assertEqual(regex.findall(r"(?<=:\S+? )\w+", ":9 abc :10 def"), + ['abc', 'def']) + self.assertEqual(regex.findall(r"(?<=:\S*? )\w+", ":9 abc :10 def"), + ['abc', 'def']) + + # Hg issue 73. + self.assertEqual(regex.search(r"(?:fe)?male", "female").group(), + "female") + self.assertEqual([m.group() for m in + regex.finditer(r"(fe)?male: h(?(1)(er)|(is)) (\w+)", + "female: her dog; male: his cat. asdsasda")], ['female: her dog', + 'male: his cat']) + + # Hg issue 78. + self.assertEqual(regex.search(r'(?\((?:[^()]++|(?&rec))*\))', + 'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)', + '(((1+0)+1)+1)']) + + # Hg issue 80. + self.assertRaisesRegex(regex.error, self.BAD_ESCAPE, lambda: + regex.sub('x', '\\', 'x'), ) + + # Hg issue 82. + fz = "(CAGCCTCCCATTTCAGAATATACATCC){1a(?b))', "ab").spans("x"), [(1, + 2), (0, 2)]) + + # Hg issue 91. + # Check that the replacement cache works. + self.assertEqual(regex.sub(r'(-)', lambda m: m.expand(r'x'), 'a-b-c'), + 'axbxc') + + # Hg issue 94. + rx = regex.compile(r'\bt(est){i<2}', flags=regex.V1) + self.assertEqual(rx.search("Some text"), None) + self.assertEqual(rx.findall("Some text"), []) + + # Hg issue 95. + self.assertRaisesRegex(regex.error, + '^nothing to repeat at position 3$', lambda: regex.compile(r'.???')) + + # Hg issue 97. + self.assertEquals(regex.escape(u'foo!?'), u'foo\\!\\?') + self.assertEquals(regex.escape(u'foo!?', special_only=True), + u'foo!\\?') + + self.assertEquals(regex.escape('foo!?'), 'foo\\!\\?') + self.assertEquals(regex.escape('foo!?', special_only=True), 'foo!\\?') + + # Hg issue 100. + self.assertEquals(regex.search('^([^z]*(?:WWWi|W))?$', + 'WWWi').groups(), ('WWWi', )) + self.assertEquals(regex.search('^([^z]*(?:WWWi|w))?$', + 'WWWi').groups(), ('WWWi', )) + self.assertEquals(regex.search('^([^z]*?(?:WWWi|W))?$', + 'WWWi').groups(), ('WWWi', )) + + # Hg issue 101. + pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.UNICODE) + self.assertEquals([x.group() for x in pat.finditer('yxxx')], ['xxx']) + self.assertEquals(pat.findall('yxxx'), ['xxx']) + + raw = 'yxxx' + self.assertEquals([x.group() for x in pat.finditer(raw)], ['xxx']) + self.assertEquals(pat.findall(raw), ['xxx']) + + pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.IGNORECASE | + regex.UNICODE) + self.assertEquals([x.group() for x in pat.finditer('yxxx')], ['xxx']) + self.assertEquals(pat.findall('yxxx'), ['xxx']) + + raw = 'yxxx' + self.assertEquals([x.group() for x in pat.finditer(raw)], ['xxx']) + self.assertEquals(pat.findall(raw), ['xxx']) + + # Hg issue 106. + self.assertEquals(regex.sub('(?V0).*', 'x', 'test'), 'x') + self.assertEquals(regex.sub('(?V1).*', 'x', 'test'), 'xx') + + self.assertEquals(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|') + self.assertEquals(regex.sub('(?V1).*?', '|', 'test'), '|||||||||') + + # Hg issue 112. + self.assertEquals(regex.sub(r'^(@)\n(?!.*?@)(.*)', + r'\1\n==========\n\2', '@\n', flags=regex.DOTALL), '@\n==========\n') + + # Hg issue 109. + self.assertEquals(regex.match(r'(?:cats|cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + self.assertEquals(regex.match(r'(?e)(?:cats|cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + self.assertEquals(regex.match(r'(?b)(?:cats|cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + + self.assertEquals(regex.match(r'(?:cat){e<=1}', 'caz').fuzzy_counts, + (1, 0, 0)) + self.assertEquals(regex.match(r'(?e)(?:cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + self.assertEquals(regex.match(r'(?b)(?:cat){e<=1}', + 'caz').fuzzy_counts, (1, 0, 0)) + + self.assertEquals(regex.match(r'(?:cats){e<=2}', 'c ats').fuzzy_counts, + (1, 1, 0)) + self.assertEquals(regex.match(r'(?e)(?:cats){e<=2}', + 'c ats').fuzzy_counts, (0, 1, 0)) + self.assertEquals(regex.match(r'(?b)(?:cats){e<=2}', + 'c ats').fuzzy_counts, (0, 1, 0)) + + self.assertEquals(regex.match(r'(?:cats){e<=2}', + 'c a ts').fuzzy_counts, (0, 2, 0)) + self.assertEquals(regex.match(r'(?e)(?:cats){e<=2}', + 'c a ts').fuzzy_counts, (0, 2, 0)) + self.assertEquals(regex.match(r'(?b)(?:cats){e<=2}', + 'c a ts').fuzzy_counts, (0, 2, 0)) + + self.assertEquals(regex.match(r'(?:cats){e<=1}', + 'c ats').fuzzy_counts, (0, 1, 0)) + self.assertEquals(regex.match(r'(?e)(?:cats){e<=1}', + 'c ats').fuzzy_counts, (0, 1, 0)) + self.assertEquals(regex.match(r'(?b)(?:cats){e<=1}', + 'c ats').fuzzy_counts, (0, 1, 0)) + +if not hasattr(str, "format"): + # Strings don't have the .format method (below Python 2.6). + del RegexTests.test_format + +def test_main(): + run_unittest(RegexTests) + +if __name__ == "__main__": + test_main() diff --git a/sickbeard/name_parser/parser.py b/sickbeard/name_parser/parser.py index cd36951a..bfa0cf8c 100644 --- a/sickbeard/name_parser/parser.py +++ b/sickbeard/name_parser/parser.py @@ -16,35 +16,33 @@ # You should have received a copy of the GNU General Public License # along with SickRage. If not, see . +import re import datetime import os.path -import re import threading import regexes -import time import sickbeard -from sickbeard import logger, helpers, scene_numbering, db -from sickbeard.exceptions import EpisodeNotFoundByAbsoluteNumberException +from sickbeard import logger, helpers, scene_numbering +from regex import regex from dateutil import parser nameparser_lock = threading.Lock() - class NameParser(object): ALL_REGEX = 0 NORMAL_REGEX = 1 SPORTS_REGEX = 2 ANIME_REGEX = 3 - def __init__(self, file_name=True, show=None, useIndexers=False): + def __init__(self, file_name=True, showObj=None, epObj=None, useIndexers=False, convert=False): regexMode = self.ALL_REGEX - if show and show.is_anime: + if showObj and showObj.is_anime: regexMode = self.ANIME_REGEX - elif show and show.is_sports: + elif showObj and showObj.is_sports: regexMode = self.SPORTS_REGEX - elif show and not show.is_anime and not show.is_sports: + elif showObj and not showObj.is_anime and not showObj.is_sports: regexMode = self.NORMAL_REGEX self.file_name = file_name @@ -53,7 +51,9 @@ class NameParser(object): self._compile_regexes(self.regexMode) self.showList = sickbeard.showList self.useIndexers = useIndexers - self.show = show + self.showObj = showObj + self.epObj = epObj + self.convert = convert def clean_series_name(self, series_name): """Cleans up series name by removing any . and _ @@ -85,7 +85,7 @@ class NameParser(object): uncompiled_regex = [regexes.anime_regexes, regexes.sports_regexs, regexes.normal_regexes] elif regexMode == self.NORMAL_REGEX: - logger.log(u"Using NORMAL regexs", logger.DEBUG) + logger.log(u"Using NORMAL reqgexs", logger.DEBUG) uncompiled_regex = [regexes.normal_regexes] elif regexMode == self.SPORTS_REGEX: @@ -101,125 +101,120 @@ class NameParser(object): uncompiled_regex = [regexes.normal_regexes] for regexItem in uncompiled_regex: - for regex_type, regex in regexItem.items(): - try: - self.compiled_regexes[regex_type] - except: - self.compiled_regexes[regex_type] = {} - - for (cur_pattern_name, cur_pattern) in regex: + for regex_type, regex_pattern in regexItem.items(): + for (cur_pattern_name, cur_pattern) in regex_pattern: try: - cur_regex = re.compile(cur_pattern, re.VERBOSE | re.IGNORECASE) - except re.error, errormsg: + cur_regex = regex.compile(cur_pattern, regex.V1 | regex.VERBOSE | regex.IGNORECASE | regex.BESTMATCH) + except regex.error, errormsg: logger.log(u"WARNING: Invalid episode_pattern, %s. %s" % (errormsg, cur_pattern)) else: - self.compiled_regexes[regex_type].update({cur_pattern_name: cur_regex}) + self.compiled_regexes[(regex_type,cur_pattern_name)] = cur_regex def _parse_string(self, name): if not name: return - for cur_regex_type, cur_regexes in self.compiled_regexes.items(): - for cur_regex_name, cur_regex in cur_regexes.items(): - match = cur_regex.match(name) + result = ParseResult(name) + for (cur_regex_type, cur_regex_name), cur_regex in self.compiled_regexes.items(): + match = cur_regex.fullmatch(name) - if not match: + if not match: + continue + + result.which_regex = [cur_regex_name] + + named_groups = match.groupdict().keys() + + if 'series_name' in named_groups: + result.series_name = match.group('series_name') + if result.series_name: + result.series_name = self.clean_series_name(result.series_name) + else:continue + + if 'season_num' in named_groups: + tmp_season = int(match.group('season_num')) + if cur_regex_name == 'bare' and tmp_season in (19, 20): continue + result.season_number = tmp_season - result = ParseResult(name) - result.which_regex = [cur_regex_name] + if 'ep_num' in named_groups: + ep_num = self._convert_number(match.group('ep_num')) + if 'extra_ep_num' in named_groups and match.group('extra_ep_num'): + result.episode_numbers = range(ep_num, self._convert_number(match.group('extra_ep_num')) + 1) + else: + result.episode_numbers = [ep_num] - named_groups = match.groupdict().keys() + if 'ep_ab_num' in named_groups: + ep_ab_num = self._convert_number(match.group('ep_ab_num')) + if 'extra_ab_ep_num' in named_groups and match.group('extra_ab_ep_num'): + result.ab_episode_numbers = range(ep_ab_num, + self._convert_number(match.group('extra_ab_ep_num')) + 1) + else: + result.ab_episode_numbers = [ep_ab_num] - if 'series_name' in named_groups: - result.series_name = match.group('series_name') - if result.series_name: - result.series_name = self.clean_series_name(result.series_name) + if 'sports_event_id' in named_groups: + sports_event_id = match.group('sports_event_id') + if sports_event_id: + result.sports_event_id = int(match.group('sports_event_id')) - cur_show = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers) - if not cur_show: - continue - - # if we have a show object to compare against then do so else return the result anyways - if self.show: - if self.show.indexerid != cur_show.indexerid: - logger.log( - u"I expected an episode of the show " + self.show.name + " but the parser thinks its the show " + cur_show.name + ". I will continue thinking its " + self.show.name, - logger.WARNING) - continue - - result.show = cur_show - - if 'season_num' in named_groups: - tmp_season = int(match.group('season_num')) - if cur_regex_name == 'bare' and tmp_season in (19, 20): - continue - result.season_number = tmp_season - - if 'ep_num' in named_groups: - ep_num = self._convert_number(match.group('ep_num')) - if 'extra_ep_num' in named_groups and match.group('extra_ep_num'): - result.episode_numbers = range(ep_num, self._convert_number(match.group('extra_ep_num')) + 1) - else: - result.episode_numbers = [ep_num] - - if 'ep_ab_num' in named_groups: - ep_ab_num = self._convert_number(match.group('ep_ab_num')) - if 'extra_ab_ep_num' in named_groups and match.group('extra_ab_ep_num'): - result.ab_episode_numbers = range(ep_ab_num, - self._convert_number(match.group('extra_ab_ep_num')) + 1) - else: - result.ab_episode_numbers = [ep_ab_num] - - if 'sports_event_id' in named_groups: - sports_event_id = match.group('sports_event_id') - if sports_event_id: - result.sports_event_id = int(match.group('sports_event_id')) - - if 'sports_event_name' in named_groups: - result.sports_event_name = match.group('sports_event_name') - if result.sports_event_name: - result.sports_event_name = self.clean_series_name(result.sports_event_name) - - if 'sports_event_date' in named_groups: - sports_event_date = match.group('sports_event_date') - if sports_event_date: - try: - result.sports_event_date = parser.parse(sports_event_date, fuzzy=True).date() - except: - continue - - if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups: - year = int(match.group('air_year')) - month = int(match.group('air_month')) - day = int(match.group('air_day')) + if 'sports_event_name' in named_groups: + result.sports_event_name = match.group('sports_event_name') + if result.sports_event_name: + result.sports_event_name = self.clean_series_name(result.sports_event_name) + if 'sports_event_date' in named_groups: + sports_event_date = match.group('sports_event_date') + if sports_event_date: try: - dtStr = '%s-%s-%s' % (year, month, day) - result.air_date = datetime.datetime.strptime(dtStr, "%Y-%m-%d").date() + result.sports_event_date = parser.parse(sports_event_date, fuzzy=True).date() except: continue - if 'extra_info' in named_groups: - tmp_extra_info = match.group('extra_info') + if 'air_year' in named_groups and 'air_month' in named_groups and 'air_day' in named_groups: + year = int(match.group('air_year')) + month = int(match.group('air_month')) + day = int(match.group('air_day')) - # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season - if tmp_extra_info and cur_regex_name == 'season_only' and re.search( - r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I): - continue - result.extra_info = tmp_extra_info + try: + dtStr = '%s-%s-%s' % (year, month, day) + result.air_date = datetime.datetime.strptime(dtStr, "%Y-%m-%d").date() + except: + continue - if 'release_group' in named_groups: - result.release_group = match.group('release_group') + if 'extra_info' in named_groups: + tmp_extra_info = match.group('extra_info') - if result.show and result.show.is_anime and cur_regex_type in ['anime', 'normal']: - return result - elif result.show and result.show.is_sports and cur_regex_type == 'sports': - return result - elif cur_regex_type == 'normal': - return result + # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season + if tmp_extra_info and cur_regex_name == 'season_only' and regex.search( + r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, regex.I): + continue + result.extra_info = tmp_extra_info - return None + if 'release_group' in named_groups: + result.release_group = match.group('release_group') + + cur_show = helpers.get_show_by_name(result.series_name, useIndexer=self.useIndexers) + if cur_show: + if self.showObj: + if self.showObj.indexerid != cur_show.indexerid: + logger.log( + u"I expected an episode of the show " + self.showObj.name + " but the parser thinks its the show " + cur_show.name + ". I will continue thinking its " + self.showObj.name, + logger.WARNING) + return + + result.show = cur_show + + if not result.show: + continue + + # Natch found! + break + + + if self.convert: + result = result.convert() + + return result def _combine_results(self, first, second, attr): # if the first doesn't exist then return the second or nothing @@ -291,7 +286,7 @@ class NameParser(object): # break it into parts if there are any (dirname, file name, extension) dir_name, file_name = os.path.split(name) - ext_match = re.match('(.*)\.\w{3,4}$', file_name) + ext_match = regex.match('(.*)\.\w{3,4}$', file_name) if ext_match and self.file_name: base_file_name = ext_match.group(1) else: @@ -364,7 +359,8 @@ class ParseResult(object): release_group=None, air_date=None, ab_episode_numbers=None, - show=None + show=None, + score=None ): self.original_name = original_name @@ -392,6 +388,7 @@ class ParseResult(object): self.which_regex = None self.show = show + self.score = score def __eq__(self, other): if not other: @@ -419,6 +416,8 @@ class ParseResult(object): return False if self.show != other.show: return False + if self.score != other.score: + return False return True @@ -479,7 +478,7 @@ class ParseResult(object): new_episode_numbers.append(e) new_season_numbers.append(s) - # need to do a quick sanity check here. It's possible that we now have episodes + # need to do a quick sanity check heregex. It's possible that we now have episodes # from more than one season (by tvdb numbering), and this is just too much # for sickbeard, so we'd need to flag it. new_season_numbers = list(set(new_season_numbers)) # remove duplicates @@ -546,17 +545,7 @@ class NameParserCache(object): logger.log("Using cached parse result for: " + name, logger.DEBUG) return self._previous_parsed[name] - name_parser_cache = NameParserCache() - class InvalidNameException(Exception): - "The given name is not valid" - - -class MultipleSceneShowResults(Exception): - pass - - -class MultipleSceneEpisodeResults(Exception): - pass + "The given name is not valid" \ No newline at end of file diff --git a/sickbeard/naming.py b/sickbeard/naming.py index 1b0deceb..59692da9 100644 --- a/sickbeard/naming.py +++ b/sickbeard/naming.py @@ -56,6 +56,31 @@ class TVShow(): self.anime = 0 self.scene = 0 + def _is_anime(self): + if (self.anime > 0): + return True + else: + return False + + is_anime = property(_is_anime) + + def _is_sports(self): + if (self.sports > 0): + return True + else: + return False + + is_sports = property(_is_sports) + + def _is_scene(self): + if (self.scene > 0): + return True + else: + return False + + is_scene = property(_is_scene) + + class TVEpisode(tv.TVEpisode): def __init__(self, season, episode, absolute_number, name): self.relatedEps = [] @@ -139,9 +164,7 @@ def check_valid_sports_naming(pattern=None): return valid def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False): - ep = _generate_sample_ep(multi, abd, sports) - - parser = NameParser(True) + ep = generate_sample_ep(multi, abd, sports) new_name = ep.formatted_filename(pattern, multi) + '.ext' new_path = ep.formatted_dir(pattern, multi) @@ -154,9 +177,11 @@ def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False) logger.log(u"Trying to parse " + new_name, logger.DEBUG) + parser = NameParser(True) + try: result = parser.parse(new_name) - except InvalidNameException, e : + except Exception, e: logger.log(u"Unable to parse " + new_name + ", not valid", logger.DEBUG) return False @@ -177,7 +202,7 @@ def validate_name(pattern, multi=None, file_only=False, abd=False, sports=False) return True -def _generate_sample_ep(multi=None, abd=False, sports=False, anime=False): +def generate_sample_ep(multi=None, abd=False, sports=False, anime=False): # make a fake episode object ep = TVEpisode(2, 3, 3, "Ep Name") @@ -215,6 +240,6 @@ def _generate_sample_ep(multi=None, abd=False, sports=False, anime=False): def test_name(pattern, multi=None, abd=False, sports=False, anime=False): - ep = _generate_sample_ep(multi, abd, sports, anime) + ep = generate_sample_ep(multi, abd, sports, anime) return {'name': ep.formatted_filename(pattern, multi), 'dir': ep.formatted_dir(pattern, multi)} \ No newline at end of file diff --git a/sickbeard/properFinder.py b/sickbeard/properFinder.py index cbdcf32d..208975e3 100644 --- a/sickbeard/properFinder.py +++ b/sickbeard/properFinder.py @@ -119,7 +119,7 @@ class ProperFinder(): try: myParser = NameParser(False) - parse_result = myParser.parse(curProper.name).convert() + parse_result = myParser.parse(curProper.name) except InvalidNameException: logger.log(u"Unable to parse the filename " + curProper.name + " into a valid episode", logger.DEBUG) continue @@ -138,7 +138,7 @@ class ProperFinder(): showObj = parse_result.show logger.log( - u"Successful match! Result " + parse_result.series_name + " matched to show " + showObj.name, + u"Successful match! Result " + parse_result.original_name + " matched to show " + showObj.name, logger.DEBUG) # set the indexerid in the db to the show's indexerid diff --git a/sickbeard/providers/generic.py b/sickbeard/providers/generic.py index ab80bafd..24d5c15a 100644 --- a/sickbeard/providers/generic.py +++ b/sickbeard/providers/generic.py @@ -278,8 +278,8 @@ class GenericProvider: # parse the file name try: - myParser = NameParser(False, show=show, useIndexers=manualSearch) - parse_result = myParser.parse(title).convert() + myParser = NameParser(False, showObj=show, epObj=ep_obj, convert=True) + parse_result = myParser.parse(title) except InvalidNameException: logger.log(u"Unable to parse the filename " + title + " into a valid episode", logger.WARNING) continue diff --git a/sickbeard/tv.py b/sickbeard/tv.py index eeb9ed00..c9890efd 100644 --- a/sickbeard/tv.py +++ b/sickbeard/tv.py @@ -1895,6 +1895,26 @@ class TVEpisode(object): else: return ek.ek(os.path.join, self.show.location, self.location) + def createStrings(self, pattern=None): + patterns = [ + '%S.N.S%SE%0E', + '%S.N.S%0SE%E', + '%S.N.S%SE%E', + '%S.N.S%0SE%0E', + '%SN S%SE%0E', + '%SN S%0SE%E', + '%SN S%SE%E', + '%SN S%0SE%0E' + + ] + + strings = [] + if not pattern: + for p in patterns: + strings += [self._format_pattern(p)] + return strings + return self._format_pattern(pattern) + def prettyName(self): """ Returns the name of this episode in a "pretty" human-readable format. Used for logging @@ -1903,16 +1923,12 @@ class TVEpisode(object): Returns: A string representing the episode's name and season/ep numbers """ - if self.show.is_anime and not self.show.is_scene: - return self._format_pattern('%SN - %A - %EN') - elif self.show.is_anime and self.show.is_scene: - return self._format_pattern('%SN - %XA - %EN') - elif self.show.is_scene: - return self._format_pattern('%SN - %XSx%0XE - %EN') + if self.show.anime and not self.show.scene: + return self._format_pattern('%SN - %AB - %EN') elif self.show.air_by_date: return self._format_pattern('%SN - %AD - %EN') - else: - return self._format_pattern('%SN - %Sx%0E - %EN') + + return self._format_pattern('%SN - %Sx%0E - %EN') def _ep_name(self): """ @@ -1980,9 +1996,8 @@ class TVEpisode(object): if not name: return '' - np = NameParser(name) - try: + np = NameParser(name) parse_result = np.parse(name) except InvalidNameException, e: logger.log(u"Unable to get parse release_group: " + ex(e), logger.DEBUG) @@ -2017,7 +2032,7 @@ class TVEpisode(object): '%0XS': '%02d' % self.scene_season, '%XE': str(self.scene_episode), '%0XE': '%02d' % self.scene_episode, - '%A': '%(#)03d' % {'#': self.absolute_number}, + '%AB': '%(#)03d' % {'#': self.absolute_number}, '%XA': '%(#)03d' % {'#': self.scene_absolute_number}, '%RN': release_name(self.release_name), '%RG': release_group(self.release_name),