diff --git a/lib/regex/Python25/__init__.py b/lib/regex/Python25/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/lib/regex/Python25/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/regex/Python25/_regex.pyd b/lib/regex/Python25/_regex.pyd deleted file mode 100644 index ad8e1c76..00000000 Binary files a/lib/regex/Python25/_regex.pyd and /dev/null differ diff --git a/lib/regex/Python26/__init__.py b/lib/regex/Python26/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/lib/regex/Python26/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/regex/Python26/_regex.pyd b/lib/regex/Python26/_regex.pyd deleted file mode 100644 index 0616ec7d..00000000 Binary files a/lib/regex/Python26/_regex.pyd and /dev/null differ diff --git a/lib/regex/Python26/_regex.so b/lib/regex/Python26/_regex.so deleted file mode 100644 index fb66edca..00000000 Binary files a/lib/regex/Python26/_regex.so and /dev/null differ diff --git a/lib/regex/Python27/__init__.py b/lib/regex/Python27/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/lib/regex/Python27/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/regex/Python27/_regex.pyd b/lib/regex/Python27/_regex.pyd deleted file mode 100644 index f41bb0b9..00000000 Binary files a/lib/regex/Python27/_regex.pyd and /dev/null differ diff --git a/lib/regex/Python27/_regex.so b/lib/regex/Python27/_regex.so deleted file mode 100644 index 760556b7..00000000 Binary files a/lib/regex/Python27/_regex.so and /dev/null differ diff --git a/lib/regex/__init__.py b/lib/regex/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/lib/regex/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/regex/_regex.c b/lib/regex/_regex.c deleted file mode 100644 index a40d2091..00000000 --- a/lib/regex/_regex.c +++ /dev/null @@ -1,22557 +0,0 @@ -/* Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * partial history: - * 1999-10-24 fl created (based on existing template matcher code) - * 2000-03-06 fl first alpha, sort of - * 2000-08-01 fl fixes for 1.6b1 - * 2000-08-07 fl use PyOS_CheckStack() if available - * 2000-09-20 fl added expand method - * 2001-03-20 fl lots of fixes for 2.1b2 - * 2001-04-15 fl export copyright as Python attribute, not global - * 2001-04-28 fl added __copy__ methods (work in progress) - * 2001-05-14 fl fixes for 1.5.2 compatibility - * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) - * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) - * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1 - * 2001-10-21 fl added sub/subn primitive - * 2001-10-24 fl added finditer primitive (for 2.2 only) - * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) - * 2002-11-09 fl fixed empty sub/subn return type - * 2003-04-18 mvl fully support 4-byte codes - * 2003-10-17 gn implemented non recursive scheme - * 2009-07-26 mrab completely re-designed matcher code - * 2011-11-18 mrab added support for PEP 393 strings - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * This version of the SRE library can be redistributed under CNRI's - * Python 1.6 license. For any other use, please contact Secret Labs - * AB (info@pythonware.com). - * - * Portions of this engine have been developed in cooperation with - * CNRI. Hewlett-Packard provided funding for 1.6 integration and - * other compatibility work. - */ - -/* #define VERBOSE */ - -#if defined(VERBOSE) -#define TRACE(X) printf X; -#else -#define TRACE(X) -#endif - -#include "Python.h" -#include "structmember.h" /* offsetof */ -#include -#include "_regex.h" -#include "pyport.h" -#include "pythread.h" - -#if PY_VERSION_HEX < 0x02060000 -#if SIZEOF_SIZE_T == SIZEOF_LONG_LONG -#define T_PYSSIZET T_LONGLONG -#elif SIZEOF_SIZE_T == SIZEOF_LONG -#define T_PYSSIZET T_LONG -#else -#error size_t is the same size as neither LONG nor LONGLONG -#endif - -#endif -typedef unsigned char Py_UCS1; -typedef unsigned short Py_UCS2; - -typedef RE_UINT32 RE_CODE; - -/* Properties in the General Category. */ -#define RE_PROP_GC_CN ((RE_PROP_GC << 16) | RE_PROP_CN) -#define RE_PROP_GC_LU ((RE_PROP_GC << 16) | RE_PROP_LU) -#define RE_PROP_GC_LL ((RE_PROP_GC << 16) | RE_PROP_LL) -#define RE_PROP_GC_LT ((RE_PROP_GC << 16) | RE_PROP_LT) -#define RE_PROP_GC_P ((RE_PROP_GC << 16) | RE_PROP_P) - -/* Unlimited repeat count. */ -#define RE_UNLIMITED (~(RE_CODE)0) - -/* The status of a node. */ -typedef unsigned short RE_STATUS_T; - -/* Whether to match concurrently, i.e. release the GIL while matching. */ -#define RE_CONC_NO 0 -#define RE_CONC_YES 1 -#define RE_CONC_DEFAULT 2 - -/* the side that could truncate in a partial match. - * - * The values RE_PARTIAL_LEFT and RE_PARTIAL_RIGHT are also used as array - * indexes, so they need to be 0 and 1. - */ -#define RE_PARTIAL_NONE -1 -#define RE_PARTIAL_LEFT 0 -#define RE_PARTIAL_RIGHT 1 - -/* Flags for the kind of 'sub' call: 'sub', 'subn', 'subf', 'subfn'. */ -#define RE_SUB 0x0 -#define RE_SUBN 0x1 -#if PY_VERSION_HEX >= 0x02060000 -#define RE_SUBF 0x2 -#endif - -/* The name of this module, minus the leading underscore. */ -#define RE_MODULE "regex" - -/* Error codes. */ -#define RE_ERROR_SUCCESS 1 /* Successful match. */ -#define RE_ERROR_FAILURE 0 /* Unsuccessful match. */ -#define RE_ERROR_ILLEGAL -1 /* Illegal code. */ -#define RE_ERROR_INTERNAL -2 /* Internal error. */ -#define RE_ERROR_CONCURRENT -3 /* "concurrent" invalid. */ -#define RE_ERROR_MEMORY -4 /* Out of memory. */ -#define RE_ERROR_INTERRUPTED -5 /* Signal handler raised exception. */ -#define RE_ERROR_REPLACEMENT -6 /* Invalid replacement string. */ -#define RE_ERROR_INVALID_GROUP_REF -7 /* Invalid group reference. */ -#define RE_ERROR_GROUP_INDEX_TYPE -8 /* Group index type error. */ -#define RE_ERROR_NO_SUCH_GROUP -9 /* No such group. */ -#define RE_ERROR_INDEX -10 /* String index. */ -#define RE_ERROR_BACKTRACKING -11 /* Too much backtracking. */ -#define RE_ERROR_NOT_STRING -12 /* Not a string. */ -#define RE_ERROR_NOT_UNICODE -13 /* Not a Unicode string. */ -#define RE_ERROR_PARTIAL -15 /* Partial match. */ - -/* The number of backtrack entries per allocated block. */ -#define RE_BACKTRACK_BLOCK_SIZE 64 - -/* The maximum number of backtrack entries to allocate. */ -#define RE_MAX_BACKTRACK_ALLOC (1024 * 1024) - -/* The initial maximum capacity of the guard block. */ -#define RE_INIT_GUARDS_BLOCK_SIZE 16 - -/* The initial maximum capacity of the node list. */ -#define RE_INIT_NODE_LIST_SIZE 16 - -/* The size increment for various allocation lists. */ -#define RE_LIST_SIZE_INC 16 - -/* The initial maximum capacity of the capture groups. */ -#define RE_INIT_CAPTURE_SIZE 16 - -/* Node bitflags. */ -#define RE_POSITIVE_OP 0x1 -#define RE_ZEROWIDTH_OP 0x2 -#define RE_FUZZY_OP 0x4 -#define RE_REVERSE_OP 0x8 -#define RE_REQUIRED_OP 0x10 - -/* Guards against further matching can occur at the start of the body and the - * tail of a repeat containing a repeat. - */ -#define RE_STATUS_BODY 0x1 -#define RE_STATUS_TAIL 0x2 - -/* Whether a guard is added depends on whether there's a repeat in the body of - * the repeat or a group reference in the body or tail of the repeat. - */ -#define RE_STATUS_NEITHER 0x0 -#define RE_STATUS_REPEAT 0x4 -#define RE_STATUS_LIMITED 0x8 -#define RE_STATUS_REF 0x10 -#define RE_STATUS_VISITED_AG 0x20 -#define RE_STATUS_VISITED_REP 0x40 - -/* Whether a string node has been initialised for fast searching. */ -#define RE_STATUS_FAST_INIT 0x80 - -/* Whether a node us being used. (Additional nodes may be created while the - * pattern is being built. - */ -#define RE_STATUS_USED 0x100 - -/* Whether a node is a string node. */ -#define RE_STATUS_STRING 0x200 - -/* Whether a repeat node is within another repeat. */ -#define RE_STATUS_INNER 0x400 - -/* Various flags stored in a node status member. */ -#define RE_STATUS_SHIFT 11 - -#define RE_STATUS_FUZZY (RE_FUZZY_OP << RE_STATUS_SHIFT) -#define RE_STATUS_REVERSE (RE_REVERSE_OP << RE_STATUS_SHIFT) -#define RE_STATUS_REQUIRED (RE_REQUIRED_OP << RE_STATUS_SHIFT) - -/* The different error types for fuzzy matching. */ -#define RE_FUZZY_SUB 0 -#define RE_FUZZY_INS 1 -#define RE_FUZZY_DEL 2 -#define RE_FUZZY_ERR 3 -#define RE_FUZZY_COUNT 3 - -/* The various values in a FUZZY node. */ -#define RE_FUZZY_VAL_MAX_SUB 1 -#define RE_FUZZY_VAL_MAX_INS 2 -#define RE_FUZZY_VAL_MAX_DEL 3 -#define RE_FUZZY_VAL_MAX_ERR 4 -#define RE_FUZZY_VAL_SUB_COST 5 -#define RE_FUZZY_VAL_INS_COST 6 -#define RE_FUZZY_VAL_DEL_COST 7 -#define RE_FUZZY_VAL_MAX_COST 8 - -#define RE_FUZZY_VAL_MAX_BASE 1 -#define RE_FUZZY_VAL_COST_BASE 5 - -/* The various values in an END_FUZZY node. */ -#define RE_FUZZY_VAL_MIN_SUB 1 -#define RE_FUZZY_VAL_MIN_INS 2 -#define RE_FUZZY_VAL_MIN_DEL 3 -#define RE_FUZZY_VAL_MIN_ERR 4 - -/* The flags which will be set for full Unicode case folding. */ -#define RE_FULL_CASE_FOLDING (RE_FLAG_UNICODE | RE_FLAG_FULLCASE | RE_FLAG_IGNORECASE) - -/* The shortest string prefix for which we'll use a fast string search. */ -#define RE_MIN_FAST_LENGTH 5 - -static char copyright[] = - " RE 2.3.0 Copyright (c) 1997-2002 by Secret Labs AB "; - -/* The exception to raise on error. */ -static PyObject* error_exception; - -/* The dictionary of Unicode properties. */ -static PyObject* property_dict; - -typedef struct RE_State* RE_StatePtr; - -/* Handlers for ASCII, locale and Unicode. */ -typedef struct RE_EncodingTable { - BOOL (*has_property)(RE_CODE property, Py_UCS4 ch); - BOOL (*at_boundary)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_word_start)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_word_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_default_boundary)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_default_word_start)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_default_word_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_grapheme_boundary)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*is_line_sep)(Py_UCS4 ch); - BOOL (*at_line_start)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*at_line_end)(RE_StatePtr state, Py_ssize_t text_pos); - BOOL (*possible_turkic)(Py_UCS4 ch); - int (*all_cases)(Py_UCS4 ch, Py_UCS4* codepoints); - Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - int (*all_turkic_i)(Py_UCS4 ch, Py_UCS4* cases); -} RE_EncodingTable; - -/* Position within the regex and text. */ -typedef struct RE_Position { - struct RE_Node* node; - Py_ssize_t text_pos; -} RE_Position; - -/* Info about fuzzy matching. */ -typedef struct RE_FuzzyInfo { - struct RE_Node* node; - size_t counts[RE_FUZZY_COUNT + 1]; /* Add 1 for total errors. */ - size_t total_cost; -} RE_FuzzyInfo; - -/* Storage for backtrack data. */ -typedef struct RE_BacktrackData { - union { - struct { - size_t capture_change; - BOOL too_few_errors; - } atomic; - struct { - RE_Position position; - } branch; - struct { - RE_FuzzyInfo fuzzy_info; - Py_ssize_t text_pos; - RE_CODE index; - } fuzzy; - struct { - RE_Position position; - size_t count; - struct RE_Node* fuzzy_node; - BOOL too_few_errors; - } fuzzy_insert; - struct { - RE_Position position; - RE_INT8 fuzzy_type; - RE_INT8 step; - } fuzzy_item; - struct { - RE_Position position; - Py_ssize_t string_pos; - RE_INT8 fuzzy_type; - RE_INT8 folded_pos; - RE_INT8 folded_len; - RE_INT8 gfolded_pos; - RE_INT8 gfolded_len; - RE_INT8 step; - } fuzzy_string; - struct { - Py_ssize_t text_pos; - Py_ssize_t current_capture; - RE_CODE private_index; - RE_CODE public_index; - BOOL capture; - } group; - struct { - struct RE_Node* node; - size_t capture_change; - } group_call; - struct { - size_t capture_change; - BOOL too_few_errors; - } lookaround; - struct { - RE_Position position; - Py_ssize_t text_pos; - size_t count; - Py_ssize_t start; - size_t capture_change; - RE_CODE index; - } repeat; - }; - RE_UINT8 op; -} RE_BacktrackData; - -/* Storage for backtrack data is allocated in blocks for speed. */ -typedef struct RE_BacktrackBlock { - RE_BacktrackData items[RE_BACKTRACK_BLOCK_SIZE]; - struct RE_BacktrackBlock* previous; - struct RE_BacktrackBlock* next; - size_t capacity; - size_t count; -} RE_BacktrackBlock; - -/* Storage for saved groups. */ -typedef struct RE_SavedGroups { - struct RE_SavedGroups* previous; - struct RE_SavedGroups* next; - struct RE_GroupSpan* spans; - size_t* counts; -} RE_SavedGroups; - -/* Storage for info around a recursive by 'basic'match'. */ -typedef struct RE_Info { - RE_BacktrackBlock* current_backtrack_block; - size_t backtrack_count; - RE_SavedGroups* current_saved_groups; - struct RE_GroupCallFrame* current_group_call_frame; - BOOL must_advance; -} RE_Info; - -/* Storage for the next node. */ -typedef struct RE_NextNode { - struct RE_Node* node; - struct RE_Node* test; - struct RE_Node* match_next; - Py_ssize_t match_step; -} RE_NextNode; - -/* A pattern node. */ -typedef struct RE_Node { - RE_NextNode next_1; - union { - struct { - RE_NextNode next_2; - } nonstring; - struct { - /* Used only if (node->status & RE_STATUS_STRING) is true. */ - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - } string; - }; - Py_ssize_t step; - size_t value_count; - RE_CODE* values; - RE_STATUS_T status; - RE_UINT8 op; - BOOL match; -} RE_Node; - -/* Info about a group's span. */ -typedef struct RE_GroupSpan { - Py_ssize_t start; - Py_ssize_t end; -} RE_GroupSpan; - -/* Span of a guard (inclusive range). */ -typedef struct RE_GuardSpan { - Py_ssize_t low; - Py_ssize_t high; - BOOL protect; -} RE_GuardSpan; - -/* Spans guarded against further matching. */ -typedef struct RE_GuardList { - size_t capacity; - size_t count; - RE_GuardSpan* spans; - Py_ssize_t last_text_pos; - size_t last_low; -} RE_GuardList; - -/* Info about a group in a context. */ -typedef struct RE_GroupData { - RE_GroupSpan span; - size_t capture_count; - size_t capture_capacity; - Py_ssize_t current_capture; - RE_GroupSpan* captures; -} RE_GroupData; - -/* Info about a repeat. */ -typedef struct RE_RepeatData { - RE_GuardList body_guard_list; - RE_GuardList tail_guard_list; - size_t count; - Py_ssize_t start; - size_t capture_change; -} RE_RepeatData; - -/* Storage for saved repeats. */ -typedef struct RE_SavedRepeats { - struct RE_SavedRepeats* previous; - struct RE_SavedRepeats* next; - RE_RepeatData* repeats; -} RE_SavedRepeats; - -/* Guards for fuzzy sections. */ -typedef struct RE_FuzzyGuards { - RE_GuardList body_guard_list; - RE_GuardList tail_guard_list; -} RE_FuzzyGuards; - -/* Info about a capture group. */ -typedef struct RE_GroupInfo { - Py_ssize_t end_index; - RE_Node* node; - BOOL referenced; - BOOL has_name; -} RE_GroupInfo; - -/* Info about a call_ref. */ -typedef struct RE_CallRefInfo { - RE_Node* node; - BOOL defined; - BOOL used; -} RE_CallRefInfo; - -/* Info about a repeat. */ -typedef struct RE_RepeatInfo { - RE_STATUS_T status; -} RE_RepeatInfo; - -/* Stack frame for a group call. */ -typedef struct RE_GroupCallFrame { - struct RE_GroupCallFrame* previous; - struct RE_GroupCallFrame* next; - RE_Node* node; - RE_GroupData* groups; - RE_RepeatData* repeats; -} RE_GroupCallFrame; - -/* Info about a string argument. */ -typedef struct RE_StringInfo { -#if PY_VERSION_HEX >= 0x02060000 - Py_buffer view; /* View of the string if it's a buffer object. */ -#endif - void* characters; /* Pointer to the characters of the string. */ - Py_ssize_t length; /* Length of the string. */ - Py_ssize_t charsize; /* Size of the characters in the string. */ - BOOL is_unicode; /* Whether the string is Unicode. */ - BOOL should_release; /* Whether the buffer should be released. */ -} RE_StringInfo; - -/* Info about where the next match was found, starting from a certain search - * position. This is used when a pattern starts with a BRANCH. - */ -#define MAX_SEARCH_POSITIONS 7 - -/* Info about a search position. */ -typedef struct { - Py_ssize_t start_pos; - Py_ssize_t match_pos; -} RE_SearchPosition; - -/* The state object used during matching. */ -typedef struct RE_State { - struct PatternObject* pattern; /* Parent PatternObject. */ - /* Info about the string being matched. */ - PyObject* string; -#if PY_VERSION_HEX >= 0x02060000 - Py_buffer view; /* View of the string if it's a buffer object. */ -#endif - Py_ssize_t charsize; - void* text; - Py_ssize_t text_length; - /* The slice of the string being searched. */ - Py_ssize_t slice_start; - Py_ssize_t slice_end; - /* Info about the capture groups. */ - RE_GroupData* groups; - Py_ssize_t lastindex; - Py_ssize_t lastgroup; - /* Info about the repeats. */ - RE_RepeatData* repeats; - Py_ssize_t search_anchor; /* Where the last match finished. */ - Py_ssize_t match_pos; /* The start position of the match. */ - Py_ssize_t text_pos; /* The current position of the match. */ - Py_ssize_t final_newline; /* The index of newline at end of string, or -1. */ - Py_ssize_t final_line_sep; /* The index of line separator at end of string, or -1. */ - /* Storage for backtrack info. */ - RE_BacktrackBlock backtrack_block; - RE_BacktrackBlock* current_backtrack_block; - Py_ssize_t backtrack_allocated; - RE_BacktrackData* backtrack; - /* Storage for saved capture groups. */ - RE_SavedGroups* first_saved_groups; - RE_SavedGroups* current_saved_groups; - RE_SavedRepeats* first_saved_repeats; - RE_SavedRepeats* current_saved_repeats; - Py_ssize_t min_width; /* The minimum width of the string to match (assuming it's not a fuzzy pattern). */ - RE_EncodingTable* encoding; /* The 'encoding' of the string being searched. */ - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - void* (*point_to)(void* text, Py_ssize_t pos); - PyThread_type_lock lock; /* A lock for accessing the state across threads. */ - RE_FuzzyInfo fuzzy_info; /* Info about fuzzy matching. */ - size_t total_fuzzy_counts[RE_FUZZY_COUNT]; /* Totals for fuzzy matching. */ - RE_FuzzyGuards* fuzzy_guards; /* The guards for a fuzzy match. */ - size_t total_errors; /* The total number of errors of a fuzzy match. */ - size_t total_cost; /* The total cost of a fuzzy match. */ - size_t max_cost; /* The maximum permitted fuzzy cost. */ - /* The group call stack. */ - RE_GroupCallFrame* first_group_call_frame; - RE_GroupCallFrame* current_group_call_frame; - RE_GuardList* group_call_guard_list; - RE_SearchPosition search_positions[MAX_SEARCH_POSITIONS]; /* Where the search matches next. */ - size_t capture_change; /* Incremented every time a captive group changes. */ - Py_ssize_t req_pos; /* The position where the required string matched. */ - Py_ssize_t req_end; /* The end position where the required string matched. */ - int partial_side; /* The side that could truncate in a partial match. */ - RE_UINT16 iterations; /* The number of iterations the matching engine has performed since checking for KeyboardInterrupt. */ - BOOL is_unicode; /* Whether the string to be matched is Unicode. */ - BOOL should_release; /* Whether the buffer should be released. */ - BOOL overlapped; /* Whether the matches can be overlapped. */ - BOOL reverse; /* Whether it's a reverse pattern. */ - BOOL visible_captures; /* Whether the 'captures' method will be visible. */ - BOOL version_0; /* Whether to perform version_0 behaviour (same as re module). */ - BOOL must_advance; /* Whether the end of the match must advance past its start. */ - BOOL is_multithreaded; /* Whether to release the GIL while matching. */ - BOOL too_few_errors; /* Whether there were too few fuzzy errors. */ - BOOL match_all; /* Whether to match all of the string ('fullmatch'). */ -} RE_State; - -/* Storage for the regex state and thread state. - * - * Scanner objects can sometimes be shared across threads, which means that - * their RE_State structs are also shared. This isn't safe when the GIL is - * released, so in such instances we have a lock (mutex) in the RE_State struct - * to protect it during matching. We also need a thread-safe place to store the - * thread state when releasing the GIL. - */ -typedef struct RE_SafeState { - RE_State* re_state; - PyThreadState* thread_state; -} RE_SafeState; - -/* The PatternObject created from a regular expression. */ -typedef struct PatternObject { - PyObject_HEAD - PyObject* pattern; /* Pattern source (or None). */ - Py_ssize_t flags; /* Flags used when compiling pattern source. */ - PyObject* weakreflist; /* List of weak references */ - /* Nodes into which the regular expression is compiled. */ - RE_Node* start_node; - RE_Node* start_test; - size_t true_group_count; /* The true number of capture groups. */ - size_t public_group_count; /* The number of public capture groups. */ - size_t repeat_count; /* The number of repeats. */ - Py_ssize_t group_end_index; /* The number of group closures. */ - PyObject* groupindex; - PyObject* indexgroup; - PyObject* named_lists; - size_t named_lists_count; - PyObject** partial_named_lists[2]; - PyObject* named_list_indexes; - /* Storage for the pattern nodes. */ - size_t node_capacity; - size_t node_count; - RE_Node** node_list; - /* Info about the capture groups. */ - size_t group_info_capacity; - RE_GroupInfo* group_info; - /* Info about the call_refs. */ - size_t call_ref_info_capacity; - size_t call_ref_info_count; - RE_CallRefInfo* call_ref_info; - Py_ssize_t pattern_call_ref; - /* Info about the repeats. */ - size_t repeat_info_capacity; - RE_RepeatInfo* repeat_info; - Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ - RE_EncodingTable* encoding; /* Encoding handlers. */ - RE_GroupData* groups_storage; - RE_RepeatData* repeats_storage; - size_t fuzzy_count; /* The number of fuzzy sections. */ - Py_ssize_t req_offset; /* The offset to the required string. */ - RE_Node* req_string; /* The required string. */ - BOOL is_fuzzy; /* Whether it's a fuzzy pattern. */ - BOOL do_search_start; /* Whether to do an initial search. */ - BOOL recursive; /* Whether the entire pattern is recursive. */ -} PatternObject; - -/* The MatchObject created when a match is found. */ -typedef struct MatchObject { - PyObject_HEAD - PyObject* string; /* Link to the target string or NULL if detached. */ - PyObject* substring; /* Link to (a substring of) the target string. */ - Py_ssize_t substring_offset; /* Offset into the target string. */ - PatternObject* pattern; /* Link to the regex (pattern) object. */ - Py_ssize_t pos; /* Start of current slice. */ - Py_ssize_t endpos; /* End of current slice. */ - Py_ssize_t match_start; /* Start of matched slice. */ - Py_ssize_t match_end; /* End of matched slice. */ - Py_ssize_t lastindex; /* Last group seen by the engine (-1 if none). */ - Py_ssize_t lastgroup; /* Last named group seen by the engine (-1 if none). */ - size_t group_count; /* The number of groups. */ - RE_GroupData* groups; /* The capture groups. */ - PyObject* regs; - size_t fuzzy_counts[RE_FUZZY_COUNT]; - BOOL partial; /* Whether it's a partial match. */ -} MatchObject; - -/* The ScannerObject. */ -typedef struct ScannerObject { - PyObject_HEAD - PatternObject* pattern; - RE_State state; - int status; -} ScannerObject; - -/* The SplitterObject. */ -typedef struct SplitterObject { - PyObject_HEAD - PatternObject* pattern; - RE_State state; - Py_ssize_t maxsplit; - Py_ssize_t last_pos; - Py_ssize_t split_count; - Py_ssize_t index; - int status; -} SplitterObject; - -/* Info used when compiling a pattern to nodes. */ -typedef struct RE_CompileArgs { - RE_CODE* code; /* The start of the compiled pattern. */ - RE_CODE* end_code; /* The end of the compiled pattern. */ - PatternObject* pattern; /* The pattern object. */ - Py_ssize_t min_width; /* The minimum width of the string to match (assuming it isn't a fuzzy pattern). */ - RE_Node* start; /* The start node. */ - RE_Node* end; /* The end node. */ - size_t repeat_depth; /* The nesting depth of the repeat. */ - BOOL forward; /* Whether it's a forward (not reverse) pattern. */ - BOOL visible_captures; /* Whether all of the captures will be visible. */ - BOOL has_captures; /* Whether the pattern has capture groups. */ - BOOL is_fuzzy; /* Whether the pattern (or some part of it) is fuzzy. */ - BOOL within_fuzzy; /* Whether the subpattern is within a fuzzy section. */ -} RE_CompileArgs; - -/* The string slices which will be concatenated to make the result string of - * the 'sub' method. - * - * This allows us to avoid creating a list of slices if there of fewer than 2 - * of them. Empty strings aren't recorded, so if 'list' and 'item' are both - * NULL then the result is an empty string. - */ -typedef struct JoinInfo { - PyObject* list; /* The list of slices if there are more than 2 of them. */ - PyObject* item; /* The slice if there is only 1 of them. */ - BOOL reversed; /* Whether the slices have been found in reverse order. */ - BOOL is_unicode; /* Whether the string is Unicode. */ -} JoinInfo; - -/* Info about fuzzy matching. */ -typedef struct { - RE_Node* new_node; - Py_ssize_t new_text_pos; - Py_ssize_t limit; - Py_ssize_t new_string_pos; - int step; - int new_folded_pos; - int folded_len; - int new_gfolded_pos; - int new_group_pos; - int fuzzy_type; - BOOL permit_insertion; -} RE_FuzzyData; - -/* Function types for getting info from a MatchObject. */ -typedef PyObject* (*RE_GetByIndexFunc)(MatchObject* self, Py_ssize_t index); - -/* Returns the magnitude of a 'Py_ssize_t' value. */ -Py_LOCAL_INLINE(Py_ssize_t) abs_ssize_t(Py_ssize_t x) { - return x >= 0 ? x : -x; -} - -/* Returns the minimum of 2 'Py_ssize_t' values. */ -Py_LOCAL_INLINE(Py_ssize_t) min_ssize_t(Py_ssize_t x, Py_ssize_t y) { - return x <= y ? x : y; -} - -/* Returns the maximum of 2 'Py_ssize_t' values. */ -Py_LOCAL_INLINE(Py_ssize_t) max_ssize_t(Py_ssize_t x, Py_ssize_t y) { - return x >= y ? x : y; -} - -/* Returns the minimum of 2 'size_t' values. */ -Py_LOCAL_INLINE(size_t) min_size_t(size_t x, size_t y) { - return x <= y ? x : y; -} - -/* Returns the maximum of 2 'size_t' values. */ -Py_LOCAL_INLINE(size_t) max_size_t(size_t x, size_t y) { - return x >= y ? x : y; -} - -/* Returns the 'maximum' of 2 RE_STATUS_T values. */ -Py_LOCAL_INLINE(RE_STATUS_T) max_status_2(RE_STATUS_T x, RE_STATUS_T y) { - return x >= y ? x : y; -} - -/* Returns the 'maximum' of 3 RE_STATUS_T values. */ -Py_LOCAL_INLINE(RE_STATUS_T) max_status_3(RE_STATUS_T x, RE_STATUS_T y, - RE_STATUS_T z) { - return max_status_2(x, max_status_2(y, z)); -} - -/* Returns the 'maximum' of 4 RE_STATUS_T values. */ -Py_LOCAL_INLINE(RE_STATUS_T) max_status_4(RE_STATUS_T w, RE_STATUS_T x, - RE_STATUS_T y, RE_STATUS_T z) { - return max_status_2(max_status_2(w, x), max_status_2(y, z)); -} - -/* Gets a character at a position assuming 1 byte per character. */ -static Py_UCS4 bytes1_char_at(void* text, Py_ssize_t pos) { - return *((Py_UCS1*)text + pos); -} - -/* Sets a character at a position assuming 1 byte per character. */ -static void bytes1_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { - *((Py_UCS1*)text + pos) = (Py_UCS1)ch; -} - -/* Gets a pointer to a position assuming 1 byte per character. */ -static void* bytes1_point_to(void* text, Py_ssize_t pos) { - return (Py_UCS1*)text + pos; -} - -/* Gets a character at a position assuming 2 bytes per character. */ -static Py_UCS4 bytes2_char_at(void* text, Py_ssize_t pos) { - return *((Py_UCS2*)text + pos); -} - -/* Sets a character at a position assuming 2 bytes per character. */ -static void bytes2_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { - *((Py_UCS2*)text + pos) = (Py_UCS2)ch; -} - -/* Gets a pointer to a position assuming 2 bytes per character. */ -static void* bytes2_point_to(void* text, Py_ssize_t pos) { - return (Py_UCS2*)text + pos; -} - -/* Gets a character at a position assuming 4 bytes per character. */ -static Py_UCS4 bytes4_char_at(void* text, Py_ssize_t pos) { - return *((Py_UCS4*)text + pos); -} - -/* Sets a character at a position assuming 4 bytes per character. */ -static void bytes4_set_char_at(void* text, Py_ssize_t pos, Py_UCS4 ch) { - *((Py_UCS4*)text + pos) = (Py_UCS4)ch; -} - -/* Gets a pointer to a position assuming 4 bytes per character. */ -static void* bytes4_point_to(void* text, Py_ssize_t pos) { - return (Py_UCS4*)text + pos; -} - -/* Default for whether a position is on a word boundary. */ -static BOOL at_boundary_always(RE_State* state, Py_ssize_t text_pos) { - return TRUE; -} - -/* Converts a BOOL to success/failure. */ -Py_LOCAL_INLINE(int) bool_as_status(BOOL value) { - return value ? RE_ERROR_SUCCESS : RE_ERROR_FAILURE; -} - -/* ASCII-specific. */ - -Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch); - -/* Checks whether a character has a property. */ -Py_LOCAL_INLINE(BOOL) ascii_has_property(RE_CODE property, Py_UCS4 ch) { - if (ch > RE_ASCII_MAX) { - /* Outside the ASCII range. */ - RE_UINT32 value; - - value = property & 0xFFFF; - - return value == 0; - } - - return unicode_has_property(property, ch); -} - -/* Wrapper for calling 'ascii_has_property' via a pointer. */ -static BOOL ascii_has_property_wrapper(RE_CODE property, Py_UCS4 ch) { - return ascii_has_property(property, ch); -} - -/* Checks whether there's a word character to the left. */ -Py_LOCAL_INLINE(BOOL) ascii_word_left(RE_State* state, Py_ssize_t text_pos) { - return text_pos > 0 && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); -} - -/* Checks whether there's a word character to the right. */ -Py_LOCAL_INLINE(BOOL) ascii_word_right(RE_State* state, Py_ssize_t text_pos) { - return text_pos < state->text_length && ascii_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); -} - -/* Checks whether a position is on a word boundary. */ -static BOOL ascii_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = ascii_word_left(state, text_pos); - right = ascii_word_right(state, text_pos); - - return left != right; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL ascii_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = ascii_word_left(state, text_pos); - right = ascii_word_right(state, text_pos); - - return !left && right; -} - -/* Checks whether a position is at the end of a word. */ -static BOOL ascii_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = ascii_word_left(state, text_pos); - right = ascii_word_right(state, text_pos); - - return left && !right; -} - -/* Checks whether a character is a line separator. */ -static BOOL ascii_is_line_sep(Py_UCS4 ch) { - return 0x0A <= ch && ch <= 0x0D; -} - -/* Checks whether a position is at the start of a line. */ -static BOOL ascii_at_line_start(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos <= 0) - return TRUE; - - ch = state->char_at(state->text, text_pos - 1); - - if (ch == 0x0D) { - if (text_pos >= state->text_length) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos) != 0x0A; - } - - return 0x0A <= ch && ch <= 0x0D; -} - -/* Checks whether a position is at the end of a line. */ -static BOOL ascii_at_line_end(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos >= state->text_length) - return TRUE; - - ch = state->char_at(state->text, text_pos); - - if (ch == 0x0A) { - if (text_pos <= 0) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos - 1) != 0x0D; - } - - return 0x0A <= ch && ch <= 0x0D; -} - -/* Checks whether a character could be Turkic (variants of I/i). For ASCII, it - * won't be. - */ -static BOOL ascii_possible_turkic(Py_UCS4 ch) { - return FALSE; -} - -/* Gets all the cases of a character. */ -static int ascii_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { - int count; - - count = 0; - - codepoints[count++] = ch; - - if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')) - /* It's a letter, so add the other case. */ - codepoints[count++] = ch ^ 0x20; - - return count; -} - -/* Returns a character with its case folded. */ -static Py_UCS4 ascii_simple_case_fold(Py_UCS4 ch) { - if ('A' <= ch && ch <= 'Z') - /* Uppercase folds to lowercase. */ - return ch ^ 0x20; - - return ch; -} - -/* Returns a character with its case folded. */ -static int ascii_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { - if ('A' <= ch && ch <= 'Z') - /* Uppercase folds to lowercase. */ - folded[0] = ch ^ 0x20; - else - folded[0] = ch; - - return 1; -} - -/* Gets all the case variants of Turkic 'I'. The given character will be listed - * first. - */ -static int ascii_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { - int count; - - count = 0; - - cases[count++] = ch; - - if (ch != 'I') - cases[count++] = 'I'; - - if (ch != 'i') - cases[count++] = 'i'; - - return count; -} - -/* The handlers for ASCII characters. */ -static RE_EncodingTable ascii_encoding = { - ascii_has_property_wrapper, - ascii_at_boundary, - ascii_at_word_start, - ascii_at_word_end, - ascii_at_boundary, /* No special "default word boundary" for ASCII. */ - ascii_at_word_start, /* No special "default start of word" for ASCII. */ - ascii_at_word_end, /* No special "default end of a word" for ASCII. */ - at_boundary_always, /* No special "grapheme boundary" for ASCII. */ - ascii_is_line_sep, - ascii_at_line_start, - ascii_at_line_end, - ascii_possible_turkic, - ascii_all_cases, - ascii_simple_case_fold, - ascii_full_case_fold, - ascii_all_turkic_i, -}; - -/* Locale-specific. */ - -/* Checks whether a character has a property. */ -Py_LOCAL_INLINE(BOOL) locale_has_property(RE_CODE property, Py_UCS4 ch) { - RE_UINT32 value; - RE_UINT32 v; - - value = property & 0xFFFF; - - if (ch > RE_LOCALE_MAX) - /* Outside the locale range. */ - return value == 0; - - switch (property >> 16) { - case RE_PROP_ALNUM >> 16: - v = isalnum((int)ch) != 0; - break; - case RE_PROP_ALPHA >> 16: - v = isalpha((int)ch) != 0; - break; - case RE_PROP_ANY >> 16: - v = 1; - break; - case RE_PROP_ASCII >> 16: - v = ch <= RE_ASCII_MAX; - break; - case RE_PROP_BLANK >> 16: - v = ch == '\t' || ch == ' '; - break; - case RE_PROP_GC: - switch (property) { - case RE_PROP_ASSIGNED: - v = ch <= RE_LOCALE_MAX; - break; - case RE_PROP_CASEDLETTER: - v = isalpha((int)ch) ? value : 0xFFFF; - break; - case RE_PROP_CNTRL: - v = iscntrl((int)ch) ? value : 0xFFFF; - break; - case RE_PROP_DIGIT: - v = isdigit((int)ch) ? value : 0xFFFF; - break; - case RE_PROP_GC_CN: - v = ch > RE_LOCALE_MAX; - break; - case RE_PROP_GC_LL: - v = islower((int)ch) ? value : 0xFFFF; - break; - case RE_PROP_GC_LU: - v = isupper((int)ch) ? value : 0xFFFF; - break; - case RE_PROP_GC_P: - v = ispunct((int)ch) ? value : 0xFFFF; - break; - default: - v = 0xFFFF; - break; - } - break; - case RE_PROP_GRAPH >> 16: - v = isgraph((int)ch) != 0; - break; - case RE_PROP_LOWER >> 16: - v = islower((int)ch) != 0; - break; - case RE_PROP_PRINT >> 16: - v = isprint((int)ch) != 0; - break; - case RE_PROP_SPACE >> 16: - v = isspace((int)ch) != 0; - break; - case RE_PROP_UPPER >> 16: - v = isupper((int)ch) != 0; - break; - case RE_PROP_WORD >> 16: - v = ch == '_' || isalnum((int)ch) != 0; - break; - case RE_PROP_XDIGIT >> 16: - v = re_get_hex_digit(ch) != 0; - break; - default: - v = 0; - break; - } - - return v == value; -} - -/* Wrapper for calling 'locale_has_property' via a pointer. */ -static BOOL locale_has_property_wrapper(RE_CODE property, Py_UCS4 ch) { - return locale_has_property(property, ch); -} - -/* Checks whether there's a word character to the left. */ -Py_LOCAL_INLINE(BOOL) locale_word_left(RE_State* state, Py_ssize_t text_pos) { - return text_pos > 0 && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); -} - -/* Checks whether there's a word character to the right. */ -Py_LOCAL_INLINE(BOOL) locale_word_right(RE_State* state, Py_ssize_t text_pos) { - return text_pos < state->text_length && locale_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); -} - -/* Checks whether a position is on a word boundary. */ -static BOOL locale_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = locale_word_left(state, text_pos); - right = locale_word_right(state, text_pos); - - return left != right; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL locale_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = locale_word_left(state, text_pos); - right = locale_word_right(state, text_pos); - - return !left && right; -} - -/* Checks whether a position is at the end of a word. */ -static BOOL locale_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = locale_word_left(state, text_pos); - right = locale_word_right(state, text_pos); - - return left && !right; -} - -/* Checks whether a character could be Turkic (variants of I/i). */ -static BOOL locale_possible_turkic(Py_UCS4 ch) { - return toupper((int)ch) == 'I' || tolower((int)ch) == 'i'; -} - -/* Gets all the cases of a character. */ -static int locale_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { - int count; - Py_UCS4 other; - - count = 0; - - codepoints[count++] = ch; - - other = (Py_UCS4)toupper((int)ch); - if (other != ch) - codepoints[count++] = other; - - other = (Py_UCS4)tolower((int)ch); - if (other != ch) - codepoints[count++] = other; - - return count; -} - -/* Returns a character with its case folded. */ -static Py_UCS4 locale_simple_case_fold(Py_UCS4 ch) { - if (ch <= RE_LOCALE_MAX) - return (Py_UCS4)tolower((int)ch); - - return ch; -} - -/* Returns a character with its case folded. */ -static int locale_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { - if (ch <= RE_LOCALE_MAX) - folded[0] = (Py_UCS4)tolower((int)ch); - else - folded[0] = ch; - - return 1; -} - -/* Gets all the case variants of Turkic 'I'. The given character will be listed - * first. - */ -static int locale_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { - int count; - Py_UCS4 other; - - count = 0; - - cases[count++] = ch; - - if (ch != 'I') - cases[count++] = 'I'; - - if (ch != 'i') - cases[count++] = 'i'; - - /* Uppercase 'i' will be either dotted (Turkic) or dotless (non-Turkic). */ - other = (Py_UCS4)toupper('i'); - if (other != ch && other != 'I') - cases[count++] = other; - - /* Lowercase 'I' will be either dotless (Turkic) or dotted (non-Turkic). */ - other = (Py_UCS4)tolower('I'); - if (other != ch && other != 'i') - cases[count++] = other; - - return count; -} - -/* The handlers for locale characters. */ -static RE_EncodingTable locale_encoding = { - locale_has_property_wrapper, - locale_at_boundary, - locale_at_word_start, - locale_at_word_end, - locale_at_boundary, /* No special "default word boundary" for locale. */ - locale_at_word_start, /* No special "default start of a word" for locale. */ - locale_at_word_end, /* No special "default end of a word" for locale. */ - at_boundary_always, /* No special "grapheme boundary" for locale. */ - ascii_is_line_sep, /* Assume locale line separators are same as ASCII. */ - ascii_at_line_start, /* Assume locale line separators are same as ASCII. */ - ascii_at_line_end, /* Assume locale line separators are same as ASCII. */ - locale_possible_turkic, - locale_all_cases, - locale_simple_case_fold, - locale_full_case_fold, - locale_all_turkic_i, -}; - -/* Unicode-specific. */ - -/* Checks whether a Unicode character has a property. */ -Py_LOCAL_INLINE(BOOL) unicode_has_property(RE_CODE property, Py_UCS4 ch) { - RE_UINT32 prop; - RE_UINT32 value; - RE_UINT32 v; - - prop = property >> 16; - if (prop >= sizeof(re_get_property) / sizeof(re_get_property[0])) - return FALSE; - - value = property & 0xFFFF; - v = re_get_property[prop](ch); - - if (v == value) - return TRUE; - - if (prop == RE_PROP_GC) { - switch (value) { - case RE_PROP_ASSIGNED: - return v != RE_PROP_CN; - case RE_PROP_C: - return (RE_PROP_C_MASK & (1 << v)) != 0; - case RE_PROP_CASEDLETTER: - return v == RE_PROP_LU || v == RE_PROP_LL || v == RE_PROP_LT; - case RE_PROP_L: - return (RE_PROP_L_MASK & (1 << v)) != 0; - case RE_PROP_M: - return (RE_PROP_M_MASK & (1 << v)) != 0; - case RE_PROP_N: - return (RE_PROP_N_MASK & (1 << v)) != 0; - case RE_PROP_P: - return (RE_PROP_P_MASK & (1 << v)) != 0; - case RE_PROP_S: - return (RE_PROP_S_MASK & (1 << v)) != 0; - case RE_PROP_Z: - return (RE_PROP_Z_MASK & (1 << v)) != 0; - } - } - - return FALSE; -} - -/* Wrapper for calling 'unicode_has_property' via a pointer. */ -static BOOL unicode_has_property_wrapper(RE_CODE property, Py_UCS4 ch) { - return unicode_has_property(property, ch); -} - -/* Checks whether there's a word character to the left. */ -Py_LOCAL_INLINE(BOOL) unicode_word_left(RE_State* state, Py_ssize_t text_pos) { - return text_pos > 0 && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos - 1)); -} - -/* Checks whether there's a word character to the right. */ -Py_LOCAL_INLINE(BOOL) unicode_word_right(RE_State* state, Py_ssize_t text_pos) - { - return text_pos < state->text_length && unicode_has_property(RE_PROP_WORD, - state->char_at(state->text, text_pos)); -} - -/* Checks whether a position is on a word boundary. */ -static BOOL unicode_at_boundary(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = unicode_word_left(state, text_pos); - right = unicode_word_right(state, text_pos); - - return left != right; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL unicode_at_word_start(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = unicode_word_left(state, text_pos); - right = unicode_word_right(state, text_pos); - - return !left && right; -} - -/* Checks whether a position is at the end of a word. */ -static BOOL unicode_at_word_end(RE_State* state, Py_ssize_t text_pos) { - BOOL left; - BOOL right; - - left = unicode_word_left(state, text_pos); - right = unicode_word_right(state, text_pos); - - return left && !right; -} - -/* Checks whether a character is a Unicode vowel. - * - * Only a limited number are treated as vowels. - */ -Py_LOCAL_INLINE(BOOL) is_unicode_vowel(Py_UCS4 ch) { - switch (Py_UNICODE_TOLOWER((Py_UNICODE)ch)) { - case 'a': case 0xE0: case 0xE1: case 0xE2: - case 'e': case 0xE8: case 0xE9: case 0xEA: - case 'i': case 0xEC: case 0xED: case 0xEE: - case 'o': case 0xF2: case 0xF3: case 0xF4: - case 'u': case 0xF9: case 0xFA: case 0xFB: - return TRUE; - default: - return FALSE; - } -} - -/* Checks whether a position is on a default word boundary. - * - * The rules are defined here: - * http://www.unicode.org/reports/tr29/#Default_Word_Boundaries - */ -static BOOL unicode_at_default_boundary(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - int prop; - int prop_m1; - Py_ssize_t pos_m1; - Py_ssize_t pos_m2; - int prop_m2; - Py_ssize_t pos_p0; - int prop_p0; - Py_ssize_t pos_p1; - int prop_p1; - - /* Break at the start and end of the text. */ - if (text_pos <= 0) - return TRUE; - - if (text_pos >= state->text_length) - return TRUE; - - char_at = state->char_at; - - prop = (int)re_get_word_break(char_at(state->text, text_pos)); - prop_m1 = (int)re_get_word_break(char_at(state->text, text_pos - 1)); - - /* Don't break within CRLF. */ - if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) - return FALSE; - - /* Otherwise break before and after Newlines (including CR and LF). */ - if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == - RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == - RE_BREAK_LF) - return TRUE; - - /* Get the property of the previous character. */ - pos_m1 = text_pos - 1; - prop_m1 = RE_BREAK_OTHER; - while (pos_m1 >= 0) { - prop_m1 = (int)re_get_word_break(char_at(state->text, pos_m1)); - if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) - break; - - --pos_m1; - } - - /* Get the property of the preceding character. */ - pos_m2 = pos_m1 - 1; - prop_m2 = RE_BREAK_OTHER; - while (pos_m2 >= 0) { - prop_m2 = (int)re_get_word_break(char_at(state->text, pos_m2)); - if (prop_m2 != RE_BREAK_EXTEND && prop_m2 != RE_BREAK_FORMAT) - break; - - --pos_m2; - } - - /* Get the property of the next character. */ - pos_p0 = text_pos; - prop_p0 = prop; - while (pos_p0 < state->text_length) { - prop_p0 = (int)re_get_word_break(char_at(state->text, pos_p0)); - if (prop_p0 != RE_BREAK_EXTEND && prop_p0 != RE_BREAK_FORMAT) - break; - - ++pos_p0; - } - - /* Get the property of the following character. */ - pos_p1 = pos_p0 + 1; - prop_p1 = RE_BREAK_OTHER; - while (pos_p1 < state->text_length) { - prop_p1 = (int)re_get_word_break(char_at(state->text, pos_p1)); - if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) - break; - - ++pos_p1; - } - - /* Don't break between most letters. */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && - (prop_p0 == RE_BREAK_ALETTER || prop_p0 == RE_BREAK_HEBREWLETTER)) - return FALSE; - - /* Break between apostrophe and vowels (French, Italian). */ - if (pos_m1 >= 0 && char_at(state->text, pos_m1) == '\'' && - is_unicode_vowel(char_at(state->text, text_pos))) - return TRUE; - - /* Don't break letters across certain punctuation. */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && - (prop_p0 == RE_BREAK_MIDLETTER || prop_p0 == RE_BREAK_MIDNUMLET || - prop_p0 == RE_BREAK_SINGLEQUOTE) && (prop_p1 == RE_BREAK_ALETTER || - prop_p1 == RE_BREAK_HEBREWLETTER)) - return FALSE; - if ((prop_m2 == RE_BREAK_ALETTER || prop_m2 == RE_BREAK_HEBREWLETTER) && - (prop_m1 == RE_BREAK_MIDLETTER || prop_m1 == RE_BREAK_MIDNUMLET || - prop_m1 == RE_BREAK_SINGLEQUOTE) && (prop_p0 == RE_BREAK_ALETTER || - prop_p0 == RE_BREAK_HEBREWLETTER)) - return FALSE; - if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_SINGLEQUOTE) - return FALSE; - if (prop_m1 == RE_BREAK_HEBREWLETTER && prop_p0 == RE_BREAK_DOUBLEQUOTE && - prop_p1 == RE_BREAK_HEBREWLETTER) - return FALSE; - if (prop_m2 == RE_BREAK_HEBREWLETTER && prop_m1 == RE_BREAK_DOUBLEQUOTE && - prop_p0 == RE_BREAK_HEBREWLETTER) - return FALSE; - - /* Don't break within sequences of digits, or digits adjacent to letters - * ("3a", or "A3"). - */ - if (prop_m1 == RE_BREAK_NUMERIC && prop_p0 == RE_BREAK_NUMERIC) - return FALSE; - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER) && - prop_p0 == RE_BREAK_NUMERIC) - return FALSE; - if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_ALETTER || prop_p0 - == RE_BREAK_HEBREWLETTER)) - return FALSE; - - /* Don't break within sequences, such as "3.2" or "3,456.789". */ - if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 - == RE_BREAK_MIDNUMLET || prop_m1 == RE_BREAK_SINGLEQUOTE) && prop_p0 == - RE_BREAK_NUMERIC) - return FALSE; - if (prop_m1 == RE_BREAK_NUMERIC && (prop_p0 == RE_BREAK_MIDNUM || prop_p0 - == RE_BREAK_MIDNUMLET || prop_p0 == RE_BREAK_SINGLEQUOTE) && prop_p1 == - RE_BREAK_NUMERIC) - return FALSE; - - /* Don't break between Katakana. */ - if (prop_m1 == RE_BREAK_KATAKANA && prop_p0 == RE_BREAK_KATAKANA) - return FALSE; - - /* Don't break from extenders. */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_HEBREWLETTER || - prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_KATAKANA || prop_m1 == - RE_BREAK_EXTENDNUMLET) && prop_p0 == RE_BREAK_EXTENDNUMLET) - return FALSE; - if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop_p0 == RE_BREAK_ALETTER || - prop_p0 == RE_BREAK_HEBREWLETTER || prop_p0 == RE_BREAK_NUMERIC || - prop_p0 == RE_BREAK_KATAKANA)) - return FALSE; - - /* Don't break between regional indicator symbols. */ - if (prop_m1 == RE_BREAK_REGIONALINDICATOR && prop_p0 == - RE_BREAK_REGIONALINDICATOR) - return FALSE; - - /* Otherwise, break everywhere (including around ideographs). */ - return TRUE; -} - -/* Checks whether a position is at the start/end of a word. */ -Py_LOCAL_INLINE(BOOL) unicode_at_default_word_start_or_end(RE_State* state, - Py_ssize_t text_pos, BOOL at_start) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - BOOL before; - BOOL after; - Py_UCS4 char_0; - Py_UCS4 char_m1; - int prop; - int prop_m1; - Py_ssize_t pos_m1; - Py_UCS4 char_p1; - Py_ssize_t pos_p1; - int prop_p1; - Py_ssize_t pos_m2; - Py_UCS4 char_m2; - int prop_m2; - - char_at = state->char_at; - - /* At the start or end of the text. */ - if (text_pos <= 0 || text_pos >= state->text_length) { - before = unicode_word_left(state, text_pos); - after = unicode_word_right(state, text_pos); - - return before != at_start && after == at_start; - } - - char_0 = char_at(state->text, text_pos); - char_m1 = char_at(state->text, text_pos - 1); - prop = (int)re_get_word_break(char_0); - prop_m1 = (int)re_get_word_break(char_m1); - - /* No break within CRLF. */ - if (prop_m1 == RE_BREAK_CR && prop == RE_BREAK_LF) - return FALSE; - - /* Break before and after Newlines (including CR and LF). */ - if (prop_m1 == RE_BREAK_NEWLINE || prop_m1 == RE_BREAK_CR || prop_m1 == - RE_BREAK_LF || prop == RE_BREAK_NEWLINE || prop == RE_BREAK_CR || prop == - RE_BREAK_LF) { - before = unicode_has_property(RE_PROP_WORD, char_m1); - after = unicode_has_property(RE_PROP_WORD, char_0); - - return before != at_start && after == at_start; - } - - /* No break just before Format or Extend characters. */ - if (prop == RE_BREAK_EXTEND || prop == RE_BREAK_FORMAT) - return FALSE; - - /* Get the property of the previous character. */ - pos_m1 = text_pos - 1; - prop_m1 = RE_BREAK_OTHER; - while (pos_m1 >= 0) { - char_m1 = char_at(state->text, pos_m1); - prop_m1 = (int)re_get_word_break(char_m1); - if (prop_m1 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) - break; - - --pos_m1; - } - - /* No break between most letters. */ - if (prop_m1 == RE_BREAK_ALETTER && prop == RE_BREAK_ALETTER) - return FALSE; - - if (pos_m1 >= 0 && char_m1 == '\'' && is_unicode_vowel(char_0)) - return TRUE; - - pos_p1 = text_pos + 1; - prop_p1 = RE_BREAK_OTHER; - while (pos_p1 < state->text_length) { - char_p1 = char_at(state->text, pos_p1); - prop_p1 = (int)re_get_word_break(char_p1); - if (prop_p1 != RE_BREAK_EXTEND && prop_p1 != RE_BREAK_FORMAT) - break; - - ++pos_p1; - } - - /* No break letters across certain punctuation. */ - if (prop_m1 == RE_BREAK_ALETTER && (prop == RE_BREAK_MIDLETTER || prop == - RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_ALETTER) - return FALSE; - - pos_m2 = pos_m1 - 1; - prop_m2 = RE_BREAK_OTHER; - while (pos_m2 >= 0) { - char_m2 = char_at(state->text, pos_m2); - prop_m2 = (int)re_get_word_break(char_m2); - if (prop_m2 != RE_BREAK_EXTEND && prop_m1 != RE_BREAK_FORMAT) - break; - - --pos_m2; - } - - if (prop_m2 == RE_BREAK_ALETTER && (prop_m1 == RE_BREAK_MIDLETTER || - prop_m1 == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_ALETTER) - return FALSE; - - /* No break within sequences of digits, or digits adjacent to letters - * ("3a", or "A3"). - */ - if ((prop_m1 == RE_BREAK_NUMERIC || prop_m1 == RE_BREAK_ALETTER) && prop == - RE_BREAK_NUMERIC) - return FALSE; - - if (prop_m1 == RE_BREAK_NUMERIC && prop == RE_BREAK_ALETTER) - return FALSE; - - /* No break within sequences, such as "3.2" or "3,456.789". */ - if (prop_m2 == RE_BREAK_NUMERIC && (prop_m1 == RE_BREAK_MIDNUM || prop_m1 - == RE_BREAK_MIDNUMLET) && prop == RE_BREAK_NUMERIC) - return FALSE; - - if (prop_m1 == RE_BREAK_NUMERIC && (prop == RE_BREAK_MIDNUM || prop == - RE_BREAK_MIDNUMLET) && prop_p1 == RE_BREAK_NUMERIC) - return FALSE; - - /* No break between Katakana. */ - if (prop_m1 == RE_BREAK_KATAKANA && prop == RE_BREAK_KATAKANA) - return FALSE; - - /* No break from extenders. */ - if ((prop_m1 == RE_BREAK_ALETTER || prop_m1 == RE_BREAK_NUMERIC || prop_m1 - == RE_BREAK_KATAKANA || prop_m1 == RE_BREAK_EXTENDNUMLET) && prop == - RE_BREAK_EXTENDNUMLET) - return FALSE; - - if (prop_m1 == RE_BREAK_EXTENDNUMLET && (prop == RE_BREAK_ALETTER || prop - == RE_BREAK_NUMERIC || prop == RE_BREAK_KATAKANA)) - return FALSE; - - /* Otherwise, break everywhere (including around ideographs). */ - before = unicode_has_property(RE_PROP_WORD, char_m1); - after = unicode_has_property(RE_PROP_WORD, char_0); - - return before != at_start && after == at_start; -} - -/* Checks whether a position is at the start of a word. */ -static BOOL unicode_at_default_word_start(RE_State* state, Py_ssize_t text_pos) - { - return unicode_at_default_word_start_or_end(state, text_pos, TRUE); -} - -/* Checks whether a position is at the end of a word. */ -static BOOL unicode_at_default_word_end(RE_State* state, Py_ssize_t text_pos) { - return unicode_at_default_word_start_or_end(state, text_pos, FALSE); -} - -/* Checks whether a position is on a grapheme boundary. - * - * The rules are defined here: - * http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries - */ -static BOOL unicode_at_grapheme_boundary(RE_State* state, Py_ssize_t text_pos) - { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - int prop; - int prop_m1; - - /* Break at the start and end of the text. */ - if (text_pos <= 0) - return TRUE; - - if (text_pos >= state->text_length) - return TRUE; - - char_at = state->char_at; - - prop = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos)); - prop_m1 = (int)re_get_grapheme_cluster_break(char_at(state->text, text_pos - - 1)); - - /* Don't break within CRLF. */ - if (prop_m1 == RE_GBREAK_CR && prop == RE_GBREAK_LF) - return FALSE; - - /* Otherwise break before and after controls (including CR and LF). */ - if (prop_m1 == RE_GBREAK_CONTROL || prop_m1 == RE_GBREAK_CR || prop_m1 == - RE_GBREAK_LF || prop == RE_GBREAK_CONTROL || prop == RE_GBREAK_CR || prop - == RE_GBREAK_LF) - return TRUE; - - /* Don't break Hangul syllable sequences. */ - if (prop_m1 == RE_GBREAK_L && (prop == RE_GBREAK_L || prop == RE_GBREAK_V - || prop == RE_GBREAK_LV || prop == RE_GBREAK_LVT)) - return FALSE; - if ((prop_m1 == RE_GBREAK_LV || prop_m1 == RE_GBREAK_V) && (prop == - RE_GBREAK_V || prop == RE_GBREAK_T)) - return FALSE; - if ((prop_m1 == RE_GBREAK_LVT || prop_m1 == RE_GBREAK_T) && (prop == - RE_GBREAK_T)) - return FALSE; - - /* Don't break between regional indicator symbols. */ - if (prop_m1 == RE_GBREAK_REGIONALINDICATOR && prop == - RE_GBREAK_REGIONALINDICATOR) - return FALSE; - - /* Don't break just before Extend characters. */ - if (prop == RE_GBREAK_EXTEND) - return FALSE; - - /* Don't break before SpacingMarks, or after Prepend characters. */ - if (prop == RE_GBREAK_SPACINGMARK) - return FALSE; - - if (prop_m1 == RE_GBREAK_PREPEND) - return FALSE; - - /* Otherwise, break everywhere. */ - return TRUE; -} - -/* Checks whether a character is a line separator. */ -static BOOL unicode_is_line_sep(Py_UCS4 ch) { - return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == - 0x2029; -} - -/* Checks whether a position is at the start of a line. */ -static BOOL unicode_at_line_start(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos <= 0) - return TRUE; - - ch = state->char_at(state->text, text_pos - 1); - - if (ch == 0x0D) { - if (text_pos >= state->text_length) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos) != 0x0A; - } - - return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == - 0x2029; -} - -/* Checks whether a position is at the end of a line. */ -static BOOL unicode_at_line_end(RE_State* state, Py_ssize_t text_pos) { - Py_UCS4 ch; - - if (text_pos >= state->text_length) - return TRUE; - - ch = state->char_at(state->text, text_pos); - - if (ch == 0x0A) { - if (text_pos <= 0) - return TRUE; - - /* No line break inside CRLF. */ - return state->char_at(state->text, text_pos - 1) != 0x0D; - } - - return (0x0A <= ch && ch <= 0x0D) || ch == 0x85 || ch == 0x2028 || ch == - 0x2029; -} - -/* Checks whether a character could be Turkic (variants of I/i). */ -static BOOL unicode_possible_turkic(Py_UCS4 ch) { - return ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131; -} - -/* Gets all the cases of a character. */ -static int unicode_all_cases(Py_UCS4 ch, Py_UCS4* codepoints) { - return re_get_all_cases(ch, codepoints); -} - -/* Returns a character with its case folded, unless it could be Turkic - * (variants of I/i). - */ -static Py_UCS4 unicode_simple_case_fold(Py_UCS4 ch) { - /* Is it a possible Turkic character? If so, pass it through unchanged. */ - if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) - return ch; - - return (Py_UCS4)re_get_simple_case_folding(ch); -} - -/* Returns a character with its case folded, unless it could be Turkic - * (variants of I/i). - */ -static int unicode_full_case_fold(Py_UCS4 ch, Py_UCS4* folded) { - /* Is it a possible Turkic character? If so, pass it through unchanged. */ - if (ch == 'I' || ch == 'i' || ch == 0x0130 || ch == 0x0131) { - folded[0] = ch; - return 1; - } - - return re_get_full_case_folding(ch, folded); -} - -/* Gets all the case variants of Turkic 'I'. */ -static int unicode_all_turkic_i(Py_UCS4 ch, Py_UCS4* cases) { - int count; - - count = 0; - - cases[count++] = ch; - - if (ch != 'I') - cases[count++] = 'I'; - - if (ch != 'i') - cases[count++] = 'i'; - - if (ch != 0x130) - cases[count++] = 0x130; - - if (ch != 0x131) - cases[count++] = 0x131; - - return count; - -} - -/* The handlers for Unicode characters. */ -static RE_EncodingTable unicode_encoding = { - unicode_has_property_wrapper, - unicode_at_boundary, - unicode_at_word_start, - unicode_at_word_end, - unicode_at_default_boundary, - unicode_at_default_word_start, - unicode_at_default_word_end, - unicode_at_grapheme_boundary, - unicode_is_line_sep, - unicode_at_line_start, - unicode_at_line_end, - unicode_possible_turkic, - unicode_all_cases, - unicode_simple_case_fold, - unicode_full_case_fold, - unicode_all_turkic_i, -}; - -Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name); - -/* Sets the error message. */ -Py_LOCAL_INLINE(void) set_error(int status, PyObject* object) { - TRACE(("<>\n")) - - if (!error_exception) - error_exception = get_object("_" RE_MODULE "_core", "error"); - - switch (status) { - case RE_ERROR_BACKTRACKING: - PyErr_SetString(error_exception, "too much backtracking"); - break; - case RE_ERROR_CONCURRENT: - PyErr_SetString(PyExc_ValueError, "concurrent not int or None"); - break; - case RE_ERROR_GROUP_INDEX_TYPE: - if (object) - PyErr_Format(PyExc_TypeError, - "group indices must be integers or strings, not %.200s", - object->ob_type->tp_name); - else - PyErr_Format(PyExc_TypeError, - "group indices must be integers or strings"); - break; - case RE_ERROR_ILLEGAL: - PyErr_SetString(PyExc_RuntimeError, "invalid RE code"); - break; - case RE_ERROR_INDEX: - PyErr_SetString(PyExc_TypeError, "string indices must be integers"); - break; - case RE_ERROR_INTERRUPTED: - /* An exception has already been raised, so let it fly. */ - break; - case RE_ERROR_INVALID_GROUP_REF: - PyErr_SetString(error_exception, "invalid group reference"); - break; - case RE_ERROR_MEMORY: - PyErr_NoMemory(); - break; - case RE_ERROR_NOT_STRING: - PyErr_Format(PyExc_TypeError, "expected string instance, %.200s found", - object->ob_type->tp_name); - break; - case RE_ERROR_NOT_UNICODE: - PyErr_Format(PyExc_TypeError, - "expected unicode instance, %.200s found", object->ob_type->tp_name); - break; - case RE_ERROR_NO_SUCH_GROUP: - PyErr_SetString(PyExc_IndexError, "no such group"); - break; - case RE_ERROR_REPLACEMENT: - PyErr_SetString(error_exception, "invalid replacement"); - break; - default: - /* Other error codes indicate compiler/engine bugs. */ - PyErr_SetString(PyExc_RuntimeError, - "internal error in regular expression engine"); - break; - } -} - -/* Allocates memory. - * - * Sets the Python error handler and returns NULL if the allocation fails. - */ -Py_LOCAL_INLINE(void*) re_alloc(size_t size) { - void* new_ptr; - - new_ptr = PyMem_Malloc(size); - if (!new_ptr) - set_error(RE_ERROR_MEMORY, NULL); - - return new_ptr; -} - -/* Reallocates memory. - * - * Sets the Python error handler and returns NULL if the reallocation fails. - */ -Py_LOCAL_INLINE(void*) re_realloc(void* ptr, size_t size) { - void* new_ptr; - - new_ptr = PyMem_Realloc(ptr, size); - if (!new_ptr) - set_error(RE_ERROR_MEMORY, NULL); - - return new_ptr; -} - -/* Deallocates memory. */ -Py_LOCAL_INLINE(void) re_dealloc(void* ptr) { - PyMem_Free(ptr); -} - -/* Releases the GIL if multithreading is enabled. */ -Py_LOCAL_INLINE(void) release_GIL(RE_SafeState* safe_state) { - if (safe_state->re_state->is_multithreaded) - safe_state->thread_state = PyEval_SaveThread(); -} - -/* Acquires the GIL if multithreading is enabled. */ -Py_LOCAL_INLINE(void) acquire_GIL(RE_SafeState* safe_state) { - if (safe_state->re_state->is_multithreaded) - PyEval_RestoreThread(safe_state->thread_state); -} - -/* Allocates memory, holding the GIL during the allocation. - * - * Sets the Python error handler and returns NULL if the allocation fails. - */ -Py_LOCAL_INLINE(void*) safe_alloc(RE_SafeState* safe_state, size_t size) { - void* new_ptr; - - acquire_GIL(safe_state); - - new_ptr = re_alloc(size); - - release_GIL(safe_state); - - return new_ptr; -} - -/* Reallocates memory, holding the GIL during the reallocation. - * - * Sets the Python error handler and returns NULL if the reallocation fails. - */ -Py_LOCAL_INLINE(void*) safe_realloc(RE_SafeState* safe_state, void* ptr, size_t - size) { - void* new_ptr; - - acquire_GIL(safe_state); - - new_ptr = re_realloc(ptr, size); - - release_GIL(safe_state); - - return new_ptr; -} - -/* Deallocates memory, holding the GIL during the deallocation. */ -Py_LOCAL_INLINE(void) safe_dealloc(RE_SafeState* safe_state, void* ptr) { - acquire_GIL(safe_state); - - re_dealloc(ptr); - - release_GIL(safe_state); -} - -/* Checks for KeyboardInterrupt, holding the GIL during the check. */ -Py_LOCAL_INLINE(BOOL) safe_check_signals(RE_SafeState* safe_state) { - BOOL result; - - acquire_GIL(safe_state); - - result = (BOOL)PyErr_CheckSignals(); - - release_GIL(safe_state); - - return result; -} - -/* Checks whether a character is in a range. */ -Py_LOCAL_INLINE(BOOL) in_range(RE_EncodingTable* encoding, Py_UCS4 lower, - Py_UCS4 upper, Py_UCS4 ch) { - return lower <= ch && ch <= upper; -} - -/* Checks whether a character is in a range, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_range_ign(RE_EncodingTable* encoding, Py_UCS4 lower, - Py_UCS4 upper, Py_UCS4 ch) { - Py_UCS4 cases[RE_MAX_CASES]; - int count; - int i; - - count = encoding->all_cases(ch, cases); - - for (i = 0; i < count; i++) { - if (in_range(encoding, lower, upper, cases[i])) - return TRUE; - } - - return FALSE; -} - -/* Checks whether 2 characters are the same. */ -Py_LOCAL_INLINE(BOOL) same_char(RE_EncodingTable* encoding, Py_UCS4 ch1, - Py_UCS4 ch2) { - return ch1 == ch2; -} - -/* Wrapper for calling 'same_char' via a pointer. */ -static BOOL same_char_wrapper(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 - ch2) { - return same_char(encoding, ch1, ch2); -} - -/* Checks whether 2 characters are the same, ignoring case. */ -Py_LOCAL_INLINE(BOOL) same_char_ign(RE_EncodingTable* encoding, Py_UCS4 ch1, - Py_UCS4 ch2) { - Py_UCS4 cases[RE_MAX_CASES]; - int count; - int i; - - if (ch1 == ch2) - return TRUE; - - count = encoding->all_cases(ch1, cases); - - for (i = 1; i < count; i++) { - if (cases[i] == ch2) - return TRUE; - } - - return FALSE; -} - -/* Wrapper for calling 'same_char' via a pointer. */ -static BOOL same_char_ign_wrapper(RE_EncodingTable* encoding, Py_UCS4 ch1, - Py_UCS4 ch2) { - return same_char_ign(encoding, ch1, ch2); -} - -/* Checks whether a character is anything except a newline. */ -Py_LOCAL_INLINE(BOOL) matches_ANY(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - return ch != '\n'; -} - -/* Checks whether a character is anything except a line separator. */ -Py_LOCAL_INLINE(BOOL) matches_ANY_U(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - return !encoding->is_line_sep(ch); -} - -/* Checks whether 2 characters are the same. */ -Py_LOCAL_INLINE(BOOL) matches_CHARACTER(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { - return same_char(encoding, node->values[0], ch); -} - -/* Checks whether 2 characters are the same, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_CHARACTER_IGN(RE_EncodingTable* encoding, - RE_Node* node, Py_UCS4 ch) { - return same_char_ign(encoding, node->values[0], ch); -} - -/* Checks whether a character has a property. */ -Py_LOCAL_INLINE(BOOL) matches_PROPERTY(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { - return encoding->has_property(node->values[0], ch); -} - -/* Checks whether a character has a property, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_PROPERTY_IGN(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { - RE_UINT32 property; - RE_UINT32 prop; - - property = node->values[0]; - prop = property >> 16; - - /* We need to do special handling of case-sensitive properties according to - * the 'encoding'. - */ - if (encoding == &unicode_encoding) { - /* We are working with Unicode. */ - if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property - == RE_PROP_GC_LT) { - RE_UINT32 value; - - value = re_get_general_category(ch); - - return value == RE_PROP_LU || value == RE_PROP_LL || value == - RE_PROP_LT; - } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return (BOOL)re_get_cased(ch); - - /* The property is case-insensitive. */ - return unicode_has_property(property, ch); - } else if (encoding == &ascii_encoding) { - /* We are working with ASCII. */ - if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property - == RE_PROP_GC_LT) { - RE_UINT32 value; - - value = re_get_general_category(ch); - - return value == RE_PROP_LU || value == RE_PROP_LL || value == - RE_PROP_LT; - } else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return (BOOL)re_get_cased(ch); - - /* The property is case-insensitive. */ - return ascii_has_property(property, ch); - } else { - /* We are working with Locale. */ - if (property == RE_PROP_GC_LU || property == RE_PROP_GC_LL || property - == RE_PROP_GC_LT) - return ch <= RE_LOCALE_MAX && (isupper((int)ch) || - islower((int)ch)) != 0; - else if (prop == RE_PROP_UPPERCASE || prop == RE_PROP_LOWERCASE) - return ch <= RE_LOCALE_MAX && (isupper((int)ch) || - islower((int)ch)) != 0; - - /* The property is case-insensitive. */ - return locale_has_property(property, ch); - } -} - -/* Checks whether a character is in a range. */ -Py_LOCAL_INLINE(BOOL) matches_RANGE(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - return in_range(encoding, node->values[0], node->values[1], ch); -} - -/* Checks whether a character is in a range, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_RANGE_IGN(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { - return in_range_ign(encoding, node->values[0], node->values[1], ch); -} - -Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch); -Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch); - -/* Checks whether a character matches a set member. */ -Py_LOCAL_INLINE(BOOL) matches_member(RE_EncodingTable* encoding, RE_Node* - member, Py_UCS4 ch) { - switch (member->op) { - case RE_OP_CHARACTER: - /* values are: char_code */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - return ch == member->values[0]; - case RE_OP_PROPERTY: - /* values are: property */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - return encoding->has_property(member->values[0], ch); - case RE_OP_RANGE: - /* values are: lower, upper */ - TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, - member->values[0], member->values[1])) - return in_range(encoding, member->values[0], member->values[1], ch); - case RE_OP_SET_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_diff(encoding, member, ch); - case RE_OP_SET_INTER: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_inter(encoding, member, ch); - case RE_OP_SET_SYM_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_sym_diff(encoding, member, ch); - case RE_OP_SET_UNION: - TRACE(("%s\n", re_op_text[member->op])) - return in_set_union(encoding, member, ch); - case RE_OP_STRING: - { - /* values are: char_code, char_code, ... */ - size_t i; - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->value_count)) - - for (i = 0; i < member->value_count; i++) { - if (ch == member->values[i]) - return TRUE; - } - return FALSE; - } - default: - return FALSE; - } -} - -/* Checks whether a character matches a set member, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_member_ign(RE_EncodingTable* encoding, RE_Node* - member, int case_count, Py_UCS4* cases) { - int i; - - for (i = 0; i < case_count; i++) { - switch (member->op) { - case RE_OP_CHARACTER: - /* values are: char_code */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - if (cases[i] == member->values[0]) - return TRUE; - break; - case RE_OP_PROPERTY: - /* values are: property */ - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->values[0])) - if (encoding->has_property(member->values[0], cases[i])) - return TRUE; - break; - case RE_OP_RANGE: - /* values are: lower, upper */ - TRACE(("%s %d %d %d\n", re_op_text[member->op], member->match, - member->values[0], member->values[1])) - if (in_range(encoding, member->values[0], member->values[1], - cases[i])) - return TRUE; - break; - case RE_OP_SET_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_diff(encoding, member, cases[i])) - return TRUE; - break; - case RE_OP_SET_INTER: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_inter(encoding, member, cases[i])) - return TRUE; - break; - case RE_OP_SET_SYM_DIFF: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_sym_diff(encoding, member, cases[i])) - return TRUE; - break; - case RE_OP_SET_UNION: - TRACE(("%s\n", re_op_text[member->op])) - if (in_set_union(encoding, member, cases[i])) - return TRUE; - break; - case RE_OP_STRING: - { - size_t j; - TRACE(("%s %d %d\n", re_op_text[member->op], member->match, - member->value_count)) - - for (j = 0; j < member->value_count; j++) { - if (cases[i] == member->values[j]) - return TRUE; - } - break; - } - default: - return TRUE; - } - } - - return FALSE; -} - -/* Checks whether a character is in a set difference. */ -Py_LOCAL_INLINE(BOOL) in_set_diff(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - if (matches_member(encoding, member, ch) != member->match) - return FALSE; - - member = member->next_1.node; - - while (member) { - if (matches_member(encoding, member, ch) == member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set difference, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_diff_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - if (matches_member_ign(encoding, member, case_count, cases) != - member->match) - return FALSE; - - member = member->next_1.node; - - while (member) { - if (matches_member_ign(encoding, member, case_count, cases) == - member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set intersection. */ -Py_LOCAL_INLINE(BOOL) in_set_inter(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member(encoding, member, ch) != member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set intersection, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_inter_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member_ign(encoding, member, case_count, cases) != - member->match) - return FALSE; - - member = member->next_1.node; - } - - return TRUE; -} - -/* Checks whether a character is in a set symmetric difference. */ -Py_LOCAL_INLINE(BOOL) in_set_sym_diff(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { - RE_Node* member; - BOOL result; - - member = node->nonstring.next_2.node; - - result = FALSE; - - while (member) { - if (matches_member(encoding, member, ch) == member->match) - result = !result; - - member = member->next_1.node; - } - - return result; -} - -/* Checks whether a character is in a set symmetric difference, ignoring case. - */ -Py_LOCAL_INLINE(BOOL) in_set_sym_diff_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { - RE_Node* member; - BOOL result; - - member = node->nonstring.next_2.node; - - result = FALSE; - - while (member) { - if (matches_member_ign(encoding, member, case_count, cases) == - member->match) - result = !result; - - member = member->next_1.node; - } - - return result; -} - -/* Checks whether a character is in a set union. */ -Py_LOCAL_INLINE(BOOL) in_set_union(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member(encoding, member, ch) == member->match) - return TRUE; - - member = member->next_1.node; - } - - return FALSE; -} - -/* Checks whether a character is in a set union, ignoring case. */ -Py_LOCAL_INLINE(BOOL) in_set_union_ign(RE_EncodingTable* encoding, RE_Node* - node, int case_count, Py_UCS4* cases) { - RE_Node* member; - - member = node->nonstring.next_2.node; - - while (member) { - if (matches_member_ign(encoding, member, case_count, cases) == - member->match) - return TRUE; - - member = member->next_1.node; - } - - return FALSE; -} - -/* Checks whether a character is in a set. */ -Py_LOCAL_INLINE(BOOL) matches_SET(RE_EncodingTable* encoding, RE_Node* node, - Py_UCS4 ch) { - switch (node->op) { - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_REV: - return in_set_diff(encoding, node, ch); - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_REV: - return in_set_inter(encoding, node, ch); - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_REV: - return in_set_sym_diff(encoding, node, ch); - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_REV: - return in_set_union(encoding, node, ch); - } - - return FALSE; -} - -/* Checks whether a character is in a set, ignoring case. */ -Py_LOCAL_INLINE(BOOL) matches_SET_IGN(RE_EncodingTable* encoding, RE_Node* - node, Py_UCS4 ch) { - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - case_count = encoding->all_cases(ch, cases); - - switch (node->op) { - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - return in_set_diff_ign(encoding, node, case_count, cases); - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - return in_set_inter_ign(encoding, node, case_count, cases); - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - return in_set_sym_diff_ign(encoding, node, case_count, cases); - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - return in_set_union_ign(encoding, node, case_count, cases); - } - - return FALSE; -} - -/* Resets a guard list. */ -Py_LOCAL_INLINE(void) reset_guard_list(RE_GuardList* guard_list) { - guard_list->count = 0; - guard_list->last_text_pos = -1; -} - -/* Initialises the state for a match. */ -Py_LOCAL_INLINE(void) init_match(RE_State* state) { - size_t i; - - /* Reset the backtrack. */ - state->current_backtrack_block = &state->backtrack_block; - state->current_backtrack_block->count = 0; - state->current_saved_groups = state->first_saved_groups; - state->backtrack = NULL; - state->search_anchor = state->text_pos; - state->match_pos = state->text_pos; - - /* Reset the guards for the repeats. */ - for (i = 0; i < state->pattern->repeat_count; i++) { - reset_guard_list(&state->repeats[i].body_guard_list); - reset_guard_list(&state->repeats[i].tail_guard_list); - } - - /* Reset the guards for the fuzzy sections. */ - for (i = 0; i < state->pattern->fuzzy_count; i++) { - reset_guard_list(&state->fuzzy_guards[i].body_guard_list); - reset_guard_list(&state->fuzzy_guards[i].tail_guard_list); - } - - for (i = 0; i < state->pattern->true_group_count; i++) { - RE_GroupData* group; - - group = &state->groups[i]; - group->span.start = -1; - group->span.end = -1; - group->capture_count = 0; - group->current_capture = -1; - } - - /* Reset the guards for the group calls. */ - for (i = 0; i < state->pattern->call_ref_info_count; i++) - reset_guard_list(&state->group_call_guard_list[i]); - - /* Clear the counts and cost for matching. */ - memset(state->fuzzy_info.counts, 0, sizeof(state->fuzzy_info.counts)); - state->fuzzy_info.total_cost = 0; - memset(state->total_fuzzy_counts, 0, sizeof(state->total_fuzzy_counts)); - state->total_errors = 0; - state->total_cost = 0; - state->too_few_errors = FALSE; - state->capture_change = 0; - state->iterations = 0; -} - -/* Adds a new backtrack entry. */ -Py_LOCAL_INLINE(BOOL) add_backtrack(RE_SafeState* safe_state, RE_UINT8 op) { - RE_State* state; - RE_BacktrackBlock* current; - - state = safe_state->re_state; - - current = state->current_backtrack_block; - if (current->count >= current->capacity) { - if (!current->next) { - RE_BacktrackBlock* next; - - /* Is there too much backtracking? */ - if (state->backtrack_allocated >= RE_MAX_BACKTRACK_ALLOC) - return FALSE; - - next = (RE_BacktrackBlock*)safe_alloc(safe_state, - sizeof(RE_BacktrackBlock)); - if (!next) - return FALSE; - - next->previous = current; - next->next = NULL; - next->capacity = RE_BACKTRACK_BLOCK_SIZE; - current->next = next; - - state->backtrack_allocated += RE_BACKTRACK_BLOCK_SIZE; - } - - current = current->next; - current->count = 0; - state->current_backtrack_block = current; - } - - state->backtrack = ¤t->items[current->count++]; - state->backtrack->op = op; - - return TRUE; -} - -/* Gets the last backtrack entry. - * - * It'll never be called when there are _no_ entries. - */ -Py_LOCAL_INLINE(RE_BacktrackData*) last_backtrack(RE_State* state) { - RE_BacktrackBlock* current; - - current = state->current_backtrack_block; - state->backtrack = ¤t->items[current->count - 1]; - - return state->backtrack; -} - -/* Discards the last backtrack entry. - * - * It'll never be called to discard the _only_ entry. - */ -Py_LOCAL_INLINE(void) discard_backtrack(RE_State* state) { - RE_BacktrackBlock* current; - - current = state->current_backtrack_block; - --current->count; - if (current->count == 0 && current->previous) - state->current_backtrack_block = current->previous; -} - -/* Copies a repeat guard list. */ -Py_LOCAL_INLINE(BOOL) copy_guard_data(RE_SafeState* safe_state, RE_GuardList* - dst, RE_GuardList* src) { - if (dst->capacity < src->count) { - RE_GuardSpan* new_spans; - - if (!safe_state) - return FALSE; - - dst->capacity = src->count; - new_spans = (RE_GuardSpan*)safe_realloc(safe_state, dst->spans, - dst->capacity * sizeof(RE_GuardSpan)); - if (!new_spans) - return FALSE; - - dst->spans = new_spans; - } - - dst->count = src->count; - memmove(dst->spans, src->spans, dst->count * sizeof(RE_GuardSpan)); - - dst->last_text_pos = -1; - - return TRUE; -} - -/* Copies a repeat. */ -Py_LOCAL_INLINE(BOOL) copy_repeat_data(RE_SafeState* safe_state, RE_RepeatData* - dst, RE_RepeatData* src) { - if (!copy_guard_data(safe_state, &dst->body_guard_list, - &src->body_guard_list) || !copy_guard_data(safe_state, - &dst->tail_guard_list, &src->tail_guard_list)) { - safe_dealloc(safe_state, dst->body_guard_list.spans); - safe_dealloc(safe_state, dst->tail_guard_list.spans); - - return FALSE; - } - - dst->count = src->count; - dst->start = src->start; - dst->capture_change = src->capture_change; - - return TRUE; -} - -/* Pushes a return node onto the group call stack. */ -Py_LOCAL_INLINE(BOOL) push_group_return(RE_SafeState* safe_state, RE_Node* - return_node) { - RE_State* state; - PatternObject* pattern; - RE_GroupCallFrame* frame; - - state = safe_state->re_state; - pattern = state->pattern; - - if (state->current_group_call_frame && - state->current_group_call_frame->next) - /* Advance to the next allocated frame. */ - frame = state->current_group_call_frame->next; - else if (!state->current_group_call_frame && state->first_group_call_frame) - /* Advance to the first allocated frame. */ - frame = state->first_group_call_frame; - else { - /* Create a new frame. */ - frame = (RE_GroupCallFrame*)safe_alloc(safe_state, - sizeof(RE_GroupCallFrame)); - if (!frame) - return FALSE; - - frame->groups = (RE_GroupData*)safe_alloc(safe_state, - pattern->true_group_count * sizeof(RE_GroupData)); - frame->repeats = (RE_RepeatData*)safe_alloc(safe_state, - pattern->repeat_count * sizeof(RE_RepeatData)); - if (!frame->groups || !frame->repeats) { - safe_dealloc(safe_state, frame->groups); - safe_dealloc(safe_state, frame->repeats); - safe_dealloc(safe_state, frame); - - return FALSE; - } - - memset(frame->groups, 0, pattern->true_group_count * - sizeof(RE_GroupData)); - memset(frame->repeats, 0, pattern->repeat_count * - sizeof(RE_RepeatData)); - - frame->previous = state->current_group_call_frame; - frame->next = NULL; - - if (frame->previous) - frame->previous->next = frame; - else - state->first_group_call_frame = frame; - } - - frame->node = return_node; - - /* Push the groups and guards. */ - if (return_node) { - size_t g; - size_t r; - - for (g = 0; g < pattern->true_group_count; g++) { - frame->groups[g].span = state->groups[g].span; - frame->groups[g].current_capture = - state->groups[g].current_capture; - } - - for (r = 0; r < pattern->repeat_count; r++) { - if (!copy_repeat_data(safe_state, &frame->repeats[r], - &state->repeats[r])) - return FALSE; - } - } - - state->current_group_call_frame = frame; - - return TRUE; -} - -/* Pops a return node from the group call stack. */ -Py_LOCAL_INLINE(RE_Node*) pop_group_return(RE_State* state) { - RE_GroupCallFrame* frame; - - frame = state->current_group_call_frame; - - /* Pop the groups and repeats. */ - if (frame->node) { - PatternObject* pattern; - size_t g; - size_t r; - - pattern = state->pattern; - - for (g = 0; g < pattern->true_group_count; g++) { - state->groups[g].span = frame->groups[g].span; - state->groups[g].current_capture = - frame->groups[g].current_capture; - } - - for (r = 0; r < pattern->repeat_count; r++) - copy_repeat_data(NULL, &state->repeats[r], &frame->repeats[r]); - } - - /* Withdraw to previous frame. */ - state->current_group_call_frame = frame->previous; - - return frame->node; -} - -/* Returns the return node from the top of the group call stack. */ -Py_LOCAL_INLINE(RE_Node*) top_group_return(RE_State* state) { - RE_GroupCallFrame* frame; - - frame = state->current_group_call_frame; - - return frame->node; -} - -/* Checks whether a node matches only 1 character. */ -Py_LOCAL_INLINE(BOOL) node_matches_one_character(RE_Node* node) { - switch (node->op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - return TRUE; - default: - return FALSE; - } -} - -/* Checks whether the node is a firstset. */ -Py_LOCAL_INLINE(BOOL) is_firstset(RE_Node* node) { - if (node->step != 0) - return FALSE; - - return node_matches_one_character(node); -} - -/* Locates the start node for testing ahead. */ -Py_LOCAL_INLINE(RE_Node*) locate_test_start(RE_Node* node) { - for (;;) { - switch (node->op) { - case RE_OP_BOUNDARY: - switch (node->next_1.node->op) { - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - return node->next_1.node; - default: - return node; - } - case RE_OP_CALL_REF: - case RE_OP_END_GROUP: - case RE_OP_START_GROUP: - node = node->next_1.node; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - if (node->values[1] == 0) - return node; - node = node->next_1.node; - break; - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - if (node->values[1] == 0) - return node; - return node->nonstring.next_2.node; - case RE_OP_LOOKAROUND: - node = node->next_1.node; - break; - default: - if (is_firstset(node)) { - switch (node->next_1.node->op) { - case RE_OP_END_OF_STRING: - case RE_OP_START_OF_STRING: - return node->next_1.node; - } - } - - return node; - } - } -} - -/* Checks whether a character matches any of a set of case characters. */ -Py_LOCAL_INLINE(BOOL) any_case(Py_UCS4 ch, int case_count, Py_UCS4* cases) { - int i; - - for (i = 0; i < case_count; i++) { - if (ch == cases[i]) - return TRUE; - } - - return FALSE; -} - -/* Matches many ANYs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many ANYs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many ANY_Us, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_ANY_U(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many ANY_Us, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_ANY_U_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_ANY_U(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 ch; - - text = state->text; - match = node->match == match; - ch = node->values[0]; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && (text_ptr[0] == ch) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - text = state->text; - match = node->match == match; - case_count = state->encoding->all_cases(node->values[0], cases); - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && any_case(text_ptr[0], case_count, cases) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_IGN_REV(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - text = state->text; - match = node->match == match; - case_count = state->encoding->all_cases(node->values[0], cases); - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, - cases) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, - cases) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && any_case(text_ptr[-1], case_count, - cases) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many CHARACTERs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_CHARACTER_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - Py_UCS4 ch; - - text = state->text; - match = node->match == match; - ch = node->values[0]; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && (text_ptr[-1] == ch) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_PROPERTY_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_IGN_REV(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many PROPERTYs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_PROPERTY_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_PROPERTY(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_RANGE_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_IGN_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many RANGEs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_RANGE_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_RANGE(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_SET(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_SET(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_SET(encoding, node, text_ptr[0]) - == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr < limit_ptr && matches_SET_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr < limit_ptr && matches_SET_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr < limit_ptr && matches_SET_IGN(encoding, node, - text_ptr[0]) == match) - ++text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_IGN_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_SET_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_SET_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_SET_IGN(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Matches many SETs, up to a limit, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) match_many_SET_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit, BOOL match) { - void* text; - RE_EncodingTable* encoding; - - text = state->text; - match = node->match == match; - encoding = state->encoding; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr > limit_ptr && matches_SET(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS1*)text; - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr > limit_ptr && matches_SET(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS2*)text; - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr > limit_ptr && matches_SET(encoding, node, - text_ptr[-1]) == match) - --text_ptr; - - text_pos = text_ptr - (Py_UCS4*)text; - break; - } - } - - return text_pos; -} - -/* Counts a repeated character pattern. */ -Py_LOCAL_INLINE(size_t) count_one(RE_State* state, RE_Node* node, Py_ssize_t - text_pos, size_t max_count, BOOL* is_partial) { - size_t count; - - *is_partial = FALSE; - - if (max_count < 1) - return 0; - - switch (node->op) { - case RE_OP_ANY: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_ANY(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_ANY_ALL: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_ANY_ALL_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_ANY_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_ANY_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_ANY_U: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_ANY_U(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_ANY_U_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_ANY_U_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_CHARACTER: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_CHARACTER(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_CHARACTER_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_CHARACTER_IGN(state, node, text_pos, - text_pos + (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_CHARACTER_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_CHARACTER_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_CHARACTER_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_CHARACTER_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_PROPERTY: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_PROPERTY(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_PROPERTY_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_PROPERTY_IGN(state, node, text_pos, - text_pos + (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_PROPERTY_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_PROPERTY_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_PROPERTY_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_PROPERTY_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_RANGE: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_RANGE(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_RANGE_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_RANGE_IGN(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_RANGE_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_RANGE_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_RANGE_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_RANGE_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_SET(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - count = min_size_t((size_t)(state->slice_end - text_pos), max_count); - - count = (size_t)(match_many_SET_IGN(state, node, text_pos, text_pos + - (Py_ssize_t)count, TRUE) - text_pos); - - *is_partial = count == (size_t)(state->text_length - text_pos) && count - < max_count && state->partial_side == RE_PARTIAL_RIGHT; - - return count; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_SET_IGN_REV(state, node, - text_pos, text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - count = min_size_t((size_t)(text_pos - state->slice_start), max_count); - - count = (size_t)(text_pos - match_many_SET_REV(state, node, text_pos, - text_pos - (Py_ssize_t)count, TRUE)); - - *is_partial = count == (size_t)(text_pos) && count < max_count && - state->partial_side == RE_PARTIAL_LEFT; - - return count; - } - - return 0; -} - -/* Performs a simple string search. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - RE_EncodingTable* encoding; - Py_UCS4 check_char; - - length = (Py_ssize_t)node->value_count; - values = node->values; - encoding = state->encoding; - check_char = values[0]; - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (text_ptr[0] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(encoding, text_ptr[s_pos], values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (text_ptr[0] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(encoding, text_ptr[s_pos], values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (text_ptr[0] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(encoding, text_ptr[s_pos], values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a simple string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - RE_EncodingTable* encoding; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - length = (Py_ssize_t)node->value_count; - values = node->values; - encoding = state->encoding; - case_count = encoding->all_cases(values[0], cases); - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, text_ptr[s_pos], - values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, text_ptr[s_pos], - values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr < limit_ptr) { - if (any_case(text_ptr[0], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr + s_pos >= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, text_ptr[s_pos], - values[s_pos])) - break; - - ++s_pos; - } - } - - ++text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_RIGHT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a simple string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_ign_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - RE_EncodingTable* encoding; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - length = (Py_ssize_t)node->value_count; - values = node->values; - encoding = state->encoding; - case_count = encoding->all_cases(values[length - 1], cases); - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (any_case(text_ptr[-1], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, text_ptr[- s_pos - 1], - values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (any_case(text_ptr[-1], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, text_ptr[- s_pos - 1], - values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (any_case(text_ptr[-1], case_count, cases)) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char_ign(encoding, text_ptr[- s_pos - 1], - values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a simple string search, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) simple_string_search_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - Py_ssize_t length; - RE_CODE* values; - RE_EncodingTable* encoding; - Py_UCS4 check_char; - - length = (Py_ssize_t)node->value_count; - values = node->values; - encoding = state->encoding; - check_char = values[length - 1]; - - *is_partial = FALSE; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text = (Py_UCS1*)state->text; - Py_UCS1* text_ptr = text + text_pos; - Py_UCS1* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (text_ptr[-1] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(encoding, text_ptr[- s_pos - 1], - values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 2: - { - Py_UCS2* text = (Py_UCS2*)state->text; - Py_UCS2* text_ptr = text + text_pos; - Py_UCS2* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (text_ptr[-1] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(encoding, text_ptr[- s_pos - 1], - values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - case 4: - { - Py_UCS4* text = (Py_UCS4*)state->text; - Py_UCS4* text_ptr = text + text_pos; - Py_UCS4* limit_ptr = text + limit; - - while (text_ptr > limit_ptr) { - if (text_ptr[-1] == check_char) { - Py_ssize_t s_pos; - - s_pos = 1; - - for (;;) { - if (s_pos >= length) - /* End of search string. */ - return text_ptr - text; - - if (text_ptr - s_pos <= limit_ptr) { - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_ptr - text; - } - - return -1; - - } - - if (!same_char(encoding, text_ptr[- s_pos - 1], - values[length - s_pos - 1])) - break; - - ++s_pos; - } - } - - --text_ptr; - } - text_pos = text_ptr - text; - break; - } - } - - /* Off the end of the text. */ - if (state->partial_side == RE_PARTIAL_LEFT) { - /* Partial match. */ - *is_partial = TRUE; - return text_pos; - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search(RE_State* state, RE_Node* node, - Py_ssize_t text_pos, Py_ssize_t limit) { - RE_EncodingTable* encoding; - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_ssize_t last_pos; - Py_UCS4 check_char; - - encoding = state->encoding; - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - last_pos = length - 1; - check_char = values[last_pos]; - limit -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char(encoding, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS1*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char(encoding, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS2*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char(encoding, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS4*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - RE_EncodingTable* encoding; - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_ssize_t last_pos; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - encoding = state->encoding; - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - last_pos = length - 1; - case_count = encoding->all_cases(values[last_pos], cases); - limit -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS1*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS2*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr <= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[last_pos]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = last_pos - 1; - while (pos >= 0 && same_char_ign(encoding, - text_ptr[pos], values[pos])) - --pos; - - if (pos < 0) - return text_ptr - (Py_UCS4*)text; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_ign_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit) { - RE_EncodingTable* encoding; - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_UCS4 cases[RE_MAX_CASES]; - int case_count; - - encoding = state->encoding; - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - case_count = encoding->all_cases(values[0], cases); - text_pos -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char_ign(encoding, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS1*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char_ign(encoding, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS2*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (any_case(ch, case_count, cases)) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char_ign(encoding, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS4*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Performs a Boyer-Moore fast string search, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) fast_string_search_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit) { - RE_EncodingTable* encoding; - void* text; - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad_character_offset; - Py_ssize_t* good_suffix_offset; - Py_UCS4 check_char; - - encoding = state->encoding; - text = state->text; - length = (Py_ssize_t)node->value_count; - values = node->values; - good_suffix_offset = node->string.good_suffix_offset; - bad_character_offset = node->string.bad_character_offset; - check_char = values[0]; - text_pos -= length; - - switch (state->charsize) { - case 1: - { - Py_UCS1* text_ptr; - Py_UCS1* limit_ptr; - - text_ptr = (Py_UCS1*)text + text_pos; - limit_ptr = (Py_UCS1*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char(encoding, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS1*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 2: - { - Py_UCS2* text_ptr; - Py_UCS2* limit_ptr; - - text_ptr = (Py_UCS2*)text + text_pos; - limit_ptr = (Py_UCS2*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char(encoding, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS2*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - case 4: - { - Py_UCS4* text_ptr; - Py_UCS4* limit_ptr; - - text_ptr = (Py_UCS4*)text + text_pos; - limit_ptr = (Py_UCS4*)text + limit; - - while (text_ptr >= limit_ptr) { - Py_UCS4 ch; - - ch = text_ptr[0]; - if (ch == check_char) { - Py_ssize_t pos; - - pos = 1; - while (pos < length && same_char(encoding, - text_ptr[pos], values[pos])) - ++pos; - - if (pos >= length) - return text_ptr - (Py_UCS4*)text + length; - - text_ptr += good_suffix_offset[pos]; - } else - text_ptr += bad_character_offset[ch & 0xFF]; - } - break; - } - } - - return -1; -} - -/* Builds the tables for a Boyer-Moore fast string search. */ -Py_LOCAL_INLINE(BOOL) build_fast_tables(RE_EncodingTable* encoding, RE_Node* - node, BOOL ignore) { - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad; - Py_ssize_t* good; - Py_UCS4 ch; - Py_ssize_t last_pos; - Py_ssize_t pos; - BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); - Py_ssize_t suffix_len; - BOOL saved_start; - Py_ssize_t s; - Py_ssize_t i; - Py_ssize_t s_start; - Py_UCS4 codepoints[RE_MAX_CASES]; - - length = (Py_ssize_t)node->value_count; - - if (length < RE_MIN_FAST_LENGTH) - return TRUE; - - values = node->values; - bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); - good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); - - if (!bad || !good) { - re_dealloc(bad); - re_dealloc(good); - - return FALSE; - } - - for (ch = 0; ch < 0x100; ch++) - bad[ch] = length; - - last_pos = length - 1; - - for (pos = 0; pos < last_pos; pos++) { - Py_ssize_t offset; - - offset = last_pos - pos; - ch = values[pos]; - if (ignore) { - int count; - int i; - - count = encoding->all_cases(ch, codepoints); - - for (i = 0; i < count; i++) - bad[codepoints[i] & 0xFF] = offset; - } else - bad[ch & 0xFF] = offset; - } - - is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; - - suffix_len = 2; - pos = length - suffix_len; - saved_start = FALSE; - s = pos - 1; - i = suffix_len - 1; - s_start = s; - - while (pos >= 0) { - /* Look for another occurrence of the suffix. */ - while (i > 0) { - /* Have we dropped off the end of the string? */ - if (s + i < 0) - break; - - if (is_same_char(encoding, values[s + i], values[pos + i])) - /* It still matches. */ - --i; - else { - /* Start again further along. */ - --s; - i = suffix_len - 1; - } - } - - if (s >= 0 && is_same_char(encoding, values[s], values[pos])) { - /* We haven't dropped off the end of the string, and the suffix has - * matched this far, so this is a good starting point for the next - * iteration. - */ - --s; - if (!saved_start) { - s_start = s; - saved_start = TRUE; - } - } else { - /* Calculate the suffix offset. */ - good[pos] = pos - s; - - /* Extend the suffix and start searching for _this_ one. */ - --pos; - ++suffix_len; - - /* Where's a good place to start searching? */ - if (saved_start) { - s = s_start; - saved_start = FALSE; - } else - --s; - - /* Can we short-circuit the searching? */ - if (s < 0) - break; - } - - i = suffix_len - 1; - } - - /* Fill-in any remaining entries. */ - while (pos >= 0) { - good[pos] = pos - s; - --pos; - --s; - } - - node->string.bad_character_offset = bad; - node->string.good_suffix_offset = good; - - return TRUE; -} - -/* Builds the tables for a Boyer-Moore fast string search, backwards. */ -Py_LOCAL_INLINE(BOOL) build_fast_tables_rev(RE_EncodingTable* encoding, - RE_Node* node, BOOL ignore) { - Py_ssize_t length; - RE_CODE* values; - Py_ssize_t* bad; - Py_ssize_t* good; - Py_UCS4 ch; - Py_ssize_t last_pos; - Py_ssize_t pos; - BOOL (*is_same_char)(RE_EncodingTable* encoding, Py_UCS4 ch1, Py_UCS4 ch2); - Py_ssize_t suffix_len; - BOOL saved_start; - Py_ssize_t s; - Py_ssize_t i; - Py_ssize_t s_start; - Py_UCS4 codepoints[RE_MAX_CASES]; - - length = (Py_ssize_t)node->value_count; - - if (length < RE_MIN_FAST_LENGTH) - return TRUE; - - values = node->values; - bad = (Py_ssize_t*)re_alloc(256 * sizeof(bad[0])); - good = (Py_ssize_t*)re_alloc((size_t)length * sizeof(good[0])); - - if (!bad || !good) { - re_dealloc(bad); - re_dealloc(good); - - return FALSE; - } - - for (ch = 0; ch < 0x100; ch++) - bad[ch] = -length; - - last_pos = length - 1; - - for (pos = last_pos; pos > 0; pos--) { - Py_ssize_t offset; - - offset = -pos; - ch = values[pos]; - if (ignore) { - int count; - int i; - - count = encoding->all_cases(ch, codepoints); - - for (i = 0; i < count; i++) - bad[codepoints[i] & 0xFF] = offset; - } else - bad[ch & 0xFF] = offset; - } - - is_same_char = ignore ? same_char_ign_wrapper : same_char_wrapper; - - suffix_len = 2; - pos = suffix_len - 1; - saved_start = FALSE; - s = pos + 1; - i = suffix_len - 1; - s_start = s; - - while (pos < length) { - /* Look for another occurrence of the suffix. */ - while (i > 0) { - /* Have we dropped off the end of the string? */ - if (s - i >= length) - break; - - if (is_same_char(encoding, values[s - i], values[pos - i])) - /* It still matches. */ - --i; - else { - /* Start again further along. */ - ++s; - i = suffix_len - 1; - } - } - - if (s < length && is_same_char(encoding, values[s], values[pos])) { - /* We haven't dropped off the end of the string, and the suffix has - * matched this far, so this is a good starting point for the next - * iteration. - */ - ++s; - if (!saved_start) { - s_start = s; - saved_start = TRUE; - } - } else { - /* Calculate the suffix offset. */ - good[pos] = pos - s; - - /* Extend the suffix and start searching for _this_ one. */ - ++pos; - ++suffix_len; - - /* Where's a good place to start searching? */ - if (saved_start) { - s = s_start; - saved_start = FALSE; - } else - ++s; - - /* Can we short-circuit the searching? */ - if (s >= length) - break; - } - - i = suffix_len - 1; - } - - /* Fill-in any remaining entries. */ - while (pos < length) { - good[pos] = pos - s; - ++pos; - ++s; - } - - node->string.bad_character_offset = bad; - node->string.good_suffix_offset = good; - - return TRUE; -} - -/* Performs a string search. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search(RE_SafeState* safe_state, RE_Node* - node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables(state->encoding, node, FALSE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search(state, node, limit - - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Performs a string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_fld(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, - BOOL* is_partial) { - RE_State* state; - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - RE_CODE* values; - Py_ssize_t start_pos; - int f_pos; - int folded_len; - Py_ssize_t length; - Py_ssize_t s_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - state = safe_state->re_state; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - char_at = state->char_at; - text = state->text; - - values = node->values; - start_pos = text_pos; - f_pos = 0; - folded_len = 0; - length = (Py_ssize_t)node->value_count; - s_pos = 0; - - *is_partial = FALSE; - - while (s_pos < length || f_pos < folded_len) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos >= limit) { - if (text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) { - *is_partial = TRUE; - return start_pos; - } - - return -1; - } - - folded_len = full_case_fold(char_at(text, text_pos), folded); - f_pos = 0; - } - - if (same_char_ign(encoding, values[s_pos], folded[f_pos])) { - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - ++text_pos; - } else { - ++start_pos; - text_pos = start_pos; - f_pos = 0; - folded_len = 0; - s_pos = 0; - } - } - - /* We found the string. */ - if (new_pos) - *new_pos = text_pos; - - return start_pos; -} - -/* Performs a string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_fld_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, Py_ssize_t* new_pos, - BOOL* is_partial) { - RE_State* state; - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void* text; - RE_CODE* values; - Py_ssize_t start_pos; - int f_pos; - int folded_len; - Py_ssize_t length; - Py_ssize_t s_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - state = safe_state->re_state; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - char_at = state->char_at; - text = state->text; - - values = node->values; - start_pos = text_pos; - f_pos = 0; - folded_len = 0; - length = (Py_ssize_t)node->value_count; - s_pos = 0; - - *is_partial = FALSE; - - while (s_pos < length || f_pos < folded_len) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos <= limit) { - if (text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) { - *is_partial = TRUE; - return start_pos; - } - - return -1; - } - - folded_len = full_case_fold(char_at(text, text_pos - 1), folded); - f_pos = 0; - } - - if (same_char_ign(encoding, values[length - s_pos - 1], - folded[folded_len - f_pos - 1])) { - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - --text_pos; - } else { - --start_pos; - text_pos = start_pos; - f_pos = 0; - folded_len = 0; - s_pos = 0; - } - } - - /* We found the string. */ - if (new_pos) - *new_pos = text_pos; - - return start_pos; -} - -/* Performs a string search, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_ign(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables(state->encoding, node, TRUE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search_ign(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_RIGHT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search_ign(state, node, limit - - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search_ign(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Performs a string search, backwards, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_ign_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables_rev(state->encoding, node, TRUE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search_ign_rev(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search_ign_rev(state, node, limit + - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search_ign_rev(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Performs a string search, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) string_search_rev(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t limit, BOOL* is_partial) { - RE_State* state; - Py_ssize_t found_pos; - - state = safe_state->re_state; - - *is_partial = FALSE; - - /* Has the node been initialised for fast searching, if necessary? */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - /* Ideally the pattern should immutable and shareable across threads. - * Internally, however, it isn't. For safety we need to hold the GIL. - */ - acquire_GIL(safe_state); - - /* Double-check because of multithreading. */ - if (!(node->status & RE_STATUS_FAST_INIT)) { - build_fast_tables_rev(state->encoding, node, FALSE); - node->status |= RE_STATUS_FAST_INIT; - } - - release_GIL(safe_state); - } - - if (node->string.bad_character_offset) { - /* Start with a fast search. This will find the string if it's complete - * (i.e. not truncated). - */ - found_pos = fast_string_search_rev(state, node, text_pos, limit); - if (found_pos < 0 && state->partial_side == RE_PARTIAL_LEFT) - /* We didn't find the string, but it could've been truncated, so - * try again, starting close to the end. - */ - found_pos = simple_string_search_rev(state, node, limit + - (Py_ssize_t)(node->value_count - 1), limit, is_partial); - } else - found_pos = simple_string_search_rev(state, node, text_pos, limit, - is_partial); - - return found_pos; -} - -/* Returns how many characters there could be before full case-folding. */ -Py_LOCAL_INLINE(Py_ssize_t) possible_unfolded_length(Py_ssize_t length) { - if (length == 0) - return 0; - - if (length < RE_MAX_FOLDED) - return 1; - - return length / RE_MAX_FOLDED; -} - -/* Checks whether there's any character except a newline at a position. */ -Py_LOCAL_INLINE(int) try_match_ANY(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_ANY(state->encoding, node, state->char_at(state->text, - text_pos))); -} - -/* Checks whether there's any character at all at a position. */ -Py_LOCAL_INLINE(int) try_match_ANY_ALL(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end); -} - -/* Checks whether there's any character at all at a position, backwards. */ -Py_LOCAL_INLINE(int) try_match_ANY_ALL_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start); -} - -/* Checks whether there's any character except a newline at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_ANY_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_ANY(state->encoding, node, state->char_at(state->text, text_pos - - 1))); -} - -/* Checks whether there's any character except a line separator at a position. - */ -Py_LOCAL_INLINE(int) try_match_ANY_U(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_ANY_U(state->encoding, node, state->char_at(state->text, - text_pos))); -} - -/* Checks whether there's any character except a line separator at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_ANY_U_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_ANY_U(state->encoding, node, state->char_at(state->text, text_pos - - 1))); -} - -/* Checks whether a position is on a word boundary. */ -Py_LOCAL_INLINE(int) try_match_BOUNDARY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_boundary(state, text_pos) == - node->match); -} - -/* Checks whether there's a character at a position. */ -Py_LOCAL_INLINE(int) try_match_CHARACTER(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_CHARACTER(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character at a position, ignoring case. */ -Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_CHARACTER_IGN(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character at a position, ignoring case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_CHARACTER_IGN_REV(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_CHARACTER_IGN(state->encoding, node, state->char_at(state->text, - text_pos - 1)) == node->match); -} - -/* Checks whether there's a character at a position, backwards. */ -Py_LOCAL_INLINE(int) try_match_CHARACTER_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_CHARACTER(state->encoding, node, state->char_at(state->text, - text_pos - 1)) == node->match); -} - -/* Checks whether a position is on a default word boundary. */ -Py_LOCAL_INLINE(int) try_match_DEFAULT_BOUNDARY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_default_boundary(state, text_pos) - == node->match); -} - -/* Checks whether a position is at the default end of a word. */ -Py_LOCAL_INLINE(int) try_match_DEFAULT_END_OF_WORD(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_default_word_end(state, - text_pos)); -} - -/* Checks whether a position is at the default start of a word. */ -Py_LOCAL_INLINE(int) try_match_DEFAULT_START_OF_WORD(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_default_word_start(state, - text_pos)); -} - -/* Checks whether a position is at the end of a line. */ -Py_LOCAL_INLINE(int) try_match_END_OF_LINE(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->slice_end || - state->char_at(state->text, text_pos) == '\n'); -} - -/* Checks whether a position is at the end of a line. */ -Py_LOCAL_INLINE(int) try_match_END_OF_LINE_U(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_line_end(state, text_pos)); -} - -/* Checks whether a position is at the end of the string. */ -Py_LOCAL_INLINE(int) try_match_END_OF_STRING(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->text_length); -} - -/* Checks whether a position is at the end of a line or the string. */ -Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->text_length || text_pos == - state->final_newline); -} - -/* Checks whether a position is at the end of the string. */ -Py_LOCAL_INLINE(int) try_match_END_OF_STRING_LINE_U(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(text_pos >= state->text_length || text_pos == - state->final_line_sep); -} - -/* Checks whether a position is at the end of a word. */ -Py_LOCAL_INLINE(int) try_match_END_OF_WORD(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_word_end(state, text_pos)); -} - -/* Checks whether a position is on a grapheme boundary. */ -Py_LOCAL_INLINE(int) try_match_GRAPHEME_BOUNDARY(RE_State* state, RE_Node* - node, Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_grapheme_boundary(state, - text_pos)); -} - -/* Checks whether there's a character with a certain property at a position. */ -Py_LOCAL_INLINE(int) try_match_PROPERTY(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_PROPERTY(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character with a certain property at a position, - * ignoring case. - */ -Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_PROPERTY_IGN(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character with a certain property at a position, - * ignoring case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_PROPERTY_IGN_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_PROPERTY_IGN(state->encoding, node, state->char_at(state->text, - text_pos - 1)) == node->match); -} - -/* Checks whether there's a character with a certain property at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_PROPERTY_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_PROPERTY(state->encoding, node, state->char_at(state->text, - text_pos - 1)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position. */ -Py_LOCAL_INLINE(int) try_match_RANGE(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_RANGE(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position, - * ignoring case. - */ -Py_LOCAL_INLINE(int) try_match_RANGE_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_RANGE_IGN(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position, - * ignoring case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_RANGE_IGN_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_RANGE_IGN(state->encoding, node, state->char_at(state->text, - text_pos - 1)) == node->match); -} - -/* Checks whether there's a character in a certain range at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_RANGE_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_RANGE(state->encoding, node, state->char_at(state->text, text_pos - - 1)) == node->match); -} - -/* Checks whether a position is at the search anchor. */ -Py_LOCAL_INLINE(int) try_match_SEARCH_ANCHOR(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos == state->search_anchor); -} - -/* Checks whether there's a character in a certain set at a position. */ -Py_LOCAL_INLINE(int) try_match_SET(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_SET(state->encoding, node, state->char_at(state->text, text_pos)) - == node->match); -} - -/* Checks whether there's a character in a certain set at a position, ignoring - * case. - */ -Py_LOCAL_INLINE(int) try_match_SET_IGN(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos < state->slice_end && - matches_SET_IGN(state->encoding, node, state->char_at(state->text, - text_pos)) == node->match); -} - -/* Checks whether there's a character in a certain set at a position, ignoring - * case, backwards. - */ -Py_LOCAL_INLINE(int) try_match_SET_IGN_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_SET_IGN(state->encoding, node, state->char_at(state->text, - text_pos - 1)) == node->match); -} - -/* Checks whether there's a character in a certain set at a position, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_SET_REV(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - if (text_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - return bool_as_status(text_pos > state->slice_start && - matches_SET(state->encoding, node, state->char_at(state->text, text_pos - - 1)) == node->match); -} - -/* Checks whether a position is at the start of a line. */ -Py_LOCAL_INLINE(int) try_match_START_OF_LINE(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos <= 0 || state->char_at(state->text, text_pos - - 1) == '\n'); -} - -/* Checks whether a position is at the start of a line. */ -Py_LOCAL_INLINE(int) try_match_START_OF_LINE_U(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_line_start(state, text_pos)); -} - -/* Checks whether a position is at the start of the string. */ -Py_LOCAL_INLINE(int) try_match_START_OF_STRING(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(text_pos <= 0); -} - -/* Checks whether a position is at the start of a word. */ -Py_LOCAL_INLINE(int) try_match_START_OF_WORD(RE_State* state, RE_Node* node, - Py_ssize_t text_pos) { - return bool_as_status(state->encoding->at_word_start(state, text_pos)); -} - -/* Checks whether there's a certain string at a position. */ -Py_LOCAL_INLINE(int) try_match_STRING(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos + s_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - if (!same_char(encoding, char_at(state->text, text_pos + s_pos), - values[s_pos])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case. */ -Py_LOCAL_INLINE(int) try_match_STRING_FLD(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t s_pos; - RE_CODE* values; - int folded_len; - int f_pos; - Py_ssize_t start_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - - s_pos = 0; - values = node->values; - folded_len = 0; - f_pos = 0; - start_pos = text_pos; - - while (s_pos < length) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - folded_len = full_case_fold(char_at(state->text, text_pos), - folded); - f_pos = 0; - } - - if (!same_char_ign(encoding, folded[f_pos], values[s_pos])) - return RE_ERROR_FAILURE; - - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - ++text_pos; - } - - if (f_pos < folded_len) - return RE_ERROR_FAILURE; - - next_position->node = next->match_next; - if (next->match_step == 0) - next_position->text_pos = start_pos; - else - next_position->text_pos = text_pos; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_STRING_FLD_REV(RE_State* state, RE_NextNode* - next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t s_pos; - RE_CODE* values; - int folded_len; - int f_pos; - Py_ssize_t start_pos; - Py_UCS4 folded[RE_MAX_FOLDED]; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - full_case_fold = encoding->full_case_fold; - - s_pos = 0; - values = node->values; - folded_len = 0; - f_pos = 0; - start_pos = text_pos; - - while (s_pos < length) { - if (f_pos >= folded_len) { - /* Fetch and casefold another character. */ - if (text_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - folded_len = full_case_fold(char_at(state->text, text_pos - 1), - folded); - f_pos = 0; - } - - if (!same_char_ign(encoding, folded[folded_len - f_pos - 1], - values[length - s_pos - 1])) - return RE_ERROR_FAILURE; - - ++s_pos; - ++f_pos; - - if (f_pos >= folded_len) - --text_pos; - } - - if (f_pos < folded_len) - return RE_ERROR_FAILURE; - - next_position->node = next->match_next; - if (next->match_step == 0) - next_position->text_pos = start_pos; - else - next_position->text_pos = text_pos; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case. */ -Py_LOCAL_INLINE(int) try_match_STRING_IGN(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos + s_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - if (!same_char_ign(encoding, char_at(state->text, text_pos + s_pos), - values[s_pos])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, ignoring case, - * backwards. - */ -Py_LOCAL_INLINE(int) try_match_STRING_IGN_REV(RE_State* state, RE_NextNode* - next, RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos - s_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - if (!same_char_ign(encoding, char_at(state->text, text_pos - s_pos - - 1), values[length - s_pos - 1])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Checks whether there's a certain string at a position, backwards. */ -Py_LOCAL_INLINE(int) try_match_STRING_REV(RE_State* state, RE_NextNode* next, - RE_Node* node, Py_ssize_t text_pos, RE_Position* next_position) { - Py_ssize_t length; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - RE_EncodingTable* encoding; - RE_CODE* values; - Py_ssize_t s_pos; - - length = (Py_ssize_t)node->value_count; - char_at = state->char_at; - encoding = state->encoding; - values = node->values; - - for (s_pos = 0; s_pos < length; s_pos++) { - if (text_pos - s_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - - if (!same_char(encoding, char_at(state->text, text_pos - s_pos - 1), - values[length - s_pos - 1])) - return RE_ERROR_FAILURE; - } - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Tries a match at the current text position. - * - * Returns the next node and text position if the match succeeds. - */ -Py_LOCAL_INLINE(int) try_match(RE_State* state, RE_NextNode* next, Py_ssize_t - text_pos, RE_Position* next_position) { - RE_Node* test; - int status; - - test = next->test; - - if (test->status & RE_STATUS_FUZZY) { - next_position->node = next->node; - next_position->text_pos = text_pos; - return RE_ERROR_SUCCESS; - } - - switch (test->op) { - case RE_OP_ANY: - status = try_match_ANY(state, test, text_pos); - break; - case RE_OP_ANY_ALL: - status = try_match_ANY_ALL(state, test, text_pos); - break; - case RE_OP_ANY_ALL_REV: - status = try_match_ANY_ALL_REV(state, test, text_pos); - break; - case RE_OP_ANY_REV: - status = try_match_ANY_REV(state, test, text_pos); - break; - case RE_OP_ANY_U: - status = try_match_ANY_U(state, test, text_pos); - break; - case RE_OP_ANY_U_REV: - status = try_match_ANY_U_REV(state, test, text_pos); - break; - case RE_OP_BOUNDARY: - status = try_match_BOUNDARY(state, test, text_pos); - break; - case RE_OP_BRANCH: - status = try_match(state, &test->next_1, text_pos, next_position); - if (status == RE_ERROR_FAILURE) - status = try_match(state, &test->nonstring.next_2, text_pos, - next_position); - break; - case RE_OP_CHARACTER: - status = try_match_CHARACTER(state, test, text_pos); - break; - case RE_OP_CHARACTER_IGN: - status = try_match_CHARACTER_IGN(state, test, text_pos); - break; - case RE_OP_CHARACTER_IGN_REV: - status = try_match_CHARACTER_IGN_REV(state, test, text_pos); - break; - case RE_OP_CHARACTER_REV: - status = try_match_CHARACTER_REV(state, test, text_pos); - break; - case RE_OP_DEFAULT_BOUNDARY: - status = try_match_DEFAULT_BOUNDARY(state, test, text_pos); - break; - case RE_OP_DEFAULT_END_OF_WORD: - status = try_match_DEFAULT_END_OF_WORD(state, test, text_pos); - break; - case RE_OP_DEFAULT_START_OF_WORD: - status = try_match_DEFAULT_START_OF_WORD(state, test, text_pos); - break; - case RE_OP_END_OF_LINE: - status = try_match_END_OF_LINE(state, test, text_pos); - break; - case RE_OP_END_OF_LINE_U: - status = try_match_END_OF_LINE_U(state, test, text_pos); - break; - case RE_OP_END_OF_STRING: - status = try_match_END_OF_STRING(state, test, text_pos); - break; - case RE_OP_END_OF_STRING_LINE: - status = try_match_END_OF_STRING_LINE(state, test, text_pos); - break; - case RE_OP_END_OF_STRING_LINE_U: - status = try_match_END_OF_STRING_LINE_U(state, test, text_pos); - break; - case RE_OP_END_OF_WORD: - status = try_match_END_OF_WORD(state, test, text_pos); - break; - case RE_OP_GRAPHEME_BOUNDARY: - status = try_match_GRAPHEME_BOUNDARY(state, test, text_pos); - break; - case RE_OP_PROPERTY: - status = try_match_PROPERTY(state, test, text_pos); - break; - case RE_OP_PROPERTY_IGN: - status = try_match_PROPERTY_IGN(state, test, text_pos); - break; - case RE_OP_PROPERTY_IGN_REV: - status = try_match_PROPERTY_IGN_REV(state, test, text_pos); - break; - case RE_OP_PROPERTY_REV: - status = try_match_PROPERTY_REV(state, test, text_pos); - break; - case RE_OP_RANGE: - status = try_match_RANGE(state, test, text_pos); - break; - case RE_OP_RANGE_IGN: - status = try_match_RANGE_IGN(state, test, text_pos); - break; - case RE_OP_RANGE_IGN_REV: - status = try_match_RANGE_IGN_REV(state, test, text_pos); - break; - case RE_OP_RANGE_REV: - status = try_match_RANGE_REV(state, test, text_pos); - break; - case RE_OP_SEARCH_ANCHOR: - status = try_match_SEARCH_ANCHOR(state, test, text_pos); - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - status = try_match_SET(state, test, text_pos); - break; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - status = try_match_SET_IGN(state, test, text_pos); - break; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - status = try_match_SET_IGN_REV(state, test, text_pos); - break; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - status = try_match_SET_REV(state, test, text_pos); - break; - case RE_OP_START_OF_LINE: - status = try_match_START_OF_LINE(state, test, text_pos); - break; - case RE_OP_START_OF_LINE_U: - status = try_match_START_OF_LINE_U(state, test, text_pos); - break; - case RE_OP_START_OF_STRING: - status = try_match_START_OF_STRING(state, test, text_pos); - break; - case RE_OP_START_OF_WORD: - status = try_match_START_OF_WORD(state, test, text_pos); - break; - case RE_OP_STRING: - return try_match_STRING(state, next, test, text_pos, next_position); - case RE_OP_STRING_FLD: - return try_match_STRING_FLD(state, next, test, text_pos, - next_position); - case RE_OP_STRING_FLD_REV: - return try_match_STRING_FLD_REV(state, next, test, text_pos, - next_position); - case RE_OP_STRING_IGN: - return try_match_STRING_IGN(state, next, test, text_pos, - next_position); - case RE_OP_STRING_IGN_REV: - return try_match_STRING_IGN_REV(state, next, test, text_pos, - next_position); - case RE_OP_STRING_REV: - return try_match_STRING_REV(state, next, test, text_pos, - next_position); - default: - next_position->node = next->node; - next_position->text_pos = text_pos; - return RE_ERROR_SUCCESS; - } - - if (status != RE_ERROR_SUCCESS) - return status; - - next_position->node = next->match_next; - next_position->text_pos = text_pos + next->match_step; - - return RE_ERROR_SUCCESS; -} - -/* Searches for a word boundary. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_boundary = state->encoding->at_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for a word boundary, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_BOUNDARY_rev(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_boundary = state->encoding->at_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for a default word boundary. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_default_boundary = state->encoding->at_default_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_default_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for a default word boundary, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_BOUNDARY_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_default_boundary = state->encoding->at_default_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_default_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the default end of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_end = state->encoding->at_default_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the default end of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_END_OF_WORD_rev(RE_State* - state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_end = state->encoding->at_default_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the default start of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_start = state->encoding->at_default_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the default start of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_DEFAULT_START_OF_WORD_rev(RE_State* - state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_default_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_default_word_start = state->encoding->at_default_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_default_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the end of line. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos >= state->text_length || state->char_at(state->text, - text_pos) == '\n') - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the end of line, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_LINE_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos >= state->text_length || state->char_at(state->text, - text_pos) == '\n') - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the end of the string. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (state->slice_end >= state->text_length) - return state->text_length; - - return -1; -} - -/* Searches for the end of the string, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos >= state->text_length) - return text_pos; - - return -1; -} - -/* Searches for the end of the string or line. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos <= state->final_newline) - text_pos = state->final_newline; - else if (text_pos <= state->text_length) - text_pos = state->text_length; - - if (text_pos > state->slice_end) - return -1; - - if (text_pos >= state->text_length) - return text_pos; - - return text_pos; -} - -/* Searches for the end of the string or line, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_STRING_LINE_rev(RE_State* - state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos >= state->text_length) - text_pos = state->text_length; - else if (text_pos >= state->final_newline) - text_pos = state->final_newline; - else - return -1; - - if (text_pos < state->slice_start) - return -1; - - if (text_pos <= 0) - return text_pos; - - return text_pos; -} - -/* Searches for the end of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD(RE_State* state, RE_Node* - node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_word_end = state->encoding->at_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the end of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_END_OF_WORD_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_end)(RE_State* state, Py_ssize_t text_pos); - - at_word_end = state->encoding->at_word_end; - - *is_partial = FALSE; - - for (;;) { - if (at_word_end(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for a grapheme boundary. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_grapheme_boundary = state->encoding->at_grapheme_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_grapheme_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for a grapheme boundary, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_GRAPHEME_BOUNDARY_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_grapheme_boundary)(RE_State* state, Py_ssize_t text_pos); - - at_grapheme_boundary = state->encoding->at_grapheme_boundary; - - *is_partial = FALSE; - - for (;;) { - if (at_grapheme_boundary(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the start of line. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the start of line, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_LINE_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - for (;;) { - if (text_pos <= 0 || state->char_at(state->text, text_pos - 1) == '\n') - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for the start of the string. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (text_pos <= 0) - return text_pos; - - return -1; -} - -/* Searches for the start of the string, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_STRING_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - *is_partial = FALSE; - - if (state->slice_start <= 0) - return 0; - - return -1; -} - -/* Searches for the start of a word. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_word_start = state->encoding->at_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos >= state->slice_end) - return -1; - - ++text_pos; - } -} - -/* Searches for the start of a word, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_START_OF_WORD_rev(RE_State* state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - BOOL (*at_word_start)(RE_State* state, Py_ssize_t text_pos); - - at_word_start = state->encoding->at_word_start; - - *is_partial = FALSE; - - for (;;) { - if (at_word_start(state, text_pos) == node->match) - return text_pos; - - if (text_pos <= state->slice_start) - return -1; - - --text_pos; - } -} - -/* Searches for a string. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search(safe_state, node, text_pos, state->slice_end, - is_partial); -} - -/* Searches for a string, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { - *new_pos = state->req_end; - return text_pos; - } - - return string_search_fld(safe_state, node, text_pos, state->slice_end, - new_pos, is_partial); -} - -/* Searches for a string, ignoring case, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_FLD_REV(RE_SafeState* - safe_state, RE_Node* node, Py_ssize_t text_pos, Py_ssize_t* new_pos, BOOL* - is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) { - *new_pos = state->req_end; - return text_pos; - } - - return string_search_fld_rev(safe_state, node, text_pos, - state->slice_start, new_pos, is_partial); -} - -/* Searches for a string, ignoring case. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search_ign(safe_state, node, text_pos, state->slice_end, - is_partial); -} - -/* Searches for a string, ignoring case, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_IGN_REV(RE_SafeState* - safe_state, RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search_ign_rev(safe_state, node, text_pos, - state->slice_start, is_partial); -} - -/* Searches for a string, backwards. */ -Py_LOCAL_INLINE(Py_ssize_t) search_start_STRING_REV(RE_SafeState* safe_state, - RE_Node* node, Py_ssize_t text_pos, BOOL* is_partial) { - RE_State* state; - - state = safe_state->re_state; - - *is_partial = FALSE; - - if ((node->status & RE_STATUS_REQUIRED) && text_pos == state->req_pos) - return text_pos; - - return string_search_rev(safe_state, node, text_pos, state->slice_start, - is_partial); -} - -/* Searches for the start of a match. */ -Py_LOCAL_INLINE(int) search_start(RE_SafeState* safe_state, RE_NextNode* next, - RE_Position* new_position, int search_index) { - RE_State* state; - Py_ssize_t text_pos; - RE_Node* test; - RE_Node* node; - Py_ssize_t start_pos; - RE_SearchPosition* info; - - state = safe_state->re_state; - - start_pos = state->text_pos; - TRACE(("<> at %d\n", start_pos)) - - test = next->test; - node = next->node; - - if (state->reverse) { - if (start_pos < state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - } else { - if (start_pos > state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = state->slice_end; - return RE_ERROR_PARTIAL; - } - } - } - - if (test->status & RE_STATUS_FUZZY) { - /* Don't call 'search_start' again. */ - state->pattern->do_search_start = FALSE; - - state->match_pos = start_pos; - new_position->node = node; - new_position->text_pos = start_pos; - - return RE_ERROR_SUCCESS; - } - -again: - if (!state->pattern->is_fuzzy && state->partial_side == RE_PARTIAL_NONE) { - if (state->reverse) { - if (start_pos - state->min_width < state->slice_start) - return RE_ERROR_FAILURE; - } else { - if (start_pos + state->min_width > state->slice_end) - return RE_ERROR_FAILURE; - } - } - - if (search_index < MAX_SEARCH_POSITIONS) { - info = &state->search_positions[search_index]; - if (state->reverse) { - if (info->start_pos >= 0 && info->start_pos >= start_pos && - start_pos >= info->match_pos) { - state->match_pos = info->match_pos; - - new_position->text_pos = state->match_pos; - new_position->node = node; - - return RE_ERROR_SUCCESS; - } - } else { - if (info->start_pos >= 0 && info->start_pos <= start_pos && - start_pos <= info->match_pos) { - state->match_pos = info->match_pos; - - new_position->text_pos = state->match_pos; - new_position->node = node; - - return RE_ERROR_SUCCESS; - } - } - } else - info = NULL; - - switch (test->op) { - case RE_OP_ANY: - start_pos = match_many_ANY(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_ANY_ALL: - break; - case RE_OP_ANY_ALL_REV: - break; - case RE_OP_ANY_REV: - start_pos = match_many_ANY_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_ANY_U: - start_pos = match_many_ANY_U(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_ANY_U_REV: - start_pos = match_many_ANY_U_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_BOUNDARY: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_BOUNDARY_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_BOUNDARY(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_CHARACTER: - start_pos = match_many_CHARACTER(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_CHARACTER_IGN: - start_pos = match_many_CHARACTER_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_CHARACTER_IGN_REV: - start_pos = match_many_CHARACTER_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_CHARACTER_REV: - start_pos = match_many_CHARACTER_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_DEFAULT_BOUNDARY: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_DEFAULT_BOUNDARY_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_DEFAULT_BOUNDARY(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_DEFAULT_END_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_DEFAULT_END_OF_WORD_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_DEFAULT_END_OF_WORD(state, test, - start_pos, &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_DEFAULT_START_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_DEFAULT_START_OF_WORD_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_DEFAULT_START_OF_WORD(state, test, - start_pos, &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_LINE: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_LINE_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_END_OF_LINE(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_STRING: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_STRING_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_END_OF_STRING(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_STRING_LINE: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_STRING_LINE_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_END_OF_STRING_LINE(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_END_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_END_OF_WORD_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_END_OF_WORD(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_GRAPHEME_BOUNDARY: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_GRAPHEME_BOUNDARY_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_GRAPHEME_BOUNDARY(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_PROPERTY: - start_pos = match_many_PROPERTY(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_PROPERTY_IGN: - start_pos = match_many_PROPERTY_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_PROPERTY_IGN_REV: - start_pos = match_many_PROPERTY_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_PROPERTY_REV: - start_pos = match_many_PROPERTY_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE: - start_pos = match_many_RANGE(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE_IGN: - start_pos = match_many_RANGE_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE_IGN_REV: - start_pos = match_many_RANGE_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_RANGE_REV: - start_pos = match_many_RANGE_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return RE_ERROR_FAILURE; - break; - case RE_OP_SEARCH_ANCHOR: - if (state->reverse) { - if (start_pos < state->search_anchor) - return RE_ERROR_FAILURE; - } else { - if (start_pos > state->search_anchor) - return RE_ERROR_FAILURE; - } - - start_pos = state->search_anchor; - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - start_pos = match_many_SET(state, test, start_pos, state->slice_end, - FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return FALSE; - break; - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - start_pos = match_many_SET_IGN(state, test, start_pos, - state->slice_end, FALSE); - - if (start_pos >= state->text_length) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos >= state->slice_end) - return FALSE; - break; - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - start_pos = match_many_SET_IGN_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return FALSE; - break; - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - start_pos = match_many_SET_REV(state, test, start_pos, - state->slice_start, FALSE); - - if (start_pos <= 0) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - } - - if (start_pos <= state->slice_start) - return FALSE; - break; - case RE_OP_START_OF_LINE: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_START_OF_LINE_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_START_OF_LINE(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_START_OF_STRING: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_START_OF_STRING_rev(state, test, - start_pos, &is_partial); - else - start_pos = search_start_START_OF_STRING(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_START_OF_WORD: - { - BOOL is_partial; - - if (state->reverse) - start_pos = search_start_START_OF_WORD_rev(state, test, start_pos, - &is_partial); - else - start_pos = search_start_START_OF_WORD(state, test, start_pos, - &is_partial); - - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING: - { - BOOL is_partial; - - start_pos = search_start_STRING(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING_FLD: - { - Py_ssize_t new_pos; - BOOL is_partial; - - start_pos = search_start_STRING_FLD(safe_state, test, start_pos, - &new_pos, &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - - /* Can we look further ahead? */ - if (test == node) { - if (test->next_1.node) { - int status; - - status = try_match(state, &test->next_1, new_pos, - new_position); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) { - ++start_pos; - - if (start_pos >= state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - goto again; - } - } - - /* It's a possible match. */ - state->match_pos = start_pos; - - if (info) { - info->start_pos = state->text_pos; - info->match_pos = state->match_pos; - } - - return RE_ERROR_SUCCESS; - } - break; - } - case RE_OP_STRING_FLD_REV: - { - Py_ssize_t new_pos; - BOOL is_partial; - - start_pos = search_start_STRING_FLD_REV(safe_state, test, start_pos, - &new_pos, &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - - /* Can we look further ahead? */ - if (test == node) { - if (test->next_1.node) { - int status; - - status = try_match(state, &test->next_1, new_pos, - new_position); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) { - --start_pos; - - if (start_pos <= state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - - goto again; - } - } - - /* It's a possible match. */ - state->match_pos = start_pos; - - if (info) { - info->start_pos = state->text_pos; - info->match_pos = state->match_pos; - } - - return RE_ERROR_SUCCESS; - } - break; - } - case RE_OP_STRING_IGN: - { - BOOL is_partial; - - start_pos = search_start_STRING_IGN(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING_IGN_REV: - { - BOOL is_partial; - - start_pos = search_start_STRING_IGN_REV(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - case RE_OP_STRING_REV: - { - BOOL is_partial; - - start_pos = search_start_STRING_REV(safe_state, test, start_pos, - &is_partial); - if (start_pos < 0) - return RE_ERROR_FAILURE; - - if (is_partial) { - new_position->text_pos = start_pos; - return RE_ERROR_PARTIAL; - } - break; - } - default: - /* Don't call 'search_start' again. */ - state->pattern->do_search_start = FALSE; - - state->match_pos = start_pos; - new_position->node = node; - new_position->text_pos = start_pos; - return RE_ERROR_SUCCESS; - } - - /* Can we look further ahead? */ - if (test == node) { - text_pos = start_pos + test->step; - - if (test->next_1.node) { - int status; - - status = try_match(state, &test->next_1, text_pos, new_position); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) { - if (state->reverse) { - --start_pos; - - if (start_pos < state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) { - new_position->text_pos = state->slice_start; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - } else { - ++start_pos; - - if (start_pos > state->slice_end) { - if (state->partial_side == RE_PARTIAL_RIGHT) { - new_position->text_pos = state->slice_end; - return RE_ERROR_PARTIAL; - } - - return RE_ERROR_FAILURE; - } - } - - goto again; - } - } - } else { - new_position->node = node; - new_position->text_pos = start_pos; - } - - /* It's a possible match. */ - state->match_pos = start_pos; - - if (info) { - info->start_pos = state->text_pos; - info->match_pos = state->match_pos; - } - - return RE_ERROR_SUCCESS; -} - -/* Saves a capture group. */ -Py_LOCAL_INLINE(BOOL) save_capture(RE_SafeState* safe_state, size_t - private_index, size_t public_index) { - RE_State* state; - RE_GroupData* private_group; - RE_GroupData* public_group; - - state = safe_state->re_state; - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - private_group = &state->groups[private_index - 1]; - public_group = &state->groups[public_index - 1]; - - /* Will the repeated captures ever be visible? */ - if (!state->visible_captures) { - public_group->captures[0] = private_group->span; - public_group->capture_count = 1; - - return TRUE; - } - - if (public_group->capture_count >= public_group->capture_capacity) { - size_t new_capacity; - RE_GroupSpan* new_captures; - - new_capacity = public_group->capture_capacity * 2; - new_capacity = max_size_t(new_capacity, RE_INIT_CAPTURE_SIZE); - new_captures = (RE_GroupSpan*)safe_realloc(safe_state, - public_group->captures, new_capacity * sizeof(RE_GroupSpan)); - if (!new_captures) - return FALSE; - - public_group->captures = new_captures; - public_group->capture_capacity = new_capacity; - } - - public_group->captures[public_group->capture_count++] = - private_group->span; - - return TRUE; -} - -/* Unsaves a capture group. */ -Py_LOCAL_INLINE(void) unsave_capture(RE_State* state, size_t private_index, - size_t public_index) { - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - if (state->groups[public_index - 1].capture_count > 0) - --state->groups[public_index - 1].capture_count; -} - -/* Pushes the groups for backtracking. */ -Py_LOCAL_INLINE(BOOL) push_groups(RE_SafeState* safe_state) { - RE_State* state; - size_t group_count; - RE_SavedGroups* current; - size_t g; - - state = safe_state->re_state; - - group_count = state->pattern->true_group_count; - if (group_count == 0) - return TRUE; - - current = state->current_saved_groups; - - if (current && current->next) - current = current->next; - else if (!current && state->first_saved_groups) - current = state->first_saved_groups; - else { - RE_SavedGroups* new_block; - - new_block = (RE_SavedGroups*)safe_alloc(safe_state, - sizeof(RE_SavedGroups)); - if (!new_block) - return FALSE; - - new_block->spans = (RE_GroupSpan*)safe_alloc(safe_state, group_count * - sizeof(RE_GroupSpan)); - new_block->counts = (size_t*)safe_alloc(safe_state, group_count * - sizeof(Py_ssize_t)); - if (!new_block->spans || !new_block->counts) { - safe_dealloc(safe_state, new_block->spans); - safe_dealloc(safe_state, new_block->counts); - safe_dealloc(safe_state, new_block); - return FALSE; - } - - new_block->previous = current; - new_block->next = NULL; - - if (new_block->previous) - new_block->previous->next = new_block; - else - state->first_saved_groups = new_block; - - current = new_block; - } - - for (g = 0; g < group_count; g++) { - current->spans[g] = state->groups[g].span; - current->counts[g] = state->groups[g].capture_count; - } - - state->current_saved_groups = current; - - return TRUE; -} - -/* Pops the groups for backtracking. */ -Py_LOCAL_INLINE(void) pop_groups(RE_State* state) { - size_t group_count; - RE_SavedGroups* current; - size_t g; - - group_count = state->pattern->true_group_count; - if (group_count == 0) - return; - - current = state->current_saved_groups; - - for (g = 0; g < group_count; g++) { - state->groups[g].span = current->spans[g]; - state->groups[g].capture_count = current->counts[g]; - } - - state->current_saved_groups = current->previous; -} - -/* Drops the groups for backtracking. */ -Py_LOCAL_INLINE(void) drop_groups(RE_State* state) { - if (state->pattern->true_group_count != 0) - state->current_saved_groups = state->current_saved_groups->previous; -} - -/* Pushes the repeats for backtracking. */ -Py_LOCAL_INLINE(BOOL) push_repeats(RE_SafeState* safe_state) { - RE_State* state; - PatternObject* pattern; - size_t repeat_count; - RE_SavedRepeats* current; - size_t r; - - state = safe_state->re_state; - pattern = state->pattern; - - repeat_count = pattern->repeat_count; - if (repeat_count == 0) - return TRUE; - - current = state->current_saved_repeats; - - if (current && current->next) - current = current->next; - else if (!current && state->first_saved_repeats) - current = state->first_saved_repeats; - else { - RE_SavedRepeats* new_block; - - new_block = (RE_SavedRepeats*)safe_alloc(safe_state, - sizeof(RE_SavedRepeats)); - if (!new_block) - return FALSE; - - memset(new_block, 0, sizeof(RE_SavedRepeats)); - - new_block->repeats = (RE_RepeatData*)safe_alloc(safe_state, - repeat_count * sizeof(RE_RepeatData)); - if (!new_block->repeats) { - safe_dealloc(safe_state, new_block); - return FALSE; - } - - memset(new_block->repeats, 0, repeat_count * sizeof(RE_RepeatData)); - - new_block->previous = current; - new_block->next = NULL; - - if (new_block->previous) - new_block->previous->next = new_block; - else - state->first_saved_repeats = new_block; - - current = new_block; - } - - for (r = 0; r < repeat_count; r++) { - if (!copy_repeat_data(safe_state, ¤t->repeats[r], - &state->repeats[r])) - return FALSE; - } - - state->current_saved_repeats = current; - - return TRUE; -} - -/* Pops the repeats for backtracking. */ -Py_LOCAL_INLINE(void) pop_repeats(RE_State* state) { - PatternObject* pattern; - size_t repeat_count; - RE_SavedRepeats* current; - size_t r; - - pattern = state->pattern; - - repeat_count = pattern->repeat_count; - if (repeat_count == 0) - return; - - current = state->current_saved_repeats; - - for (r = 0; r < repeat_count; r++) - copy_repeat_data(NULL, &state->repeats[r], ¤t->repeats[r]); - - state->current_saved_repeats = current->previous; -} - -/* Saves state info before a recusive call by 'basic_match'. */ -Py_LOCAL_INLINE(void) save_info(RE_State* state, RE_Info* info) { - info->backtrack_count = state->current_backtrack_block->count; - info->current_backtrack_block = state->current_backtrack_block; - info->current_saved_groups = state->current_saved_groups; - info->must_advance = state->must_advance; - info->current_group_call_frame = state->current_group_call_frame; -} - -/* Restores state info after a recusive call by 'basic_match'. */ -Py_LOCAL_INLINE(void) restore_info(RE_State* state, RE_Info* info) { - state->current_group_call_frame = info->current_group_call_frame; - state->must_advance = info->must_advance; - state->current_saved_groups = info->current_saved_groups; - state->current_backtrack_block = info->current_backtrack_block; - state->current_backtrack_block->count = info->backtrack_count; -} - -/* Inserts a new span in a guard list. */ -Py_LOCAL_INLINE(BOOL) insert_guard_span(RE_SafeState* safe_state, RE_GuardList* - guard_list, size_t index) { - size_t n; - - if (guard_list->count >= guard_list->capacity) { - size_t new_capacity; - RE_GuardSpan* new_spans; - - new_capacity = guard_list->capacity * 2; - if (new_capacity == 0) - new_capacity = RE_INIT_GUARDS_BLOCK_SIZE; - new_spans = (RE_GuardSpan*)safe_realloc(safe_state, guard_list->spans, - new_capacity * sizeof(RE_GuardSpan)); - if (!new_spans) - return FALSE; - - guard_list->capacity = new_capacity; - guard_list->spans = new_spans; - } - - n = guard_list->count - index; - if (n > 0) - memmove(guard_list->spans + index + 1, guard_list->spans + index, n * - sizeof(RE_GuardSpan)); - ++guard_list->count; - - return TRUE; -} - -/* Deletes a span in a guard list. */ -Py_LOCAL_INLINE(void) delete_guard_span(RE_GuardList* guard_list, size_t index) - { - size_t n; - - n = guard_list->count - index - 1; - if (n > 0) - memmove(guard_list->spans + index, guard_list->spans + index + 1, n * - sizeof(RE_GuardSpan)); - --guard_list->count; -} - -/* Checks whether a position is guarded against further matching. */ -Py_LOCAL_INLINE(BOOL) is_guarded(RE_GuardList* guard_list, Py_ssize_t text_pos) - { - size_t low; - size_t high; - - /* Is this position in the guard list? */ - low = 0; - high = guard_list->count; - while (low < high) { - size_t mid; - RE_GuardSpan* span; - - mid = (low + high) / 2; - span = &guard_list->spans[mid]; - if (text_pos < span->low) - high = mid; - else if (text_pos > span->high) - low = mid + 1; - else - return span->protect; - } - - guard_list->last_text_pos = text_pos; - guard_list->last_low = low; - - return FALSE; -} - -/* Guards a position against further matching. */ -Py_LOCAL_INLINE(BOOL) guard(RE_SafeState* safe_state, RE_GuardList* guard_list, - Py_ssize_t text_pos, BOOL protect) { - size_t low; - size_t high; - - /* Where should be new position be added? */ - if (text_pos == guard_list->last_text_pos) - low = guard_list->last_low; - else { - low = 0; - high = guard_list->count; - while (low < high) { - size_t mid; - RE_GuardSpan* span; - - mid = (low + high) / 2; - span = &guard_list->spans[mid]; - if (text_pos < span->low) - high = mid; - else if (text_pos > span->high) - low = mid + 1; - else - return TRUE; - } - } - - /* Add the position to the guard list. */ - if (low > 0 && guard_list->spans[low - 1].high + 1 == text_pos && - guard_list->spans[low - 1].protect == protect) { - /* The new position is just above this span. */ - if (low < guard_list->count && guard_list->spans[low].low - 1 == - text_pos && guard_list->spans[low].protect == protect) { - /* The new position joins 2 spans */ - guard_list->spans[low - 1].high = guard_list->spans[low].high; - delete_guard_span(guard_list, low); - } else - /* Extend the span. */ - guard_list->spans[low - 1].high = text_pos; - } else if (low < guard_list->count && guard_list->spans[low].low - 1 == - text_pos && guard_list->spans[low].protect == protect) - /* The new position is just below this span. */ - /* Extend the span. */ - guard_list->spans[low].low = text_pos; - else { - /* Insert a new span. */ - if (!insert_guard_span(safe_state, guard_list, low)) - return FALSE; - guard_list->spans[low].low = text_pos; - guard_list->spans[low].high = text_pos; - guard_list->spans[low].protect = protect; - } - - guard_list->last_text_pos = -1; - - return TRUE; -} - -/* Guards a position against further matching for a repeat. */ -Py_LOCAL_INLINE(BOOL) guard_repeat(RE_SafeState* safe_state, size_t index, - Py_ssize_t text_pos, RE_STATUS_T guard_type, BOOL protect) { - RE_State* state; - RE_GuardList* guard_list; - - state = safe_state->re_state; - - /* Is a guard active here? */ - if (!(state->pattern->repeat_info[index].status & guard_type)) - return TRUE; - - /* Which guard list? */ - if (guard_type & RE_STATUS_BODY) - guard_list = &state->repeats[index].body_guard_list; - else - guard_list = &state->repeats[index].tail_guard_list; - - return guard(safe_state, guard_list, text_pos, protect); -} - -/* Checks whether a position is guarded against further matching for a repeat. - */ -Py_LOCAL_INLINE(BOOL) is_repeat_guarded(RE_SafeState* safe_state, size_t index, - Py_ssize_t text_pos, RE_STATUS_T guard_type) { - RE_State* state; - RE_GuardList* guard_list; - - state = safe_state->re_state; - - /* Is a guard active here? */ - if (!(state->pattern->repeat_info[index].status & guard_type)) - return FALSE; - - /* Which guard list? */ - if (guard_type == RE_STATUS_BODY) - guard_list = &state->repeats[index].body_guard_list; - else - guard_list = &state->repeats[index].tail_guard_list; - - return is_guarded(guard_list, text_pos); -} - -/* Resets the guards inside atomic subpatterns and lookarounds. */ -Py_LOCAL_INLINE(void) reset_guards(RE_State* state, RE_CODE* values) { - PatternObject* pattern; - size_t repeat_count; - - pattern = state->pattern; - repeat_count = pattern->repeat_count; - - if (values) { - size_t i; - - for (i = 1; i <= values[0]; i++) { - size_t index; - - index = values[i]; - - if (index < repeat_count) { - reset_guard_list(&state->repeats[index].body_guard_list); - reset_guard_list(&state->repeats[index].tail_guard_list); - } else { - index -= repeat_count; - - reset_guard_list(&state->fuzzy_guards[index].body_guard_list); - reset_guard_list(&state->fuzzy_guards[index].tail_guard_list); - } - } - } else { - size_t index; - size_t fuzzy_count; - - for (index = 0; index < repeat_count; index++) { - reset_guard_list(&state->repeats[index].body_guard_list); - reset_guard_list(&state->repeats[index].tail_guard_list); - } - - fuzzy_count = pattern->fuzzy_count; - - for (index = 0; index < fuzzy_count; index++) { - reset_guard_list(&state->fuzzy_guards[index].body_guard_list); - reset_guard_list(&state->fuzzy_guards[index].tail_guard_list); - } - } -} - -/* Builds a Unicode string. */ -Py_LOCAL_INLINE(PyObject*) build_unicode_value(void* buffer, Py_ssize_t len, - Py_ssize_t buffer_charsize) { - return PyUnicode_FromUnicode(buffer, len); -} - -/* Builds a bytestring. Returns NULL if any member is too wide. */ -Py_LOCAL_INLINE(PyObject*) build_bytes_value(void* buffer, Py_ssize_t len, - Py_ssize_t buffer_charsize) -{ - Py_UCS1* byte_buffer; - Py_ssize_t i; - PyObject* result; - - if (buffer_charsize == 1) - return Py_BuildValue("s#", buffer, len); - - byte_buffer = re_alloc((size_t)len); - if (!byte_buffer) - return NULL; - - for (i = 0; i < len; i++) { - Py_UCS2 c = ((Py_UCS2*)buffer)[i]; - if (c > 0xFF) - goto too_wide; - - byte_buffer[i] = (Py_UCS1)c; - } - - result = Py_BuildValue("s#", byte_buffer, len); - - re_dealloc(byte_buffer); - - return result; - -too_wide: - re_dealloc(byte_buffer); - - return NULL; -} - -/* Looks for a string in a string set. */ -Py_LOCAL_INLINE(int) string_set_contains(RE_State* state, PyObject* string_set, - Py_ssize_t first, Py_ssize_t last) { - PyObject* string; - int status; - - if (state->is_unicode) - string = build_unicode_value(state->point_to(state->text, first), last - - first, state->charsize); - else - string = build_bytes_value(state->point_to(state->text, first), last - - first, state->charsize); - if (!string) - return RE_ERROR_INTERNAL; - - status = PySet_Contains(string_set, string); - Py_DECREF(string); - - return status; -} - -/* Looks for a string in a string set, ignoring case. */ -Py_LOCAL_INLINE(int) string_set_contains_ign(RE_State* state, PyObject* - string_set, void* buffer, Py_ssize_t index, Py_ssize_t len, Py_ssize_t - buffer_charsize) { - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - RE_EncodingTable* encoding; - BOOL (*possible_turkic)(Py_UCS4 ch); - Py_UCS4 codepoints[4]; - - switch (buffer_charsize) { - case 1: - char_at = bytes1_char_at; - set_char_at = bytes1_set_char_at; - break; - case 2: - char_at = bytes2_char_at; - set_char_at = bytes2_set_char_at; - break; - case 4: - char_at = bytes4_char_at; - set_char_at = bytes4_set_char_at; - break; - default: - char_at = bytes1_char_at; - set_char_at = bytes1_set_char_at; - break; - } - - encoding = state->encoding; - possible_turkic = encoding->possible_turkic; - - /* Look for a possible Turkic 'I'. */ - while (index < len && !possible_turkic(char_at(buffer, index))) - ++index; - - if (index < len) { - /* Possible Turkic 'I'. */ - int count; - int i; - - /* Try all the alternatives to the 'I'. */ - count = encoding->all_turkic_i(char_at(buffer, index), codepoints); - - for (i = 0; i < count; i++) { - int status; - - set_char_at(buffer, index, codepoints[i]); - - /* Recurse for the remainder of the string. */ - status = string_set_contains_ign(state, string_set, buffer, index + - 1, len, buffer_charsize); - if (status != 0) - return status; - } - - return 0; - } else { - /* No Turkic 'I'. */ - PyObject* string; - int status; - - if (state->is_unicode) - string = build_unicode_value(buffer, len, buffer_charsize); - else - string = build_bytes_value(buffer, len, buffer_charsize); - if (!string) - return RE_ERROR_MEMORY; - - status = PySet_Contains(string_set, string); - Py_DECREF(string); - - return status; - } -} - -/* Creates a partial string set for truncation at the left or right side. */ -Py_LOCAL_INLINE(int) make_partial_string_set(RE_State* state, RE_Node* node) { - PatternObject* pattern; - int partial_side; - PyObject* string_set; - PyObject* partial_set; - PyObject* iter = NULL; - PyObject* item = NULL; - PyObject* slice = NULL; - - pattern = state->pattern; - partial_side = state->partial_side; - if (partial_side != RE_PARTIAL_LEFT && partial_side != RE_PARTIAL_RIGHT) - return RE_ERROR_INTERNAL; - - /* Fetch the full string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(pattern->named_list_indexes, node->values[0]); - if (!string_set) - return RE_ERROR_INTERNAL; - - /* Gets the list of partial string sets. */ - if (!pattern->partial_named_lists[partial_side]) { - size_t size; - - size = pattern->named_lists_count * sizeof(PyObject*); - pattern->partial_named_lists[partial_side] = re_alloc(size); - if (!pattern->partial_named_lists[partial_side]) - return RE_ERROR_INTERNAL; - - memset(pattern->partial_named_lists[partial_side], 0, size); - } - - /* Get the partial string set. */ - partial_set = pattern->partial_named_lists[partial_side][node->values[0]]; - if (partial_set) - return 1; - - /* Build the partial string set. */ - partial_set = PySet_New(NULL); - if (!partial_set) - return RE_ERROR_INTERNAL; - - iter = PyObject_GetIter(string_set); - if (!iter) - goto error; - - item = PyIter_Next(iter); - - while (item) { - Py_ssize_t len; - Py_ssize_t first; - Py_ssize_t last; - - len = PySequence_Length(item); - if (len == -1) - goto error; - - first = 0; - last = len; - - while (last - first > 1) { - int status; - - /* Shorten the entry. */ - if (partial_side == RE_PARTIAL_LEFT) - ++first; - else - --last; - - slice = PySequence_GetSlice(item, first, last); - if (!slice) - goto error; - - status = PySet_Add(partial_set, slice); - Py_DECREF(slice); - if (status < 0) - goto error; - } - - Py_DECREF(item); - item = PyIter_Next(iter); - } - - if (PyErr_Occurred()) - goto error; - - Py_DECREF(iter); - - pattern->partial_named_lists[partial_side][node->values[0]] = partial_set; - - return 1; - -error: - Py_XDECREF(item); - Py_XDECREF(iter); - Py_DECREF(partial_set); - - return RE_ERROR_INTERNAL; -} - -/* Tries to match a string at the current position with a member of a string - * set, forwards or backwards. - */ -Py_LOCAL_INLINE(int) string_set_match_fwdrev(RE_SafeState* safe_state, RE_Node* - node, BOOL reverse) { - RE_State* state; - Py_ssize_t min_len; - Py_ssize_t max_len; - Py_ssize_t text_available; - Py_ssize_t slice_available; - int partial_side; - Py_ssize_t len; - Py_ssize_t first; - Py_ssize_t last; - int status; - PyObject* string_set; - - state = safe_state->re_state; - - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - acquire_GIL(safe_state); - - if (reverse) { - text_available = state->text_pos; - slice_available = state->text_pos - state->slice_start; - partial_side = RE_PARTIAL_LEFT; - } else { - text_available = state->text_length - state->text_pos; - slice_available = state->slice_end - state->text_pos; - partial_side = RE_PARTIAL_RIGHT; - } - - /* Get as many characters as we need for the longest possible match. */ - len = min_ssize_t(max_len, slice_available); - - if (reverse) { - first = state->text_pos - len; - last = state->text_pos; - } else { - first = state->text_pos; - last = state->text_pos + len; - } - - /* If we didn't get all of the characters we need, is a partial match - * allowed? - */ - if (len < max_len && len == text_available && state->partial_side == - partial_side) { - if (len == 0) { - /* An empty string is always a possible partial match. */ - status = RE_ERROR_PARTIAL; - goto finished; - } - - /* Make a set of the possible partial matches. */ - status = make_partial_string_set(state, node); - if (status < 0) - goto finished; - - /* Fetch the partial string set. */ - string_set = - state->pattern->partial_named_lists[partial_side][node->values[0]]; - - /* Is the text we have a partial match? */ - status = string_set_contains(state, string_set, first, last); - if (status < 0) - goto finished; - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = RE_ERROR_PARTIAL; - goto finished; - } - } - - /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, - node->values[0]); - if (!string_set) { - status = RE_ERROR_INTERNAL; - goto finished; - } - - /* We've already looked for a partial match (if allowed), but what about a - * complete match? - */ - while (len >= min_len) { - status = string_set_contains(state, string_set, first, last); - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = 1; - goto finished; - } - - /* Look for a shorter match. */ - --len; - if (reverse) - ++first; - else - --last; - } - - /* No match. */ - status = 0; - -finished: - release_GIL(safe_state); - - return status; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case, forwards or backwards. - */ -Py_LOCAL_INLINE(int) string_set_match_fld_fwdrev(RE_SafeState* safe_state, - RE_Node* node, BOOL reverse) { - RE_State* state; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - Py_ssize_t min_len; - Py_ssize_t max_len; - Py_ssize_t buf_len; - void* folded; - int status; - BOOL* end_of_fold = NULL; - Py_ssize_t text_available; - Py_ssize_t slice_available; - Py_ssize_t t_pos; - Py_ssize_t f_pos; - int step; - int partial_side; - Py_ssize_t len; - Py_ssize_t consumed; - Py_UCS4 codepoints[RE_MAX_FOLDED]; - PyObject* string_set; - Py_ssize_t first; - Py_ssize_t last; - - state = safe_state->re_state; - full_case_fold = state->encoding->full_case_fold; - char_at = state->char_at; - - /* The folded string will have the same width as the original string. */ - folded_charsize = state->charsize; - - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: - return RE_ERROR_INTERNAL; - } - - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - acquire_GIL(safe_state); - - /* Allocate a buffer for the folded string. */ - buf_len = max_len + RE_MAX_FOLDED; - folded = re_alloc((size_t)(buf_len * folded_charsize)); - if (!folded) { - status = RE_ERROR_MEMORY; - goto finished; - } - - end_of_fold = re_alloc((size_t)buf_len * sizeof(BOOL)); - if (!end_of_fold) { - status = RE_ERROR_MEMORY; - goto finished; - } - - memset(end_of_fold, 0, (size_t)buf_len * sizeof(BOOL)); - - if (reverse) { - text_available = state->text_pos; - slice_available = state->text_pos - state->slice_start; - t_pos = state->text_pos - 1; - f_pos = buf_len; - step = -1; - partial_side = RE_PARTIAL_LEFT; - } else { - text_available = state->text_length - state->text_pos; - slice_available = state->slice_end - state->text_pos; - t_pos = state->text_pos; - f_pos = 0; - step = 1; - partial_side = RE_PARTIAL_RIGHT; - } - - /* We can stop getting characters as soon as the case-folded string is long - * enough (each codepoint from the text can expand to more than one folded - * codepoint). - */ - len = 0; - end_of_fold[len] = TRUE; - - consumed = 0; - while (len < max_len && consumed < slice_available) { - int count; - int j; - - count = full_case_fold(char_at(state->text, t_pos), codepoints); - - if (reverse) - f_pos -= count; - - for (j = 0; j < count; j++) - set_char_at(folded, f_pos + j, codepoints[j]); - - if (!reverse) - f_pos += count; - - len += count; - end_of_fold[len] = TRUE; - ++consumed; - t_pos += step; - } - - if (reverse) { - first = f_pos; - last = buf_len; - } else { - first = 0; - last = f_pos; - } - - /* If we didn't get all of the characters we need, is a partial match - * allowed? - */ - if (len < max_len && len == text_available && state->partial_side == - partial_side) { - if (len == 0) { - /* An empty string is always a possible partial match. */ - status = RE_ERROR_PARTIAL; - goto finished; - } - - /* Make a set of the possible partial matches. */ - status = make_partial_string_set(state, node); - if (status < 0) - goto finished; - - /* Fetch the partial string set. */ - string_set = - state->pattern->partial_named_lists[partial_side][node->values[0]]; - - /* Is the text we have a partial match? */ - status = string_set_contains_ign(state, string_set, folded, first, - last, folded_charsize); - if (status < 0) - goto finished; - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= consumed; - else - state->text_pos += consumed; - - status = RE_ERROR_PARTIAL; - goto finished; - } - } - - /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, - node->values[0]); - if (!string_set) { - status = RE_ERROR_INTERNAL; - goto finished; - } - - /* We've already looked for a partial match (if allowed), but what about a - * complete match? - */ - while (len >= min_len) { - if (end_of_fold[len]) { - status = string_set_contains_ign(state, string_set, folded, first, - last, folded_charsize); - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= consumed; - else - state->text_pos += consumed; - - status = 1; - goto finished; - } - - --consumed; - } - - /* Look for a shorter match. */ - --len; - if (reverse) - ++first; - else - --last; - } - - /* No match. */ - status = 0; - -finished: - re_dealloc(end_of_fold); - re_dealloc(folded); - - release_GIL(safe_state); - - return status; -} - -/* Tries to match a string at the current position with a member of a string - * set, ignoring case, forwards or backwards. - */ -Py_LOCAL_INLINE(int) string_set_match_ign_fwdrev(RE_SafeState* safe_state, - RE_Node* node, BOOL reverse) { - RE_State* state; - Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - Py_ssize_t min_len; - Py_ssize_t max_len; - void* folded; - int status; - Py_ssize_t text_available; - Py_ssize_t slice_available; - Py_ssize_t t_pos; - Py_ssize_t f_pos; - int step; - int partial_side; - Py_ssize_t len; - Py_ssize_t i; - Py_ssize_t first; - Py_ssize_t last; - PyObject* string_set; - - state = safe_state->re_state; - simple_case_fold = state->encoding->simple_case_fold; - char_at = state->char_at; - - /* The folded string will have the same width as the original string. */ - folded_charsize = state->charsize; - - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: - return RE_ERROR_INTERNAL; - } - - min_len = (Py_ssize_t)node->values[1]; - max_len = (Py_ssize_t)node->values[2]; - - acquire_GIL(safe_state); - - /* Allocate a buffer for the folded string. */ - folded = re_alloc((size_t)(max_len * folded_charsize)); - if (!folded) { - status = RE_ERROR_MEMORY; - goto finished; - } - - if (reverse) { - text_available = state->text_pos; - slice_available = state->text_pos - state->slice_start; - t_pos = state->text_pos - 1; - f_pos = max_len - 1; - step = -1; - partial_side = RE_PARTIAL_LEFT; - } else { - text_available = state->text_length - state->text_pos; - slice_available = state->slice_end - state->text_pos; - t_pos = state->text_pos; - f_pos = 0; - step = 1; - partial_side = RE_PARTIAL_RIGHT; - } - - /* Get as many characters as we need for the longest possible match. */ - len = min_ssize_t(max_len, slice_available); - - for (i = 0; i < len; i ++) { - Py_UCS4 ch; - - ch = simple_case_fold(char_at(state->text, t_pos)); - set_char_at(folded, f_pos, ch); - t_pos += step; - f_pos += step; - } - - if (reverse) { - first = f_pos; - last = max_len; - } else { - first = 0; - last = f_pos; - } - - /* If we didn't get all of the characters we need, is a partial match - * allowed? - */ - if (len < max_len && len == text_available && state->partial_side == - partial_side) { - if (len == 0) { - /* An empty string is always a possible partial match. */ - status = RE_ERROR_PARTIAL; - goto finished; - } - - /* Make a set of the possible partial matches. */ - status = make_partial_string_set(state, node); - if (status < 0) - goto finished; - - /* Fetch the partial string set. */ - string_set = - state->pattern->partial_named_lists[partial_side][node->values[0]]; - - /* Is the text we have a partial match? */ - status = string_set_contains_ign(state, string_set, folded, first, - last, folded_charsize); - if (status < 0) - goto finished; - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = RE_ERROR_PARTIAL; - goto finished; - } - } - - /* Fetch the string set. PyList_GET_ITEM borrows a reference. */ - string_set = PyList_GET_ITEM(state->pattern->named_list_indexes, - node->values[0]); - if (!string_set) { - status = RE_ERROR_INTERNAL; - goto finished; - } - - /* We've already looked for a partial match (if allowed), but what about a - * complete match? - */ - while (len >= min_len) { - status = string_set_contains_ign(state, string_set, folded, first, - last, folded_charsize); - - if (status == 1) { - /* Advance past the match. */ - if (reverse) - state->text_pos -= len; - else - state->text_pos += len; - - status = 1; - goto finished; - } - - /* Look for a shorter match. */ - --len; - if (reverse) - ++first; - else - --last; - } - - /* No match. */ - status = 0; - -finished: - re_dealloc(folded); - - release_GIL(safe_state); - - return status; -} - -/* Checks whether any additional fuzzy error is permitted. */ -Py_LOCAL_INLINE(BOOL) any_error_permitted(RE_State* state) { - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - return fuzzy_info->total_cost <= values[RE_FUZZY_VAL_MAX_COST] && - fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MAX_ERR] && - state->total_cost <= state->max_cost; -} - -/* Checks whether this additional fuzzy error is permitted. */ -Py_LOCAL_INLINE(BOOL) this_error_permitted(RE_State* state, int fuzzy_type) { - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - return fuzzy_info->total_cost + values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] - <= values[RE_FUZZY_VAL_MAX_COST] && fuzzy_info->counts[fuzzy_type] < - values[RE_FUZZY_VAL_MAX_BASE + fuzzy_type] && state->total_cost + - values[RE_FUZZY_VAL_COST_BASE + fuzzy_type] <= state->max_cost; -} - -/* Checks whether we've reachsd the end of the text during a fuzzy partial - * match. - */ -Py_LOCAL_INLINE(int) check_fuzzy_partial(RE_State* state, Py_ssize_t text_pos) - { - switch (state->partial_side) { - case RE_PARTIAL_LEFT: - if (text_pos < 0) - return RE_ERROR_PARTIAL; - break; - case RE_PARTIAL_RIGHT: - if (text_pos > state->text_length) - return RE_ERROR_PARTIAL; - break; - } - - return RE_ERROR_FAILURE; -} - -/* Checks a fuzzy match of an item. */ -Py_LOCAL_INLINE(int) next_fuzzy_match_item(RE_State* state, RE_FuzzyData* data, - BOOL is_string, int step) { - Py_ssize_t new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - if (is_string) - data->new_string_pos += step; - else - data->new_node = data->new_node->next_1.node; - return RE_ERROR_SUCCESS; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (!data->permit_insertion) - return RE_ERROR_FAILURE; - - new_pos = data->new_text_pos + step; - if (state->slice_start <= new_pos && new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_text_pos + step; - if (state->slice_start <= new_pos && new_pos <= state->slice_end) { - data->new_text_pos = new_pos; - if (is_string) - data->new_string_pos += step; - else - data->new_node = data->new_node->next_1.node; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - } - } - - return RE_ERROR_FAILURE; -} - -/* Tries a fuzzy match of an item of width 0 or 1. */ -Py_LOCAL_INLINE(int) fuzzy_match_item(RE_SafeState* safe_state, BOOL search, - Py_ssize_t* text_pos, RE_Node** node, int step) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *node = NULL; - return RE_ERROR_SUCCESS; - } - - data.new_text_pos = *text_pos; - data.new_node = *node; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - if (step == 0) { - if (data.new_node->status & RE_STATUS_REVERSE) { - data.step = -1; - data.limit = state->slice_start; - } else { - data.step = 1; - data.limit = state->slice_end; - } - } else - data.step = step; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, FALSE, step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *node = NULL; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, (*node)->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_item.position.text_pos = *text_pos; - bt_data->fuzzy_item.position.node = *node; - bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_item.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = data.new_text_pos; - *node = data.new_node; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a item of width 0 or 1. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_item(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node, BOOL advance) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - int step; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - data.new_text_pos = bt_data->fuzzy_item.position.text_pos; - data.new_node = bt_data->fuzzy_item.position.node; - data.fuzzy_type = bt_data->fuzzy_item.fuzzy_type; - data.step = bt_data->fuzzy_item.step; - - if (data.fuzzy_type >= 0) { - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + - data.fuzzy_type]; - --state->total_errors; - state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - } - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - step = advance ? data.step : 0; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, FALSE, step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *node = NULL; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_item.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = data.new_text_pos; - *node = data.new_node; - - return RE_ERROR_SUCCESS; -} - -/* Tries a fuzzy insertion. */ -Py_LOCAL_INLINE(int) fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t - text_pos, RE_Node* node) { - RE_State* state; - RE_BacktrackData* bt_data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - - state = safe_state->re_state; - - /* No insertion or deletion. */ - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_insert.position.text_pos = text_pos; - bt_data->fuzzy_insert.position.node = node; - bt_data->fuzzy_insert.count = 0; - bt_data->fuzzy_insert.too_few_errors = state->too_few_errors; - bt_data->fuzzy_insert.fuzzy_node = node; /* END_FUZZY node. */ - - /* Check whether there are too few errors. */ - fuzzy_info = &state->fuzzy_info; - - /* The node in this case is the END_FUZZY node. */ - values = node->values; - - if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || - fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || - fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || - fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) - state->too_few_errors = RE_ERROR_SUCCESS; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy insertion. */ -Py_LOCAL_INLINE(int) retry_fuzzy_insert(RE_SafeState* safe_state, Py_ssize_t* - text_pos, RE_Node** node) { - RE_State* state; - RE_FuzzyInfo* fuzzy_info; - RE_BacktrackData* bt_data; - Py_ssize_t new_text_pos; - RE_Node* new_node; - int step; - Py_ssize_t limit; - RE_Node* fuzzy_node; - RE_CODE* values; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - new_text_pos = bt_data->fuzzy_insert.position.text_pos; - new_node = bt_data->fuzzy_insert.position.node; - - if (new_node->status & RE_STATUS_REVERSE) { - step = -1; - limit = state->slice_start; - } else { - step = 1; - limit = state->slice_end; - } - - /* Could the character at text_pos have been inserted? */ - if (!this_error_permitted(state, RE_FUZZY_INS) || new_text_pos == limit) { - size_t count; - - count = bt_data->fuzzy_insert.count; - - fuzzy_info->counts[RE_FUZZY_INS] -= count; - fuzzy_info->counts[RE_FUZZY_ERR] -= count; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; - state->total_errors -= count; - state->total_cost -= values[RE_FUZZY_VAL_INS_COST] * count; - state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; - - discard_backtrack(state); - *node = NULL; - return RE_ERROR_SUCCESS; - } - - ++bt_data->fuzzy_insert.count; - - ++fuzzy_info->counts[RE_FUZZY_INS]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_INS_COST]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_INS_COST]; - - /* Check whether there are too few errors. */ - state->too_few_errors = bt_data->fuzzy_insert.too_few_errors; - fuzzy_node = bt_data->fuzzy_insert.fuzzy_node; /* END_FUZZY node. */ - values = fuzzy_node->values; - if (fuzzy_info->counts[RE_FUZZY_DEL] < values[RE_FUZZY_VAL_MIN_DEL] || - fuzzy_info->counts[RE_FUZZY_INS] < values[RE_FUZZY_VAL_MIN_INS] || - fuzzy_info->counts[RE_FUZZY_SUB] < values[RE_FUZZY_VAL_MIN_SUB] || - fuzzy_info->counts[RE_FUZZY_ERR] < values[RE_FUZZY_VAL_MIN_ERR]) - state->too_few_errors = RE_ERROR_SUCCESS; - - *text_pos = new_text_pos + step * (Py_ssize_t)bt_data->fuzzy_insert.count; - *node = new_node; - - return RE_ERROR_SUCCESS; -} - -/* Tries a fuzzy match of a string. */ -Py_LOCAL_INLINE(int) fuzzy_match_string(RE_SafeState* safe_state, BOOL search, - Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, BOOL* matched, - int step) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *matched = FALSE; - return RE_ERROR_SUCCESS; - } - - data.new_text_pos = *text_pos; - data.new_string_pos = *string_pos; - data.step = step; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, TRUE, data.step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_string.position.text_pos = *text_pos; - bt_data->fuzzy_string.position.node = node; - bt_data->fuzzy_string.string_pos = *string_pos; - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_string.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = data.new_text_pos; - *string_pos = data.new_string_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a string. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_string(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, BOOL* - matched) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - RE_Node* new_node; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - data.new_text_pos = bt_data->fuzzy_string.position.text_pos; - new_node = bt_data->fuzzy_string.position.node; - data.new_string_pos = bt_data->fuzzy_string.string_pos; - data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; - data.step = bt_data->fuzzy_string.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || data.new_text_pos != - state->search_anchor; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_item(state, &data, TRUE, data.step); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = data.new_text_pos; - *node = new_node; - *string_pos = data.new_string_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Checks a fuzzy match of a atring. */ -Py_LOCAL_INLINE(int) next_fuzzy_match_string_fld(RE_State* state, RE_FuzzyData* - data) { - int new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - data->new_string_pos += data->step; - return RE_ERROR_SUCCESS; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (!data->permit_insertion) - return RE_ERROR_FAILURE; - - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - data->new_string_pos += data->step; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - } - } - - return RE_ERROR_FAILURE; -} - -/* Tries a fuzzy match of a string, ignoring case. */ -Py_LOCAL_INLINE(int) fuzzy_match_string_fld(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node* node, Py_ssize_t* string_pos, int* - folded_pos, int folded_len, BOOL* matched, int step) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - Py_ssize_t new_text_pos; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *matched = FALSE; - return RE_ERROR_SUCCESS; - } - - new_text_pos = *text_pos; - data.new_string_pos = *string_pos; - data.new_folded_pos = *folded_pos; - data.folded_len = folded_len; - data.step = step; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (step > 0) { - if (data.new_folded_pos != 0) - data.permit_insertion = RE_ERROR_SUCCESS; - } else { - if (data.new_folded_pos != folded_len) - data.permit_insertion = RE_ERROR_SUCCESS; - } - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_string_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_string.position.text_pos = *text_pos; - bt_data->fuzzy_string.position.node = node; - bt_data->fuzzy_string.string_pos = *string_pos; - bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); - bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_string.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = new_text_pos; - *string_pos = data.new_string_pos; - *folded_pos = data.new_folded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a string, ignoring case. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_string_fld(RE_SafeState* safe_state, - BOOL search, Py_ssize_t* text_pos, RE_Node** node, Py_ssize_t* string_pos, - int* folded_pos, BOOL* matched) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - Py_ssize_t new_text_pos; - RE_Node* new_node; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - new_text_pos = bt_data->fuzzy_string.position.text_pos; - new_node = bt_data->fuzzy_string.position.node; - data.new_string_pos = bt_data->fuzzy_string.string_pos; - data.new_folded_pos = bt_data->fuzzy_string.folded_pos; - data.folded_len = bt_data->fuzzy_string.folded_len; - data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; - data.step = bt_data->fuzzy_string.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (data.step > 0) { - if (data.new_folded_pos != 0) - data.permit_insertion = RE_ERROR_SUCCESS; - } else { - if (data.new_folded_pos != bt_data->fuzzy_string.folded_len) - data.permit_insertion = RE_ERROR_SUCCESS; - } - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_string_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = new_text_pos; - *node = new_node; - *string_pos = data.new_string_pos; - *folded_pos = data.new_folded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Checks a fuzzy match of a atring. */ -Py_LOCAL_INLINE(int) next_fuzzy_match_group_fld(RE_State* state, RE_FuzzyData* - data) { - int new_pos; - - if (this_error_permitted(state, data->fuzzy_type)) { - switch (data->fuzzy_type) { - case RE_FUZZY_DEL: - /* Could a character at text_pos have been deleted? */ - data->new_gfolded_pos += data->step; - return RE_ERROR_SUCCESS; - case RE_FUZZY_INS: - /* Could the character at text_pos have been inserted? */ - if (!data->permit_insertion) - return RE_ERROR_FAILURE; - - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - case RE_FUZZY_SUB: - /* Could the character at text_pos have been substituted? */ - new_pos = data->new_folded_pos + data->step; - if (0 <= new_pos && new_pos <= data->folded_len) { - data->new_folded_pos = new_pos; - data->new_gfolded_pos += data->step; - return RE_ERROR_SUCCESS; - } - - return check_fuzzy_partial(state, new_pos); - } - } - - return RE_ERROR_FAILURE; -} - -/* Tries a fuzzy match of a group reference, ignoring case. */ -Py_LOCAL_INLINE(int) fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node* node, int* folded_pos, int folded_len, - Py_ssize_t* group_pos, int* gfolded_pos, int gfolded_len, BOOL* matched, int - step) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - Py_ssize_t new_text_pos; - Py_ssize_t new_group_pos; - RE_BacktrackData* bt_data; - - state = safe_state->re_state; - - if (!any_error_permitted(state)) { - *matched = FALSE; - return RE_ERROR_SUCCESS; - } - - new_text_pos = *text_pos; - data.new_folded_pos = *folded_pos; - data.folded_len = folded_len; - new_group_pos = *group_pos; - data.new_gfolded_pos = *gfolded_pos; - data.step = step; - - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor; - if (data.step > 0) { - if (data.new_folded_pos != 0) - data.permit_insertion = RE_ERROR_SUCCESS; - } else { - if (data.new_folded_pos != folded_len) - data.permit_insertion = RE_ERROR_SUCCESS; - } - - for (data.fuzzy_type = 0; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_group_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - if (!add_backtrack(safe_state, node->op)) - return RE_ERROR_FAILURE; - bt_data = state->backtrack; - bt_data->fuzzy_string.position.text_pos = *text_pos; - bt_data->fuzzy_string.position.node = node; - bt_data->fuzzy_string.string_pos = *group_pos; - bt_data->fuzzy_string.folded_pos = (RE_INT8)(*folded_pos); - bt_data->fuzzy_string.folded_len = (RE_INT8)folded_len; - bt_data->fuzzy_string.gfolded_pos = (RE_INT8)(*gfolded_pos); - bt_data->fuzzy_string.gfolded_len = (RE_INT8)gfolded_len; - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - bt_data->fuzzy_string.step = (RE_INT8)step; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = new_text_pos; - *group_pos = new_group_pos; - *folded_pos = data.new_folded_pos; - *gfolded_pos = data.new_gfolded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Retries a fuzzy match of a group reference, ignoring case. */ -Py_LOCAL_INLINE(int) retry_fuzzy_match_group_fld(RE_SafeState* safe_state, BOOL - search, Py_ssize_t* text_pos, RE_Node** node, int* folded_pos, Py_ssize_t* - group_pos, int* gfolded_pos, BOOL* matched) { - RE_State* state; - RE_FuzzyData data; - RE_FuzzyInfo* fuzzy_info; - RE_CODE* values; - RE_BacktrackData* bt_data; - Py_ssize_t new_text_pos; - Py_ssize_t new_group_pos; - RE_Node* new_node; - - state = safe_state->re_state; - fuzzy_info = &state->fuzzy_info; - values = fuzzy_info->node->values; - - bt_data = state->backtrack; - new_text_pos = bt_data->fuzzy_string.position.text_pos; - new_node = bt_data->fuzzy_string.position.node; - new_group_pos = bt_data->fuzzy_string.string_pos; - data.new_folded_pos = bt_data->fuzzy_string.folded_pos; - data.folded_len = bt_data->fuzzy_string.folded_len; - data.new_gfolded_pos = bt_data->fuzzy_string.gfolded_pos; - data.fuzzy_type = bt_data->fuzzy_string.fuzzy_type; - data.step = bt_data->fuzzy_string.step; - - --fuzzy_info->counts[data.fuzzy_type]; - --fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - --state->total_errors; - state->total_cost -= values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - /* Permit insertion except initially when searching (it's better just to - * start searching one character later). - */ - data.permit_insertion = !search || new_text_pos != state->search_anchor || - data.new_folded_pos != bt_data->fuzzy_string.folded_len; - - for (++data.fuzzy_type; data.fuzzy_type < RE_FUZZY_COUNT; - data.fuzzy_type++) { - int status; - - status = next_fuzzy_match_group_fld(state, &data); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - goto found; - } - - discard_backtrack(state); - *matched = FALSE; - return RE_ERROR_SUCCESS; - -found: - bt_data->fuzzy_string.fuzzy_type = (RE_INT8)data.fuzzy_type; - - ++fuzzy_info->counts[data.fuzzy_type]; - ++fuzzy_info->counts[RE_FUZZY_ERR]; - fuzzy_info->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - ++state->total_errors; - state->total_cost += values[RE_FUZZY_VAL_COST_BASE + data.fuzzy_type]; - - *text_pos = new_text_pos; - *node = new_node; - *group_pos = new_group_pos; - *folded_pos = data.new_folded_pos; - *gfolded_pos = data.new_gfolded_pos; - *matched = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Locates the required string, if there's one. */ -Py_LOCAL_INLINE(Py_ssize_t) locate_required_string(RE_SafeState* safe_state) { - RE_State* state; - PatternObject* pattern; - Py_ssize_t found_pos; - Py_ssize_t end_pos; - - state = safe_state->re_state; - pattern = state->pattern; - - /* We haven't matched the required string yet. */ - state->req_pos = -1; - - if (!pattern->req_string) - /* There isn't a required string, so start matching from the current - * position. - */ - return state->text_pos; - - /* Search for the required string and calculate where to start matching. */ - switch (pattern->req_string->op) { - case RE_OP_STRING: - { - BOOL is_partial; - - found_pos = string_search(safe_state, pattern->req_string, - state->text_pos, state->slice_end, &is_partial); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (is_partial) - /* We found a partial match, so start matching from there. */ - return found_pos; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos + - (Py_ssize_t)pattern->req_string->value_count; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_FLD: - { - BOOL is_partial; - - found_pos = string_search_fld(safe_state, pattern->req_string, - state->text_pos, state->slice_end, &end_pos, &is_partial); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (is_partial) - /* We found a partial match, so start matching from there. */ - return found_pos; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = end_pos; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_FLD_REV: - { - BOOL is_partial; - - found_pos = string_search_fld_rev(safe_state, pattern->req_string, - state->text_pos, state->slice_start, &end_pos, &is_partial); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (is_partial) - /* We found a partial match, so start matching from there. */ - return found_pos; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = end_pos; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_IGN: - { - BOOL is_partial; - - found_pos = string_search_ign(safe_state, pattern->req_string, - state->text_pos, state->slice_end, &is_partial); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (is_partial) - /* We found a partial match, so start matching from there. */ - return found_pos; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos + - (Py_ssize_t)pattern->req_string->value_count; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos -= pattern->req_offset; - if (found_pos >= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_IGN_REV: - { - BOOL is_partial; - - found_pos = string_search_ign_rev(safe_state, pattern->req_string, - state->text_pos, state->slice_start, &is_partial); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (is_partial) - /* We found a partial match, so start matching from there. */ - return found_pos; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos - - (Py_ssize_t)pattern->req_string->value_count; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - } - case RE_OP_STRING_REV: - { - BOOL is_partial; - - found_pos = string_search_rev(safe_state, pattern->req_string, - state->text_pos, state->slice_start, &is_partial); - if (found_pos < 0) - /* The required string wasn't found. */ - return -1; - - if (is_partial) - /* We found a partial match, so start matching from there. */ - return found_pos; - - /* Record where the required string matched. */ - state->req_pos = found_pos; - state->req_end = found_pos - - (Py_ssize_t)pattern->req_string->value_count; - - if (pattern->req_offset >= 0) { - /* Step back from the required string to where we should start - * matching. - */ - found_pos += pattern->req_offset; - if (found_pos <= state->text_pos) - return found_pos; - } - break; - } - } - - /* Start matching from the current position. */ - return state->text_pos; -} - -/* Tries to match a character pattern. */ -Py_LOCAL_INLINE(int) match_one(RE_State* state, RE_Node* node, Py_ssize_t - text_pos) { - switch (node->op) { - case RE_OP_ANY: - return try_match_ANY(state, node, text_pos); - case RE_OP_ANY_ALL: - return try_match_ANY_ALL(state, node, text_pos); - case RE_OP_ANY_ALL_REV: - return try_match_ANY_ALL_REV(state, node, text_pos); - case RE_OP_ANY_REV: - return try_match_ANY_REV(state, node, text_pos); - case RE_OP_ANY_U: - return try_match_ANY_U(state, node, text_pos); - case RE_OP_ANY_U_REV: - return try_match_ANY_U_REV(state, node, text_pos); - case RE_OP_CHARACTER: - return try_match_CHARACTER(state, node, text_pos); - case RE_OP_CHARACTER_IGN: - return try_match_CHARACTER_IGN(state, node, text_pos); - case RE_OP_CHARACTER_IGN_REV: - return try_match_CHARACTER_IGN_REV(state, node, text_pos); - case RE_OP_CHARACTER_REV: - return try_match_CHARACTER_REV(state, node, text_pos); - case RE_OP_PROPERTY: - return try_match_PROPERTY(state, node, text_pos); - case RE_OP_PROPERTY_IGN: - return try_match_PROPERTY_IGN(state, node, text_pos); - case RE_OP_PROPERTY_IGN_REV: - return try_match_PROPERTY_IGN_REV(state, node, text_pos); - case RE_OP_PROPERTY_REV: - return try_match_PROPERTY_REV(state, node, text_pos); - case RE_OP_RANGE: - return try_match_RANGE(state, node, text_pos); - case RE_OP_RANGE_IGN: - return try_match_RANGE_IGN(state, node, text_pos); - case RE_OP_RANGE_IGN_REV: - return try_match_RANGE_IGN_REV(state, node, text_pos); - case RE_OP_RANGE_REV: - return try_match_RANGE_REV(state, node, text_pos); - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - return try_match_SET(state, node, text_pos); - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - return try_match_SET_IGN(state, node, text_pos); - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - return try_match_SET_IGN_REV(state, node, text_pos); - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - return try_match_SET_REV(state, node, text_pos); - } - - return FALSE; -} - -/* Performs a depth-first match or search from the context. */ -Py_LOCAL_INLINE(int) basic_match(RE_SafeState* safe_state, RE_Node* start_node, - BOOL search, BOOL recursive_call) { - RE_State* state; - RE_EncodingTable* encoding; - PatternObject* pattern; - RE_NextNode start_pair; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t pattern_step; /* The overall step of the pattern (forwards or backwards). */ - Py_ssize_t string_pos; - BOOL do_search_start; - Py_ssize_t found_pos; - int folded_pos; - int gfolded_pos; - RE_Node* node; - int status; - TRACE(("<>\n")) - - state = safe_state->re_state; - encoding = state->encoding; - pattern = state->pattern; - - /* Look beyond any initial group node. */ - start_pair.node = start_node; - if (recursive_call) - start_pair.test = locate_test_start(start_node); - else - start_pair.test = pattern->start_test; - - /* Is the pattern anchored to the start or end of the string? */ - switch (start_pair.test->op) { - case RE_OP_END_OF_STRING: - if (state->reverse) { - /* Searching backwards. */ - if (state->text_pos != state->text_length) - return RE_ERROR_FAILURE; - - /* Don't bother to search further because it's anchored. */ - search = FALSE; - } - break; - case RE_OP_START_OF_STRING: - if (!state->reverse) { - /* Searching forwards. */ - if (state->text_pos != 0) - return RE_ERROR_FAILURE; - - /* Don't bother to search further because it's anchored. */ - search = FALSE; - } - break; - } - - char_at = state->char_at; - pattern_step = state->reverse ? -1 : 1; - string_pos = -1; - do_search_start = pattern->do_search_start; - - /* Add a backtrack entry for failure. */ - if (!add_backtrack(safe_state, RE_OP_FAILURE)) - return RE_ERROR_BACKTRACKING; - -start_match: - /* If we're searching, advance along the string until there could be a - * match. - */ - if (pattern->pattern_call_ref >= 0) { - RE_GuardList* guard_list; - - guard_list = &state->group_call_guard_list[pattern->pattern_call_ref]; - guard_list->count = 0; - guard_list->last_text_pos = -1; - } - - /* Locate the required string, if there's one, unless this is a recursive - * call of 'basic_match'. - */ - if (!pattern->req_string || recursive_call) - found_pos = state->text_pos; - else { - found_pos = locate_required_string(safe_state); - if (found_pos < 0) - return RE_ERROR_FAILURE; - } - - if (search) { - state->text_pos = found_pos; - - if (do_search_start) { - RE_Position new_position; - -next_match_1: - /* 'search_start' will clear 'do_search_start' if it can't perform - * a fast search for the next possible match. This enables us to - * avoid the overhead of the call subsequently. - */ - status = search_start(safe_state, &start_pair, &new_position, 0); - if (status != RE_ERROR_SUCCESS) - return status; - - node = new_position.node; - state->text_pos = new_position.text_pos; - - if (node->op == RE_OP_SUCCESS) { - /* Must the match advance past its start? */ - if (state->text_pos != state->search_anchor || - !state->must_advance) - return RE_ERROR_SUCCESS; - - state->text_pos = state->match_pos + pattern_step; - goto next_match_1; - } - - /* 'do_search_start' may have been cleared. */ - do_search_start = pattern->do_search_start; - } else { - /* Avoiding 'search_start', which we've found can't perform a fast - * search for the next possible match. - */ - node = start_node; - -next_match_2: - if (state->reverse) { - if (state->text_pos < state->slice_start) { - if (state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - } else { - if (state->text_pos > state->slice_end) { - if (state-> partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - return RE_ERROR_FAILURE; - } - } - - state->match_pos = state->text_pos; - - if (node->op == RE_OP_SUCCESS) { - /* Must the match advance past its start? */ - if (state->text_pos != state->search_anchor || - !state->must_advance) { - BOOL success; - - if (state->match_all && !recursive_call) { - /* We want to match all of the slice. */ - if (state->reverse) - success = state->text_pos == state->slice_start; - else - success = state->text_pos == state->slice_end; - } else - success = TRUE; - - if (success) - return RE_ERROR_SUCCESS; - } - - state->text_pos = state->match_pos + pattern_step; - goto next_match_2; - } - } - } else { - /* The start position is anchored to the current position. */ - if (found_pos != state->text_pos) - return RE_ERROR_FAILURE; - - node = start_node; - } - -advance: - /* The main matching loop. */ - for (;;) { - TRACE(("%d|", state->text_pos)) - - /* Should we abort the matching? */ - ++state->iterations; - - if (state->iterations == 0 && safe_check_signals(safe_state)) - return RE_ERROR_INTERRUPTED; - - switch (node->op) { - case RE_OP_ANY: /* Any character except a newline. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - ++state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_ALL: /* Any character at all. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_ALL(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - ++state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_ALL_REV(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - --state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_REV(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - --state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_U: /* Any character except a line separator. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_U(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - ++state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_ANY_U_REV(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - --state->text_pos; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_ATOMIC: /* Atomic subpattern. */ - { - RE_Info info; - int status; - TRACE(("%s\n", re_op_text[node->op])) - - if (!add_backtrack(safe_state, RE_OP_ATOMIC)) - return RE_ERROR_BACKTRACKING; - state->backtrack->atomic.too_few_errors = state->too_few_errors; - state->backtrack->atomic.capture_change = state->capture_change; - - /* Save the groups. */ - if (!push_groups(safe_state)) - return RE_ERROR_MEMORY; - - save_info(state, &info); - - state->must_advance = FALSE; - - status = basic_match(safe_state, node->nonstring.next_2.node, - FALSE, TRUE); - if (status < 0) - return status; - - reset_guards(state, node->values); - - restore_info(state, &info); - - if (status != RE_ERROR_SUCCESS) - goto backtrack; - - node = node->next_1.node; - break; - } - case RE_OP_BOUNDARY: /* On a word boundary. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - status = try_match_BOUNDARY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_BRANCH: /* 2-way branch. */ - { - RE_Position next_position; - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match(state, &node->next_1, state->text_pos, - &next_position); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) { - if (!add_backtrack(safe_state, RE_OP_BRANCH)) - return RE_ERROR_BACKTRACKING; - state->backtrack->branch.position.node = - node->nonstring.next_2.node; - state->backtrack->branch.position.text_pos = state->text_pos; - - node = next_position.node; - state->text_pos = next_position.text_pos; - } else - node = node->nonstring.next_2.node; - break; - } - case RE_OP_CALL_REF: /* A group call reference. */ - { - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - if (!push_group_return(safe_state, NULL)) - return RE_ERROR_MEMORY; - - if (!add_backtrack(safe_state, RE_OP_CALL_REF)) - return RE_ERROR_BACKTRACKING; - - node = node->next_1.node; - break; - } - case RE_OP_CHARACTER: /* A character. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_CHARACTER(encoding, node, char_at(state->text, - state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_CHARACTER_IGN(encoding, node, char_at(state->text, - state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CHARACTER_IGN_REV: /* A character, backwards, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_CHARACTER_IGN(encoding, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_CHARACTER_REV: /* A character, backwards. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_CHARACTER(encoding, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - status = try_match_DEFAULT_BOUNDARY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_DEFAULT_END_OF_WORD: /* At the default end of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_DEFAULT_END_OF_WORD(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_DEFAULT_START_OF_WORD: /* At the default start of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_DEFAULT_START_OF_WORD(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_FUZZY: /* End of fuzzy matching. */ - TRACE(("%s\n", re_op_text[node->op])) - - if (!fuzzy_insert(safe_state, state->text_pos, node)) - return RE_ERROR_BACKTRACKING; - - /* If there were too few errors, in the fuzzy section, try again. - */ - if (state->too_few_errors) { - state->too_few_errors = FALSE; - goto backtrack; - } - - state->total_fuzzy_counts[RE_FUZZY_SUB] += - state->fuzzy_info.counts[RE_FUZZY_SUB]; - state->total_fuzzy_counts[RE_FUZZY_INS] += - state->fuzzy_info.counts[RE_FUZZY_INS]; - state->total_fuzzy_counts[RE_FUZZY_DEL] += - state->fuzzy_info.counts[RE_FUZZY_DEL]; - - node = node->next_1.node; - break; - case RE_OP_END_GREEDY_REPEAT: /* End of a greedy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - BOOL changed; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* The body has matched successfully at this position. */ - if (!guard_repeat(safe_state, index, rp_data->start, - RE_STATUS_BODY, FALSE)) - return RE_ERROR_MEMORY; - - ++rp_data->count; - - /* Have we advanced through the text or has a capture group change? - */ - changed = rp_data->capture_change != state->capture_change || - state->text_pos != rp_data->start; - - /* The counts are of type size_t, so the format needs to specify - * that. - */ - TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T - "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], - node->values[2], rp_data->count)) - - /* Could the body or tail match? */ - try_body = changed && (rp_data->count < node->values[2] || - ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, - state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = (!changed || rp_data->count >= node->values[1]) && - !is_repeat_guarded(safe_state, index, state->text_pos, - RE_STATUS_TAIL); - if(try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - - if (!try_body && !try_tail) { - /* Neither the body nor the tail could match. */ - --rp_data->count; - goto backtrack; - } - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - /* Record info in case we backtrack into the body. */ - if (!add_backtrack(safe_state, RE_OP_BODY_END)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count - 1; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - - if (try_body) { - /* Both the body and the tail could match. */ - if (try_tail) { - /* The body takes precedence. If the body fails to match - * then we want to try the tail before backtracking - * further. - */ - - /* Record backtracking info for matching the tail. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_tail_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - } - - /* Record backtracking info in case the body fails to match. */ - if (!add_backtrack(safe_state, RE_OP_BODY_START)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = state->text_pos; - - rp_data->capture_change = state->capture_change; - rp_data->start = state->text_pos; - - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_END_GROUP: /* End of a capture group. */ - { - RE_CODE private_index; - RE_CODE public_index; - RE_GroupData* group; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - */ - private_index = node->values[0]; - public_index = node->values[1]; - group = &state->groups[private_index - 1]; - - if (!add_backtrack(safe_state, RE_OP_END_GROUP)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->group.private_index = private_index; - bt_data->group.public_index = public_index; - bt_data->group.text_pos = group->span.end; - bt_data->group.capture = (BOOL)node->values[2]; - bt_data->group.current_capture = group->current_capture; - - if (pattern->group_info[private_index - 1].referenced && - group->span.end != state->text_pos) - ++state->capture_change; - group->span.end = state->text_pos; - - /* Save the capture? */ - if (node->values[2]) { - group->current_capture = (Py_ssize_t)group->capture_count; - if (!save_capture(safe_state, private_index, public_index)) - return RE_ERROR_MEMORY; - } - - node = node->next_1.node; - break; - } - case RE_OP_END_LAZY_REPEAT: /* End of a lazy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - BOOL changed; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* The body has matched successfully at this position. */ - if (!guard_repeat(safe_state, index, rp_data->start, - RE_STATUS_BODY, FALSE)) - return RE_ERROR_MEMORY; - - ++rp_data->count; - - /* Have we advanced through the text or has a capture group change? - */ - changed = rp_data->capture_change != state->capture_change || - state->text_pos != rp_data->start; - - /* The counts are of type size_t, so the format needs to specify - * that. - */ - TRACE(("min is %" PY_FORMAT_SIZE_T "u, max is %" PY_FORMAT_SIZE_T - "u, count is %" PY_FORMAT_SIZE_T "u\n", node->values[1], - node->values[2], rp_data->count)) - - /* Could the body or tail match? */ - try_body = changed && (rp_data->count < node->values[2] || - ~node->values[2] == 0) && !is_repeat_guarded(safe_state, index, - state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = (!changed || rp_data->count >= node->values[1]); - if (try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - - if (!try_body && !try_tail) { - /* Neither the body nor the tail could match. */ - --rp_data->count; - goto backtrack; - } - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - /* Record info in case we backtrack into the body. */ - if (!add_backtrack(safe_state, RE_OP_BODY_END)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count - 1; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - - if (try_body) { - /* Both the body and the tail could match. */ - if (try_tail) { - /* The tail takes precedence. If the tail fails to match - * then we want to try the body before backtracking - * further. - */ - - /* Record backtracking info for matching the body. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_body_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } else { - /* Only the body could match. */ - - /* Record backtracking info in case the body fails to - * match. - */ - if (!add_backtrack(safe_state, RE_OP_BODY_START)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = state->text_pos; - - rp_data->capture_change = state->capture_change; - rp_data->start = state->text_pos; - - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_END_OF_LINE: /* At the end of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_LINE(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_LINE_U: /* At the end of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_LINE_U(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_STRING: /* At the end of the string. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_STRING(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_STRING_LINE(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_STRING_LINE_U(state, node, - state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_END_OF_WORD: /* At the end of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_END_OF_WORD(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_FUZZY: /* Fuzzy matching. */ - { - RE_FuzzyInfo* fuzzy_info; - RE_BacktrackData* bt_data; - TRACE(("%s\n", re_op_text[node->op])) - - fuzzy_info = &state->fuzzy_info; - - /* Save the current fuzzy info. */ - if (!add_backtrack(safe_state, RE_OP_FUZZY)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - memmove(&bt_data->fuzzy.fuzzy_info, fuzzy_info, - sizeof(RE_FuzzyInfo)); - bt_data->fuzzy.index = node->values[0]; - bt_data->fuzzy.text_pos = state->text_pos; - - /* Initialise the new fuzzy info. */ - memset(fuzzy_info->counts, 0, 4 * sizeof(fuzzy_info->counts[0])); - fuzzy_info->total_cost = 0; - fuzzy_info->node = node; - - node = node->next_1.node; - break; - } - case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_GRAPHEME_BOUNDARY(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - RE_BacktrackData* bt_data; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* We might need to backtrack into the head, so save the current - * repeat. - */ - if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Initialise the new repeat. */ - rp_data->count = 0; - rp_data->start = state->text_pos; - rp_data->capture_change = state->capture_change; - - /* Could the body or tail match? */ - try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, - index, state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = node->values[1] == 0; - if (try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - if (!try_body && !try_tail) - /* Neither the body nor the tail could match. */ - goto backtrack; - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - if (try_body) { - if (try_tail) { - /* Both the body and the tail could match, but the body - * takes precedence. If the body fails to match then we - * want to try the tail before backtracking further. - */ - - /* Record backtracking info for matching the tail. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_TAIL)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_tail_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - } - - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - size_t count; - BOOL is_partial; - BOOL match; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - if (is_repeat_guarded(safe_state, index, state->text_pos, - RE_STATUS_BODY)) - goto backtrack; - - /* Count how many times the character repeats, up to the maximum. - */ - count = count_one(state, node->nonstring.next_2.node, - state->text_pos, node->values[2], &is_partial); - if (is_partial) { - state->text_pos += (Py_ssize_t)count * node->step; - return RE_ERROR_PARTIAL; - } - - /* Unmatch until it's not guarded. */ - match = FALSE; - for (;;) { - if (count < node->values[1]) - /* The number of repeats is below the minimum. */ - break; - - if (!is_repeat_guarded(safe_state, index, state->text_pos + - (Py_ssize_t)count * node->step, RE_STATUS_TAIL)) { - /* It's not guarded at this position. */ - match = TRUE; - break; - } - - if (count == 0) - break; - - --count; - } - - if (!match) { - /* The repeat has failed to match at this position. */ - if (!guard_repeat(safe_state, index, state->text_pos, - RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - goto backtrack; - } - - /* Record the backtracking info. */ - if (!add_backtrack(safe_state, RE_OP_GREEDY_REPEAT_ONE)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position.node = node; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = rp_data->start; - bt_data->repeat.count = rp_data->count; - - rp_data->start = state->text_pos; - rp_data->count = count; - - /* Advance into the tail. */ - state->text_pos += (Py_ssize_t)count * node->step; - node = node->next_1.node; - break; - } - case RE_OP_GROUP_CALL: /* Group call. */ - { - size_t index; - size_t g; - size_t r; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - index = node->values[0]; - - /* Save the capture groups and repeat guards. */ - if (!push_group_return(safe_state, node->next_1.node)) - return RE_ERROR_MEMORY; - - /* Clear the capture groups for the group call. They'll be restored - * on return. - */ - for (g = 0; g < state->pattern->true_group_count; g++) { - RE_GroupData* group; - - group = &state->groups[g]; - group->span.start = -1; - group->span.end = -1; - group->current_capture = -1; - } - - /* Clear the repeat guards for the group call. They'll be restored - * on return. - */ - for (r = 0; r < state->pattern->repeat_count; r++) { - RE_RepeatData* repeat; - - repeat = &state->repeats[r]; - repeat->body_guard_list.count = 0; - repeat->body_guard_list.last_text_pos = -1; - repeat->tail_guard_list.count = 0; - repeat->tail_guard_list.last_text_pos = -1; - } - - /* Call a group, skipping its CALL_REF node. */ - node = pattern->call_ref_info[index].node->next_1.node; - - if (!add_backtrack(safe_state, RE_OP_GROUP_CALL)) - return RE_ERROR_BACKTRACKING; - break; - } - case RE_OP_GROUP_EXISTS: /* Capture group exists. */ - { - RE_GroupData* group; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture >= 0) - node = node->next_1.node; - else - node = node->nonstring.next_2.node; - break; - } - case RE_OP_GROUP_RETURN: /* Group return. */ - { - RE_Node* return_node; - RE_BacktrackData* bt_data; - TRACE(("%s\n", re_op_text[node->op])) - - return_node = top_group_return(state); - - if (!add_backtrack(safe_state, RE_OP_GROUP_RETURN)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->group_call.node = return_node; - bt_data->group_call.capture_change = state->capture_change; - - if (return_node) { - /* The group was called. */ - node = return_node; - - /* Save the groups. */ - if (!push_groups(safe_state)) - return RE_ERROR_MEMORY; - - /* Save the repeats. */ - if (!push_repeats(safe_state)) - return RE_ERROR_MEMORY; - } else - /* The group was not called. */ - node = node->next_1.node; - - pop_group_return(state); - break; - } - case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - RE_BacktrackData* bt_data; - BOOL try_body; - int body_status; - RE_Position next_body_position; - BOOL try_tail; - int tail_status; - RE_Position next_tail_position; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - /* We might need to backtrack into the head, so save the current - * repeat. - */ - if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Initialise the new repeat. */ - rp_data->count = 0; - rp_data->start = state->text_pos; - rp_data->capture_change = state->capture_change; - - /* Could the body or tail match? */ - try_body = node->values[2] > 0 && !is_repeat_guarded(safe_state, - index, state->text_pos, RE_STATUS_BODY); - if (try_body) { - body_status = try_match(state, &node->next_1, state->text_pos, - &next_body_position); - - if (body_status == RE_ERROR_FAILURE) - try_body = FALSE; - } else - body_status = RE_ERROR_FAILURE; - - try_tail = node->values[1] == 0; - if(try_tail) { - tail_status = try_match(state, &node->nonstring.next_2, - state->text_pos, &next_tail_position); - - if (tail_status == RE_ERROR_FAILURE) - try_tail = FALSE; - } else - tail_status = RE_ERROR_FAILURE; - - if (!try_body && !try_tail) - /* Neither the body nor the tail could match. */ - goto backtrack; - - if (body_status < 0 || (body_status == 0 && tail_status < 0)) - return RE_ERROR_PARTIAL; - - if (try_body) { - if (try_tail) { - /* Both the body and the tail could match, but the tail - * takes precedence. If the tail fails to match then we - * want to try the body before backtracking further. - */ - - /* Record backtracking info for matching the tail. */ - if (!add_backtrack(safe_state, RE_OP_MATCH_BODY)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position = next_body_position; - bt_data->repeat.index = index; - bt_data->repeat.count = rp_data->count; - bt_data->repeat.start = rp_data->start; - bt_data->repeat.capture_change = rp_data->capture_change; - bt_data->repeat.text_pos = state->text_pos; - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } else { - /* Advance into the body. */ - node = next_body_position.node; - state->text_pos = next_body_position.text_pos; - } - } else { - /* Only the tail could match. */ - - /* Advance into the tail. */ - node = next_tail_position.node; - state->text_pos = next_tail_position.text_pos; - } - break; - } - case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ - { - RE_CODE index; - RE_RepeatData* rp_data; - size_t count; - BOOL is_partial; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Repeat indexes are 0-based. */ - index = node->values[0]; - rp_data = &state->repeats[index]; - - if (is_repeat_guarded(safe_state, index, state->text_pos, - RE_STATUS_BODY)) - goto backtrack; - - /* Count how many times the character repeats, up to the minimum. - */ - count = count_one(state, node->nonstring.next_2.node, - state->text_pos, node->values[1], &is_partial); - if (is_partial) { - state->text_pos += (Py_ssize_t)count * node->step; - return RE_ERROR_PARTIAL; - } - - /* Have we matched at least the minimum? */ - if (count < node->values[1]) { - /* The repeat has failed to match at this position. */ - if (!guard_repeat(safe_state, index, state->text_pos, - RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - goto backtrack; - } - - if (count < node->values[2]) { - /* The match is shorter than the maximum, so we might need to - * backtrack the repeat to consume more. - */ - RE_BacktrackData* bt_data; - - /* Get the offset to the repeat values in the context. */ - rp_data = &state->repeats[index]; - if (!add_backtrack(safe_state, RE_OP_LAZY_REPEAT_ONE)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->repeat.position.node = node; - bt_data->repeat.index = index; - bt_data->repeat.text_pos = rp_data->start; - bt_data->repeat.count = rp_data->count; - - rp_data->start = state->text_pos; - rp_data->count = count; - } - - /* Advance into the tail. */ - state->text_pos += (Py_ssize_t)count * node->step; - node = node->next_1.node; - break; - } - case RE_OP_LOOKAROUND: /* Lookaround. */ - { - RE_Info info; - size_t capture_change; - Py_ssize_t saved_slice_start; - Py_ssize_t saved_slice_end; - Py_ssize_t saved_text_pos; - BOOL too_few_errors; - int status; - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - /* Save the groups. */ - if (!push_groups(safe_state)) - return RE_ERROR_MEMORY; - - capture_change = state->capture_change; - - /* Save the other info. */ - save_info(state, &info); - - saved_slice_start = state->slice_start; - saved_slice_end = state->slice_end; - saved_text_pos = state->text_pos; - state->slice_start = 0; - state->slice_end = state->text_length; - state->must_advance = FALSE; - - too_few_errors = state->too_few_errors; - - status = basic_match(safe_state, node->nonstring.next_2.node, - FALSE, TRUE); - if (status < 0) - return status; - - reset_guards(state, node->values + 1); - - state->text_pos = saved_text_pos; - state->slice_end = saved_slice_end; - state->slice_start = saved_slice_start; - - /* Restore the other info. */ - restore_info(state, &info); - - if (node->match) { - /* It's a positive lookaround. */ - if (status == RE_ERROR_SUCCESS) { - /* It succeeded, so the groups and certain flags may have - * changed. - */ - if (!add_backtrack(safe_state, RE_OP_LOOKAROUND)) - return RE_ERROR_BACKTRACKING; - - /* We'll restore the groups and flags on backtracking. */ - state->backtrack->lookaround.too_few_errors = - too_few_errors; - state->backtrack->lookaround.capture_change = - capture_change; - } else { - /* It failed, so the groups and certain flags haven't - * changed. - */ - drop_groups(state); - goto backtrack; - } - } else { - /* It's a negative lookaround. */ - if (status == RE_ERROR_SUCCESS) { - /* It succeeded, so the groups and certain flags may have - * changed. We need to restore them. - */ - pop_groups(state); - state->too_few_errors = too_few_errors; - state->capture_change = capture_change; - goto backtrack; - } else - /* It failed, so the groups and certain flags haven't - * changed. - */ - drop_groups(state); - } - - node = node->next_1.node; - break; - } - case RE_OP_PROPERTY: /* A property. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_PROPERTY(encoding, node, char_at(state->text, - state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_PROPERTY_IGN(encoding, node, char_at(state->text, - state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PROPERTY_IGN_REV: /* A property, backwards, ignoring case. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_PROPERTY_IGN(encoding, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_PROPERTY_REV: /* A property, backwards. */ - TRACE(("%s %d %d\n", re_op_text[node->op], node->match, - node->values[0])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_PROPERTY(encoding, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE: /* A range. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && matches_RANGE(encoding, - node, char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - matches_RANGE_IGN(encoding, node, char_at(state->text, - state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_IGN_REV: /* A range, backwards, ignoring case. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_RANGE_IGN(encoding, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_RANGE_REV: /* A range, backwards. */ - TRACE(("%s %d %d %d\n", re_op_text[node->op], node->match, - node->values[0], node->values[1])) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && matches_RANGE(encoding, - node, char_at(state->text, state->text_pos - 1)) == node->match) - { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_REF_GROUP: /* Reference to a capture group. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->start; - - /* Try comparing. */ - while (string_pos < span->end) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && same_char(encoding, - char_at(state->text, state->text_pos), char_at(state->text, - string_pos))) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - int folded_len; - int gfolded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - Py_UCS4 gfolded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = span->start; - folded_pos = 0; - folded_len = 0; - gfolded_pos = 0; - gfolded_len = 0; - } else { - folded_len = full_case_fold(char_at(state->text, - state->text_pos), folded); - gfolded_len = full_case_fold(char_at(state->text, string_pos), - gfolded); - } - - /* Try comparing. */ - while (string_pos < span->end) { - /* Case-fold at current position in text. */ - if (folded_pos >= folded_len) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end) - folded_len = full_case_fold(char_at(state->text, - state->text_pos), folded); - else - folded_len = 0; - - folded_pos = 0; - } - - /* Case-fold at current position in group. */ - if (gfolded_pos >= gfolded_len) { - gfolded_len = full_case_fold(char_at(state->text, - string_pos), gfolded); - gfolded_pos = 0; - } - - if (folded_pos < folded_len && folded[folded_pos] == - gfolded[gfolded_pos]) { - ++folded_pos; - ++gfolded_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_group_fld(safe_state, search, - &state->text_pos, node, &folded_pos, folded_len, - &string_pos, &gfolded_pos, gfolded_len, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - - if (folded_pos >= folded_len && folded_len > 0) - ++state->text_pos; - - if (gfolded_pos >= gfolded_len) - ++string_pos; - } - - string_pos = -1; - - if (folded_pos < folded_len || gfolded_pos < gfolded_len) - goto backtrack; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - int folded_len; - int gfolded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - Py_UCS4 gfolded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = span->end; - folded_pos = 0; - folded_len = 0; - gfolded_pos = 0; - gfolded_len = 0; - } else { - folded_len = full_case_fold(char_at(state->text, - state->text_pos - 1), folded); - gfolded_len = full_case_fold(char_at(state->text, string_pos - - 1), gfolded); - } - - /* Try comparing. */ - while (string_pos > span->start) { - /* Case-fold at current position in text. */ - if (folded_pos <= 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start) - folded_len = full_case_fold(char_at(state->text, - state->text_pos - 1), folded); - else - folded_len = 0; - - folded_pos = folded_len; - } - - /* Case-fold at current position in group. */ - if (gfolded_pos <= 0) { - gfolded_len = full_case_fold(char_at(state->text, - string_pos - 1), gfolded); - gfolded_pos = gfolded_len; - } - - if (folded_pos > 0 && folded[folded_pos - 1] == - gfolded[gfolded_pos - 1]) { - --folded_pos; - --gfolded_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_group_fld(safe_state, search, - &state->text_pos, node, &folded_pos, folded_len, - &string_pos, &gfolded_pos, gfolded_len, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - - if (folded_pos <= 0 && folded_len > 0) - --state->text_pos; - - if (gfolded_pos <= 0) - --string_pos; - } - - string_pos = -1; - - if (folded_pos > 0 || gfolded_pos > 0) - goto backtrack; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->start; - - /* Try comparing. */ - while (string_pos < span->end) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char_ign(encoding, char_at(state->text, - state->text_pos), char_at(state->text, string_pos))) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, ignoring case. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->end; - - /* Try comparing. */ - while (string_pos > span->start) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char_ign(encoding, char_at(state->text, state->text_pos - - 1), char_at(state->text, string_pos - 1))) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_REF_GROUP_REV: /* Reference to a capture group. */ - { - RE_GroupData* group; - RE_GroupSpan* span; - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - * - * Check whether the captured text, if any, exists at this position - * in the string. - */ - - /* Did the group capture anything? */ - group = &state->groups[node->values[0] - 1]; - if (group->current_capture < 0) - goto backtrack; - - span = &group->captures[group->current_capture]; - - if (string_pos < 0) - string_pos = span->end; - - /* Try comparing. */ - while (string_pos > span->start) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && same_char(encoding, - char_at(state->text, state->text_pos - 1), - char_at(state->text, string_pos - 1))) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ - TRACE(("%s %d\n", re_op_text[node->op], node->values[0])) - - if (state->text_pos == state->search_anchor) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF: /* Character set. */ - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && matches_SET(encoding, - node, char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF_IGN: /* Character set, ignoring case. */ - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION_IGN: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && matches_SET_IGN(encoding, - node, char_at(state->text, state->text_pos)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF_IGN_REV: /* Character set, ignoring case. */ - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_UNION_IGN_REV: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - matches_SET_IGN(encoding, node, char_at(state->text, - state->text_pos - 1)) == node->match) { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_SET_DIFF_REV: /* Character set. */ - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_REV: - TRACE(("%s %d\n", re_op_text[node->op], node->match)) - - if (state->text_pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && matches_SET(encoding, - node, char_at(state->text, state->text_pos - 1)) == node->match) - { - state->text_pos += node->step; - node = node->next_1.node; - } else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_GROUP: /* Start of a capture group. */ - { - RE_CODE private_index; - RE_CODE public_index; - RE_GroupData* group; - RE_BacktrackData* bt_data; - TRACE(("%s %d\n", re_op_text[node->op], node->values[1])) - - /* Capture group indexes are 1-based (excluding group 0, which is - * the entire matched string). - */ - private_index = node->values[0]; - public_index = node->values[1]; - group = &state->groups[private_index - 1]; - - if (!add_backtrack(safe_state, RE_OP_START_GROUP)) - return RE_ERROR_BACKTRACKING; - bt_data = state->backtrack; - bt_data->group.private_index = private_index; - bt_data->group.public_index = public_index; - bt_data->group.text_pos = group->span.start; - bt_data->group.capture = (BOOL)node->values[2]; - bt_data->group.current_capture = group->current_capture; - - if (pattern->group_info[private_index - 1].referenced && - group->span.start != state->text_pos) - ++state->capture_change; - group->span.start = state->text_pos; - - /* Save the capture? */ - if (node->values[2]) { - group->current_capture = (Py_ssize_t)group->capture_count; - if (!save_capture(safe_state, private_index, public_index)) - return RE_ERROR_MEMORY; - } - - node = node->next_1.node; - break; - } - case RE_OP_START_OF_LINE: /* At the start of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_LINE(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_LINE_U: /* At the start of a line. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_LINE_U(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_STRING: /* At the start of the string. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_STRING(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_START_OF_WORD: /* At the start of a word. */ - TRACE(("%s\n", re_op_text[node->op])) - - status = try_match_START_OF_WORD(state, node, state->text_pos); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS) - node = node->next_1.node; - else if (node->status & RE_STATUS_FUZZY) { - status = fuzzy_match_item(safe_state, search, &state->text_pos, - &node, 0); - if (status < 0) - return status; - - if (!node) - goto backtrack; - } else - goto backtrack; - break; - case RE_OP_STRING: /* A string. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = 0; - - values = node->values; - - /* Try comparing. */ - while (string_pos < length) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char(encoding, char_at(state->text, - state->text_pos), values[string_pos])) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_FLD: /* A string, ignoring case. */ - { - Py_ssize_t length; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - RE_CODE* values; - int folded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = 0; - folded_pos = 0; - folded_len = 0; - } else { - folded_len = full_case_fold(char_at(state->text, - state->text_pos), folded); - if (folded_pos >= folded_len) { - if (state->text_pos >= state->slice_end) - goto backtrack; - - ++state->text_pos; - folded_pos = 0; - folded_len = 0; - } - } - - values = node->values; - - /* Try comparing. */ - while (string_pos < length) { - if (folded_pos >= folded_len) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - folded_len = full_case_fold(char_at(state->text, - state->text_pos), folded); - folded_pos = 0; - } - - if (same_char_ign(encoding, folded[folded_pos], - values[string_pos])) { - ++string_pos; - ++folded_pos; - - if (folded_pos >= folded_len) - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos >= folded_len) - ++state->text_pos; - } else { - string_pos = -1; - goto backtrack; - } - } - - if (node->status & RE_STATUS_FUZZY) { - while (folded_pos < folded_len) { - BOOL matched; - - if (!fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, 1)) - return RE_ERROR_BACKTRACKING; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos >= folded_len) - ++state->text_pos; - } - } - - string_pos = -1; - - if (folded_pos < folded_len) - goto backtrack; - } - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_FLD_REV: /* A string, ignoring case. */ - { - Py_ssize_t length; - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - RE_CODE* values; - int folded_len; - Py_UCS4 folded[RE_MAX_FOLDED]; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - full_case_fold = encoding->full_case_fold; - - if (string_pos < 0) { - string_pos = length; - folded_pos = 0; - folded_len = 0; - } else { - folded_len = full_case_fold(char_at(state->text, - state->text_pos - 1), folded); - if (folded_pos <= 0) { - if (state->text_pos <= state->slice_start) - goto backtrack; - - --state->text_pos; - folded_pos = 0; - folded_len = 0; - } - } - - values = node->values; - - /* Try comparing. */ - while (string_pos > 0) { - if (folded_pos <= 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - folded_len = full_case_fold(char_at(state->text, - state->text_pos - 1), folded); - folded_pos = folded_len; - } - - if (same_char_ign(encoding, folded[folded_pos - 1], - values[string_pos - 1])) { - --string_pos; - --folded_pos; - - if (folded_pos <= 0) - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos <= 0) - --state->text_pos; - } else { - string_pos = -1; - goto backtrack; - } - } - - if (node->status & RE_STATUS_FUZZY) { - while (folded_pos > 0) { - BOOL matched; - - if (!fuzzy_match_string_fld(safe_state, search, - &state->text_pos, node, &string_pos, &folded_pos, - folded_len, &matched, -1)) - return RE_ERROR_BACKTRACKING; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - - if (folded_pos <= 0) - --state->text_pos; - } - } - - string_pos = -1; - - if (folded_pos > 0) - goto backtrack; - } - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_IGN: /* A string, ignoring case. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = 0; - - values = node->values; - - /* Try comparing. */ - while (string_pos < length) { - if (state->text_pos >= state->text_length && - state->partial_side == RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (state->text_pos < state->slice_end && - same_char_ign(encoding, char_at(state->text, - state->text_pos), values[string_pos])) { - ++string_pos; - ++state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, 1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_IGN_REV: /* A string, ignoring case. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = length; - - values = node->values; - - /* Try comparing. */ - while (string_pos > 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char_ign(encoding, char_at(state->text, - state->text_pos - 1), values[string_pos - 1])) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_REV: /* A string. */ - { - Py_ssize_t length; - RE_CODE* values; - TRACE(("%s %d\n", re_op_text[node->op], node->value_count)) - - if ((node->status & RE_STATUS_REQUIRED) && state->text_pos == - state->req_pos && string_pos < 0) - state->text_pos = state->req_end; - else { - length = (Py_ssize_t)node->value_count; - - if (string_pos < 0) - string_pos = length; - - values = node->values; - - /* Try comparing. */ - while (string_pos > 0) { - if (state->text_pos <= 0 && state->partial_side == - RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (state->text_pos > state->slice_start && - same_char(encoding, char_at(state->text, state->text_pos - - 1), values[string_pos - 1])) { - --string_pos; - --state->text_pos; - } else if (node->status & RE_STATUS_FUZZY) { - BOOL matched; - - status = fuzzy_match_string(safe_state, search, - &state->text_pos, node, &string_pos, &matched, -1); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (!matched) { - string_pos = -1; - goto backtrack; - } - } else { - string_pos = -1; - goto backtrack; - } - } - } - - string_pos = -1; - - /* Successful match. */ - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET: /* Member of a string set. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fwdrev(safe_state, node, FALSE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_FLD: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fld_fwdrev(safe_state, node, FALSE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_FLD_REV: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fld_fwdrev(safe_state, node, TRUE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_IGN: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_ign_fwdrev(safe_state, node, FALSE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_IGN_REV: /* Member of a string set, ignoring case. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_ign_fwdrev(safe_state, node, TRUE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_STRING_SET_REV: /* Member of a string set. */ - { - int status; - TRACE(("%s\n", re_op_text[node->op])) - - status = string_set_match_fwdrev(safe_state, node, TRUE); - if (status < 0) - return status; - if (status == 0) - goto backtrack; - node = node->next_1.node; - break; - } - case RE_OP_SUCCESS: /* Success. */ - /* Must the match advance past its start? */ - TRACE(("%s\n", re_op_text[node->op])) - - if (state->text_pos == state->search_anchor && state->must_advance) - goto backtrack; - - if (state->match_all && !recursive_call) { - /* We want to match all of the slice. */ - if (state->reverse) { - if (state->text_pos != state->slice_start) - goto backtrack; - } else { - if (state->text_pos != state->slice_end) - goto backtrack; - } - } - - return RE_ERROR_SUCCESS; - default: /* Illegal opcode! */ - TRACE(("UNKNOWN OP %d\n", node->op)) - return RE_ERROR_ILLEGAL; - } - } - -backtrack: - for (;;) { - RE_BacktrackData* bt_data; - TRACE(("BACKTRACK ")) - - /* Should we abort the matching? */ - ++state->iterations; - - if (state->iterations == 0 && safe_check_signals(safe_state)) - return RE_ERROR_INTERRUPTED; - - bt_data = last_backtrack(state); - - switch (bt_data->op) { - case RE_OP_ANY: /* Any character except a newline. */ - case RE_OP_ANY_ALL: /* Any character at all. */ - case RE_OP_ANY_ALL_REV: /* Any character at all, backwards. */ - case RE_OP_ANY_REV: /* Any character except a newline, backwards. */ - case RE_OP_ANY_U: /* Any character except a line separator. */ - case RE_OP_ANY_U_REV: /* Any character except a line separator, backwards. */ - case RE_OP_CHARACTER: /* A character. */ - case RE_OP_CHARACTER_IGN: /* A character, ignoring case. */ - case RE_OP_CHARACTER_IGN_REV: /* A character, ignoring case, backwards. */ - case RE_OP_CHARACTER_REV: /* A character, backwards. */ - case RE_OP_PROPERTY: /* A property. */ - case RE_OP_PROPERTY_IGN: /* A property, ignoring case. */ - case RE_OP_PROPERTY_IGN_REV: /* A property, ignoring case, backwards. */ - case RE_OP_PROPERTY_REV: /* A property, backwards. */ - case RE_OP_RANGE: /* A range. */ - case RE_OP_RANGE_IGN: /* A range, ignoring case. */ - case RE_OP_RANGE_IGN_REV: /* A range, ignoring case, backwards. */ - case RE_OP_RANGE_REV: /* A range, backwards. */ - case RE_OP_SET_DIFF: /* Set difference. */ - case RE_OP_SET_DIFF_IGN: /* Set difference, ignoring case. */ - case RE_OP_SET_DIFF_IGN_REV: /* Set difference, ignoring case, backwards. */ - case RE_OP_SET_DIFF_REV: /* Set difference, backwards. */ - case RE_OP_SET_INTER: /* Set intersection. */ - case RE_OP_SET_INTER_IGN: /* Set intersection, ignoring case. */ - case RE_OP_SET_INTER_IGN_REV: /* Set intersection, ignoring case, backwards. */ - case RE_OP_SET_INTER_REV: /* Set intersection, backwards. */ - case RE_OP_SET_SYM_DIFF: /* Set symmetric difference. */ - case RE_OP_SET_SYM_DIFF_IGN: /* Set symmetric difference, ignoring case. */ - case RE_OP_SET_SYM_DIFF_IGN_REV: /* Set symmetric difference, ignoring case, backwards. */ - case RE_OP_SET_SYM_DIFF_REV: /* Set symmetric difference, backwards. */ - case RE_OP_SET_UNION: /* Set union. */ - case RE_OP_SET_UNION_IGN: /* Set union, ignoring case. */ - case RE_OP_SET_UNION_IGN_REV: /* Set union, ignoring case, backwards. */ - case RE_OP_SET_UNION_REV: /* Set union, backwards. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_item(safe_state, search, - &state->text_pos, &node, TRUE); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (node) - goto advance; - break; - case RE_OP_ATOMIC: /* Atomic subpattern. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Restore the groups and certain flags and then backtrack. */ - pop_groups(state); - state->too_few_errors = bt_data->atomic.too_few_errors; - state->capture_change = bt_data->atomic.capture_change; - discard_backtrack(state); - break; - case RE_OP_BODY_END: - { - RE_RepeatData* rp_data; - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* We're backtracking into the body. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Restore the repeat info. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - discard_backtrack(state); - break; - } - case RE_OP_BODY_START: - { - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* The body may have failed to match at this position. */ - if (!guard_repeat(safe_state, bt_data->repeat.index, - bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - - discard_backtrack(state); - break; - } - case RE_OP_BOUNDARY: /* On a word boundary. */ - case RE_OP_DEFAULT_BOUNDARY: /* On a default word boundary. */ - case RE_OP_DEFAULT_END_OF_WORD: /* At a default end of a word. */ - case RE_OP_DEFAULT_START_OF_WORD: /* At a default start of a word. */ - case RE_OP_END_OF_LINE: /* At the end of a line. */ - case RE_OP_END_OF_LINE_U: /* At the end of a line. */ - case RE_OP_END_OF_STRING: /* At the end of the string. */ - case RE_OP_END_OF_STRING_LINE: /* At end of string or final newline. */ - case RE_OP_END_OF_STRING_LINE_U: /* At end of string or final newline. */ - case RE_OP_END_OF_WORD: /* At end of a word. */ - case RE_OP_GRAPHEME_BOUNDARY: /* On a grapheme boundary. */ - case RE_OP_SEARCH_ANCHOR: /* At the start of the search. */ - case RE_OP_START_OF_LINE: /* At the start of a line. */ - case RE_OP_START_OF_LINE_U: /* At the start of a line. */ - case RE_OP_START_OF_STRING: /* At the start of the string. */ - case RE_OP_START_OF_WORD: /* At start of a word. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_item(safe_state, search, - &state->text_pos, &node, FALSE); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (node) - goto advance; - break; - case RE_OP_BRANCH: /* 2-way branch. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - node = bt_data->branch.position.node; - state->text_pos = bt_data->branch.position.text_pos; - discard_backtrack(state); - goto advance; - case RE_OP_CALL_REF: /* A group call ref. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - pop_group_return(state); - discard_backtrack(state); - break; - case RE_OP_END_FUZZY: /* End of fuzzy matching. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - state->total_fuzzy_counts[RE_FUZZY_SUB] -= - state->fuzzy_info.counts[RE_FUZZY_SUB]; - state->total_fuzzy_counts[RE_FUZZY_INS] -= - state->fuzzy_info.counts[RE_FUZZY_INS]; - state->total_fuzzy_counts[RE_FUZZY_DEL] -= - state->fuzzy_info.counts[RE_FUZZY_DEL]; - - /* We need to retry the fuzzy match. */ - status = retry_fuzzy_insert(safe_state, &state->text_pos, &node); - if (status < 0) - return RE_ERROR_PARTIAL; - - /* If there were too few errors, in the fuzzy section, try again. - */ - if (state->too_few_errors) { - state->too_few_errors = FALSE; - goto backtrack; - } - - if (node) { - state->total_fuzzy_counts[RE_FUZZY_SUB] += - state->fuzzy_info.counts[RE_FUZZY_SUB]; - state->total_fuzzy_counts[RE_FUZZY_INS] += - state->fuzzy_info.counts[RE_FUZZY_INS]; - state->total_fuzzy_counts[RE_FUZZY_DEL] += - state->fuzzy_info.counts[RE_FUZZY_DEL]; - - node = node->next_1.node; - goto advance; - } - break; - case RE_OP_END_GROUP: /* End of a capture group. */ - { - RE_CODE private_index; - RE_GroupData* group; - TRACE(("%s %d\n", re_op_text[bt_data->op], - bt_data->group.public_index)) - - private_index = bt_data->group.private_index; - group = &state->groups[private_index - 1]; - - /* Unsave the capture? */ - if (bt_data->group.capture) - unsave_capture(state, bt_data->group.private_index, - bt_data->group.public_index); - - if (pattern->group_info[private_index - 1].referenced && - group->span.end != bt_data->group.text_pos) - --state->capture_change; - group->span.end = bt_data->group.text_pos; - group->current_capture = bt_data->group.current_capture; - - discard_backtrack(state); - break; - } - case RE_OP_FAILURE: - { - Py_ssize_t end_pos; - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Do we have to advance? */ - if (!search) - return RE_ERROR_FAILURE; - - /* Can we advance? */ - state->text_pos = state->match_pos; - end_pos = state->reverse ? state->slice_start : state->slice_end; - if (state->text_pos == end_pos) - return RE_ERROR_FAILURE; - - /* Skip over any repeated leading characters. */ - switch (start_node->op) { - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - { - size_t count; - BOOL is_partial; - - /* How many characters did the repeat actually match? */ - count = count_one(state, start_node->nonstring.next_2.node, - state->text_pos, start_node->values[2], &is_partial); - - /* If it's fewer than the maximum then skip over those - * characters. - */ - if (count < start_node->values[2]) - state->text_pos += (Py_ssize_t)count * pattern_step; - break; - } - } - - /* Advance and try to match again. */ - state->text_pos += pattern_step; - - goto start_match; - } - case RE_OP_FUZZY: /* Fuzzy matching. */ - { - RE_FuzzyInfo* fuzzy_info; - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Restore the previous fuzzy info. */ - fuzzy_info = &state->fuzzy_info; - memmove(fuzzy_info, &bt_data->fuzzy.fuzzy_info, - sizeof(RE_FuzzyInfo)); - - discard_backtrack(state); - break; - } - case RE_OP_GREEDY_REPEAT: /* Greedy repeat. */ - case RE_OP_LAZY_REPEAT: /* Lazy repeat. */ - { - RE_RepeatData* rp_data; - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* The repeat failed to match. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* The body may have failed to match at this position. */ - if (!guard_repeat(safe_state, bt_data->repeat.index, - bt_data->repeat.text_pos, RE_STATUS_BODY, TRUE)) - return RE_ERROR_MEMORY; - - /* Restore the previous repeat. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - discard_backtrack(state); - break; - } - case RE_OP_GREEDY_REPEAT_ONE: /* Greedy repeat for one character. */ - { - RE_RepeatData* rp_data; - size_t count; - Py_ssize_t step; - Py_ssize_t pos; - Py_ssize_t limit; - RE_Node* test; - BOOL match; - BOOL m; - size_t index; - TRACE(("%s\n", re_op_text[bt_data->op])) - - node = bt_data->repeat.position.node; - - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Unmatch one character at a time until the tail could match or we - * have reached the minimum. - */ - state->text_pos = rp_data->start; - - count = rp_data->count; - step = node->step; - pos = state->text_pos + (Py_ssize_t)count * step; - limit = state->text_pos + (Py_ssize_t)node->values[1] * step; - - /* The tail failed to match at this position. */ - if (!guard_repeat(safe_state, bt_data->repeat.index, pos, - RE_STATUS_TAIL, TRUE)) - return RE_ERROR_MEMORY; - - if (count == node->values[1]) { - /* We've backtracked the repeat as far as we can. */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - break; - } - - test = node->next_1.test; - - m = test->match; - index = node->values[0]; - - match = FALSE; - - if (test->status & RE_STATUS_FUZZY) { - for (;;) { - RE_Position next_position; - - pos -= step; - - if (try_match(state, &node->next_1, pos, &next_position) && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - } else { - /* A repeated single-character match is often followed by a - * literal, so checking specially for it can be a good - * optimisation when working with long strings. - */ - switch (test->op) { - case RE_OP_CHARACTER: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - --pos; - - if (same_char(encoding, char_at(state->text, pos), ch) - == m && !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_CHARACTER_IGN: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - --pos; - - if (same_char_ign(encoding, char_at(state->text, pos), - ch) == m && !is_repeat_guarded(safe_state, index, - pos, RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_CHARACTER_IGN_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - ++pos; - - if (same_char_ign(encoding, char_at(state->text, pos - - 1), ch) == m && !is_repeat_guarded(safe_state, index, - pos, RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_CHARACTER_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - for (;;) { - ++pos; - - if (same_char(encoding, char_at(state->text, pos - 1), - ch) == m && !is_repeat_guarded(safe_state, index, - pos, RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - - } - break; - } - case RE_OP_STRING: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = min_ssize_t(pos, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos < limit) - break; - - found = string_search_rev(safe_state, test, pos + - length, limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found - length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - --pos; - } - break; - } - case RE_OP_STRING_FLD: - { - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t folded_length; - size_t i; - Py_UCS4 folded[RE_MAX_FOLDED]; - - full_case_fold = encoding->full_case_fold; - - folded_length = 0; - for (i = 0; i < test->value_count; i++) - folded_length += full_case_fold(test->values[i], - folded); - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = min_ssize_t(pos, state->slice_end - folded_length); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos < limit) - break; - - found = string_search_fld_rev(safe_state, test, pos + - folded_length, limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found - folded_length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - --pos; - } - break; - } - case RE_OP_STRING_FLD_REV: - { - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t folded_length; - size_t i; - Py_UCS4 folded[RE_MAX_FOLDED]; - - full_case_fold = encoding->full_case_fold; - - folded_length = 0; - for (i = 0; i < test->value_count; i++) - folded_length += full_case_fold(test->values[i], - folded); - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = max_ssize_t(pos, state->slice_start + folded_length); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos > limit) - break; - - found = string_search_fld(safe_state, test, pos - - folded_length, limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found + folded_length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - ++pos; - } - break; - } - case RE_OP_STRING_IGN: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = min_ssize_t(pos, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos < limit) - break; - - found = string_search_ign_rev(safe_state, test, pos + - length, limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found - length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - --pos; - } - break; - } - case RE_OP_STRING_IGN_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = max_ssize_t(pos, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos > limit) - break; - - found = string_search_ign(safe_state, test, pos - - length, limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found + length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - ++pos; - } - break; - } - case RE_OP_STRING_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - pos = max_ssize_t(pos, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos > limit) - break; - - found = string_search(safe_state, test, pos - length, - limit, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - pos = found + length; - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - ++pos; - } - break; - } - default: - for (;;) { - RE_Position next_position; - - pos -= step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - break; - } - } - - if (match) { - count = (size_t)abs_ssize_t(pos - state->text_pos); - - /* The tail could match. */ - if (count > node->values[1]) - /* The match is longer than the minimum, so we might need - * to backtrack the repeat again to consume less. - */ - rp_data->count = count; - else { - /* We've reached or passed the minimum, so we won't need to - * backtrack the repeat again. - */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - - /* Have we passed the minimum? */ - if (count < node->values[1]) - goto backtrack; - } - - node = node->next_1.node; - state->text_pos = pos; - goto advance; - } else { - /* We've backtracked the repeat as far as we can. */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - } - break; - } - case RE_OP_GROUP_CALL: /* Group call. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - pop_group_return(state); - discard_backtrack(state); - break; - case RE_OP_GROUP_RETURN: /* Group return. */ - { - RE_Node* return_node; - TRACE(("%s\n", re_op_text[bt_data->op])) - - return_node = bt_data->group_call.node; - - push_group_return(safe_state, return_node); - - if (return_node) { - /* Restore the groups. */ - pop_groups(state); - state->capture_change = bt_data->group_call.capture_change; - - /* Restore the repeats. */ - pop_repeats(state); - } - - discard_backtrack(state); - break; - } - case RE_OP_LAZY_REPEAT_ONE: /* Lazy repeat for one character. */ - { - RE_RepeatData* rp_data; - size_t count; - Py_ssize_t step; - Py_ssize_t pos; - Py_ssize_t available; - size_t max_count; - Py_ssize_t limit; - RE_Node* repeated; - RE_Node* test; - BOOL match; - BOOL m; - size_t index; - TRACE(("%s\n", re_op_text[bt_data->op])) - - node = bt_data->repeat.position.node; - - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Match one character at a time until the tail could match or we - * have reached the maximum. - */ - state->text_pos = rp_data->start; - count = rp_data->count; - - step = node->step; - pos = state->text_pos + (Py_ssize_t)count * step; - available = step > 0 ? state->slice_end - state->text_pos : - state->text_pos - state->slice_start; - max_count = min_size_t((size_t)available, node->values[2]); - limit = state->text_pos + (Py_ssize_t)max_count * step; - - repeated = node->nonstring.next_2.node; - - test = node->next_1.test; - - m = test->match; - index = node->values[0]; - - match = FALSE; - - if (test->status & RE_STATUS_FUZZY) { - for (;;) { - RE_Position next_position; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - pos += step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return status; - - if (status == RE_ERROR_SUCCESS && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - } else { - /* A repeated single-character match is often followed by a - * literal, so checking specially for it can be a good - * optimisation when working with long strings. - */ - switch (test->op) { - case RE_OP_CHARACTER: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = min_ssize_t(limit, state->slice_end - 1); - - for (;;) { - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - - if (same_char(encoding, char_at(state->text, pos), ch) - == m && !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_CHARACTER_IGN: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = min_ssize_t(limit, state->slice_end - 1); - - for (;;) { - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - - if (same_char_ign(encoding, char_at(state->text, pos), - ch) == m && !is_repeat_guarded(safe_state, index, - pos, RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_CHARACTER_IGN_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = max_ssize_t(limit, state->slice_start + 1); - - for (;;) { - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - - if (same_char_ign(encoding, char_at(state->text, pos - - 1), ch) == m && !is_repeat_guarded(safe_state, index, - pos, RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_CHARACTER_REV: - { - Py_UCS4 ch; - - ch = test->values[0]; - - /* The tail is a character. We don't want to go off the end - * of the slice. - */ - limit = max_ssize_t(limit, state->slice_start + 1); - - for (;;) { - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - - if (same_char(encoding, char_at(state->text, pos - 1), - ch) == m && !is_repeat_guarded(safe_state, index, - pos, RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = min_ssize_t(limit, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - /* Look for the tail string. */ - found = string_search(safe_state, test, pos + 1, limit - + length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_FLD: - { - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = min_ssize_t(limit, state->slice_end); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - /* Look for the tail string. */ - found = string_search_fld(safe_state, test, pos + 1, - limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_FLD_REV: - { - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = max_ssize_t(limit, state->slice_start); - - for (;;) { - Py_ssize_t found; - Py_ssize_t new_pos; - BOOL is_partial; - - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - /* Look for the tail string. */ - found = string_search_fld_rev(safe_state, test, pos - - 1, limit, &new_pos, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_IGN: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = min_ssize_t(limit, state->slice_end - length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos >= state->text_length && state->partial_side == - RE_PARTIAL_RIGHT) - return RE_ERROR_PARTIAL; - - if (pos >= limit) - break; - - /* Look for the tail string. */ - found = string_search_ign(safe_state, test, pos + 1, - limit + length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - ++pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_IGN_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = max_ssize_t(limit, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - /* Look for the tail string. */ - found = string_search_ign_rev(safe_state, test, pos - - 1, limit - length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - case RE_OP_STRING_REV: - { - Py_ssize_t length; - - length = (Py_ssize_t)test->value_count; - - /* The tail is a string. We don't want to go off the end of - * the slice. - */ - limit = max_ssize_t(limit, state->slice_start + length); - - for (;;) { - Py_ssize_t found; - BOOL is_partial; - - if (pos <= 0 && state->partial_side == RE_PARTIAL_LEFT) - return RE_ERROR_PARTIAL; - - if (pos <= limit) - break; - - /* Look for the tail string. */ - found = string_search_rev(safe_state, test, pos - 1, - limit - length, &is_partial); - if (is_partial) - return RE_ERROR_PARTIAL; - - if (found < 0) - break; - - if (repeated->op == RE_OP_ANY_ALL) - /* Anything can precede the tail. */ - pos = found; - else { - /* Check that what precedes the tail will match. */ - while (pos != found) { - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - --pos; - } - - if (pos != found) - /* Something preceding the tail didn't match. - */ - break; - } - - if (!is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - } - break; - } - default: - for (;;) { - RE_Position next_position; - - status = match_one(state, repeated, pos); - if (status < 0) - return status; - - if (status == RE_ERROR_FAILURE) - break; - - pos += step; - - status = try_match(state, &node->next_1, pos, - &next_position); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (status == RE_ERROR_SUCCESS && - !is_repeat_guarded(safe_state, index, pos, - RE_STATUS_TAIL)) { - match = TRUE; - break; - } - - if (pos == limit) - break; - } - break; - } - } - - if (match) { - /* The tail could match. */ - count = (size_t)abs_ssize_t(pos - state->text_pos); - state->text_pos = pos; - - if (count < max_count) { - /* The match is shorter than the maximum, so we might need - * to backtrack the repeat again to consume more. - */ - rp_data->count = count; - } else { - /* We've reached or passed the maximum, so we won't need to - * backtrack the repeat again. - */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - - /* Have we passed the maximum? */ - if (count > max_count) - goto backtrack; - } - - node = node->next_1.node; - goto advance; - } else { - /* The tail couldn't match. */ - rp_data->start = bt_data->repeat.text_pos; - rp_data->count = bt_data->repeat.count; - discard_backtrack(state); - } - break; - } - case RE_OP_LOOKAROUND: /* Lookaround. */ - TRACE(("%s\n", re_op_text[bt_data->op])) - - /* Restore the groups and certain flags and then backtrack. */ - pop_groups(state); - state->too_few_errors = bt_data->lookaround.too_few_errors; - state->capture_change = bt_data->lookaround.capture_change; - discard_backtrack(state); - break; - case RE_OP_MATCH_BODY: - { - RE_RepeatData* rp_data; - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* We want to match the body. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Restore the repeat info. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - /* Record backtracking info in case the body fails to match. */ - bt_data->op = RE_OP_BODY_START; - - /* Advance into the body. */ - node = bt_data->repeat.position.node; - state->text_pos = bt_data->repeat.position.text_pos; - goto advance; - } - case RE_OP_MATCH_TAIL: - { - RE_RepeatData* rp_data; - TRACE(("%s %d\n", re_op_text[bt_data->op], bt_data->repeat.index)) - - /* We want to match the tail. */ - rp_data = &state->repeats[bt_data->repeat.index]; - - /* Restore the repeat info. */ - rp_data->count = bt_data->repeat.count; - rp_data->start = bt_data->repeat.start; - rp_data->capture_change = bt_data->repeat.capture_change; - - /* Advance into the tail. */ - node = bt_data->repeat.position.node; - state->text_pos = bt_data->repeat.position.text_pos; - - discard_backtrack(state); - goto advance; - } - case RE_OP_REF_GROUP: /* Reference to a capture group. */ - case RE_OP_REF_GROUP_IGN: /* Reference to a capture group, ignoring case. */ - case RE_OP_REF_GROUP_IGN_REV: /* Reference to a capture group, backwards, ignoring case. */ - case RE_OP_REF_GROUP_REV: /* Reference to a capture group, backwards. */ - case RE_OP_STRING: /* A string. */ - case RE_OP_STRING_IGN: /* A string, ignoring case. */ - case RE_OP_STRING_IGN_REV: /* A string, backwards, ignoring case. */ - case RE_OP_STRING_REV: /* A string, backwards. */ - { - BOOL matched; - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_string(safe_state, search, - &state->text_pos, &node, &string_pos, &matched); - if (status < 0) - return RE_ERROR_PARTIAL; - - - if (matched) - goto advance; - - string_pos = -1; - break; - } - case RE_OP_REF_GROUP_FLD: /* Reference to a capture group, ignoring case. */ - case RE_OP_REF_GROUP_FLD_REV: /* Reference to a capture group, backwards, ignoring case. */ - { - BOOL matched; - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_group_fld(safe_state, search, - &state->text_pos, &node, &folded_pos, &string_pos, &gfolded_pos, - &matched); - if (status < 0) - return RE_ERROR_PARTIAL; - - - if (matched) - goto advance; - - string_pos = -1; - break; - } - case RE_OP_START_GROUP: /* Start of a capture group. */ - { - RE_CODE private_index; - RE_GroupData* group; - TRACE(("%s %d\n", re_op_text[bt_data->op], - bt_data->group.public_index)) - - private_index = bt_data->group.private_index; - group = &state->groups[private_index - 1]; - - /* Unsave the capture? */ - if (bt_data->group.capture) - unsave_capture(state, bt_data->group.private_index, - bt_data->group.public_index); - - if (pattern->group_info[private_index - 1].referenced && - group->span.start != bt_data->group.text_pos) - --state->capture_change; - group->span.start = bt_data->group.text_pos; - group->current_capture = bt_data->group.current_capture; - - discard_backtrack(state); - break; - } - case RE_OP_STRING_FLD: /* A string, ignoring case. */ - case RE_OP_STRING_FLD_REV: /* A string, backwards, ignoring case. */ - { - BOOL matched; - TRACE(("%s\n", re_op_text[bt_data->op])) - - status = retry_fuzzy_match_string_fld(safe_state, search, - &state->text_pos, &node, &string_pos, &folded_pos, &matched); - if (status < 0) - return RE_ERROR_PARTIAL; - - if (matched) - goto advance; - - string_pos = -1; - break; - } - default: - TRACE(("UNKNOWN OP %d\n", bt_data->op)) - return RE_ERROR_ILLEGAL; - } - } -} - -/* Saves group data for fuzzy matching. */ -Py_LOCAL_INLINE(RE_GroupData*) save_groups(RE_SafeState* safe_state, - RE_GroupData* saved_groups) { - RE_State* state; - PatternObject* pattern; - size_t g; - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - state = safe_state->re_state; - pattern = state->pattern; - - if (!saved_groups) { - saved_groups = (RE_GroupData*)re_alloc(pattern->true_group_count * - sizeof(RE_GroupData)); - if (!saved_groups) - goto error; - - memset(saved_groups, 0, pattern->true_group_count * - sizeof(RE_GroupData)); - } - - for (g = 0; g < pattern->true_group_count; g++) { - RE_GroupData* orig; - RE_GroupData* copy; - - orig = &state->groups[g]; - copy = &saved_groups[g]; - - copy->span = orig->span; - - if (orig->capture_count > copy->capture_capacity) { - RE_GroupSpan* cap_copy; - - cap_copy = (RE_GroupSpan*)re_realloc(copy->captures, - orig->capture_count * sizeof(RE_GroupSpan)); - if (!cap_copy) - goto error; - - copy->capture_capacity = orig->capture_count; - copy->captures = cap_copy; - } - - copy->capture_count = orig->capture_count; - Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * - sizeof(RE_GroupSpan)); - } - - /* Release the GIL. */ - release_GIL(safe_state); - - return saved_groups; - -error: - if (saved_groups) { - for (g = 0; g < pattern->true_group_count; g++) - re_dealloc(saved_groups[g].captures); - - re_dealloc(saved_groups); - } - - /* Release the GIL. */ - release_GIL(safe_state); - - return NULL; -} - -/* Restores group data for fuzzy matching. */ -Py_LOCAL_INLINE(void) restore_groups(RE_SafeState* safe_state, RE_GroupData* - saved_groups) { - RE_State* state; - PatternObject* pattern; - size_t g; - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - state = safe_state->re_state; - pattern = state->pattern; - - for (g = 0; g < pattern->true_group_count; g++) - re_dealloc(state->groups[g].captures); - - Py_MEMCPY(state->groups, saved_groups, pattern->true_group_count * - sizeof(RE_GroupData)); - - re_dealloc(saved_groups); - - /* Release the GIL. */ - release_GIL(safe_state); -} - -/* Discards group data for fuzzy matching. */ -Py_LOCAL_INLINE(void) discard_groups(RE_SafeState* safe_state, RE_GroupData* - saved_groups) { - RE_State* state; - PatternObject* pattern; - size_t g; - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - state = safe_state->re_state; - pattern = state->pattern; - - for (g = 0; g < pattern->true_group_count; g++) - re_dealloc(saved_groups[g].captures); - - re_dealloc(saved_groups); - - /* Release the GIL. */ - release_GIL(safe_state); -} - -/* Saves the fuzzy info. */ -Py_LOCAL_INLINE(void) save_fuzzy_counts(RE_State* state, size_t* fuzzy_counts) - { - Py_MEMCPY(fuzzy_counts, state->total_fuzzy_counts, - sizeof(state->total_fuzzy_counts)); -} - -/* Restores the fuzzy info. */ -Py_LOCAL_INLINE(void) restore_fuzzy_counts(RE_State* state, size_t* - fuzzy_counts) { - Py_MEMCPY(state->total_fuzzy_counts, fuzzy_counts, - sizeof(state->total_fuzzy_counts)); -} - -/* Performs a match or search from the current text position. - * - * The state can sometimes be shared across threads. In such instances there's - * a lock (mutex) on it. The lock is held for the duration of matching. - */ -Py_LOCAL_INLINE(int) do_match(RE_SafeState* safe_state, BOOL search) { - RE_State* state; - PatternObject* pattern; - Py_ssize_t available; - BOOL get_best; - BOOL enhance_match; - BOOL must_advance; - RE_GroupData* best_groups; - Py_ssize_t best_match_pos; - Py_ssize_t best_text_pos = 0; /* Initialise to stop compiler warning. */ - int status; - Py_ssize_t slice_start; - Py_ssize_t slice_end; - size_t best_fuzzy_counts[RE_FUZZY_COUNT]; - TRACE(("<>\n")) - - state = safe_state->re_state; - pattern = state->pattern; - - /* Release the GIL. */ - release_GIL(safe_state); - - /* Is there enough to search? */ - if (state->reverse) { - if (state->text_pos < state->slice_start) { - acquire_GIL(safe_state); - return FALSE; - } - - available = state->text_pos - state->slice_start; - } else { - if (state->text_pos > state->slice_end) { - acquire_GIL(safe_state); - return FALSE; - } - - available = state->slice_end - state->text_pos; - } - - get_best = (pattern->flags & RE_FLAG_BESTMATCH) != 0; - enhance_match = (pattern->flags & RE_FLAG_ENHANCEMATCH) != 0 && !get_best; - - /* The maximum permitted cost. */ - state->max_cost = pattern->is_fuzzy ? PY_SSIZE_T_MAX : 0; - - best_groups = NULL; - - best_match_pos = state->text_pos; - must_advance = state->must_advance; - - slice_start = state->slice_start; - slice_end = state->slice_end; - - for (;;) { - /* If there's a better match, it won't start earlier in the string than - * the current best match, so there's no need to start earlier than - * that match. - */ - state->text_pos = best_match_pos; - state->must_advance = must_advance; - - /* Initialise the state. */ - init_match(state); - - status = RE_ERROR_SUCCESS; - if (state->max_cost == 0 && state->partial_side == RE_PARTIAL_NONE) { - /* An exact match, and partial matches not permitted. */ - if (available < state->min_width || (available == 0 && - state->must_advance)) - status = RE_ERROR_FAILURE; - } - - if (status == RE_ERROR_SUCCESS) - status = basic_match(safe_state, pattern->start_node, search, - FALSE); - - /* Has an error occurred, or is it a partial match? */ - if (status < 0) - break; - - if (status == RE_ERROR_FAILURE || (status == RE_ERROR_SUCCESS && - state->total_cost == 0)) - break; - - if (!get_best && !enhance_match) - break; - - save_fuzzy_counts(state, best_fuzzy_counts); - - if (!get_best && state->text_pos == state->match_pos) - /* We want the first match. The match is already zero-width, so the - * cost can't get any lower (because the fit can't get any better). - */ - break; - - if (best_groups) { - BOOL same; - size_t g; - - /* Did we get the same match as the best so far? */ - same = state->match_pos == best_match_pos && state->text_pos == - best_text_pos; - for (g = 0; same && g < pattern->public_group_count; g++) { - same = state->groups[g].span.start == best_groups[g].span.start - && state->groups[g].span.end == best_groups[g].span.end; - } - - if (same) - break; - } - - /* Save the best result so far. */ - best_groups = save_groups(safe_state, best_groups); - if (!best_groups) { - status = RE_ERROR_MEMORY; - break; - } - - best_match_pos = state->match_pos; - best_text_pos = state->text_pos; - - if (state->max_cost == 0) - break; - - /* Reduce the maximum permitted cost and try again. */ - state->max_cost = state->total_cost - 1; - - if (enhance_match) { - if (state->reverse) { - state->slice_start = state->text_pos; - state->slice_end = state->match_pos; - } else { - state->slice_start = state->match_pos; - state->slice_end = state->text_pos; - } - } - } - - state->slice_start = slice_start; - state->slice_end = slice_end; - - if (best_groups) { - if (status == RE_ERROR_SUCCESS && state->total_cost == 0) - /* We have a perfect match, so the previous best match. */ - discard_groups(safe_state, best_groups); - else { - /* Restore the previous best match. */ - status = RE_ERROR_SUCCESS; - - state->match_pos = best_match_pos; - state->text_pos = best_text_pos; - - restore_groups(safe_state, best_groups); - restore_fuzzy_counts(state, best_fuzzy_counts); - } - } - - if (status == RE_ERROR_SUCCESS || status == RE_ERROR_PARTIAL) { - Py_ssize_t max_end_index; - RE_GroupInfo* group_info; - size_t g; - - /* Store the results. */ - state->lastindex = -1; - state->lastgroup = -1; - max_end_index = -1; - - /* Store the capture groups. */ - group_info = pattern->group_info; - - for (g = 0; g < pattern->public_group_count; g++) { - RE_GroupSpan* span; - - span = &state->groups[g].span; - /* The string positions are of type Py_ssize_t, so the format needs - * to specify that. - */ - TRACE(("group %d from %" PY_FORMAT_SIZE_T "d to %" PY_FORMAT_SIZE_T - "d\n", g + 1, span->start, span->end)) - - if (span->start >= 0 && span->end >= 0 && group_info[g].end_index > - max_end_index) { - max_end_index = group_info[g].end_index; - state->lastindex = (Py_ssize_t)g + 1; - if (group_info[g].has_name) - state->lastgroup = (Py_ssize_t)g + 1; - } - } - } - - /* Re-acquire the GIL. */ - acquire_GIL(safe_state); - - if (status < 0 && status != RE_ERROR_PARTIAL && !PyErr_Occurred()) - set_error(status, NULL); - - return status; -} - -/* Gets a string from a Python object. - * - * If the function returns true and str_info->should_release is true then it's - * the responsibility of the caller to release the buffer when it's no longer - * needed. - */ -Py_LOCAL_INLINE(BOOL) get_string(PyObject* string, RE_StringInfo* str_info) { - /* Given a Python object, return a data pointer, a length (in characters), - * and a character size. Return FALSE if the object is not a string (or not - * compatible). - */ - PyBufferProcs* buffer; - Py_ssize_t bytes; - Py_ssize_t size; - - /* Unicode objects do not support the buffer API. So, get the data directly - * instead. - */ - if (PyUnicode_Check(string)) { - /* Unicode strings doesn't always support the buffer interface. */ - str_info->characters = (void*)PyUnicode_AS_DATA(string); - str_info->length = PyUnicode_GET_SIZE(string); - str_info->charsize = sizeof(Py_UNICODE); - str_info->is_unicode = TRUE; - str_info->should_release = FALSE; - return TRUE; - } - - /* Get pointer to string buffer. */ -#if PY_VERSION_HEX >= 0x02060000 - buffer = Py_TYPE(string)->tp_as_buffer; - str_info->view.len = -1; -#else - buffer = string->ob_type->tp_as_buffer; -#endif - - if (!buffer) { - PyErr_SetString(PyExc_TypeError, "expected string or buffer"); - return FALSE; - } - -#if PY_VERSION_HEX >= 0x02060000 - if (buffer->bf_getbuffer && (*buffer->bf_getbuffer)(string, - &str_info->view, PyBUF_SIMPLE) >= 0) - /* It's a new-style buffer. */ - str_info->should_release = TRUE; - else -#endif - if (buffer->bf_getreadbuffer && buffer->bf_getsegcount && - buffer->bf_getsegcount(string, NULL) == 1) - /* It's an old-style buffer. */ - str_info->should_release = FALSE; - else { - PyErr_SetString(PyExc_TypeError, "expected string or buffer"); - return FALSE; - } - - /* Determine buffer size. */ -#if PY_VERSION_HEX >= 0x02060000 - if (str_info->should_release) { - /* It's a new-style buffer. */ - bytes = str_info->view.len; - str_info->characters = str_info->view.buf; - - if (str_info->characters == NULL) { - PyBuffer_Release(&str_info->view); - PyErr_SetString(PyExc_ValueError, "buffer is NULL"); - return FALSE; - } - } else -#endif - /* It's an old-style buffer. */ - bytes = buffer->bf_getreadbuffer(string, 0, &str_info->characters); - - if (bytes < 0) { -#if PY_VERSION_HEX >= 0x02060000 - if (str_info->should_release) - PyBuffer_Release(&str_info->view); -#endif - PyErr_SetString(PyExc_TypeError, "buffer has negative size"); - return FALSE; - } - - /* Determine character size. */ - size = PyObject_Size(string); - - if (PyString_Check(string) || bytes == size) - str_info->charsize = 1; - else { -#if PY_VERSION_HEX >= 0x02060000 - if (str_info->should_release) - PyBuffer_Release(&str_info->view); -#endif - PyErr_SetString(PyExc_TypeError, "buffer size mismatch"); - return FALSE; - } - - str_info->length = size; - str_info->is_unicode = FALSE; - - return TRUE; -} - -/* Deallocates the groups storage. */ -Py_LOCAL_INLINE(void) dealloc_groups(RE_GroupData* groups, size_t group_count) - { - size_t g; - - if (!groups) - return; - - for (g = 0; g < group_count; g++) - re_dealloc(groups[g].captures); - - re_dealloc(groups); -} - -/* Initialises a state object. */ -Py_LOCAL_INLINE(BOOL) state_init_2(RE_State* state, PatternObject* pattern, - PyObject* string, RE_StringInfo* str_info, Py_ssize_t start, Py_ssize_t end, - BOOL overlapped, int concurrent, BOOL partial, BOOL use_lock, BOOL - visible_captures, BOOL match_all) { - Py_ssize_t final_pos; - int i; - - state->groups = NULL; - state->repeats = NULL; - state->visible_captures = visible_captures; - state->match_all = match_all; - state->backtrack_block.previous = NULL; - state->backtrack_block.next = NULL; - state->backtrack_block.capacity = RE_BACKTRACK_BLOCK_SIZE; - state->backtrack_allocated = RE_BACKTRACK_BLOCK_SIZE; - state->first_saved_groups = NULL; - state->current_saved_groups = NULL; - state->first_saved_repeats = NULL; - state->current_saved_repeats = NULL; - state->lock = NULL; - state->fuzzy_guards = NULL; - state->first_group_call_frame = NULL; - state->current_group_call_frame = NULL; - state->group_call_guard_list = NULL; - state->req_pos = -1; - - /* The call guards used by recursive patterns. */ - if (pattern->call_ref_info_count > 0) { - state->group_call_guard_list = - (RE_GuardList*)re_alloc(pattern->call_ref_info_count * - sizeof(RE_GuardList)); - if (!state->group_call_guard_list) - goto error; - memset(state->group_call_guard_list, 0, pattern->call_ref_info_count * - sizeof(RE_GuardList)); - } - - /* The capture groups. */ - if (pattern->true_group_count) { - size_t g; - - if (pattern->groups_storage) { - state->groups = pattern->groups_storage; - pattern->groups_storage = NULL; - } else { - state->groups = (RE_GroupData*)re_alloc(pattern->true_group_count * - sizeof(RE_GroupData)); - if (!state->groups) - goto error; - memset(state->groups, 0, pattern->true_group_count * - sizeof(RE_GroupData)); - - for (g = 0; g < pattern->true_group_count; g++) { - RE_GroupSpan* captures; - - captures = (RE_GroupSpan*)re_alloc(sizeof(RE_GroupSpan)); - if (!captures) { - size_t i; - - for (i = 0; i < g; i++) - re_dealloc(state->groups[i].captures); - - goto error; - } - - state->groups[g].captures = captures; - state->groups[g].capture_capacity = 1; - } - } - } - - /* Adjust boundaries. */ - if (start < 0) - start += str_info->length; - if (start < 0) - start = 0; - else if (start > str_info->length) - start = str_info->length; - - if (end < 0) - end += str_info->length; - if (end < 0) - end = 0; - else if (end > str_info->length) - end = str_info->length; - - state->overlapped = overlapped; - state->min_width = pattern->min_width; - - /* Initialise the getters and setters for the character size. */ - state->charsize = str_info->charsize; - state->is_unicode = str_info->is_unicode; - -#if PY_VERSION_HEX >= 0x02060000 - /* Are we using a buffer object? If so, we need to copy the info. */ - state->should_release = str_info->should_release; - if (state->should_release) - state->view = str_info->view; - -#endif - switch (state->charsize) { - case 1: - state->char_at = bytes1_char_at; - state->set_char_at = bytes1_set_char_at; - state->point_to = bytes1_point_to; - break; - case 2: - state->char_at = bytes2_char_at; - state->set_char_at = bytes2_set_char_at; - state->point_to = bytes2_point_to; - break; - case 4: - state->char_at = bytes4_char_at; - state->set_char_at = bytes4_set_char_at; - state->point_to = bytes4_point_to; - break; - default: - goto error; - } - - state->encoding = pattern->encoding; - - /* The state object contains a reference to the string and also a pointer - * to its contents. - * - * The documentation says that the end of the slice behaves like the end of - * the string. - */ - state->text = str_info->characters; - state->text_length = end; - - state->reverse = (pattern->flags & RE_FLAG_REVERSE) != 0; - if (partial) - state->partial_side = state->reverse ? RE_PARTIAL_LEFT : - RE_PARTIAL_RIGHT; - else - state->partial_side = RE_PARTIAL_NONE; - - state->slice_start = start; - state->slice_end = state->text_length; - state->text_pos = state->reverse ? state->slice_end : state->slice_start; - - /* Point to the final newline and line separator if it's at the end of the - * string, otherwise just -1. - */ - state->final_newline = -1; - state->final_line_sep = -1; - final_pos = state->text_length - 1; - if (final_pos >= 0) { - Py_UCS4 ch; - - ch = state->char_at(state->text, final_pos); - if (ch == 0x0A) { - /* The string ends with LF. */ - state->final_newline = final_pos; - state->final_line_sep = final_pos; - - /* Does the string end with CR/LF? */ - --final_pos; - if (final_pos >= 0 && state->char_at(state->text, final_pos) == - 0x0D) - state->final_line_sep = final_pos; - } else { - /* The string doesn't end with LF, but it could be another kind of - * line separator. - */ - if (state->encoding->is_line_sep(ch)) - state->final_line_sep = final_pos; - } - } - - /* If the 'new' behaviour is enabled then split correctly on zero-width - * matches. - */ - state->version_0 = (pattern->flags & RE_FLAG_VERSION1) == 0; - state->must_advance = FALSE; - - state->pattern = pattern; - state->string = string; - - if (pattern->repeat_count) { - if (pattern->repeats_storage) { - state->repeats = pattern->repeats_storage; - pattern->repeats_storage = NULL; - } else { - state->repeats = (RE_RepeatData*)re_alloc(pattern->repeat_count * - sizeof(RE_RepeatData)); - if (!state->repeats) - goto error; - memset(state->repeats, 0, pattern->repeat_count * - sizeof(RE_RepeatData)); - } - } - - if (pattern->fuzzy_count) { - state->fuzzy_guards = (RE_FuzzyGuards*)re_alloc(pattern->fuzzy_count * - sizeof(RE_FuzzyGuards)); - if (!state->fuzzy_guards) - goto error; - memset(state->fuzzy_guards, 0, pattern->fuzzy_count * - sizeof(RE_FuzzyGuards)); - } - - Py_INCREF(state->pattern); - Py_INCREF(state->string); - - /* Multithreading is allowed during matching when explicitly enabled or on - * immutable strings. - */ - switch (concurrent) { - case RE_CONC_NO: - state->is_multithreaded = FALSE; - break; - case RE_CONC_YES: - state->is_multithreaded = TRUE; - break; - default: - state->is_multithreaded = PyUnicode_Check(string) || - PyString_Check(string); - break; - } - - /* A state struct can sometimes be shared across threads. In such - * instances, if multithreading is enabled we need to protect the state - * with a lock (mutex) during matching. - */ - if (state->is_multithreaded && use_lock) - state->lock = PyThread_allocate_lock(); - - for (i = 0; i < MAX_SEARCH_POSITIONS; i++) - state->search_positions[i].start_pos = -1; - - return TRUE; - -error: - re_dealloc(state->group_call_guard_list); - re_dealloc(state->repeats); - dealloc_groups(state->groups, pattern->true_group_count); - re_dealloc(state->fuzzy_guards); - state->repeats = NULL; - state->groups = NULL; - state->fuzzy_guards = NULL; - return FALSE; -} - -#if PY_VERSION_HEX >= 0x02060000 -/* Releases the string's buffer, if necessary. */ -Py_LOCAL_INLINE(void) release_buffer(RE_StringInfo* str_info) { - if (str_info->should_release) - PyBuffer_Release(&str_info->view); -} - -#endif -/* Initialises a state object. */ -Py_LOCAL_INLINE(BOOL) state_init(RE_State* state, PatternObject* pattern, - PyObject* string, Py_ssize_t start, Py_ssize_t end, BOOL overlapped, int - concurrent, BOOL partial, BOOL use_lock, BOOL visible_captures, BOOL - match_all) { - RE_StringInfo str_info; - - /* Get the string to search or match. */ - if (!get_string(string, &str_info)) - return FALSE; - - /* If we fail to initialise the state then we need to release the buffer if - * the string is a buffer object. - */ - if (!state_init_2(state, pattern, string, &str_info, start, end, - overlapped, concurrent, partial, use_lock, visible_captures, match_all)) - { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return FALSE; - } - - /* The state has been initialised successfully, so now the state has the - * responsibility of releasing the buffer if the string is a buffer object. - */ - return TRUE; -} - -/* Deallocates repeat data. */ -Py_LOCAL_INLINE(void) dealloc_repeats(RE_RepeatData* repeats, size_t - repeat_count) { - size_t i; - - if (!repeats) - return; - - for (i = 0; i < repeat_count; i++) { - re_dealloc(repeats[i].body_guard_list.spans); - re_dealloc(repeats[i].tail_guard_list.spans); - } - - re_dealloc(repeats); -} - -/* Deallocates fuzzy guards. */ -Py_LOCAL_INLINE(void) dealloc_fuzzy_guards(RE_FuzzyGuards* guards, size_t - fuzzy_count) { - size_t i; - - if (!guards) - return; - - for (i = 0; i < fuzzy_count; i++) { - re_dealloc(guards[i].body_guard_list.spans); - re_dealloc(guards[i].tail_guard_list.spans); - } - - re_dealloc(guards); -} - -/* Finalises a state object, discarding its contents. */ -Py_LOCAL_INLINE(void) state_fini(RE_State* state) { - RE_BacktrackBlock* current; - PatternObject* pattern; - RE_SavedGroups* saved_groups; - RE_SavedRepeats* saved_repeats; - RE_GroupCallFrame* frame; - size_t i; - - /* Discard the lock (mutex) if there's one. */ - if (state->lock) - PyThread_free_lock(state->lock); - - /* Deallocate the backtrack blocks. */ - current = state->backtrack_block.next; - while (current) { - RE_BacktrackBlock* next; - - next = current->next; - re_dealloc(current); - state->backtrack_allocated -= RE_BACKTRACK_BLOCK_SIZE; - current = next; - } - - pattern = state->pattern; - - saved_groups = state->first_saved_groups; - while (saved_groups) { - RE_SavedGroups* next; - - next = saved_groups->next; - re_dealloc(saved_groups->spans); - re_dealloc(saved_groups->counts); - re_dealloc(saved_groups); - saved_groups = next; - } - - saved_repeats = state->first_saved_repeats; - while (saved_repeats) { - RE_SavedRepeats* next; - - next = saved_repeats->next; - - dealloc_repeats(saved_repeats->repeats, pattern->repeat_count); - - re_dealloc(saved_repeats); - saved_repeats = next; - } - - if (pattern->groups_storage) - dealloc_groups(state->groups, pattern->true_group_count); - else - pattern->groups_storage = state->groups; - - if (pattern->repeats_storage) - dealloc_repeats(state->repeats, pattern->repeat_count); - else - pattern->repeats_storage = state->repeats; - - frame = state->first_group_call_frame; - while (frame) { - RE_GroupCallFrame* next; - - next = frame->next; - - dealloc_groups(frame->groups, pattern->true_group_count); - dealloc_repeats(frame->repeats, pattern->repeat_count); - - re_dealloc(frame); - frame = next; - } - - for (i = 0; i < pattern->call_ref_info_count; i++) - re_dealloc(state->group_call_guard_list[i].spans); - - if (state->group_call_guard_list) - re_dealloc(state->group_call_guard_list); - - if (state->fuzzy_guards) - dealloc_fuzzy_guards(state->fuzzy_guards, pattern->fuzzy_count); - - Py_DECREF(state->pattern); - Py_DECREF(state->string); -#if PY_VERSION_HEX >= 0x02060000 - - if (state->should_release) - PyBuffer_Release(&state->view); -#endif -} - -/* Converts a string index to an integer. - * - * If the index is None then the default will be returned. - */ -Py_LOCAL_INLINE(Py_ssize_t) as_string_index(PyObject* obj, Py_ssize_t def) { - Py_ssize_t value; - - if (obj == Py_None) - return def; - - value = PyInt_AsSsize_t(obj); - if (value != -1 || !PyErr_Occurred()) - return value; - - PyErr_Clear(); - - value = PyLong_AsLong(obj); - if (value != -1 || !PyErr_Occurred()) - return value; - - set_error(RE_ERROR_INDEX, NULL); - return 0; -} - -/* Deallocates a MatchObject. */ -static void match_dealloc(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - Py_XDECREF(self->string); - Py_XDECREF(self->substring); - Py_DECREF(self->pattern); - if (self->groups) - re_dealloc(self->groups); - Py_XDECREF(self->regs); - PyObject_DEL(self); -} - -/* Restricts a value to a range. */ -Py_LOCAL_INLINE(Py_ssize_t) limited_range(Py_ssize_t value, Py_ssize_t lower, - Py_ssize_t upper) { - if (value < lower) - return lower; - - if (value > upper) - return upper; - - return value; -} - -/* Gets a slice from a Unicode string. */ -Py_LOCAL_INLINE(PyObject*) unicode_slice(PyObject* string, Py_ssize_t start, - Py_ssize_t end) { - Py_ssize_t length; - Py_UNICODE* buffer; - - length = PyUnicode_GET_SIZE(string); - start = limited_range(start, 0, length); - end = limited_range(end, 0, length); - - buffer = PyUnicode_AsUnicode(string); - - return PyUnicode_FromUnicode(buffer + start, end - start); -} - -/* Gets a slice from a bytestring. */ -Py_LOCAL_INLINE(PyObject*) bytes_slice(PyObject* string, Py_ssize_t start, - Py_ssize_t end) { - Py_ssize_t length; - char* buffer; - - length = PyString_GET_SIZE(string); - start = limited_range(start, 0, length); - end = limited_range(end, 0, length); - - buffer = PyString_AsString(string); - - return PyString_FromStringAndSize(buffer + start, end - start); -} - -/* Gets a slice from a string, returning either a Unicode string or a - * bytestring. - */ -Py_LOCAL_INLINE(PyObject*) get_slice(PyObject* string, Py_ssize_t start, - Py_ssize_t end) { - if (PyUnicode_Check(string)) - return unicode_slice(string, start, end); - - if (PyString_Check(string)) - return bytes_slice(string, start, end); - - return PySequence_GetSlice(string, start, end); -} - -/* Gets a MatchObject's group by integer index. */ -static PyObject* match_get_group_by_index(MatchObject* self, Py_ssize_t index, - PyObject* def) { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - - if (span->start < 0 || span->end < 0) { - /* Return default value if the string or group is undefined. */ - Py_INCREF(def); - return def; - } - - return get_slice(self->substring, span->start - self->substring_offset, - span->end - self->substring_offset); -} - -/* Gets a MatchObject's start by integer index. */ -static PyObject* match_get_start_by_index(MatchObject* self, Py_ssize_t index) - { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return Py_BuildValue("n", self->match_start); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - return Py_BuildValue("n", span->start); -} - -/* Gets a MatchObject's starts by integer index. */ -static PyObject* match_get_starts_by_index(MatchObject* self, Py_ssize_t index) - { - RE_GroupData* group; - PyObject* result; - PyObject* item; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - item = Py_BuildValue("n", self->match_start); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, item); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - item = Py_BuildValue("n", group->captures[i].start); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Gets a MatchObject's end by integer index. */ -static PyObject* match_get_end_by_index(MatchObject* self, Py_ssize_t index) { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return Py_BuildValue("n", self->match_end); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - return Py_BuildValue("n", span->end); -} - -/* Gets a MatchObject's ends by integer index. */ -static PyObject* match_get_ends_by_index(MatchObject* self, Py_ssize_t index) { - RE_GroupData* group; - PyObject* result; - PyObject* item; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - item = Py_BuildValue("n", self->match_end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, item); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - item = Py_BuildValue("n", group->captures[i].end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Gets a MatchObject's span by integer index. */ -static PyObject* match_get_span_by_index(MatchObject* self, Py_ssize_t index) { - RE_GroupSpan* span; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) - return Py_BuildValue("nn", self->match_start, self->match_end); - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - span = &self->groups[index - 1].span; - return Py_BuildValue("nn", span->start, span->end); -} - -/* Gets a MatchObject's spans by integer index. */ -static PyObject* match_get_spans_by_index(MatchObject* self, Py_ssize_t index) - { - RE_GroupData* group; - PyObject* result; - PyObject* item; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - item = Py_BuildValue("nn", self->match_start, self->match_end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, item); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - item = Py_BuildValue("nn", group->captures[i].start, - group->captures[i].end); - if (!item) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Gets a MatchObject's captures by integer index. */ -static PyObject* match_get_captures_by_index(MatchObject* self, Py_ssize_t - index) { - RE_GroupData* group; - PyObject* result; - PyObject* slice; - size_t i; - - if (index < 0 || (size_t)index > self->group_count) { - /* Raise error if we were given a bad group number. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } - - if (index == 0) { - result = PyList_New(1); - if (!result) - return NULL; - - slice = get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - if (!slice) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, 0, slice); - - return result; - } - - /* Capture group indexes are 1-based (excluding group 0, which is the - * entire matched string). - */ - group = &self->groups[index - 1]; - - result = PyList_New((Py_ssize_t)group->capture_count); - if (!result) - return NULL; - - for (i = 0; i < group->capture_count; i++) { - slice = get_slice(self->substring, group->captures[i].start - - self->substring_offset, group->captures[i].end - - self->substring_offset); - if (!slice) - goto error; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(result, i, slice); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Converts a group index to an integer. */ -Py_LOCAL_INLINE(Py_ssize_t) as_group_index(PyObject* obj) { - Py_ssize_t value; - - value = PyInt_AsSsize_t(obj); - if (value != -1 || !PyErr_Occurred()) - return value; - - PyErr_Clear(); - - value = PyLong_AsLong(obj); - if (value != -1 || !PyErr_Occurred()) - return value; - - set_error(RE_ERROR_INDEX, NULL); - return -1; -} - -/* Gets a MatchObject's group index. - * - * The supplied index can be an integer or a string (group name) object. - */ -Py_LOCAL_INLINE(Py_ssize_t) match_get_group_index(MatchObject* self, PyObject* - index, BOOL allow_neg) { - Py_ssize_t group; - - /* Is the index an integer? */ - group = as_group_index(index); - if (group != -1 || !PyErr_Occurred()) { - Py_ssize_t min_group = 0; - - /* Adjust negative indices where valid and allowed. */ - if (group < 0 && allow_neg) { - group += (Py_ssize_t)self->group_count + 1; - min_group = 1; - } - - if (min_group <= group && (size_t)group <= self->group_count) - return group; - - return -1; - } - - /* The index might be a group name. */ - if (self->pattern->groupindex) { - /* Look up the name. */ - PyErr_Clear(); - - index = PyObject_GetItem(self->pattern->groupindex, index); - if (index) { - /* Check that we have an integer. */ - group = as_group_index(index); - Py_DECREF(index); - if (group != -1 || !PyErr_Occurred()) - return group; - } - } - - PyErr_Clear(); - return -1; -} - -/* Gets a MatchObject's group by object index. */ -Py_LOCAL_INLINE(PyObject*) match_get_group(MatchObject* self, PyObject* index, - PyObject* def, BOOL allow_neg) { - /* Check that the index is an integer or a string. */ - if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || - PyString_Check(index)) - return match_get_group_by_index(self, match_get_group_index(self, - index, allow_neg), def); - - set_error(RE_ERROR_GROUP_INDEX_TYPE, index); - return NULL; -} - -/* Gets info from a MatchObject by object index. */ -Py_LOCAL_INLINE(PyObject*) get_by_arg(MatchObject* self, PyObject* index, - RE_GetByIndexFunc get_by_index) { - /* Check that the index is an integer or a string. */ - if (PyInt_Check(index) || PyLong_Check(index) || PyUnicode_Check(index) || - PyString_Check(index)) - return get_by_index(self, match_get_group_index(self, index, FALSE)); - - set_error(RE_ERROR_GROUP_INDEX_TYPE, index); - return NULL; -} - -/* MatchObject's 'group' method. */ -static PyObject* match_group(MatchObject* self, PyObject* args) { - Py_ssize_t size; - PyObject* result; - Py_ssize_t i; - - size = PyTuple_GET_SIZE(args); - - switch (size) { - case 0: - /* group() */ - result = match_get_group_by_index(self, 0, Py_None); - break; - case 1: - /* group(x). PyTuple_GET_ITEM borrows the reference. */ - result = match_get_group(self, PyTuple_GET_ITEM(args, 0), Py_None, - FALSE); - break; - default: - /* group(x, y, z, ...) */ - /* Fetch multiple items. */ - result = PyTuple_New(size); - if (!result) - return NULL; - - for (i = 0; i < size; i++) { - PyObject* item; - - /* PyTuple_GET_ITEM borrows the reference. */ - item = match_get_group(self, PyTuple_GET_ITEM(args, i), Py_None, - FALSE); - if (!item) { - Py_DECREF(result); - return NULL; - } - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(result, i, item); - } - break; - } - - return result; -} - -/* Generic method for getting info from a MatchObject. */ -Py_LOCAL_INLINE(PyObject*) get_from_match(MatchObject* self, PyObject* args, - RE_GetByIndexFunc get_by_index) { - Py_ssize_t size; - PyObject* result; - Py_ssize_t i; - - size = PyTuple_GET_SIZE(args); - - switch (size) { - case 0: - /* get() */ - result = get_by_index(self, 0); - break; - case 1: - /* get(x). PyTuple_GET_ITEM borrows the reference. */ - result = get_by_arg(self, PyTuple_GET_ITEM(args, 0), get_by_index); - break; - default: - /* get(x, y, z, ...) */ - /* Fetch multiple items. */ - result = PyTuple_New(size); - if (!result) - return NULL; - - for (i = 0; i < size; i++) { - PyObject* item; - - /* PyTuple_GET_ITEM borrows the reference. */ - item = get_by_arg(self, PyTuple_GET_ITEM(args, i), get_by_index); - if (!item) { - Py_DECREF(result); - return NULL; - } - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(result, i, item); - } - break; - } - - return result; -} - -/* MatchObject's 'start' method. */ -static PyObject* match_start(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_start_by_index); -} - -/* MatchObject's 'starts' method. */ -static PyObject* match_starts(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_starts_by_index); -} - -/* MatchObject's 'end' method. */ -static PyObject* match_end(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_end_by_index); -} - -/* MatchObject's 'ends' method. */ -static PyObject* match_ends(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_ends_by_index); -} - -/* MatchObject's 'span' method. */ -static PyObject* match_span(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_span_by_index); -} - -/* MatchObject's 'spans' method. */ -static PyObject* match_spans(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_spans_by_index); -} - -/* MatchObject's 'captures' method. */ -static PyObject* match_captures(MatchObject* self, PyObject* args) { - return get_from_match(self, args, match_get_captures_by_index); -} - -/* MatchObject's 'groups' method. */ -static PyObject* match_groups(MatchObject* self, PyObject* args, PyObject* - kwargs) { - PyObject* result; - size_t g; - - PyObject* def = Py_None; - static char* kwlist[] = { "default", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groups", kwlist, &def)) - return NULL; - - result = PyTuple_New((Py_ssize_t)self->group_count); - if (!result) - return NULL; - - /* Group 0 is the entire matched portion of the string. */ - for (g = 0; g < self->group_count; g++) { - PyObject* item; - - item = match_get_group_by_index(self, (Py_ssize_t)g + 1, def); - if (!item) - goto error; - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(result, g, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* MatchObject's 'groupdict' method. */ -static PyObject* match_groupdict(MatchObject* self, PyObject* args, PyObject* - kwargs) { - PyObject* result; - PyObject* keys; - Py_ssize_t g; - - PyObject* def = Py_None; - static char* kwlist[] = { "default", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:groupdict", kwlist, - &def)) - return NULL; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - PyObject* key; - PyObject* value; - int status; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - value = match_get_group(self, key, def, FALSE); - if (!value) - goto failed; - - status = PyDict_SetItem(result, key, value); - Py_DECREF(value); - if (status < 0) - goto failed; - } - - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} - -/* MatchObject's 'capturesdict' method. */ -static PyObject* match_capturesdict(MatchObject* self) { - PyObject* result; - PyObject* keys; - Py_ssize_t g; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - PyObject* key; - Py_ssize_t group; - PyObject* captures; - int status; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - group = match_get_group_index(self, key, FALSE); - if (group < 0) - goto failed; - - captures = match_get_captures_by_index(self, group); - if (!captures) - goto failed; - - status = PyDict_SetItem(result, key, captures); - Py_DECREF(captures); - if (status < 0) - goto failed; - } - - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} - -/* Gets a Python object by name from a named module. */ -Py_LOCAL_INLINE(PyObject*) get_object(char* module_name, char* object_name) { - PyObject* module; - PyObject* object; - - module = PyImport_ImportModule(module_name); - if (!module) - return NULL; - - object = PyObject_GetAttrString(module, object_name); - Py_DECREF(module); - - return object; -} - -/* Calls a function in a module. */ -Py_LOCAL_INLINE(PyObject*) call(char* module_name, char* function_name, - PyObject* args) { - PyObject* function; - PyObject* result; - - if (!args) - return NULL; - - function = get_object(module_name, function_name); - if (!function) - return NULL; - - result = PyObject_CallObject(function, args); - Py_DECREF(function); - Py_DECREF(args); - - return result; -} - -/* Gets a replacement item from the replacement list. - * - * The replacement item could be a string literal or a group. - */ -Py_LOCAL_INLINE(PyObject*) get_match_replacement(MatchObject* self, PyObject* - item, size_t group_count) { - Py_ssize_t index; - - if (PyUnicode_Check(item) || PyString_Check(item)) { - /* It's a literal, which can be added directly to the list. */ - Py_INCREF(item); - return item; - } - - /* Is it a group reference? */ - index = as_group_index(item); - if (index == -1 && PyErr_Occurred()) { - /* Not a group either! */ - set_error(RE_ERROR_REPLACEMENT, NULL); - return NULL; - } - - if (index == 0) { - /* The entire matched portion of the string. */ - return get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - } else if (index >= 1 && (size_t)index <= group_count) { - /* A group. If it didn't match then return None instead. */ - RE_GroupData* group; - - group = &self->groups[index - 1]; - - if (group->capture_count > 0) - return get_slice(self->substring, group->span.start - - self->substring_offset, group->span.end - - self->substring_offset); - else { - Py_INCREF(Py_None); - return Py_None; - } - } else { - /* No such group. */ - set_error(RE_ERROR_NO_SUCH_GROUP, NULL); - return NULL; - } -} - -/* Initialises the join list. */ -Py_LOCAL_INLINE(void) init_join_list(JoinInfo* join_info, BOOL reversed, BOOL - is_unicode) { - join_info->list = NULL; - join_info->item = NULL; - join_info->reversed = reversed; - join_info->is_unicode = is_unicode; -} - -/* Adds an item to the join list. */ -Py_LOCAL_INLINE(int) add_to_join_list(JoinInfo* join_info, PyObject* item) { - PyObject* new_item; - int status; - - if (join_info->is_unicode) { - if (PyUnicode_Check(item)) { - new_item = item; - Py_INCREF(new_item); - } else { - new_item = PyUnicode_FromObject(item); - if (!new_item) { - set_error(RE_ERROR_NOT_UNICODE, item); - return RE_ERROR_NOT_UNICODE; - } - } - } else { - if (PyString_Check(item)) { - new_item = item; - Py_INCREF(new_item); - } else { - new_item = PyUnicode_FromObject(item); - if (!new_item) { - set_error(RE_ERROR_NOT_STRING, item); - return RE_ERROR_NOT_STRING; - } - } - } - - /* If the list already exists then just add the item to it. */ - if (join_info->list) { - status = PyList_Append(join_info->list, new_item); - if (status < 0) - goto error; - - Py_DECREF(new_item); - return status; - } - - /* If we already have an item then we now have 2(!) and we need to put them - * into a list. - */ - if (join_info->item) { - join_info->list = PyList_New(2); - if (!join_info->list) { - status = RE_ERROR_MEMORY; - goto error; - } - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(join_info->list, 0, join_info->item); - join_info->item = NULL; - - /* PyList_SET_ITEM borrows the reference. */ - PyList_SET_ITEM(join_info->list, 1, new_item); - return 0; - } - - /* This is the first item. */ - join_info->item = new_item; - - return 0; - -error: - Py_DECREF(new_item); - set_error(status, NULL); - return status; -} - -/* Clears the join list. */ -Py_LOCAL_INLINE(void) clear_join_list(JoinInfo* join_info) { - Py_XDECREF(join_info->list); - Py_XDECREF(join_info->item); -} - -/* Joins together a list of strings for pattern_subx. */ -Py_LOCAL_INLINE(PyObject*) join_list_info(JoinInfo* join_info) { - /* If the list already exists then just do the join. */ - if (join_info->list) { - PyObject* joiner; - PyObject* result; - - if (join_info->reversed) - /* The list needs to be reversed before being joined. */ - PyList_Reverse(join_info->list); - - if (join_info->is_unicode) { - /* Concatenate the Unicode strings. */ - joiner = PyUnicode_FromUnicode(NULL, 0); - if (!joiner) { - clear_join_list(join_info); - return NULL; - } - - result = PyUnicode_Join(joiner, join_info->list); - } else { - joiner = PyString_FromString(""); - if (!joiner) { - clear_join_list(join_info); - return NULL; - } - - /* Concatenate the bytestrings. */ - result = _PyString_Join(joiner, join_info->list); - } - - Py_DECREF(joiner); - clear_join_list(join_info); - - return result; - } - - /* If we have only 1 item, so we'll just return it. */ - if (join_info->item) - return join_info->item; - - /* There are no items, so return an empty string. */ - if (join_info->is_unicode) - return PyUnicode_FromUnicode(NULL, 0); - else - return PyString_FromString(""); -} - -/* Checks whether a string replacement is a literal. - * - * To keep it simple we'll say that a literal is a string which can be used - * as-is. - * - * Returns its length if it is a literal, otherwise -1. - */ -Py_LOCAL_INLINE(Py_ssize_t) check_replacement_string(PyObject* str_replacement, - unsigned char special_char) { - RE_StringInfo str_info; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t pos; - - if (!get_string(str_replacement, &str_info)) - return -1; - - switch (str_info.charsize) { - case 1: - char_at = bytes1_char_at; - break; - case 2: - char_at = bytes2_char_at; - break; - case 4: - char_at = bytes4_char_at; - break; - default: -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); -#endif - return -1; - } - - for (pos = 0; pos < str_info.length; pos++) { - if (char_at(str_info.characters, pos) == special_char) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return -1; - } - } - -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return str_info.length; -} - -/* MatchObject's 'expand' method. */ -static PyObject* match_expand(MatchObject* self, PyObject* str_template) { - Py_ssize_t literal_length; - PyObject* replacement; - JoinInfo join_info; - Py_ssize_t size; - Py_ssize_t i; - - /* Is the template just a literal? */ - literal_length = check_replacement_string(str_template, '\\'); - if (literal_length >= 0) { - /* It's a literal. */ - Py_INCREF(str_template); - return str_template; - } - - /* Hand the template to the template compiler. */ - replacement = call(RE_MODULE, "_compile_replacement_helper", - PyTuple_Pack(2, self->pattern, str_template)); - if (!replacement) - return NULL; - - init_join_list(&join_info, FALSE, PyUnicode_Check(self->string)); - - /* Add each part of the template to the list. */ - size = PyList_GET_SIZE(replacement); - for (i = 0; i < size; i++) { - PyObject* item; - PyObject* str_item; - - /* PyList_GET_ITEM borrows a reference. */ - item = PyList_GET_ITEM(replacement, i); - str_item = get_match_replacement(self, item, self->group_count); - if (!str_item) - goto error; - - /* Add to the list. */ - if (str_item == Py_None) - Py_DECREF(str_item); - else { - int status; - - status = add_to_join_list(&join_info, str_item); - Py_DECREF(str_item); - if (status < 0) - goto error; - } - } - - Py_DECREF(replacement); - - /* Convert the list to a single string (also cleans up join_info). */ - return join_list_info(&join_info); - -error: - clear_join_list(&join_info); - Py_DECREF(replacement); - return NULL; -} - -#if PY_VERSION_HEX >= 0x02060000 -/* Gets a MatchObject's group dictionary. */ -Py_LOCAL_INLINE(PyObject*) match_get_group_dict(MatchObject* self) { - PyObject* result; - PyObject* keys; - Py_ssize_t g; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - keys = PyMapping_Keys(self->pattern->groupindex); - if (!keys) - goto failed; - - for (g = 0; g < PyList_GET_SIZE(keys); g++) { - int status; - PyObject* key; - PyObject* value; - - /* PyList_GET_ITEM borrows a reference. */ - key = PyList_GET_ITEM(keys, g); - if (!key) - goto failed; - - value = match_get_group(self, key, Py_None, FALSE); - if (!value) - goto failed; - - status = PyDict_SetItem(result, key, value); - Py_DECREF(value); - if (status < 0) - goto failed; - } - - Py_DECREF(keys); - - return result; - -failed: - Py_XDECREF(keys); - Py_DECREF(result); - return NULL; -} - -/* MatchObject's 'expandf' method. */ -static PyObject* match_expandf(MatchObject* self, PyObject* str_template) { - PyObject* format_func; - PyObject* args = NULL; - size_t g; - PyObject* kwargs = NULL; - PyObject* result; - - format_func = PyObject_GetAttrString(str_template, "format"); - if (!format_func) - return NULL; - - args = PyTuple_New((Py_ssize_t)self->group_count + 1); - if (!args) - goto error; - - for (g = 0; g < self->group_count + 1; g++) - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(args, (Py_ssize_t)g, match_get_group_by_index(self, - (Py_ssize_t)g, Py_None)); - - kwargs = match_get_group_dict(self); - if (!kwargs) - goto error; - - result = PyObject_Call(format_func, args, kwargs); - Py_DECREF(kwargs); - Py_DECREF(args); - Py_DECREF(format_func); - - return result; - -error: - Py_XDECREF(args); - Py_DECREF(format_func); - return NULL; -} - -#endif -Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self); - -/* MatchObject's '__copy__' method. */ -static PyObject* match_copy(MatchObject* self, PyObject *unused) { - return make_match_copy(self); -} - -/* MatchObject's '__deepcopy__' method. */ -static PyObject* match_deepcopy(MatchObject* self, PyObject* memo) { - return make_match_copy(self); -} - -/* MatchObject's 'regs' attribute. */ -static PyObject* match_regs(MatchObject* self) { - PyObject* regs; - PyObject* item; - size_t g; - - regs = PyTuple_New((Py_ssize_t)self->group_count + 1); - if (!regs) - return NULL; - - item = Py_BuildValue("nn", self->match_start, self->match_end); - if (!item) - goto error; - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(regs, 0, item); - - for (g = 0; g < self->group_count; g++) { - RE_GroupSpan* span; - - span = &self->groups[g].span; - item = Py_BuildValue("nn", span->start, span->end); - if (!item) - goto error; - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(regs, g + 1, item); - } - - Py_INCREF(regs); - self->regs = regs; - - return regs; - -error: - Py_DECREF(regs); - return NULL; -} - -/* MatchObject's slice method. */ -Py_LOCAL_INLINE(PyObject*) match_get_group_slice(MatchObject* self, PyObject* - slice) { - Py_ssize_t start; - Py_ssize_t end; - Py_ssize_t step; - Py_ssize_t slice_length; - - if (PySlice_GetIndicesEx((PySliceObject*)slice, - (Py_ssize_t)self->group_count + 1, &start, &end, &step, &slice_length) < - 0) - return NULL; - - if (slice_length <= 0) - return PyTuple_New(0); - else { - PyObject* result; - Py_ssize_t cur; - Py_ssize_t i; - - result = PyTuple_New(slice_length); - if (!result) - return NULL; - - cur = start; - for (i = 0; i < slice_length; i++) { - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(result, i, match_get_group_by_index(self, cur, - Py_None)); - cur += step; - } - - return result; - } -} - -/* MatchObject's length method. */ -Py_LOCAL_INLINE(Py_ssize_t) match_length(MatchObject* self) { - return (Py_ssize_t)self->group_count + 1; -} - -/* MatchObject's '__getitem__' method. */ -static PyObject* match_getitem(MatchObject* self, PyObject* item) { - if (PySlice_Check(item)) - return match_get_group_slice(self, item); - - return match_get_group(self, item, Py_None, TRUE); -} - -/* Determines the portion of the target string which is covered by the group - * captures. - */ -Py_LOCAL_INLINE(void) determine_target_substring(MatchObject* match, - Py_ssize_t* slice_start, Py_ssize_t* slice_end) { - Py_ssize_t start; - Py_ssize_t end; - size_t g; - - start = match->pos; - end = match->endpos; - - for (g = 0; g < match->group_count; g++) { - RE_GroupSpan* span; - size_t c; - - span = &match->groups[g].span; - if (span->start >= 0 && span->start < start) - start = span->start; - if (span->end >= 0 && span->end > end) - end = span->end; - - for (c = 0; c < match->groups[g].capture_count; c++) { - RE_GroupSpan* span; - - span = match->groups[g].captures; - if (span->start >= 0 && span->start < start) - start = span->start; - if (span->end >= 0 && span->end > end) - end = span->end; - } - } - - *slice_start = start; - *slice_end = end; -} - -/* MatchObject's 'detach_string' method. */ -static PyObject* match_detach_string(MatchObject* self, PyObject* unused) { - if (self->string) { - Py_ssize_t start; - Py_ssize_t end; - PyObject* substring; - - determine_target_substring(self, &start, &end); - - substring = get_slice(self->string, start, end); - if (substring) { - Py_XDECREF(self->substring); - self->substring = substring; - self->substring_offset = start; - - Py_DECREF(self->string); - self->string = NULL; - } - } - - Py_INCREF(Py_None); - return Py_None; -} - -/* The documentation of a MatchObject. */ -PyDoc_STRVAR(match_group_doc, - "group([group1, ...]) --> string or tuple of strings.\n\ - Return one or more subgroups of the match. If there is a single argument,\n\ - the result is a single string, or None if the group did not contribute to\n\ - the match; if there are multiple arguments, the result is a tuple with one\n\ - item per argument; if there are no arguments, the whole match is returned.\n\ - Group 0 is the whole match."); - -PyDoc_STRVAR(match_start_doc, - "start([group1, ...]) --> int or tuple of ints.\n\ - Return the index of the start of one or more subgroups of the match. If\n\ - there is a single argument, the result is an index, or -1 if the group did\n\ - not contribute to the match; if there are multiple arguments, the result is\n\ - a tuple with one item per argument; if there are no arguments, the index of\n\ - the start of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_end_doc, - "end([group1, ...]) --> int or tuple of ints.\n\ - Return the index of the end of one or more subgroups of the match. If there\n\ - is a single argument, the result is an index, or -1 if the group did not\n\ - contribute to the match; if there are multiple arguments, the result is a\n\ - tuple with one item per argument; if there are no arguments, the index of\n\ - the end of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_span_doc, - "span([group1, ...]) --> 2-tuple of int or tuple of 2-tuple of ints.\n\ - Return the span (a 2-tuple of the indices of the start and end) of one or\n\ - more subgroups of the match. If there is a single argument, the result is a\n\ - span, or (-1, -1) if the group did not contribute to the match; if there are\n\ - multiple arguments, the result is a tuple with one item per argument; if\n\ - there are no arguments, the span of the whole match is returned. Group 0 is\n\ - the whole match."); - -PyDoc_STRVAR(match_groups_doc, - "groups(default=None) --> tuple of strings.\n\ - Return a tuple containing all the subgroups of the match. The argument is\n\ - the default for groups that did not participate in the match."); - -PyDoc_STRVAR(match_groupdict_doc, - "groupdict(default=None) --> dict.\n\ - Return a dictionary containing all the named subgroups of the match, keyed\n\ - by the subgroup name. The argument is the value to be given for groups that\n\ - did not participate in the match."); - -PyDoc_STRVAR(match_capturesdict_doc, - "capturesdict() --> dict.\n\ - Return a dictionary containing the captures of all the named subgroups of the\n\ - match, keyed by the subgroup name."); - -PyDoc_STRVAR(match_expand_doc, - "expand(template) --> string.\n\ - Return the string obtained by doing backslash substitution on the template,\n\ - as done by the sub() method."); - -#if PY_VERSION_HEX >= 0x02060000 -PyDoc_STRVAR(match_expandf_doc, - "expandf(format) --> string.\n\ - Return the string obtained by using the format, as done by the subf()\n\ - method."); - -#endif -PyDoc_STRVAR(match_captures_doc, - "captures([group1, ...]) --> list of strings or tuple of list of strings.\n\ - Return the captures of one or more subgroups of the match. If there is a\n\ - single argument, the result is a list of strings; if there are multiple\n\ - arguments, the result is a tuple of lists with one item per argument; if\n\ - there are no arguments, the captures of the whole match is returned. Group\n\ - 0 is the whole match."); - -PyDoc_STRVAR(match_starts_doc, - "starts([group1, ...]) --> list of ints or tuple of list of ints.\n\ - Return the indices of the starts of the captures of one or more subgroups of\n\ - the match. If there is a single argument, the result is a list of indices;\n\ - if there are multiple arguments, the result is a tuple of lists with one\n\ - item per argument; if there are no arguments, the indices of the starts of\n\ - the captures of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_ends_doc, - "ends([group1, ...]) --> list of ints or tuple of list of ints.\n\ - Return the indices of the ends of the captures of one or more subgroups of\n\ - the match. If there is a single argument, the result is a list of indices;\n\ - if there are multiple arguments, the result is a tuple of lists with one\n\ - item per argument; if there are no arguments, the indices of the ends of the\n\ - captures of the whole match is returned. Group 0 is the whole match."); - -PyDoc_STRVAR(match_spans_doc, - "spans([group1, ...]) --> list of 2-tuple of ints or tuple of list of 2-tuple of ints.\n\ - Return the spans (a 2-tuple of the indices of the start and end) of the\n\ - captures of one or more subgroups of the match. If there is a single\n\ - argument, the result is a list of spans; if there are multiple arguments,\n\ - the result is a tuple of lists with one item per argument; if there are no\n\ - arguments, the spans of the captures of the whole match is returned. Group\n\ - 0 is the whole match."); - -PyDoc_STRVAR(match_detach_string_doc, - "detach_string()\n\ - Detaches the target string from the match object. The 'string' attribute\n\ - will become None."); - -/* MatchObject's methods. */ -static PyMethodDef match_methods[] = { - {"group", (PyCFunction)match_group, METH_VARARGS, match_group_doc}, - {"start", (PyCFunction)match_start, METH_VARARGS, match_start_doc}, - {"end", (PyCFunction)match_end, METH_VARARGS, match_end_doc}, - {"span", (PyCFunction)match_span, METH_VARARGS, match_span_doc}, - {"groups", (PyCFunction)match_groups, METH_VARARGS|METH_KEYWORDS, - match_groups_doc}, - {"groupdict", (PyCFunction)match_groupdict, METH_VARARGS|METH_KEYWORDS, - match_groupdict_doc}, - {"capturesdict", (PyCFunction)match_capturesdict, METH_NOARGS, - match_capturesdict_doc}, - {"expand", (PyCFunction)match_expand, METH_O, match_expand_doc}, -#if PY_VERSION_HEX >= 0x02060000 - {"expandf", (PyCFunction)match_expandf, METH_O, match_expandf_doc}, -#endif - {"captures", (PyCFunction)match_captures, METH_VARARGS, - match_captures_doc}, - {"starts", (PyCFunction)match_starts, METH_VARARGS, match_starts_doc}, - {"ends", (PyCFunction)match_ends, METH_VARARGS, match_ends_doc}, - {"spans", (PyCFunction)match_spans, METH_VARARGS, match_spans_doc}, - {"detach_string", (PyCFunction)match_detach_string, METH_NOARGS, - match_detach_string_doc}, - {"__copy__", (PyCFunction)match_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)match_deepcopy, METH_O}, - {"__getitem__", (PyCFunction)match_getitem, METH_O|METH_COEXIST}, - {NULL, NULL} -}; - -PyDoc_STRVAR(match_doc, "Match object"); - -/* MatchObject's 'lastindex' attribute. */ -static PyObject* match_lastindex(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - if (self->lastindex >= 0) - return Py_BuildValue("n", self->lastindex); - - Py_INCREF(Py_None); - return Py_None; -} - -/* MatchObject's 'lastgroup' attribute. */ -static PyObject* match_lastgroup(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - if (self->pattern->indexgroup && self->lastgroup >= 0) { - PyObject* index; - PyObject* result; - - index = Py_BuildValue("n", self->lastgroup); - - /* PyDict_GetItem returns borrows a reference. */ - result = PyDict_GetItem(self->pattern->indexgroup, index); - Py_DECREF(index); - if (result) { - Py_INCREF(result); - return result; - } - PyErr_Clear(); - } - - Py_INCREF(Py_None); - return Py_None; -} - -/* MatchObject's 'string' attribute. */ -static PyObject* match_string(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - if (self->string) { - Py_INCREF(self->string); - return self->string; - } else { - Py_INCREF(Py_None); - return Py_None; - } -} -#if PY_VERSION_HEX < 0x02060000 - -/* MatchObject's 'partial' attribute. */ -static PyObject* match_partial(PyObject* self_) { - MatchObject* self; - PyObject* result; - - self = (MatchObject*)self_; - - result = self->partial ? Py_True : Py_False; - Py_INCREF(result); - - return result; -} -#endif - -/* MatchObject's 'fuzzy_counts' attribute. */ -static PyObject* match_fuzzy_counts(PyObject* self_) { - MatchObject* self; - - self = (MatchObject*)self_; - - return Py_BuildValue("nnn", self->fuzzy_counts[RE_FUZZY_SUB], - self->fuzzy_counts[RE_FUZZY_INS], self->fuzzy_counts[RE_FUZZY_DEL]); -} - -static PyGetSetDef match_getset[] = { - {"lastindex", (getter)match_lastindex, (setter)NULL, - "The group number of the last matched capturing group, or None."}, - {"lastgroup", (getter)match_lastgroup, (setter)NULL, - "The name of the last matched capturing group, or None."}, - {"regs", (getter)match_regs, (setter)NULL, - "A tuple of the spans of the capturing groups."}, - {"string", (getter)match_string, (setter)NULL, - "The string that was searched, or None if it has been detached."}, -#if PY_VERSION_HEX < 0x02060000 - {"partial", (getter)match_partial, (setter)NULL, - "Whether it's a partial match."}, -#endif - {"fuzzy_counts", (getter)match_fuzzy_counts, (setter)NULL, - "A tuple of the number of substitutions, insertions and deletions."}, - {NULL} /* Sentinel */ -}; - -static PyMemberDef match_members[] = { - {"re", T_OBJECT, offsetof(MatchObject, pattern), READONLY, - "The regex object that produced this match object."}, - {"pos", T_PYSSIZET, offsetof(MatchObject, pos), READONLY, - "The position at which the regex engine starting searching."}, - {"endpos", T_PYSSIZET, offsetof(MatchObject, endpos), READONLY, - "The final position beyond which the regex engine won't search."}, -#if PY_VERSION_HEX >= 0x02060000 - {"partial", T_BOOL, offsetof(MatchObject, partial), READONLY, - "Whether it's a partial match."}, -#endif - {NULL} /* Sentinel */ -}; - -static PyMappingMethods match_as_mapping = { - (lenfunc)match_length, /* mp_length */ - (binaryfunc)match_getitem, /* mp_subscript */ - 0, /* mp_ass_subscript */ -}; - -static PyTypeObject Match_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Match", - sizeof(MatchObject) -}; - -/* Copies the groups. */ -Py_LOCAL_INLINE(RE_GroupData*) copy_groups(RE_GroupData* groups, size_t - group_count) { - size_t span_count; - size_t g; - RE_GroupData* groups_copy; - RE_GroupSpan* spans_copy; - size_t offset; - - /* Calculate the total size of the group info. */ - span_count = 0; - for (g = 0; g < group_count; g++) - span_count += groups[g].capture_count; - - /* Allocate the storage for the group info in a single block. */ - groups_copy = (RE_GroupData*)re_alloc(group_count * sizeof(RE_GroupData) + - span_count * sizeof(RE_GroupSpan)); - if (!groups_copy) - return NULL; - - /* The storage for the spans comes after the other group info. */ - spans_copy = (RE_GroupSpan*)&groups_copy[group_count]; - - /* There's no need to initialise the spans info. */ - memset(groups_copy, 0, group_count * sizeof(RE_GroupData)); - - offset = 0; - for (g = 0; g < group_count; g++) { - RE_GroupData* orig; - RE_GroupData* copy; - - orig = &groups[g]; - copy = &groups_copy[g]; - copy->span = orig->span; - - copy->captures = &spans_copy[offset]; - offset += orig->capture_count; - - if (orig->capture_count > 0) { - Py_MEMCPY(copy->captures, orig->captures, orig->capture_count * - sizeof(RE_GroupSpan)); - copy->capture_capacity = orig->capture_count; - copy->capture_count = orig->capture_count; - } - } - - return groups_copy; -} - -/* Makes a copy of a MatchObject. */ -Py_LOCAL_INLINE(PyObject*) make_match_copy(MatchObject* self) { - MatchObject* match; - - if (!self->string) { - /* The target string has been detached, so the MatchObject is now - * immutable. - */ - Py_INCREF(self); - return (PyObject*)self; - } - - /* Create a MatchObject. */ - match = PyObject_NEW(MatchObject, &Match_Type); - if (!match) - return NULL; - - Py_MEMCPY(match, self, sizeof(MatchObject)); - - Py_INCREF(match->string); - Py_INCREF(match->substring); - Py_INCREF(match->pattern); - - /* Copy the groups to the MatchObject. */ - if (self->group_count > 0) { - match->groups = copy_groups(self->groups, self->group_count); - if (!match->groups) { - Py_DECREF(match); - return NULL; - } - } - - return (PyObject*)match; -} - -/* Creates a new MatchObject. */ -Py_LOCAL_INLINE(PyObject*) pattern_new_match(PatternObject* pattern, RE_State* - state, int status) { - /* Create MatchObject (from state object). */ - if (status > 0 || status == RE_ERROR_PARTIAL) { - MatchObject* match; - - /* Create a MatchObject. */ - match = PyObject_NEW(MatchObject, &Match_Type); - if (!match) - return NULL; - - match->string = state->string; - match->substring = state->string; - match->substring_offset = 0; - match->pattern = pattern; - match->regs = NULL; - match->fuzzy_counts[RE_FUZZY_SUB] = - state->total_fuzzy_counts[RE_FUZZY_SUB]; - match->fuzzy_counts[RE_FUZZY_INS] = - state->total_fuzzy_counts[RE_FUZZY_INS]; - match->fuzzy_counts[RE_FUZZY_DEL] = - state->total_fuzzy_counts[RE_FUZZY_DEL]; - match->partial = status == RE_ERROR_PARTIAL; - Py_INCREF(match->string); - Py_INCREF(match->substring); - Py_INCREF(match->pattern); - - /* Copy the groups to the MatchObject. */ - if (pattern->public_group_count > 0) { - match->groups = copy_groups(state->groups, - pattern->public_group_count); - if (!match->groups) { - Py_DECREF(match); - return NULL; - } - } else - match->groups = NULL; - - match->group_count = pattern->public_group_count; - - match->pos = state->slice_start; - match->endpos = state->slice_end; - - if (state->reverse) { - match->match_start = state->text_pos; - match->match_end = state->match_pos; - } else { - match->match_start = state->match_pos; - match->match_end = state->text_pos; - } - - match->lastindex = state->lastindex; - match->lastgroup = state->lastgroup; - - return (PyObject*)match; - } else if (status == 0) { - /* No match. */ - Py_INCREF(Py_None); - return Py_None; - } else { - /* Internal error. */ - set_error(status, NULL); - return NULL; - } -} - -/* Gets the text of a capture group from a state. */ -Py_LOCAL_INLINE(PyObject*) state_get_group(RE_State* state, Py_ssize_t index, - PyObject* string, BOOL empty) { - RE_GroupData* group; - Py_ssize_t start; - Py_ssize_t end; - - group = &state->groups[index - 1]; - - if (string != Py_None && index >= 1 && (size_t)index <= - state->pattern->public_group_count && group->capture_count > 0) { - start = group->span.start; - end = group->span.end; - } else { - if (empty) - /* Want an empty string. */ - start = end = 0; - else { - Py_INCREF(Py_None); - return Py_None; - } - } - - return get_slice(string, start, end); -} - -/* Acquires the lock (mutex) on the state if there's one. - * - * It also increments the owner's refcount just to ensure that it won't be - * destroyed by another thread. - */ -Py_LOCAL_INLINE(void) acquire_state_lock(PyObject* owner, RE_SafeState* - safe_state) { - RE_State* state; - - state = safe_state->re_state; - - if (state->lock) { - /* In order to avoid deadlock we need to release the GIL while trying - * to acquire the lock. - */ - Py_INCREF(owner); - if (!PyThread_acquire_lock(state->lock, 0)) { - release_GIL(safe_state); - PyThread_acquire_lock(state->lock, 1); - acquire_GIL(safe_state); - } - } -} - -/* Releases the lock (mutex) on the state if there's one. - * - * It also decrements the owner's refcount, which was incremented when the lock - * was acquired. - */ -Py_LOCAL_INLINE(void) release_state_lock(PyObject* owner, RE_SafeState* - safe_state) { - RE_State* state; - - state = safe_state->re_state; - - if (state->lock) { - PyThread_release_lock(state->lock); - Py_DECREF(owner); - } -} - -/* Implements the functionality of ScanObject's search and match methods. */ -Py_LOCAL_INLINE(PyObject*) scanner_search_or_match(ScannerObject* self, BOOL - search) { - RE_State* state; - RE_SafeState safe_state; - PyObject* match; - - state = &self->state; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = state; - safe_state.thread_state = NULL; - - /* Acquire the state lock in case we're sharing the scanner object across - * threads. - */ - acquire_state_lock((PyObject*)self, &safe_state); - - if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { - /* No or partial match. */ - release_state_lock((PyObject*)self, &safe_state); - Py_INCREF(Py_None); - return Py_None; - } else if (self->status < 0) { - /* Internal error. */ - release_state_lock((PyObject*)self, &safe_state); - set_error(self->status, NULL); - return NULL; - } - - /* Look for another match. */ - self->status = do_match(&safe_state, search); - if (self->status >= 0 || self->status == RE_ERROR_PARTIAL) { - /* Create the match object. */ - match = pattern_new_match(self->pattern, state, self->status); - - if (search && state->overlapped) { - /* Advance one character. */ - Py_ssize_t step; - - step = state->reverse ? -1 : 1; - state->text_pos = state->match_pos + step; - state->must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow 2 contiguous - * zero-width matches. - */ - state->must_advance = state->text_pos == state->match_pos; - } else - /* Internal error. */ - match = NULL; - - /* Release the state lock. */ - release_state_lock((PyObject*)self, &safe_state); - - return match; -} - -/* ScannerObject's 'match' method. */ -static PyObject* scanner_match(ScannerObject* self, PyObject* unused) { - return scanner_search_or_match(self, FALSE); -} - -/* ScannerObject's 'search' method. */ -static PyObject* scanner_search(ScannerObject* self, PyObject *unused) { - return scanner_search_or_match(self, TRUE); -} - -/* ScannerObject's 'next' method. */ -static PyObject* scanner_next(PyObject* self) { - PyObject* match; - - match = scanner_search((ScannerObject*)self, NULL); - - if (match == Py_None) { - /* No match. */ - Py_DECREF(Py_None); - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - return match; -} - -/* Returns an iterator for a ScannerObject. - * - * The iterator is actually the ScannerObject itself. - */ -static PyObject* scanner_iter(PyObject* self) { - Py_INCREF(self); - return self; -} - -/* Gets the next result from a scanner iterator. */ -static PyObject* scanner_iternext(PyObject* self) { - PyObject* match; - - match = scanner_search((ScannerObject*)self, NULL); - - if (match == Py_None) { - /* No match. */ - Py_DECREF(match); - return NULL; - } - - return match; -} - -/* Makes a copy of a ScannerObject. - * - * It actually doesn't make a copy, just returns the original object. - */ -Py_LOCAL_INLINE(PyObject*) make_scanner_copy(ScannerObject* self) { - Py_INCREF(self); - return (PyObject*)self; -} - -/* ScannerObject's '__copy__' method. */ -static PyObject* scanner_copy(ScannerObject* self, PyObject *unused) { - return make_scanner_copy(self); -} - -/* ScannerObject's '__deepcopy__' method. */ -static PyObject* scanner_deepcopy(ScannerObject* self, PyObject* memo) { - return make_scanner_copy(self); -} - -/* The documentation of a ScannerObject. */ -PyDoc_STRVAR(scanner_match_doc, - "match() --> MatchObject or None.\n\ - Match at the current position in the string."); - -PyDoc_STRVAR(scanner_search_doc, - "search() --> MatchObject or None.\n\ - Search from the current position in the string."); - -/* ScannerObject's methods. */ -static PyMethodDef scanner_methods[] = { - {"next", (PyCFunction)scanner_next, METH_NOARGS}, - {"match", (PyCFunction)scanner_match, METH_NOARGS, scanner_match_doc}, - {"search", (PyCFunction)scanner_search, METH_NOARGS, scanner_search_doc}, - {"__copy__", (PyCFunction)scanner_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)scanner_deepcopy, METH_O}, - {NULL, NULL} -}; - -PyDoc_STRVAR(scanner_doc, "Scanner object"); - -/* Deallocates a ScannerObject. */ -static void scanner_dealloc(PyObject* self_) { - ScannerObject* self; - - self = (ScannerObject*)self_; - - state_fini(&self->state); - Py_DECREF(self->pattern); - PyObject_DEL(self); -} - -static PyMemberDef scanner_members[] = { - {"pattern", T_OBJECT, offsetof(ScannerObject, pattern), READONLY, - "The regex object that produced this scanner object."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject Scanner_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Scanner", - sizeof(ScannerObject) -}; - -/* Decodes a 'concurrent' argument. */ -Py_LOCAL_INLINE(int) decode_concurrent(PyObject* concurrent) { - Py_ssize_t value; - - if (concurrent == Py_None) - return RE_CONC_DEFAULT; - - value = PyLong_AsLong(concurrent); - if (value == -1 && PyErr_Occurred()) { - set_error(RE_ERROR_CONCURRENT, NULL); - return -1; - } - - return value ? RE_CONC_YES : RE_CONC_NO; -} - -/* Decodes a 'partial' argument. */ -Py_LOCAL_INLINE(BOOL) decode_partial(PyObject* partial) { - Py_ssize_t value; - - if (partial == Py_False) - return FALSE; - - if (partial == Py_True) - return TRUE; - - value = PyLong_AsLong(partial); - if (value == -1 && PyErr_Occurred()) { - PyErr_Clear(); - return TRUE; - } - - return value != 0; -} - -/* Creates a new ScannerObject. */ -static PyObject* pattern_scanner(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { - /* Create search state object. */ - ScannerObject* self; - Py_ssize_t start; - Py_ssize_t end; - int conc; - BOOL part; - - PyObject* string; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - Py_ssize_t overlapped = FALSE; - PyObject* concurrent = Py_None; - PyObject* partial = Py_False; - static char* kwlist[] = { "string", "pos", "endpos", "overlapped", - "concurrent", "partial", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnOO:scanner", kwlist, - &string, &pos, &endpos, &overlapped, &concurrent, &partial)) - return NULL; - - start = as_string_index(pos, 0); - if (start == -1 && PyErr_Occurred()) - return NULL; - - end = as_string_index(endpos, PY_SSIZE_T_MAX); - if (end == -1 && PyErr_Occurred()) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - part = decode_partial(partial); - - /* Create a scanner object. */ - self = PyObject_NEW(ScannerObject, &Scanner_Type); - if (!self) - return NULL; - - self->pattern = pattern; - Py_INCREF(self->pattern); - - /* The MatchObject, and therefore repeated captures, will be visible. */ - if (!state_init(&self->state, pattern, string, start, end, overlapped != 0, - conc, part, TRUE, TRUE, FALSE)) { - PyObject_DEL(self); - return NULL; - } - - self->status = RE_ERROR_SUCCESS; - - return (PyObject*) self; -} - -/* Performs the split for the SplitterObject. */ -Py_LOCAL_INLINE(PyObject*) next_split_part(SplitterObject* self) { - RE_State* state; - RE_SafeState safe_state; - PyObject* result = NULL; /* Initialise to stop compiler warning. */ - - state = &self->state; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = state; - safe_state.thread_state = NULL; - - /* Acquire the state lock in case we're sharing the splitter object across - * threads. - */ - acquire_state_lock((PyObject*)self, &safe_state); - - if (self->status == RE_ERROR_FAILURE || self->status == RE_ERROR_PARTIAL) { - /* Finished. */ - release_state_lock((PyObject*)self, &safe_state); - result = Py_False; - Py_INCREF(result); - return result; - } else if (self->status < 0) { - /* Internal error. */ - release_state_lock((PyObject*)self, &safe_state); - set_error(self->status, NULL); - return NULL; - } - - if (self->index == 0) { - if (self->split_count < self->maxsplit) { - Py_ssize_t step; - Py_ssize_t end_pos; - - if (state->reverse) { - step = -1; - end_pos = state->slice_start; - } else { - step = 1; - end_pos = state->slice_end; - } - -retry: - self->status = do_match(&safe_state, TRUE); - if (self->status < 0) - goto error; - - if (self->status == RE_ERROR_SUCCESS) { - if (state->version_0) { - /* Version 0 behaviour is to advance one character if the - * split was zero-width. Unfortunately, this can give an - * incorrect result. GvR wants this behaviour to be - * retained so as not to break any existing software which - * might rely on it. - */ - if (state->text_pos == state->match_pos) { - if (self->last_pos == end_pos) - goto no_match; - - /* Advance one character. */ - state->text_pos += step; - state->must_advance = FALSE; - goto retry; - } - } - - ++self->split_count; - - /* Get segment before this match. */ - if (state->reverse) - result = get_slice(state->string, state->match_pos, - self->last_pos); - else - result = get_slice(state->string, self->last_pos, - state->match_pos); - if (!result) - goto error; - - self->last_pos = state->text_pos; - - /* Version 0 behaviour is to advance one character if the match - * was zero-width. Unfortunately, this can give an incorrect - * result. GvR wants this behaviour to be retained so as not to - * break any existing software which might rely on it. - */ - if (state->version_0) { - if (state->text_pos == state->match_pos) - /* Advance one character. */ - state->text_pos += step; - - state->must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow a - * contiguous zero-width match. - */ - state->must_advance = TRUE; - } - } else - goto no_match; - - if (self->status == RE_ERROR_FAILURE || self->status == - RE_ERROR_PARTIAL) { -no_match: - /* Get segment following last match (even if empty). */ - if (state->reverse) - result = get_slice(state->string, 0, self->last_pos); - else - result = get_slice(state->string, self->last_pos, - state->text_length); - if (!result) - goto error; - } - } else { - /* Add group. */ - result = state_get_group(state, self->index, state->string, FALSE); - if (!result) - goto error; - } - - ++self->index; - if ((size_t)self->index > state->pattern->public_group_count) - self->index = 0; - - /* Release the state lock. */ - release_state_lock((PyObject*)self, &safe_state); - - return result; - -error: - /* Release the state lock. */ - release_state_lock((PyObject*)self, &safe_state); - - return NULL; -} - -/* SplitterObject's 'split' method. */ -static PyObject* splitter_split(SplitterObject* self, PyObject *unused) { - PyObject* result; - - result = next_split_part(self); - - if (result == Py_False) { - /* The sentinel. */ - Py_DECREF(Py_False); - Py_INCREF(Py_None); - return Py_None; - } - - return result; -} - -/* SplitterObject's 'next' method. */ -static PyObject* splitter_next(PyObject* self) { - PyObject* result; - - result = next_split_part((SplitterObject*)self); - - if (result == Py_False) { - /* No match. */ - Py_DECREF(Py_False); - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - return result; -} - -/* Returns an iterator for a SplitterObject. - * - * The iterator is actually the SplitterObject itself. - */ -static PyObject* splitter_iter(PyObject* self) { - Py_INCREF(self); - return self; -} - -/* Gets the next result from a splitter iterator. */ -static PyObject* splitter_iternext(PyObject* self) { - PyObject* result; - - result = next_split_part((SplitterObject*)self); - - if (result == Py_False) { - /* No match. */ - Py_DECREF(result); - return NULL; - } - - return result; -} - -/* Makes a copy of a SplitterObject. - * - * It actually doesn't make a copy, just returns the original object. - */ -Py_LOCAL_INLINE(PyObject*) make_splitter_copy(SplitterObject* self) { - Py_INCREF(self); - return (PyObject*)self; -} - -/* SplitterObject's '__copy__' method. */ -static PyObject* splitter_copy(SplitterObject* self, PyObject *unused) { - return make_splitter_copy(self); -} - -/* SplitterObject's '__deepcopy__' method. */ -static PyObject* splitter_deepcopy(SplitterObject* self, PyObject* memo) { - return make_splitter_copy(self); -} - -/* The documentation of a SplitterObject. */ -PyDoc_STRVAR(splitter_split_doc, - "split() --> string or None.\n\ - Return the next part of the split string."); - -/* SplitterObject's methods. */ -static PyMethodDef splitter_methods[] = { - {"next", (PyCFunction)splitter_next, METH_NOARGS}, - {"split", (PyCFunction)splitter_split, METH_NOARGS, splitter_split_doc}, - {"__copy__", (PyCFunction)splitter_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)splitter_deepcopy, METH_O}, - {NULL, NULL} -}; - -PyDoc_STRVAR(splitter_doc, "Splitter object"); - -/* Deallocates a SplitterObject. */ -static void splitter_dealloc(PyObject* self_) { - SplitterObject* self; - - self = (SplitterObject*)self_; - - state_fini(&self->state); - Py_DECREF(self->pattern); - PyObject_DEL(self); -} - -static PyMemberDef splitter_members[] = { - {"pattern", T_OBJECT, offsetof(SplitterObject, pattern), READONLY, - "The regex object that produced this splitter object."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject Splitter_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Splitter", - sizeof(SplitterObject) -}; - -/* Creates a new SplitterObject. */ -Py_LOCAL_INLINE(PyObject*) pattern_splitter(PatternObject* pattern, PyObject* - args, PyObject* kwargs) { - /* Create split state object. */ - int conc; - SplitterObject* self; - RE_State* state; - - PyObject* string; - Py_ssize_t maxsplit = 0; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:splitter", kwlist, - &string, &maxsplit, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - /* Create a splitter object. */ - self = PyObject_NEW(SplitterObject, &Splitter_Type); - if (!self) - return NULL; - - self->pattern = pattern; - Py_INCREF(self->pattern); - - if (maxsplit == 0) - maxsplit = PY_SSIZE_T_MAX; - - state = &self->state; - - /* The MatchObject, and therefore repeated captures, will not be visible. - */ - if (!state_init(state, pattern, string, 0, PY_SSIZE_T_MAX, FALSE, conc, - FALSE, TRUE, FALSE, FALSE)) { - PyObject_DEL(self); - return NULL; - } - - self->maxsplit = maxsplit; - self->last_pos = state->reverse ? state->text_length : 0; - self->split_count = 0; - self->index = 0; - self->status = 1; - - return (PyObject*) self; -} - -/* Implements the functionality of PatternObject's search and match methods. */ -Py_LOCAL_INLINE(PyObject*) pattern_search_or_match(PatternObject* self, - PyObject* args, PyObject* kwargs, char* args_desc, BOOL search, BOOL - match_all) { - Py_ssize_t start; - Py_ssize_t end; - int conc; - BOOL part; - RE_State state; - RE_SafeState safe_state; - int status; - PyObject* match; - - PyObject* string; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - PyObject* partial = Py_False; - static char* kwlist[] = { "string", "pos", "endpos", "concurrent", - "partial", NULL }; - /* When working with a short string, such as a line from a file, the - * relative cost of PyArg_ParseTupleAndKeywords can be significant, and - * it's worth not using it when there are only positional arguments. - */ - Py_ssize_t arg_count; - if (args && !kwargs && PyTuple_CheckExact(args)) - arg_count = PyTuple_GET_SIZE(args); - else - arg_count = -1; - - if (1 <= arg_count && arg_count <= 5) { - /* PyTuple_GET_ITEM borrows the reference. */ - string = PyTuple_GET_ITEM(args, 0); - if (arg_count >= 2) - pos = PyTuple_GET_ITEM(args, 1); - if (arg_count >= 3) - endpos = PyTuple_GET_ITEM(args, 2); - if (arg_count >= 4) - concurrent = PyTuple_GET_ITEM(args, 3); - if (arg_count >= 5) - partial = PyTuple_GET_ITEM(args, 4); - } else if (!PyArg_ParseTupleAndKeywords(args, kwargs, args_desc, kwlist, - &string, &pos, &endpos, &concurrent, &partial)) - return NULL; - - start = as_string_index(pos, 0); - if (start == -1 && PyErr_Occurred()) - return NULL; - - end = as_string_index(endpos, PY_SSIZE_T_MAX); - if (end == -1 && PyErr_Occurred()) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - part = decode_partial(partial); - - /* The MatchObject, and therefore repeated captures, will be visible. */ - if (!state_init(&state, self, string, start, end, FALSE, conc, part, FALSE, - TRUE, match_all)) - return NULL; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - status = do_match(&safe_state, search); - - if (status >= 0 || status == RE_ERROR_PARTIAL) - /* Create the match object. */ - match = pattern_new_match(self, &state, status); - else - match = NULL; - - state_fini(&state); - - return match; -} - -/* PatternObject's 'match' method. */ -static PyObject* pattern_match(PatternObject* self, PyObject* args, PyObject* - kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOOO:match", FALSE, - FALSE); -} - -/* PatternObject's 'fullmatch' method. */ -static PyObject* pattern_fullmatch(PatternObject* self, PyObject* args, - PyObject* kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOOO:fullmatch", - FALSE, TRUE); -} - -/* PatternObject's 'search' method. */ -static PyObject* pattern_search(PatternObject* self, PyObject* args, PyObject* - kwargs) { - return pattern_search_or_match(self, args, kwargs, "O|OOOO:search", TRUE, - FALSE); -} - -/* Gets the limits of the matching. */ -Py_LOCAL_INLINE(BOOL) get_limits(PyObject* pos, PyObject* endpos, Py_ssize_t - length, Py_ssize_t* start, Py_ssize_t* end) { - Py_ssize_t s; - Py_ssize_t e; - - s = as_string_index(pos, 0); - if (s == -1 && PyErr_Occurred()) - return FALSE; - - e = as_string_index(endpos, PY_SSIZE_T_MAX); - if (e == -1 && PyErr_Occurred()) - return FALSE; - - /* Adjust boundaries. */ - if (s < 0) - s += length; - if (s < 0) - s = 0; - else if (s > length) - s = length; - - if (e < 0) - e += length; - if (e < 0) - e = 0; - else if (e > length) - e = length; - - *start = s; - *end = e; - - return TRUE; -} - -/* Gets a replacement item from the replacement list. - * - * The replacement item could be a string literal or a group. - * - * It can return None to represent an empty string. - */ -Py_LOCAL_INLINE(PyObject*) get_sub_replacement(PyObject* item, PyObject* - string, RE_State* state, size_t group_count) { - Py_ssize_t index; - - if (PyUnicode_CheckExact(item) || PyString_CheckExact(item)) { - /* It's a literal, which can be added directly to the list. */ - Py_INCREF(item); - return item; - } - - /* Is it a group reference? */ - index = as_group_index(item); - if (index == -1 && PyErr_Occurred()) { - /* Not a group either! */ - set_error(RE_ERROR_REPLACEMENT, NULL); - return NULL; - } - - if (index == 0) { - /* The entire matched portion of the string. */ - if (state->match_pos == state->text_pos) { - /* Return None for "". */ - Py_INCREF(Py_None); - return Py_None; - } - - if (state->reverse) - return get_slice(string, state->text_pos, state->match_pos); - else - return get_slice(string, state->match_pos, state->text_pos); - } else if (1 <= index && (size_t)index <= group_count) { - /* A group. */ - RE_GroupData* group; - - group = &state->groups[index - 1]; - - if (group->capture_count == 0 && group->span.start != group->span.end) - { - /* The group didn't match or is "", so return None for "". */ - Py_INCREF(Py_None); - return Py_None; - } - - return get_slice(string, group->span.start, group->span.end); - } else { - /* No such group. */ - set_error(RE_ERROR_INVALID_GROUP_REF, NULL); - return NULL; - } -} - -/* PatternObject's 'subx' method. */ -Py_LOCAL_INLINE(PyObject*) pattern_subx(PatternObject* self, PyObject* - str_template, PyObject* string, Py_ssize_t maxsub, int sub_type, PyObject* - pos, PyObject* endpos, int concurrent) { - RE_StringInfo str_info; - Py_ssize_t start; - Py_ssize_t end; - BOOL is_callable = FALSE; - BOOL is_literal = FALSE; - BOOL is_template = FALSE; - PyObject* replacement = NULL; -#if PY_VERSION_HEX >= 0x02060000 - BOOL is_format = FALSE; -#endif - RE_State state; - RE_SafeState safe_state; - JoinInfo join_info; - Py_ssize_t sub_count; - Py_ssize_t last_pos; - PyObject* item; - Py_ssize_t end_pos; - Py_ssize_t step; - - /* Get the string. */ - if (!get_string(string, &str_info)) - return NULL; - - /* Get the limits of the search. */ - if (!get_limits(pos, endpos, str_info.length, &start, &end)) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* If the pattern is too long for the string, then take a shortcut, unless - * it's a fuzzy pattern. - */ - if (!self->is_fuzzy && self->min_width > end - start) { - PyObject* result; - - Py_INCREF(string); - - if (sub_type & RE_SUBN) - result = Py_BuildValue("Nn", string, 0); - else - result = string; - -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return result; - } - - if (maxsub == 0) - maxsub = PY_SSIZE_T_MAX; - - /* sub/subn takes either a function or a string template. */ - if (PyCallable_Check(str_template)) { - /* It's callable. */ - is_callable = TRUE; - - replacement = str_template; - Py_INCREF(replacement); -#if PY_VERSION_HEX >= 0x02060000 - } else if (sub_type & RE_SUBF) { - /* Is it a literal format? - * - * To keep it simple we'll say that a literal is a string which can be - * used as-is, so no placeholders. - */ - Py_ssize_t literal_length; - - literal_length = check_replacement_string(str_template, '{'); - if (literal_length > 0) { - /* It's a literal. */ - is_literal = TRUE; - - replacement = str_template; - Py_INCREF(replacement); - } else if (literal_length < 0) { - /* It isn't a literal, so get the 'format' method. */ - is_format = TRUE; - - replacement = PyObject_GetAttrString(str_template, "format"); - if (!replacement) { - release_buffer(&str_info); - return NULL; - } - } -#endif - } else { - /* Is it a literal template? - * - * To keep it simple we'll say that a literal is a string which can be - * used as-is, so no backslashes. - */ - Py_ssize_t literal_length; - - literal_length = check_replacement_string(str_template, '\\'); - if (literal_length > 0) { - /* It's a literal. */ - is_literal = TRUE; - - replacement = str_template; - Py_INCREF(replacement); - } else if (literal_length < 0 ) { - /* It isn't a literal, so hand it over to the template compiler. */ - is_template = TRUE; - - replacement = call(RE_MODULE, "_compile_replacement_helper", - PyTuple_Pack(2, self, str_template)); - if (!replacement) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - } - } - - /* The MatchObject, and therefore repeated captures, will be visible only - * if the replacement is callable. - */ - if (!state_init_2(&state, self, string, &str_info, start, end, FALSE, - concurrent, FALSE, FALSE, is_callable, FALSE)) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - Py_XDECREF(replacement); - return NULL; - } - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - init_join_list(&join_info, state.reverse, PyUnicode_Check(string)); - - sub_count = 0; - last_pos = state.reverse ? state.text_length : 0; - step = state.reverse ? -1 : 1; - while (sub_count < maxsub) { - int status; - - status = do_match(&safe_state, TRUE); - if (status < 0) - goto error; - - if (status == 0) - break; - - /* Append the segment before this match. */ - if (state.match_pos != last_pos) { - if (state.reverse) - item = get_slice(string, state.match_pos, last_pos); - else - item = get_slice(string, last_pos, state.match_pos); - if (!item) - goto error; - - /* Add to the list. */ - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - /* Add this match. */ - if (is_literal) { - /* The replacement is a literal string. */ - status = add_to_join_list(&join_info, replacement); - if (status < 0) - goto error; -#if PY_VERSION_HEX >= 0x02060000 - } else if (is_format) { - /* The replacement is a format string. */ - MatchObject* match; - PyObject* args; - size_t g; - PyObject* kwargs; - - /* We need to create the arguments for the 'format' method. We'll - * start by creating a MatchObject. - */ - match = (MatchObject*)pattern_new_match(self, &state, 1); - if (!match) - goto error; - - /* The args are a tuple of the capture group matches. */ - args = PyTuple_New((Py_ssize_t)state.pattern->public_group_count + - 1); - if (!args) { - Py_DECREF(match); - goto error; - } - - for (g = 0; g < state.pattern->public_group_count + 1; g++) - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(args, (Py_ssize_t)g, - match_get_group_by_index(match, (Py_ssize_t)g, Py_None)); - - /* The kwargs are a dict of the named capture group matches. */ - kwargs = match_get_group_dict(match); - if (!kwargs) { - Py_DECREF(args); - Py_DECREF(match); - goto error; - } - - /* Call the 'format' method. */ - item = PyObject_Call(replacement, args, kwargs); - Py_DECREF(kwargs); - Py_DECREF(args); - Py_DECREF(match); - if (!item) - goto error; - - /* Add the result to the list. */ - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; -#endif - } else if (is_template) { - /* The replacement is a list template. */ - Py_ssize_t size; - Py_ssize_t i; - - /* Add each part of the template to the list. */ - size = PyList_GET_SIZE(replacement); - for (i = 0; i < size; i++) { - PyObject* item; - PyObject* str_item; - - /* PyList_GET_ITEM borrows a reference. */ - item = PyList_GET_ITEM(replacement, i); - str_item = get_sub_replacement(item, string, &state, - self->public_group_count); - if (!str_item) - goto error; - - /* Add the result to the list. */ - if (str_item == Py_None) - /* None for "". */ - Py_DECREF(str_item); - else { - status = add_to_join_list(&join_info, str_item); - Py_DECREF(str_item); - if (status < 0) - goto error; - } - } - } else if (is_callable) { - /* Pass a MatchObject to the replacement function. */ - PyObject* match; - PyObject* args; - - /* We need to create a MatchObject to pass to the replacement - * function. - */ - match = pattern_new_match(self, &state, 1); - if (!match) - goto error; - - /* The args for the replacement function. */ - args = PyTuple_Pack(1, match); - if (!args) { - Py_DECREF(match); - goto error; - } - - /* Call the replacement function. */ - item = PyObject_CallObject(replacement, args); - Py_DECREF(args); - Py_DECREF(match); - if (!item) - goto error; - - /* Add the result to the list. */ - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - ++sub_count; - - last_pos = state.text_pos; - - if (state.version_0) { - /* Always advance after a zero-width match. */ - if (state.match_pos == state.text_pos) { - state.text_pos += step; - state.must_advance = FALSE; - } else - state.must_advance = TRUE; - } else - /* Continue from where we left off, but don't allow a contiguous - * zero-width match. - */ - state.must_advance = state.match_pos == state.text_pos; - } - - /* Get the segment following the last match. We use 'length' instead of - * 'text_length' because the latter is truncated to 'slice_end', a - * documented idiosyncracy of the 're' module. - */ - end_pos = state.reverse ? 0 : str_info.length; - if (last_pos != end_pos) { - int status; - - /* The segment is part of the original string. */ - if (state.reverse) - item = get_slice(string, 0, last_pos); - else - item = get_slice(string, last_pos, str_info.length); - if (!item) - goto error; - - status = add_to_join_list(&join_info, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - Py_XDECREF(replacement); - - /* Convert the list to a single string (also cleans up join_info). */ - item = join_list_info(&join_info); - - state_fini(&state); - - if (!item) - return NULL; - - if (sub_type & RE_SUBN) - return Py_BuildValue("Nn", item, sub_count); - - return item; - -error: - clear_join_list(&join_info); - state_fini(&state); - Py_XDECREF(replacement); - return NULL; -} - -/* PatternObject's 'sub' method. */ -static PyObject* pattern_sub(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* replacement; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, - &replacement, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, replacement, string, count, RE_SUB, pos, endpos, - conc); -} - -#if PY_VERSION_HEX >= 0x02060000 -/* PatternObject's 'subf' method. */ -static PyObject* pattern_subf(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* format; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "format", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:sub", kwlist, - &format, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, format, string, count, RE_SUBF, pos, endpos, - conc); -} - -#endif -/* PatternObject's 'subn' method. */ -static PyObject* pattern_subn(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* replacement; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "repl", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, - &replacement, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, replacement, string, count, RE_SUBN, pos, endpos, - conc); -} - -#if PY_VERSION_HEX >= 0x02060000 -/* PatternObject's 'subfn' method. */ -static PyObject* pattern_subfn(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - PyObject* format; - PyObject* string; - Py_ssize_t count = 0; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "format", "string", "count", "pos", "endpos", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nOOO:subn", kwlist, - &format, &string, &count, &pos, &endpos, &concurrent)) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - return pattern_subx(self, format, string, count, RE_SUBF | RE_SUBN, pos, - endpos, conc); -} - -#endif -/* PatternObject's 'split' method. */ -static PyObject* pattern_split(PatternObject* self, PyObject* args, PyObject* - kwargs) { - int conc; - - RE_State state; - RE_SafeState safe_state; - PyObject* list; - PyObject* item; - int status; - Py_ssize_t split_count; - size_t g; - Py_ssize_t start_pos; - Py_ssize_t end_pos; - Py_ssize_t step; - Py_ssize_t last_pos; - - PyObject* string; - Py_ssize_t maxsplit = 0; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "maxsplit", "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nO:split", kwlist, - &string, &maxsplit, &concurrent)) - return NULL; - - if (maxsplit == 0) - maxsplit = PY_SSIZE_T_MAX; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - /* The MatchObject, and therefore repeated captures, will not be visible. - */ - if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX, FALSE, conc, - FALSE, FALSE, FALSE, FALSE)) - return NULL; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - list = PyList_New(0); - if (!list) { - state_fini(&state); - return NULL; - } - - split_count = 0; - if (state.reverse) { - start_pos = state.text_length; - end_pos = 0; - step = -1; - } else { - start_pos = 0; - end_pos = state.text_length; - step = 1; - } - - last_pos = start_pos; - while (split_count < maxsplit) { - status = do_match(&safe_state, TRUE); - if (status < 0) - goto error; - - if (status == 0) - /* No more matches. */ - break; - - if (state.version_0) { - /* Version 0 behaviour is to advance one character if the split was - * zero-width. Unfortunately, this can give an incorrect result. - * GvR wants this behaviour to be retained so as not to break any - * existing software which might rely on it. - */ - if (state.text_pos == state.match_pos) { - if (last_pos == end_pos) - break; - - /* Advance one character. */ - state.text_pos += step; - state.must_advance = FALSE; - continue; - } - } - - /* Get segment before this match. */ - if (state.reverse) - item = get_slice(string, state.match_pos, last_pos); - else - item = get_slice(string, last_pos, state.match_pos); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - /* Add groups (if any). */ - for (g = 1; g <= self->public_group_count; g++) { - item = state_get_group(&state, (Py_ssize_t)g, string, FALSE); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - ++split_count; - last_pos = state.text_pos; - - /* Version 0 behaviour is to advance one character if the match was - * zero-width. Unfortunately, this can give an incorrect result. GvR - * wants this behaviour to be retained so as not to break any existing - * software which might rely on it. - */ - if (state.version_0) { - if (state.text_pos == state.match_pos) - /* Advance one character. */ - state.text_pos += step; - - state.must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow a contiguous - * zero-width match. - */ - state.must_advance = TRUE; - } - - /* Get segment following last match (even if empty). */ - if (state.reverse) - item = get_slice(string, 0, last_pos); - else - item = get_slice(string, last_pos, state.text_length); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - state_fini(&state); - - return list; - -error: - Py_DECREF(list); - state_fini(&state); - return NULL; -} - -/* PatternObject's 'splititer' method. */ -static PyObject* pattern_splititer(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { - return pattern_splitter(pattern, args, kwargs); -} - -/* PatternObject's 'findall' method. */ -static PyObject* pattern_findall(PatternObject* self, PyObject* args, PyObject* - kwargs) { - Py_ssize_t start; - Py_ssize_t end; - RE_State state; - int conc; - RE_SafeState safe_state; - PyObject* list; - Py_ssize_t step; - int status; - size_t g; - Py_ssize_t b; - Py_ssize_t e; - - PyObject* string; - PyObject* pos = Py_None; - PyObject* endpos = Py_None; - Py_ssize_t overlapped = FALSE; - PyObject* concurrent = Py_None; - static char* kwlist[] = { "string", "pos", "endpos", "overlapped", - "concurrent", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOnO:findall", kwlist, - &string, &pos, &endpos, &overlapped, &concurrent)) - return NULL; - - start = as_string_index(pos, 0); - if (start == -1 && PyErr_Occurred()) - return NULL; - - end = as_string_index(endpos, PY_SSIZE_T_MAX); - if (end == -1 && PyErr_Occurred()) - return NULL; - - conc = decode_concurrent(concurrent); - if (conc < 0) - return NULL; - - /* The MatchObject, and therefore repeated captures, will not be visible. - */ - if (!state_init(&state, self, string, start, end, overlapped != 0, conc, - FALSE, FALSE, FALSE, FALSE)) - return NULL; - - /* Initialise the "safe state" structure. */ - safe_state.re_state = &state; - safe_state.thread_state = NULL; - - list = PyList_New(0); - if (!list) { - state_fini(&state); - return NULL; - } - - step = state.reverse ? -1 : 1; - while (state.slice_start <= state.text_pos && state.text_pos <= - state.slice_end) { - PyObject* item; - - status = do_match(&safe_state, TRUE); - if (status < 0) - goto error; - - if (status == 0) - break; - - /* Don't bother to build a MatchObject. */ - switch (self->public_group_count) { - case 0: - if (state.reverse) { - b = state.text_pos; - e = state.match_pos; - } else { - b = state.match_pos; - e = state.text_pos; - } - item = get_slice(string, b, e); - if (!item) - goto error; - break; - case 1: - item = state_get_group(&state, 1, string, TRUE); - if (!item) - goto error; - break; - default: - item = PyTuple_New((Py_ssize_t)self->public_group_count); - if (!item) - goto error; - - for (g = 0; g < self->public_group_count; g++) { - PyObject* o; - - o = state_get_group(&state, (Py_ssize_t)g + 1, string, TRUE); - if (!o) { - Py_DECREF(item); - goto error; - } - - /* PyTuple_SET_ITEM borrows the reference. */ - PyTuple_SET_ITEM(item, g, o); - } - break; - } - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - if (state.overlapped) { - /* Advance one character. */ - state.text_pos = state.match_pos + step; - state.must_advance = FALSE; - } else - /* Continue from where we left off, but don't allow 2 contiguous - * zero-width matches. - */ - state.must_advance = state.text_pos == state.match_pos; - } - - state_fini(&state); - - return list; - -error: - Py_DECREF(list); - state_fini(&state); - return NULL; -} - -/* PatternObject's 'finditer' method. */ -static PyObject* pattern_finditer(PatternObject* pattern, PyObject* args, - PyObject* kwargs) { - return pattern_scanner(pattern, args, kwargs); -} - -/* Makes a copy of a PatternObject. */ -Py_LOCAL_INLINE(PyObject*) make_pattern_copy(PatternObject* self) { - Py_INCREF(self); - return (PyObject*)self; -} - -/* PatternObject's '__copy__' method. */ -static PyObject* pattern_copy(PatternObject* self, PyObject *unused) { - return make_pattern_copy(self); -} - -/* PatternObject's '__deepcopy__' method. */ -static PyObject* pattern_deepcopy(PatternObject* self, PyObject* memo) { - return make_pattern_copy(self); -} - -/* The documentation of a PatternObject. */ -PyDoc_STRVAR(pattern_match_doc, - "match(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ - Match zero or more characters at the beginning of the string."); - -PyDoc_STRVAR(pattern_fullmatch_doc, - "fullmatch(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ - Match zero or more characters against all of the string."); - -PyDoc_STRVAR(pattern_search_doc, - "search(string, pos=None, endpos=None, concurrent=None) --> MatchObject or None.\n\ - Search through string looking for a match, and return a corresponding\n\ - match object instance. Return None if no match is found."); - -PyDoc_STRVAR(pattern_sub_doc, - "sub(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ - Return the string obtained by replacing the leftmost (or rightmost with a\n\ - reverse pattern) non-overlapping occurrences of pattern in string by the\n\ - replacement repl."); - -#if PY_VERSION_HEX >= 0x02060000 -PyDoc_STRVAR(pattern_subf_doc, - "subf(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> newstring\n\ - Return the string obtained by replacing the leftmost (or rightmost with a\n\ - reverse pattern) non-overlapping occurrences of pattern in string by the\n\ - replacement format."); - -#endif -PyDoc_STRVAR(pattern_subn_doc, - "subn(repl, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ - Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ - leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ - of pattern with the replacement repl."); - -#if PY_VERSION_HEX >= 0x02060000 -PyDoc_STRVAR(pattern_subfn_doc, - "subfn(format, string, count=0, flags=0, pos=None, endpos=None, concurrent=None) --> (newstring, number of subs)\n\ - Return the tuple (new_string, number_of_subs_made) found by replacing the\n\ - leftmost (or rightmost with a reverse pattern) non-overlapping occurrences\n\ - of pattern with the replacement format."); - -#endif -PyDoc_STRVAR(pattern_split_doc, - "split(string, string, maxsplit=0, concurrent=None) --> list.\n\ - Split string by the occurrences of pattern."); - -PyDoc_STRVAR(pattern_splititer_doc, - "splititer(string, maxsplit=0, concurrent=None) --> iterator.\n\ - Return an iterator yielding the parts of a split string."); - -PyDoc_STRVAR(pattern_findall_doc, - "findall(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> list.\n\ - Return a list of all matches of pattern in string. The matches may be\n\ - overlapped if overlapped is True."); - -PyDoc_STRVAR(pattern_finditer_doc, - "finditer(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> iterator.\n\ - Return an iterator over all matches for the RE pattern in string. The\n\ - matches may be overlapped if overlapped is True. For each match, the\n\ - iterator returns a MatchObject."); - -PyDoc_STRVAR(pattern_scanner_doc, - "scanner(string, pos=None, endpos=None, overlapped=False, concurrent=None) --> scanner.\n\ - Return an scanner for the RE pattern in string. The matches may be overlapped\n\ - if overlapped is True."); - -/* The methods of a PatternObject. */ -static PyMethodDef pattern_methods[] = { - {"match", (PyCFunction)pattern_match, METH_VARARGS|METH_KEYWORDS, - pattern_match_doc}, - {"fullmatch", (PyCFunction)pattern_fullmatch, METH_VARARGS|METH_KEYWORDS, - pattern_fullmatch_doc}, - {"search", (PyCFunction)pattern_search, METH_VARARGS|METH_KEYWORDS, - pattern_search_doc}, - {"sub", (PyCFunction)pattern_sub, METH_VARARGS|METH_KEYWORDS, - pattern_sub_doc}, -#if PY_VERSION_HEX >= 0x02060000 - {"subf", (PyCFunction)pattern_subf, METH_VARARGS|METH_KEYWORDS, - pattern_subf_doc}, -#endif - {"subn", (PyCFunction)pattern_subn, METH_VARARGS|METH_KEYWORDS, - pattern_subn_doc}, -#if PY_VERSION_HEX >= 0x02060000 - {"subfn", (PyCFunction)pattern_subfn, METH_VARARGS|METH_KEYWORDS, - pattern_subfn_doc}, -#endif - {"split", (PyCFunction)pattern_split, METH_VARARGS|METH_KEYWORDS, - pattern_split_doc}, - {"splititer", (PyCFunction)pattern_splititer, METH_VARARGS|METH_KEYWORDS, - pattern_splititer_doc}, - {"findall", (PyCFunction)pattern_findall, METH_VARARGS|METH_KEYWORDS, - pattern_findall_doc}, - {"finditer", (PyCFunction)pattern_finditer, METH_VARARGS|METH_KEYWORDS, - pattern_finditer_doc}, - {"scanner", (PyCFunction)pattern_scanner, METH_VARARGS|METH_KEYWORDS, - pattern_scanner_doc}, - {"__copy__", (PyCFunction)pattern_copy, METH_NOARGS}, - {"__deepcopy__", (PyCFunction)pattern_deepcopy, METH_O}, - {NULL, NULL} -}; - -PyDoc_STRVAR(pattern_doc, "Compiled regex object"); - -/* Deallocates a PatternObject. */ -static void pattern_dealloc(PyObject* self_) { - PatternObject* self; - int partial_side; - size_t i; - - self = (PatternObject*)self_; - - /* Discard the nodes. */ - for (i = 0; i < self->node_count; i++) { - RE_Node* node; - - node = self->node_list[i]; - re_dealloc(node->values); - if (node->status & RE_STATUS_STRING) { - re_dealloc(node->string.bad_character_offset); - re_dealloc(node->string.good_suffix_offset); - } - re_dealloc(node); - } - re_dealloc(self->node_list); - - /* Discard the group info. */ - re_dealloc(self->group_info); - - /* Discard the call_ref info. */ - re_dealloc(self->call_ref_info); - - /* Discard the repeat info. */ - re_dealloc(self->repeat_info); - - dealloc_groups(self->groups_storage, self->true_group_count); - - dealloc_repeats(self->repeats_storage, self->repeat_count); - - if (self->weakreflist) - PyObject_ClearWeakRefs((PyObject*)self); - Py_XDECREF(self->pattern); - Py_XDECREF(self->groupindex); - Py_XDECREF(self->indexgroup); - - for (partial_side = 0; partial_side < 2; partial_side++) { - if (self->partial_named_lists[partial_side]) { - for (i = 0; i < self->named_lists_count; i++) - Py_XDECREF(self->partial_named_lists[partial_side][i]); - - re_dealloc(self->partial_named_lists[partial_side]); - } - } - - Py_DECREF(self->named_lists); - Py_DECREF(self->named_list_indexes); - PyObject_DEL(self); -} - -/* Info about the various flags that can be passed in. */ -typedef struct RE_FlagName { - char* name; - int value; -} RE_FlagName; - -/* We won't bother about the A flag in Python 2. */ -static RE_FlagName flag_names[] = { - {"B", RE_FLAG_BESTMATCH}, - {"D", RE_FLAG_DEBUG}, - {"S", RE_FLAG_DOTALL}, - {"F", RE_FLAG_FULLCASE}, - {"I", RE_FLAG_IGNORECASE}, - {"L", RE_FLAG_LOCALE}, - {"M", RE_FLAG_MULTILINE}, - {"R", RE_FLAG_REVERSE}, - {"T", RE_FLAG_TEMPLATE}, - {"U", RE_FLAG_UNICODE}, - {"X", RE_FLAG_VERBOSE}, - {"V0", RE_FLAG_VERSION0}, - {"V1", RE_FLAG_VERSION1}, - {"W", RE_FLAG_WORD}, -}; - -/* Appends a string to a list. */ -Py_LOCAL_INLINE(BOOL) append_string(PyObject* list, char* string) { - PyObject* item; - int status; - - item = Py_BuildValue("s", string); - if (!item) - return FALSE; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - return FALSE; - - return TRUE; -} - -/* Appends a (decimal) integer to a list. */ -Py_LOCAL_INLINE(BOOL) append_integer(PyObject* list, Py_ssize_t value) { - PyObject* int_obj; - PyObject* repr_obj; - int status; - - int_obj = Py_BuildValue("n", value); - if (!int_obj) - return FALSE; - - repr_obj = PyObject_Repr(int_obj); - Py_DECREF(int_obj); - if (!repr_obj) - return FALSE; - - status = PyList_Append(list, repr_obj); - Py_DECREF(repr_obj); - if (status < 0) - return FALSE; - - return TRUE; -} - -/* MatchObject's '__repr__' method. */ -static PyObject* match_repr(PyObject* self_) { - MatchObject* self; - PyObject* list; - PyObject* matched_substring; - PyObject* matched_repr; - int status; - PyObject* separator; - PyObject* result; - - self = (MatchObject*)self_; - - list = PyList_New(0); - if (!list) - return NULL; - - if (!append_string(list, "match_start)) - goto error; - - if (! append_string(list, ", ")) - goto error; - - if (!append_integer(list, self->match_end)) - goto error; - - if (!append_string(list, "), match=")) - goto error; - - matched_substring = get_slice(self->substring, self->match_start - - self->substring_offset, self->match_end - self->substring_offset); - if (!matched_substring) - goto error; - - matched_repr = PyObject_Repr(matched_substring); - Py_DECREF(matched_substring); - if (!matched_repr) - goto error; - - status = PyList_Append(list, matched_repr); - Py_DECREF(matched_repr); - if (status < 0) - goto error; - - if (self->fuzzy_counts[RE_FUZZY_SUB] != 0 || - self->fuzzy_counts[RE_FUZZY_INS] != 0 || self->fuzzy_counts[RE_FUZZY_DEL] - != 0) { - if (! append_string(list, ", fuzzy_counts=(")) - goto error; - - if (!append_integer(list, - (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_SUB])) - goto error; - - if (! append_string(list, ", ")) - goto error; - - if (!append_integer(list, - (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_INS])) - goto error; - - if (! append_string(list, ", ")) - goto error; - if (!append_integer(list, - (Py_ssize_t)self->fuzzy_counts[RE_FUZZY_DEL])) - goto error; - - if (! append_string(list, ")")) - goto error; - } - - if (self->partial) { - if (!append_string(list, ", partial=True")) - goto error; - } - - if (! append_string(list, ">")) - goto error; - - separator = Py_BuildValue("s", ""); - if (!separator) - goto error; - - result = PyUnicode_Join(separator, list); - Py_DECREF(separator); - Py_DECREF(list); - - return result; - -error: - Py_DECREF(list); - return NULL; -} - -/* PatternObject's '__repr__' method. */ -static PyObject* pattern_repr(PyObject* self_) { - PatternObject* self; - PyObject* list; - PyObject* item; - int status; - int flag_count; - unsigned int i; - Py_ssize_t pos; - PyObject *key; - PyObject *value; - PyObject* separator; - PyObject* result; - - self = (PatternObject*)self_; - - list = PyList_New(0); - if (!list) - return NULL; - - if (!append_string(list, "regex.Regex(")) - goto error; - - item = PyObject_Repr(self->pattern); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - flag_count = 0; - for (i = 0; i < sizeof(flag_names) / sizeof(flag_names[0]); i++) { - if (self->flags & flag_names[i].value) { - if (flag_count == 0) { - if (!append_string(list, ", flags=")) - goto error; - } else { - if (!append_string(list, " | ")) - goto error; - } - - if (!append_string(list, "regex.")) - goto error; - - if (!append_string(list, flag_names[i].name)) - goto error; - - ++flag_count; - } - } - - pos = 0; - /* PyDict_Next borrows references. */ - while (PyDict_Next(self->named_lists, &pos, &key, &value)) { - if (!append_string(list, ", ")) - goto error; - - status = PyList_Append(list, key); - if (status < 0) - goto error; - - if (!append_string(list, "=")) - goto error; - - item = PyObject_Repr(value); - if (!item) - goto error; - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - if (!append_string(list, ")")) - goto error; - - separator = Py_BuildValue("s", ""); - if (!separator) - goto error; - - result = PyUnicode_Join(separator, list); - Py_DECREF(separator); - Py_DECREF(list); - - return result; - -error: - Py_DECREF(list); - return NULL; -} - -/* PatternObject's 'groupindex' method. */ -static PyObject* pattern_groupindex(PyObject* self_) { - PatternObject* self; - - self = (PatternObject*)self_; - - return PyDict_Copy(self->groupindex); -} - -static PyGetSetDef pattern_getset[] = { - {"groupindex", (getter)pattern_groupindex, (setter)NULL, - "A dictionary mapping group names to group numbers."}, - {NULL} /* Sentinel */ -}; - -static PyMemberDef pattern_members[] = { - {"pattern", T_OBJECT, offsetof(PatternObject, pattern), READONLY, - "The pattern string from which the regex object was compiled."}, - {"flags", T_PYSSIZET, offsetof(PatternObject, flags), READONLY, - "The regex matching flags."}, - {"groups", T_PYSSIZET, offsetof(PatternObject, public_group_count), - READONLY, "The number of capturing groups in the pattern."}, - {"named_lists", T_OBJECT, offsetof(PatternObject, named_lists), READONLY, - "The named lists used by the regex."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject Pattern_Type = { - PyObject_HEAD_INIT(NULL) - 0, - "_" RE_MODULE "." "Pattern", - sizeof(PatternObject) -}; - -/* Building the nodes is made simpler by allowing branches to have a single - * exit. These need to be removed. - */ -Py_LOCAL_INLINE(void) skip_one_way_branches(PatternObject* pattern) { - BOOL modified; - - /* If a node refers to a 1-way branch then make the former refer to the - * latter's destination. Repeat until they're all done. - */ - do { - size_t i; - - modified = FALSE; - - for (i = 0; i < pattern->node_count; i++) { - RE_Node* node; - RE_Node* next; - - node = pattern->node_list[i]; - - /* Check the first destination. */ - next = node->next_1.node; - if (next && next->op == RE_OP_BRANCH && - !next->nonstring.next_2.node) { - node->next_1.node = next->next_1.node; - modified = TRUE; - } - - /* Check the second destination. */ - next = node->nonstring.next_2.node; - if (next && next->op == RE_OP_BRANCH && - !next->nonstring.next_2.node) { - node->nonstring.next_2.node = next->next_1.node; - modified = TRUE; - } - } - } while (modified); - - /* The start node might be a 1-way branch. Skip over it because it'll be - * removed. It might even be the first in a chain. - */ - while (pattern->start_node->op == RE_OP_BRANCH && - !pattern->start_node->nonstring.next_2.node) - pattern->start_node = pattern->start_node->next_1.node; -} - -/* Adds guards to repeats which are followed by a reference to a group. - * - * Returns whether a guard was added for a node at or after the given node. - */ -Py_LOCAL_INLINE(RE_STATUS_T) add_repeat_guards(PatternObject* pattern, RE_Node* - node) { - RE_STATUS_T result; - - result = RE_STATUS_NEITHER; - - for (;;) { - if (node->status & RE_STATUS_VISITED_AG) - return node->status & (RE_STATUS_REPEAT | RE_STATUS_REF); - - switch (node->op) { - case RE_OP_ATOMIC: - case RE_OP_LOOKAROUND: - { - RE_STATUS_T body_result; - RE_STATUS_T tail_result; - RE_STATUS_T status; - - body_result = add_repeat_guards(pattern, - node->nonstring.next_2.node); - tail_result = add_repeat_guards(pattern, node->next_1.node); - status = max_status_3(result, body_result, tail_result); - node->status = RE_STATUS_VISITED_AG | status; - return status; - } - case RE_OP_BRANCH: - { - RE_STATUS_T branch_1_result; - RE_STATUS_T branch_2_result; - RE_STATUS_T status; - - branch_1_result = add_repeat_guards(pattern, node->next_1.node); - branch_2_result = add_repeat_guards(pattern, - node->nonstring.next_2.node); - status = max_status_3(result, branch_1_result, branch_2_result); - node->status = RE_STATUS_VISITED_AG | status; - return status; - } - case RE_OP_END_GREEDY_REPEAT: - case RE_OP_END_LAZY_REPEAT: - node->status |= RE_STATUS_VISITED_AG; - return result; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - { - BOOL limited; - RE_STATUS_T body_result; - RE_STATUS_T tail_result; - RE_RepeatInfo* repeat_info; - RE_STATUS_T status; - - limited = ~node->values[2] != 0; - if (limited) - body_result = RE_STATUS_LIMITED; - else - body_result = add_repeat_guards(pattern, node->next_1.node); - tail_result = add_repeat_guards(pattern, - node->nonstring.next_2.node); - - repeat_info = &pattern->repeat_info[node->values[0]]; - if (body_result != RE_STATUS_REF) - repeat_info->status |= RE_STATUS_BODY; - if (tail_result != RE_STATUS_REF) - repeat_info->status |= RE_STATUS_TAIL; - if (limited) - result = max_status_2(result, RE_STATUS_LIMITED); - else - result = max_status_2(result, RE_STATUS_REPEAT); - status = max_status_3(result, body_result, tail_result); - node->status |= RE_STATUS_VISITED_AG | status; - return status; - } - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - { - BOOL limited; - RE_STATUS_T tail_result; - RE_RepeatInfo* repeat_info; - RE_STATUS_T status; - - limited = ~node->values[2] != 0; - tail_result = add_repeat_guards(pattern, node->next_1.node); - - repeat_info = &pattern->repeat_info[node->values[0]]; - repeat_info->status |= RE_STATUS_BODY; - if (tail_result != RE_STATUS_REF) - repeat_info->status |= RE_STATUS_TAIL; - if (limited) - result = max_status_2(result, RE_STATUS_LIMITED); - else - result = max_status_2(result, RE_STATUS_REPEAT); - status = max_status_3(result, RE_STATUS_REPEAT, tail_result); - node->status = RE_STATUS_VISITED_AG | status; - return status; - } - case RE_OP_GROUP_EXISTS: - { - RE_STATUS_T branch_1_result; - RE_STATUS_T branch_2_result; - RE_STATUS_T status; - - branch_1_result = add_repeat_guards(pattern, node->next_1.node); - branch_2_result = add_repeat_guards(pattern, - node->nonstring.next_2.node); - status = max_status_4(result, branch_1_result, branch_2_result, - RE_STATUS_REF); - node->status = RE_STATUS_VISITED_AG | status; - return status; - } - case RE_OP_GROUP_CALL: - case RE_OP_REF_GROUP: - case RE_OP_REF_GROUP_FLD: - case RE_OP_REF_GROUP_FLD_REV: - case RE_OP_REF_GROUP_IGN: - case RE_OP_REF_GROUP_IGN_REV: - case RE_OP_REF_GROUP_REV: - result = RE_STATUS_REF; - node = node->next_1.node; - break; - case RE_OP_SUCCESS: - node->status = RE_STATUS_VISITED_AG | result; - return result; - default: - node = node->next_1.node; - break; - } - } -} - -/* Adds an index to a node's values unless it's already present. - * - * 'offset' is the offset of the index count within the values. - */ -Py_LOCAL_INLINE(BOOL) add_index(RE_Node* node, size_t offset, size_t index) { - size_t index_count; - size_t first_index; - size_t i; - RE_CODE* new_values; - - if (!node) - return TRUE; - - index_count = node->values[offset]; - first_index = offset + 1; - - /* Is the index already present? */ - for (i = 0; i < index_count; i++) { - if (node->values[first_index + i] == index) - return TRUE; - } - - /* Allocate more space for the new index. */ - new_values = re_realloc(node->values, (node->value_count + 1) * - sizeof(RE_CODE)); - if (!new_values) - return FALSE; - - ++node->value_count; - node->values = new_values; - - node->values[first_index + node->values[offset]++] = (RE_CODE)index; - - return TRUE; -} - -/* Records the index of every repeat and fuzzy section within atomic - * subpatterns and lookarounds. - */ -Py_LOCAL_INLINE(BOOL) record_subpattern_repeats_and_fuzzy_sections(RE_Node* - parent_node, size_t offset, size_t repeat_count, RE_Node* node) { - while (node) { - if (node->status & RE_STATUS_VISITED_REP) - return TRUE; - - node->status |= RE_STATUS_VISITED_REP; - - switch (node->op) { - case RE_OP_ATOMIC: - if (!record_subpattern_repeats_and_fuzzy_sections(node, 0, - repeat_count, node->nonstring.next_2.node)) - return FALSE; - node = node->next_1.node; - break; - case RE_OP_BRANCH: - if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, - offset, repeat_count, node->next_1.node)) - return FALSE; - node = node->nonstring.next_2.node; - break; - case RE_OP_END_FUZZY: - node = node->next_1.node; - break; - case RE_OP_END_GREEDY_REPEAT: - case RE_OP_END_LAZY_REPEAT: - return TRUE; - case RE_OP_FUZZY: - /* Record the fuzzy index. */ - if (!add_index(parent_node, offset, repeat_count + - node->values[0])) - return FALSE; - node = node->next_1.node; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - /* Record the repeat index. */ - if (!add_index(parent_node, offset, node->values[0])) - return FALSE; - if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, - offset, repeat_count, node->next_1.node)) - return FALSE; - node = node->nonstring.next_2.node; - break; - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - /* Record the repeat index. */ - if (!add_index(parent_node, offset, node->values[0])) - return FALSE; - node = node->next_1.node; - break; - case RE_OP_GROUP_EXISTS: - if (!record_subpattern_repeats_and_fuzzy_sections(parent_node, - offset, repeat_count, node->next_1.node)) - return FALSE; - node = node->nonstring.next_2.node; - break; - case RE_OP_LOOKAROUND: - if (!record_subpattern_repeats_and_fuzzy_sections(node, 1, - repeat_count, node->nonstring.next_2.node)) - return FALSE; - node = node->next_1.node; - break; - default: - node = node->next_1.node; - break; - } - } - - return TRUE; -} - -/* Marks nodes which are being used as used. */ -Py_LOCAL_INLINE(void) use_nodes(RE_Node* node) { - while (node && !(node->status & RE_STATUS_USED)) { - node->status |= RE_STATUS_USED; - if (!(node->status & RE_STATUS_STRING)) { - if (node->nonstring.next_2.node) - use_nodes(node->nonstring.next_2.node); - } - node = node->next_1.node; - } -} - -/* Discards any unused nodes. - * - * Optimising the nodes might result in some nodes no longer being used. - */ -Py_LOCAL_INLINE(void) discard_unused_nodes(PatternObject* pattern) { - size_t new_count; - size_t i; - - /* Mark the nodes which are being used. */ - use_nodes(pattern->start_node); - - for (i = 0; i < pattern->call_ref_info_capacity; i++) - use_nodes(pattern->call_ref_info[i].node); - - new_count = 0; - for (i = 0; i < pattern->node_count; i++) { - RE_Node* node; - - node = pattern->node_list[i]; - if (node->status & RE_STATUS_USED) - pattern->node_list[new_count++] = node; - else { - re_dealloc(node->values); - if (node->status & RE_STATUS_STRING) { - re_dealloc(node->string.bad_character_offset); - re_dealloc(node->string.good_suffix_offset); - } - re_dealloc(node); - } - } - - pattern->node_count = new_count; -} - -/* Marks all the group which are named. Returns FALSE if there's an error. */ -Py_LOCAL_INLINE(BOOL) mark_named_groups(PatternObject* pattern) { - size_t i; - - for (i = 0; i < pattern->public_group_count; i++) { - RE_GroupInfo* group_info; - PyObject* index; - int status; - - group_info = &pattern->group_info[i]; - index = Py_BuildValue("n", i + 1); - if (!index) - return FALSE; - - status = PyDict_Contains(pattern->indexgroup, index); - Py_DECREF(index); - if (status < 0) - return FALSE; - - group_info->has_name = status == 1; - } - - return TRUE; -} - -/* Gets the test node. - * - * The test node lets the matcher look ahead in the pattern, allowing it to - * avoid the cost of housekeeping, only to find that what follows doesn't match - * anyway. - */ -Py_LOCAL_INLINE(void) set_test_node(RE_NextNode* next) { - RE_Node* node = next->node; - RE_Node* test; - - next->test = node; - next->match_next = node; - next->match_step = 0; - - if (!node) - return; - - test = node; - while (test->op == RE_OP_END_GROUP || test->op == RE_OP_START_GROUP) - test = test->next_1.node; - - next->test = test; - - if (test != node) - return; - - switch (test->op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - case RE_OP_BOUNDARY: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_DEFAULT_BOUNDARY: - case RE_OP_DEFAULT_END_OF_WORD: - case RE_OP_DEFAULT_START_OF_WORD: - case RE_OP_END_OF_LINE: - case RE_OP_END_OF_LINE_U: - case RE_OP_END_OF_STRING: - case RE_OP_END_OF_STRING_LINE: - case RE_OP_END_OF_STRING_LINE_U: - case RE_OP_END_OF_WORD: - case RE_OP_GRAPHEME_BOUNDARY: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SEARCH_ANCHOR: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - case RE_OP_START_OF_LINE: - case RE_OP_START_OF_LINE_U: - case RE_OP_START_OF_STRING: - case RE_OP_START_OF_WORD: - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - next->match_next = test->next_1.node; - next->match_step = test->step; - break; - case RE_OP_GREEDY_REPEAT_ONE: - case RE_OP_LAZY_REPEAT_ONE: - if (test->values[1] > 0) - next->test = test; - break; - } -} - -/* Sets the test nodes. */ -Py_LOCAL_INLINE(void) set_test_nodes(PatternObject* pattern) { - RE_Node** node_list; - size_t i; - - node_list = pattern->node_list; - for (i = 0; i < pattern->node_count; i++) { - RE_Node* node; - - node = node_list[i]; - set_test_node(&node->next_1); - if (!(node->status & RE_STATUS_STRING)) - set_test_node(&node->nonstring.next_2); - } -} - -/* Optimises the pattern. */ -Py_LOCAL_INLINE(BOOL) optimise_pattern(PatternObject* pattern) { - size_t i; - - /* Building the nodes is made simpler by allowing branches to have a single - * exit. These need to be removed. - */ - skip_one_way_branches(pattern); - - /* Add position guards for repeat bodies containing a reference to a group - * or repeat tails followed at some point by a reference to a group. - */ - add_repeat_guards(pattern, pattern->start_node); - - /* Record the index of repeats and fuzzy sections within the body of atomic - * and lookaround nodes. - */ - if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, - pattern->repeat_count, pattern->start_node)) - return FALSE; - - for (i = 0; i < pattern->call_ref_info_count; i++) { - RE_Node* node; - - node = pattern->call_ref_info[i].node; - if (!record_subpattern_repeats_and_fuzzy_sections(NULL, 0, - pattern->repeat_count, node)) - return FALSE; - } - - /* Discard any unused nodes. */ - discard_unused_nodes(pattern); - - /* Set the test nodes. */ - set_test_nodes(pattern); - - /* Mark all the group that are named. */ - if (!mark_named_groups(pattern)) - return FALSE; - - return TRUE; -} - -/* Creates a new pattern node. */ -Py_LOCAL_INLINE(RE_Node*) create_node(PatternObject* pattern, RE_UINT8 op, - RE_CODE flags, Py_ssize_t step, size_t value_count) { - RE_Node* node; - - node = (RE_Node*)re_alloc(sizeof(*node)); - if (!node) - return NULL; - memset(node, 0, sizeof(RE_Node)); - - node->value_count = value_count; - if (node->value_count > 0) { - node->values = (RE_CODE*)re_alloc(node->value_count * sizeof(RE_CODE)); - if (!node->values) - goto error; - } else - node->values = NULL; - - node->op = op; - node->match = (flags & RE_POSITIVE_OP) != 0; - node->status = (RE_STATUS_T)(flags << RE_STATUS_SHIFT); - node->step = step; - - /* Ensure that there's enough storage to record the new node. */ - if (pattern->node_count >= pattern->node_capacity) { - RE_Node** new_node_list; - - pattern->node_capacity *= 2; - if (pattern->node_capacity == 0) - pattern->node_capacity = RE_INIT_NODE_LIST_SIZE; - new_node_list = (RE_Node**)re_realloc(pattern->node_list, - pattern->node_capacity * sizeof(RE_Node*)); - if (!new_node_list) - goto error; - pattern->node_list = new_node_list; - } - - /* Record the new node. */ - pattern->node_list[pattern->node_count++] = node; - - return node; - -error: - re_dealloc(node->values); - re_dealloc(node); - return NULL; -} - -/* Adds a node as a next node for another node. */ -Py_LOCAL_INLINE(void) add_node(RE_Node* node_1, RE_Node* node_2) { - if (!node_1->next_1.node) - node_1->next_1.node = node_2; - else - node_1->nonstring.next_2.node = node_2; -} - -/* Ensures that the entry for a group's details actually exists. */ -Py_LOCAL_INLINE(BOOL) ensure_group(PatternObject* pattern, size_t group) { - size_t old_capacity; - size_t new_capacity; - RE_GroupInfo* new_group_info; - - if (group <= pattern->true_group_count) - /* We already have an entry for the group. */ - return TRUE; - - /* Increase the storage capacity to include the new entry if it's - * insufficient. - */ - old_capacity = pattern->group_info_capacity; - new_capacity = pattern->group_info_capacity; - while (group > new_capacity) - new_capacity += RE_LIST_SIZE_INC; - - if (new_capacity > old_capacity) { - new_group_info = (RE_GroupInfo*)re_realloc(pattern->group_info, - new_capacity * sizeof(RE_GroupInfo)); - if (!new_group_info) - return FALSE; - memset(new_group_info + old_capacity, 0, (new_capacity - old_capacity) - * sizeof(RE_GroupInfo)); - - pattern->group_info = new_group_info; - pattern->group_info_capacity = new_capacity; - } - - pattern->true_group_count = group; - - return TRUE; -} - -/* Records that there's a reference to a group. */ -Py_LOCAL_INLINE(BOOL) record_ref_group(PatternObject* pattern, size_t group) { - if (!ensure_group(pattern, group)) - return FALSE; - - pattern->group_info[group - 1].referenced = TRUE; - - return TRUE; -} - -/* Records that there's a new group. */ -Py_LOCAL_INLINE(BOOL) record_group(PatternObject* pattern, size_t group, - RE_Node* node) { - if (!ensure_group(pattern, group)) - return FALSE; - - if (group >= 1) { - RE_GroupInfo* info; - - info = &pattern->group_info[group - 1]; - info->end_index = (Py_ssize_t)pattern->true_group_count; - info->node = node; - } - - return TRUE; -} - -/* Records that a group has closed. */ -Py_LOCAL_INLINE(void) record_group_end(PatternObject* pattern, size_t group) { - if (group >= 1) - pattern->group_info[group - 1].end_index = ++pattern->group_end_index; -} - -/* Ensures that the entry for a call_ref's details actually exists. */ -Py_LOCAL_INLINE(BOOL) ensure_call_ref(PatternObject* pattern, size_t call_ref) - { - size_t old_capacity; - size_t new_capacity; - RE_CallRefInfo* new_call_ref_info; - - if (call_ref < pattern->call_ref_info_count) - /* We already have an entry for the call_ref. */ - return TRUE; - - /* Increase the storage capacity to include the new entry if it's - * insufficient. - */ - old_capacity = pattern->call_ref_info_capacity; - new_capacity = pattern->call_ref_info_capacity; - while (call_ref >= new_capacity) - new_capacity += RE_LIST_SIZE_INC; - - if (new_capacity > old_capacity) { - new_call_ref_info = (RE_CallRefInfo*)re_realloc(pattern->call_ref_info, - new_capacity * sizeof(RE_CallRefInfo)); - if (!new_call_ref_info) - return FALSE; - memset(new_call_ref_info + old_capacity, 0, (new_capacity - - old_capacity) * sizeof(RE_CallRefInfo)); - - pattern->call_ref_info = new_call_ref_info; - pattern->call_ref_info_capacity = new_capacity; - } - - pattern->call_ref_info_count = 1 + call_ref; - - return TRUE; -} - -/* Records that a call_ref is defined. */ -Py_LOCAL_INLINE(BOOL) record_call_ref_defined(PatternObject* pattern, size_t - call_ref, RE_Node* node) { - if (!ensure_call_ref(pattern, call_ref)) - return FALSE; - - pattern->call_ref_info[call_ref].defined = TRUE; - pattern->call_ref_info[call_ref].node = node; - - return TRUE; -} - -/* Records that a call_ref is used. */ -Py_LOCAL_INLINE(BOOL) record_call_ref_used(PatternObject* pattern, size_t - call_ref) { - if (!ensure_call_ref(pattern, call_ref)) - return FALSE; - - pattern->call_ref_info[call_ref].used = TRUE; - - return TRUE; -} - -/* Checks whether a node matches one and only one character. */ -Py_LOCAL_INLINE(BOOL) sequence_matches_one(RE_Node* node) { - while (node->op == RE_OP_BRANCH && !node->nonstring.next_2.node) - node = node->next_1.node; - - if (node->next_1.node || (node->status & RE_STATUS_FUZZY)) - return FALSE; - - return node_matches_one_character(node); -} - -/* Records a repeat. */ -Py_LOCAL_INLINE(BOOL) record_repeat(PatternObject* pattern, size_t index, - size_t repeat_depth) { - size_t old_capacity; - size_t new_capacity; - - /* Increase the storage capacity to include the new entry if it's - * insufficient. - */ - old_capacity = pattern->repeat_info_capacity; - new_capacity = pattern->repeat_info_capacity; - while (index >= new_capacity) - new_capacity += RE_LIST_SIZE_INC; - - if (new_capacity > old_capacity) { - RE_RepeatInfo* new_repeat_info; - - new_repeat_info = (RE_RepeatInfo*)re_realloc(pattern->repeat_info, - new_capacity * sizeof(RE_RepeatInfo)); - if (!new_repeat_info) - return FALSE; - memset(new_repeat_info + old_capacity, 0, (new_capacity - old_capacity) - * sizeof(RE_RepeatInfo)); - - pattern->repeat_info = new_repeat_info; - pattern->repeat_info_capacity = new_capacity; - } - - if (index >= pattern->repeat_count) - pattern->repeat_count = index + 1; - - if (repeat_depth > 0) - pattern->repeat_info[index].status |= RE_STATUS_INNER; - - return TRUE; -} - -Py_LOCAL_INLINE(Py_ssize_t) get_step(RE_CODE op) { - switch (op) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_U: - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_IGN: - return 1; - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U_REV: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - return -1; - } - - return 0; -} - -Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args); - -/* Builds an ANY node. */ -Py_LOCAL_INLINE(int) build_ANY(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - - /* codes: opcode, flags. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step, 0); - if (!node) - return RE_ERROR_MEMORY; - - args->code += 2; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a FUZZY node. */ -Py_LOCAL_INLINE(int) build_FUZZY(RE_CompileArgs* args) { - RE_CODE flags; - RE_Node* start_node; - RE_Node* end_node; - RE_CODE index; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, flags, constraints, sequence, end. */ - if (args->code + 13 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - - /* Create nodes for the start and end of the fuzzy sequence. */ - start_node = create_node(args->pattern, RE_OP_FUZZY, flags, 0, 9); - end_node = create_node(args->pattern, RE_OP_END_FUZZY, flags, 0, 5); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - index = (RE_CODE)args->pattern->fuzzy_count++; - start_node->values[0] = index; - end_node->values[0] = index; - - /* The constraints consist of 4 pairs of limits and the cost equation. */ - end_node->values[RE_FUZZY_VAL_MIN_DEL] = args->code[2]; /* Deletion minimum. */ - end_node->values[RE_FUZZY_VAL_MIN_INS] = args->code[4]; /* Insertion minimum. */ - end_node->values[RE_FUZZY_VAL_MIN_SUB] = args->code[6]; /* Substitution minimum. */ - end_node->values[RE_FUZZY_VAL_MIN_ERR] = args->code[8]; /* Error minimum. */ - - start_node->values[RE_FUZZY_VAL_MAX_DEL] = args->code[3]; /* Deletion maximum. */ - start_node->values[RE_FUZZY_VAL_MAX_INS] = args->code[5]; /* Insertion maximum. */ - start_node->values[RE_FUZZY_VAL_MAX_SUB] = args->code[7]; /* Substitution maximum. */ - start_node->values[RE_FUZZY_VAL_MAX_ERR] = args->code[9]; /* Error maximum. */ - - start_node->values[RE_FUZZY_VAL_DEL_COST] = args->code[10]; /* Deletion cost. */ - start_node->values[RE_FUZZY_VAL_INS_COST] = args->code[11]; /* Insertion cost. */ - start_node->values[RE_FUZZY_VAL_SUB_COST] = args->code[12]; /* Substitution cost. */ - start_node->values[RE_FUZZY_VAL_MAX_COST] = args->code[13]; /* Total cost. */ - - args->code += 14; - - subargs = *args; - subargs.has_captures = FALSE; - subargs.is_fuzzy = TRUE; - subargs.within_fuzzy = TRUE; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width = subargs.min_width; - args->has_captures |= subargs.has_captures; - - ++args->code; - - /* Append the fuzzy sequence. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - args->is_fuzzy = TRUE; - - return RE_ERROR_SUCCESS; -} - -/* Builds an ATOMIC node. */ -Py_LOCAL_INLINE(int) build_ATOMIC(RE_CompileArgs* args) { - RE_Node* atomic_node; - RE_CompileArgs subargs; - RE_Node* success_node; - int status; - - /* codes: opcode, sequence, end. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - atomic_node = create_node(args->pattern, RE_OP_ATOMIC, 0, 0, 1); - if (!atomic_node) - return RE_ERROR_MEMORY; - - /* The number of repeat indexes. */ - atomic_node->values[0] = 0; - - ++args->code; - - subargs = *args; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - /* Create the success node to terminate the subpattern. */ - success_node = create_node(subargs.pattern, RE_OP_SUCCESS, 0, 0, 0); - if (!success_node) - return RE_ERROR_MEMORY; - - /* Append the SUCCESS node. */ - add_node(subargs.end, success_node); - - /* Insert the subpattern. */ - atomic_node->nonstring.next_2.node = subargs.start; - - args->code = subargs.code; - args->min_width = subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - ++args->code; - - /* Append the node. */ - add_node(args->end, atomic_node); - args->end = atomic_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a BOUNDARY node. */ -Py_LOCAL_INLINE(int) build_BOUNDARY(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - RE_Node* node; - - /* codes: opcode, flags. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - args->code += 2; - - /* Create the node. */ - node = create_node(args->pattern, op, flags, 0, 0); - if (!node) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a BRANCH node. */ -Py_LOCAL_INLINE(int) build_BRANCH(RE_CompileArgs* args) { - RE_Node* branch_node; - RE_Node* join_node; - Py_ssize_t smallest_min_width; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, branch, next, branch, end. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - /* Create nodes for the start and end of the branch sequence. */ - branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!branch_node || !join_node) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, branch_node); - args->end = join_node; - - smallest_min_width = PY_SSIZE_T_MAX; - - subargs = *args; - - /* A branch in the regular expression is compiled into a series of 2-way - * branches. - */ - do { - RE_Node* next_branch_node; - - /* Skip over the 'BRANCH' or 'NEXT' opcode. */ - ++subargs.code; - - /* Compile the sequence until the next 'BRANCH' or 'NEXT' opcode. */ - subargs.min_width = 0; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - smallest_min_width = min_ssize_t(smallest_min_width, - subargs.min_width); - - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - /* Append the sequence. */ - add_node(branch_node, subargs.start); - add_node(subargs.end, join_node); - - /* Create a start node for the next sequence and append it. */ - next_branch_node = create_node(subargs.pattern, RE_OP_BRANCH, 0, 0, 0); - if (!next_branch_node) - return RE_ERROR_MEMORY; - - add_node(branch_node, next_branch_node); - branch_node = next_branch_node; - } while (subargs.code < subargs.end_code && subargs.code[0] == RE_OP_NEXT); - - /* We should have reached the end of the branch. */ - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - - ++args->code; - args->min_width += smallest_min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a CALL_REF node. */ -Py_LOCAL_INLINE(int) build_CALL_REF(RE_CompileArgs* args) { - RE_CODE call_ref; - RE_Node* start_node; - RE_Node* end_node; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, call_ref. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - call_ref = args->code[1]; - - args->code += 2; - - /* Create nodes for the start and end of the subpattern. */ - start_node = create_node(args->pattern, RE_OP_CALL_REF, 0, 0, 1); - end_node = create_node(args->pattern, RE_OP_GROUP_RETURN, 0, 0, 0); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - start_node->values[0] = call_ref; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - subargs = *args; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width = subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - ++args->code; - - /* Record that we defined a call_ref. */ - if (!record_call_ref_defined(args->pattern, call_ref, start_node)) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a CHARACTER or PROPERTY node. */ -Py_LOCAL_INLINE(int) build_CHARACTER_or_PROPERTY(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - - /* codes: opcode, flags, value. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - if (flags & RE_ZEROWIDTH_OP) - step = 0; - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step, 1); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = args->code[2]; - - args->code += 3; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - if (step != 0) - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a GROUP node. */ -Py_LOCAL_INLINE(int) build_GROUP(RE_CompileArgs* args) { - RE_CODE private_group; - RE_CODE public_group; - RE_Node* start_node; - RE_Node* end_node; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, private_group, public_group. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - private_group = args->code[1]; - public_group = args->code[2]; - - args->code += 3; - - /* Create nodes for the start and end of the capture group. */ - start_node = create_node(args->pattern, args->forward ? RE_OP_START_GROUP : - RE_OP_END_GROUP, 0, 0, 3); - end_node = create_node(args->pattern, args->forward ? RE_OP_END_GROUP : - RE_OP_START_GROUP, 0, 0, 3); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - start_node->values[0] = private_group; - end_node->values[0] = private_group; - start_node->values[1] = public_group; - end_node->values[1] = public_group; - - /* Signal that the capture should be saved when it's complete. */ - start_node->values[2] = 0; - end_node->values[2] = 1; - - /* Record that we have a new capture group. */ - if (!record_group(args->pattern, private_group, start_node)) - return RE_ERROR_MEMORY; - - /* Compile the sequence and check that we've reached the end of the capture - * group. - */ - subargs = *args; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width = subargs.min_width; - if (subargs.has_captures || subargs.visible_captures) - args->has_captures = TRUE; - args->is_fuzzy |= subargs.is_fuzzy; - - ++args->code; - - /* Record that the capture group has closed. */ - record_group_end(args->pattern, private_group); - - /* Append the capture group. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a GROUP_CALL node. */ -Py_LOCAL_INLINE(int) build_GROUP_CALL(RE_CompileArgs* args) { - RE_CODE call_ref; - RE_Node* node; - - /* codes: opcode, call_ref. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - call_ref = args->code[1]; - - /* Create the node. */ - node = create_node(args->pattern, RE_OP_GROUP_CALL, 0, 0, 1); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = call_ref; - - args->code += 2; - - /* Record that we used a call_ref. */ - if (!record_call_ref_used(args->pattern, call_ref)) - return RE_ERROR_MEMORY; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a GROUP_EXISTS node. */ -Py_LOCAL_INLINE(int) build_GROUP_EXISTS(RE_CompileArgs* args) { - RE_CODE group; - RE_Node* start_node; - RE_Node* end_node; - RE_CompileArgs subargs; - Py_ssize_t min_width; - int status; - - /* codes: opcode, sequence, next, sequence, end. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - group = args->code[1]; - - args->code += 2; - - /* Create nodes for the start and end of the structure. */ - start_node = create_node(args->pattern, RE_OP_GROUP_EXISTS, 0, 0, 1); - end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!start_node || !end_node) - return RE_ERROR_MEMORY; - - start_node->values[0] = group; - - subargs = *args; - subargs.min_width = 0; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - min_width = subargs.min_width; - - /* Append the start node. */ - add_node(args->end, start_node); - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - - if (args->code[0] == RE_OP_NEXT) { - ++args->code; - - subargs.code = args->code; - subargs.min_width = 0; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - min_width = min_ssize_t(min_width, subargs.min_width); - - add_node(start_node, subargs.start); - add_node(subargs.end, end_node); - } else { - add_node(start_node, end_node); - - min_width = 0; - } - - args->min_width += min_width; - - if (args->code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - ++args->code; - - args->end = end_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a LOOKAROUND node. */ -Py_LOCAL_INLINE(int) build_LOOKAROUND(RE_CompileArgs* args) { - RE_CODE flags; - BOOL forward; - RE_Node* lookaround_node; - RE_Node* success_node; - RE_CompileArgs subargs; - int status; - - /* codes: opcode, flags, forward, sequence, end. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - forward = (BOOL)args->code[2]; - - /* Create a node for the lookaround. */ - lookaround_node = create_node(args->pattern, RE_OP_LOOKAROUND, flags, 0, - 2); - if (!lookaround_node) - return RE_ERROR_MEMORY; - - /* The number of repeat indexes. */ - lookaround_node->values[1] = 0; - - args->code += 3; - - /* Compile the sequence and check that we've reached the end of the - * subpattern. - */ - subargs = *args; - subargs.forward = forward; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - lookaround_node->values[0] = subargs.has_captures; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - ++args->code; - - /* Create the 'SUCCESS' node and append it to the subpattern. */ - success_node = create_node(args->pattern, RE_OP_SUCCESS, 0, 0, 0); - if (!success_node) - return RE_ERROR_MEMORY; - - /* Append the SUCCESS node. */ - add_node(subargs.end, success_node); - - /* Insert the subpattern into the node. */ - lookaround_node->nonstring.next_2.node = subargs.start; - - /* Append the lookaround. */ - add_node(args->end, lookaround_node); - args->end = lookaround_node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a RANGE node. */ -Py_LOCAL_INLINE(int) build_RANGE(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - - /* codes: opcode, flags, lower, upper. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - if (flags & RE_ZEROWIDTH_OP) - step = 0; - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step, 2); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = args->code[2]; - node->values[1] = args->code[3]; - - args->code += 4; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - if (step != 0) - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a REF_GROUP node. */ -Py_LOCAL_INLINE(int) build_REF_GROUP(RE_CompileArgs* args) { - RE_CODE flags; - RE_CODE group; - RE_Node* node; - - /* codes: opcode, flags, group. */ - if (args->code + 2 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - group = args->code[2]; - node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 1); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = group; - - args->code += 3; - - /* Record that we have a reference to a group. */ - if (!record_ref_group(args->pattern, group)) - return RE_ERROR_MEMORY; - - /* Append the reference. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a REPEAT node. */ -Py_LOCAL_INLINE(int) build_REPEAT(RE_CompileArgs* args) { - BOOL greedy; - RE_CODE min_count; - RE_CODE max_count; - int status; - - /* codes: opcode, min_count, max_count, sequence, end. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - /* This includes special cases such as optional items, which we'll check - * for and treat specially. They don't need repeat counts, which helps us - * avoid unnecessary work when matching. - */ - greedy = args->code[0] == RE_OP_GREEDY_REPEAT; - min_count = args->code[1]; - max_count = args->code[2]; - if (args->code[1] > args->code[2]) - return RE_ERROR_ILLEGAL; - - args->code += 3; - - if (min_count == 0 && max_count == 1) { - /* Optional sequence. */ - RE_Node* branch_node; - RE_Node* join_node; - RE_CompileArgs subargs; - - /* Create the start and end nodes. */ - branch_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - join_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!branch_node || !join_node) - return RE_ERROR_MEMORY; - - /* Compile the sequence and check that we've reached the end of it. */ - subargs = *args; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - ++args->code; - - if (greedy) { - /* It's a greedy option. */ - add_node(branch_node, subargs.start); - add_node(branch_node, join_node); - } else { - /* It's a lazy option. */ - add_node(branch_node, join_node); - add_node(branch_node, subargs.start); - } - add_node(subargs.end, join_node); - - /* Append the optional sequence. */ - add_node(args->end, branch_node); - args->end = join_node; - } else if (min_count == 1 && max_count == 1) { - /* Singly-repeated sequence. */ - RE_CompileArgs subargs; - - subargs = *args; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width = subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - ++args->code; - - /* Append the sequence. */ - add_node(args->end, subargs.start); - args->end = subargs.end; - } else { - size_t index; - RE_Node* repeat_node; - RE_CompileArgs subargs; - - index = args->pattern->repeat_count; - - /* Create the nodes for the repeat. */ - repeat_node = create_node(args->pattern, greedy ? RE_OP_GREEDY_REPEAT : - RE_OP_LAZY_REPEAT, 0, args->forward ? 1 : -1, 4); - if (!repeat_node || !record_repeat(args->pattern, index, - args->repeat_depth)) - return RE_ERROR_MEMORY; - - repeat_node->values[0] = (RE_CODE)index; - repeat_node->values[1] = min_count; - repeat_node->values[2] = max_count; - repeat_node->values[3] = args->forward; - - if (args->within_fuzzy) - args->pattern->repeat_info[index].status |= RE_STATUS_BODY; - - /* Compile the 'body' and check that we've reached the end of it. */ - subargs = *args; - subargs.min_width = 0; - subargs.visible_captures = TRUE; - subargs.has_captures = FALSE; - subargs.is_fuzzy = FALSE; - ++subargs.repeat_depth; - status = build_sequence(&subargs); - if (status != RE_ERROR_SUCCESS) - return status; - - if (subargs.code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - args->code = subargs.code; - args->min_width += (Py_ssize_t)min_count * subargs.min_width; - args->has_captures |= subargs.has_captures; - args->is_fuzzy |= subargs.is_fuzzy; - - ++args->code; - - /* Is it a repeat of something which will match a single character? - * - * If it's in a fuzzy section then it won't be optimised as a - * single-character repeat. - */ - if (sequence_matches_one(subargs.start)) { - repeat_node->op = greedy ? RE_OP_GREEDY_REPEAT_ONE : - RE_OP_LAZY_REPEAT_ONE; - - /* Append the new sequence. */ - add_node(args->end, repeat_node); - repeat_node->nonstring.next_2.node = subargs.start; - args->end = repeat_node; - } else { - RE_Node* end_repeat_node; - RE_Node* end_node; - - end_repeat_node = create_node(args->pattern, greedy ? - RE_OP_END_GREEDY_REPEAT : RE_OP_END_LAZY_REPEAT, 0, args->forward - ? 1 : -1, 4); - if (!end_repeat_node) - return RE_ERROR_MEMORY; - - end_repeat_node->values[0] = repeat_node->values[0]; - end_repeat_node->values[1] = repeat_node->values[1]; - end_repeat_node->values[2] = repeat_node->values[2]; - end_repeat_node->values[3] = args->forward; - - end_node = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - if (!end_node) - return RE_ERROR_MEMORY; - - /* Append the new sequence. */ - add_node(args->end, repeat_node); - add_node(repeat_node, subargs.start); - add_node(repeat_node, end_node); - add_node(subargs.end, end_repeat_node); - add_node(end_repeat_node, subargs.start); - add_node(end_repeat_node, end_node); - args->end = end_node; - } - } - - return RE_ERROR_SUCCESS; -} - -/* Builds a STRING node. */ -Py_LOCAL_INLINE(int) build_STRING(RE_CompileArgs* args, BOOL is_charset) { - RE_CODE flags; - RE_CODE length; - RE_UINT8 op; - Py_ssize_t step; - RE_Node* node; - size_t i; - - /* codes: opcode, flags, length, characters. */ - flags = args->code[1]; - length = args->code[2]; - if (args->code + 3 + length > args->end_code) - return RE_ERROR_ILLEGAL; - - op = (RE_UINT8)args->code[0]; - - step = get_step(op); - - /* Create the node. */ - node = create_node(args->pattern, op, flags, step * (Py_ssize_t)length, - length); - if (!node) - return RE_ERROR_MEMORY; - if (!is_charset) - node->status |= RE_STATUS_STRING; - - for (i = 0; i < length; i++) - node->values[i] = args->code[3 + i]; - - args->code += 3 + length; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - /* Because of full case-folding, one character in the text could match - * multiple characters in the pattern. - */ - if (op == RE_OP_STRING_FLD || op == RE_OP_STRING_FLD_REV) - args->min_width += possible_unfolded_length((Py_ssize_t)length); - else - args->min_width += (Py_ssize_t)length; - - return RE_ERROR_SUCCESS; -} - -/* Builds a SET node. */ -Py_LOCAL_INLINE(int) build_SET(RE_CompileArgs* args) { - RE_UINT8 op; - RE_CODE flags; - Py_ssize_t step; - RE_Node* node; - Py_ssize_t saved_min_width; - int status; - - /* codes: opcode, flags, members. */ - op = (RE_UINT8)args->code[0]; - flags = args->code[1]; - - step = get_step(op); - - if (flags & RE_ZEROWIDTH_OP) - step = 0; - - node = create_node(args->pattern, op, flags, step, 0); - if (!node) - return RE_ERROR_MEMORY; - - args->code += 2; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - saved_min_width = args->min_width; - - /* Compile the character set. */ - do { - switch (args->code[0]) { - case RE_OP_CHARACTER: - case RE_OP_PROPERTY: - status = build_CHARACTER_or_PROPERTY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_RANGE: - status = build_RANGE(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_INTER: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_UNION: - status = build_SET(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_STRING: - /* A set of characters. */ - if (!build_STRING(args, TRUE)) - return FALSE; - break; - default: - /* Illegal opcode for a character set. */ - return RE_ERROR_ILLEGAL; - } - } while (args->code < args->end_code && args->code[0] != RE_OP_END); - - /* Check that we've reached the end correctly. (The last opcode should be - * 'END'.) - */ - if (args->code >= args->end_code || args->code[0] != RE_OP_END) - return RE_ERROR_ILLEGAL; - - ++args->code; - - /* At this point the set's members are in the main sequence. They need to - * be moved out-of-line. - */ - node->nonstring.next_2.node = node->next_1.node; - node->next_1.node = NULL; - args->end = node; - - args->min_width = saved_min_width; - - if (step != 0) - ++args->min_width; - - return RE_ERROR_SUCCESS; -} - -/* Builds a STRING_SET node. */ -Py_LOCAL_INLINE(int) build_STRING_SET(RE_CompileArgs* args) { - RE_CODE index; - RE_CODE min_len; - RE_CODE max_len; - RE_Node* node; - - /* codes: opcode, index, min_len, max_len. */ - if (args->code + 3 > args->end_code) - return RE_ERROR_ILLEGAL; - - index = args->code[1]; - min_len = args->code[2]; - max_len = args->code[3]; - node = create_node(args->pattern, (RE_UINT8)args->code[0], 0, 0, 3); - if (!node) - return RE_ERROR_MEMORY; - - node->values[0] = index; - node->values[1] = min_len; - node->values[2] = max_len; - - args->code += 4; - - /* Append the reference. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a SUCCESS node . */ -Py_LOCAL_INLINE(int) build_SUCCESS(RE_CompileArgs* args) { - RE_Node* node; - /* code: opcode. */ - - /* Create the node. */ - node = create_node(args->pattern, RE_OP_SUCCESS, 0, 0, 0); - if (!node) - return RE_ERROR_MEMORY; - - ++args->code; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a zero-width node. */ -Py_LOCAL_INLINE(int) build_zerowidth(RE_CompileArgs* args) { - RE_CODE flags; - RE_Node* node; - - /* codes: opcode, flags. */ - if (args->code + 1 > args->end_code) - return RE_ERROR_ILLEGAL; - - flags = args->code[1]; - - /* Create the node. */ - node = create_node(args->pattern, (RE_UINT8)args->code[0], flags, 0, 0); - if (!node) - return RE_ERROR_MEMORY; - - args->code += 2; - - /* Append the node. */ - add_node(args->end, node); - args->end = node; - - return RE_ERROR_SUCCESS; -} - -/* Builds a sequence of nodes from regular expression code. */ -Py_LOCAL_INLINE(int) build_sequence(RE_CompileArgs* args) { - int status; - - /* Guarantee that there's something to attach to. */ - args->start = create_node(args->pattern, RE_OP_BRANCH, 0, 0, 0); - args->end = args->start; - - /* The sequence should end with an opcode we don't understand. If it - * doesn't then the code is illegal. - */ - while (args->code < args->end_code) { - /* The following code groups opcodes by format, not function. */ - switch (args->code[0]) { - case RE_OP_ANY: - case RE_OP_ANY_ALL: - case RE_OP_ANY_ALL_REV: - case RE_OP_ANY_REV: - case RE_OP_ANY_U: - case RE_OP_ANY_U_REV: - /* A simple opcode with no trailing codewords and width of 1. */ - status = build_ANY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_ATOMIC: - /* An atomic sequence. */ - status = build_ATOMIC(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_BOUNDARY: - case RE_OP_DEFAULT_BOUNDARY: - case RE_OP_DEFAULT_END_OF_WORD: - case RE_OP_DEFAULT_START_OF_WORD: - case RE_OP_END_OF_WORD: - case RE_OP_GRAPHEME_BOUNDARY: - case RE_OP_START_OF_WORD: - /* A word or grapheme boundary. */ - status = build_BOUNDARY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_BRANCH: - /* A 2-way branch. */ - status = build_BRANCH(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_CALL_REF: - /* A group call ref. */ - status = build_CALL_REF(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_CHARACTER: - case RE_OP_CHARACTER_IGN: - case RE_OP_CHARACTER_IGN_REV: - case RE_OP_CHARACTER_REV: - case RE_OP_PROPERTY: - case RE_OP_PROPERTY_IGN: - case RE_OP_PROPERTY_IGN_REV: - case RE_OP_PROPERTY_REV: - /* A character literal or a property. */ - status = build_CHARACTER_or_PROPERTY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_END_OF_LINE: - case RE_OP_END_OF_LINE_U: - case RE_OP_END_OF_STRING: - case RE_OP_END_OF_STRING_LINE: - case RE_OP_END_OF_STRING_LINE_U: - case RE_OP_SEARCH_ANCHOR: - case RE_OP_START_OF_LINE: - case RE_OP_START_OF_LINE_U: - case RE_OP_START_OF_STRING: - /* A simple opcode with no trailing codewords and width of 0. */ - status = build_zerowidth(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_FUZZY: - /* A fuzzy sequence. */ - status = build_FUZZY(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GREEDY_REPEAT: - case RE_OP_LAZY_REPEAT: - /* A repeated sequence. */ - status = build_REPEAT(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GROUP: - /* A capture group. */ - status = build_GROUP(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GROUP_CALL: - /* A group call. */ - status = build_GROUP_CALL(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_GROUP_EXISTS: - /* A conditional sequence. */ - status = build_GROUP_EXISTS(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_LOOKAROUND: - /* A lookaround. */ - status = build_LOOKAROUND(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_RANGE: - case RE_OP_RANGE_IGN: - case RE_OP_RANGE_IGN_REV: - case RE_OP_RANGE_REV: - /* A range. */ - status = build_RANGE(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_REF_GROUP: - case RE_OP_REF_GROUP_FLD: - case RE_OP_REF_GROUP_FLD_REV: - case RE_OP_REF_GROUP_IGN: - case RE_OP_REF_GROUP_IGN_REV: - case RE_OP_REF_GROUP_REV: - /* A reference to a group. */ - status = build_REF_GROUP(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_SET_DIFF: - case RE_OP_SET_DIFF_IGN: - case RE_OP_SET_DIFF_IGN_REV: - case RE_OP_SET_DIFF_REV: - case RE_OP_SET_INTER: - case RE_OP_SET_INTER_IGN: - case RE_OP_SET_INTER_IGN_REV: - case RE_OP_SET_INTER_REV: - case RE_OP_SET_SYM_DIFF: - case RE_OP_SET_SYM_DIFF_IGN: - case RE_OP_SET_SYM_DIFF_IGN_REV: - case RE_OP_SET_SYM_DIFF_REV: - case RE_OP_SET_UNION: - case RE_OP_SET_UNION_IGN: - case RE_OP_SET_UNION_IGN_REV: - case RE_OP_SET_UNION_REV: - /* A set. */ - status = build_SET(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_STRING: - case RE_OP_STRING_FLD: - case RE_OP_STRING_FLD_REV: - case RE_OP_STRING_IGN: - case RE_OP_STRING_IGN_REV: - case RE_OP_STRING_REV: - /* A string literal. */ - if (!build_STRING(args, FALSE)) - return FALSE; - break; - case RE_OP_STRING_SET: - case RE_OP_STRING_SET_FLD: - case RE_OP_STRING_SET_FLD_REV: - case RE_OP_STRING_SET_IGN: - case RE_OP_STRING_SET_IGN_REV: - case RE_OP_STRING_SET_REV: - /* A reference to a list. */ - status = build_STRING_SET(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - case RE_OP_SUCCESS: - /* Success. */ - status = build_SUCCESS(args); - if (status != RE_ERROR_SUCCESS) - return status; - break; - default: - /* We've found an opcode which we don't recognise. We'll leave it - * for the caller. - */ - return RE_ERROR_SUCCESS; - } - } - - /* If we're here then we should be at the end of the code, otherwise we - * have an error. - */ - return args->code == args->end_code; -} - -/* Compiles the regular expression code to 'nodes'. - * - * Various details about the regular expression are discovered during - * compilation and stored in the PatternObject. - */ -Py_LOCAL_INLINE(BOOL) compile_to_nodes(RE_CODE* code, RE_CODE* end_code, - PatternObject* pattern) { - RE_CompileArgs args; - int status; - - /* Compile a regex sequence and then check that we've reached the end - * correctly. (The last opcode should be 'SUCCESS'.) - * - * If successful, 'start' and 'end' will point to the start and end nodes - * of the compiled sequence. - */ - args.code = code; - args.end_code = end_code; - args.pattern = pattern; - args.forward = (pattern->flags & RE_FLAG_REVERSE) == 0; - args.min_width = 0; - args.visible_captures = FALSE; - args.has_captures = FALSE; - args.repeat_depth = 0; - args.is_fuzzy = FALSE; - args.within_fuzzy = FALSE; - status = build_sequence(&args); - if (status == RE_ERROR_ILLEGAL) - set_error(RE_ERROR_ILLEGAL, NULL); - - if (status != RE_ERROR_SUCCESS) - return FALSE; - - pattern->min_width = args.min_width; - pattern->is_fuzzy = args.is_fuzzy; - pattern->do_search_start = TRUE; - pattern->start_node = args.start; - - /* Optimise the pattern. */ - if (!optimise_pattern(pattern)) - return FALSE; - - pattern->start_test = locate_test_start(pattern->start_node); - - /* Get the call_ref for the entire pattern, if any. */ - if (pattern->start_node->op == RE_OP_CALL_REF) - pattern->pattern_call_ref = (Py_ssize_t)pattern->start_node->values[0]; - else - pattern->pattern_call_ref = -1; - - return TRUE; -} - -/* Gets the required characters for a regex. - * - * In the event of an error, it just pretends that there are no required - * characters. - */ -Py_LOCAL_INLINE(void) get_required_chars(PyObject* required_chars, RE_CODE** - req_chars, size_t* req_length) { - Py_ssize_t len; - RE_CODE* chars; - Py_ssize_t i; - - *req_chars = NULL; - *req_length = 0; - - len = PyTuple_GET_SIZE(required_chars); - if (len < 1 || PyErr_Occurred()) { - PyErr_Clear(); - return; - } - - chars = (RE_CODE*)re_alloc((size_t)len * sizeof(RE_CODE)); - if (!chars) - goto error; - - for (i = 0; i < len; i++) { - PyObject* o; - size_t value; - - /* PyTuple_SET_ITEM borrows the reference. */ - o = PyTuple_GET_ITEM(required_chars, i); - - value = PyLong_AsUnsignedLong(o); - if ((Py_ssize_t)value == -1 && PyErr_Occurred()) - goto error; - - chars[i] = (RE_CODE)value; - if (chars[i] != value) - goto error; - } - - *req_chars = chars; - *req_length = (size_t)len; - - return; - -error: - PyErr_Clear(); - re_dealloc(chars); -} - -/* Makes a STRING node. */ -Py_LOCAL_INLINE(RE_Node*) make_STRING_node(PatternObject* pattern, RE_UINT8 op, - size_t length, RE_CODE* chars) { - Py_ssize_t step; - RE_Node* node; - size_t i; - - step = get_step(op); - - /* Create the node. */ - node = create_node(pattern, op, 0, step * (Py_ssize_t)length, length); - if (!node) - return NULL; - - node->status |= RE_STATUS_STRING; - - for (i = 0; i < length; i++) - node->values[i] = chars[i]; - - return node; -} - -/* Compiles regular expression code to a PatternObject. - * - * The regular expression code is provided as a list and is then compiled to - * 'nodes'. Various details about the regular expression are discovered during - * compilation and stored in the PatternObject. - */ -static PyObject* re_compile(PyObject* self_, PyObject* args) { - PyObject* pattern; - Py_ssize_t flags = 0; - PyObject* code_list; - PyObject* groupindex; - PyObject* indexgroup; - PyObject* named_lists; - PyObject* named_list_indexes; - Py_ssize_t req_offset; - PyObject* required_chars; - size_t req_length; - RE_CODE* req_chars; - Py_ssize_t req_flags; - size_t public_group_count; - Py_ssize_t code_len; - RE_CODE* code; - Py_ssize_t i; - PatternObject* self; - BOOL ascii; - BOOL locale; - BOOL unicode; - BOOL ok; - - if (!PyArg_ParseTuple(args, "OnOOOOOnOnn:re_compile", &pattern, &flags, - &code_list, &groupindex, &indexgroup, &named_lists, &named_list_indexes, - &req_offset, &required_chars, &req_flags, &public_group_count)) - return NULL; - - /* Read the regex code. */ - code_len = PyList_GET_SIZE(code_list); - code = (RE_CODE*)re_alloc((size_t)code_len * sizeof(RE_CODE)); - if (!code) - return NULL; - - for (i = 0; i < code_len; i++) { - PyObject* o; - size_t value; - - /* PyList_GET_ITEM borrows a reference. */ - o = PyList_GET_ITEM(code_list, i); - - value = PyLong_AsUnsignedLong(o); - if ((Py_ssize_t)value == -1 && PyErr_Occurred()) - goto error; - - code[i] = (RE_CODE)value; - if (code[i] != value) - goto error; - } - - /* Get the required characters. */ - get_required_chars(required_chars, &req_chars, &req_length); - - /* Create the PatternObject. */ - self = PyObject_NEW(PatternObject, &Pattern_Type); - if (!self) { - set_error(RE_ERROR_MEMORY, NULL); - re_dealloc(req_chars); - re_dealloc(code); - return NULL; - } - - /* Initialise the PatternObject. */ - self->pattern = pattern; - self->flags = flags; - self->weakreflist = NULL; - self->start_node = NULL; - self->repeat_count = 0; - self->true_group_count = 0; - self->public_group_count = public_group_count; - self->group_end_index = 0; - self->groupindex = groupindex; - self->indexgroup = indexgroup; - self->named_lists = named_lists; - self->named_lists_count = (size_t)PyDict_Size(named_lists); - self->partial_named_lists[0] = NULL; - self->partial_named_lists[1] = NULL; - self->named_list_indexes = named_list_indexes; - self->node_capacity = 0; - self->node_count = 0; - self->node_list = NULL; - self->group_info_capacity = 0; - self->group_info = NULL; - self->call_ref_info_capacity = 0; - self->call_ref_info_count = 0; - self->call_ref_info = NULL; - self->repeat_info_capacity = 0; - self->repeat_info = NULL; - self->groups_storage = NULL; - self->repeats_storage = NULL; - self->fuzzy_count = 0; - self->recursive = FALSE; - self->req_offset = req_offset; - self->req_string = NULL; - Py_INCREF(self->pattern); - Py_INCREF(self->groupindex); - Py_INCREF(self->indexgroup); - Py_INCREF(self->named_lists); - Py_INCREF(self->named_list_indexes); - - /* Initialise the character encoding. */ - unicode = (flags & RE_FLAG_UNICODE) != 0; - locale = (flags & RE_FLAG_LOCALE) != 0; - ascii = (flags & RE_FLAG_ASCII) != 0; - if (!unicode && !locale && !ascii) { - if (PyString_Check(self->pattern)) - ascii = RE_FLAG_ASCII; - else - unicode = RE_FLAG_UNICODE; - } - if (unicode) - self->encoding = &unicode_encoding; - else if (locale) - self->encoding = &locale_encoding; - else if (ascii) - self->encoding = &ascii_encoding; - - /* Compile the regular expression code to nodes. */ - ok = compile_to_nodes(code, code + code_len, self); - - /* We no longer need the regular expression code. */ - re_dealloc(code); - - if (!ok) { - Py_DECREF(self); - re_dealloc(req_chars); - return NULL; - } - - /* Make a node for the required string, if there's one. */ - if (req_chars) { - /* Remove the FULLCASE flag if it's not a Unicode pattern. */ - if (!(self->flags & RE_FLAG_UNICODE)) - req_flags &= ~RE_FLAG_FULLCASE; - - if (self->flags & RE_FLAG_REVERSE) { - switch (req_flags) { - case 0: - self->req_string = make_STRING_node(self, RE_OP_STRING_REV, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_FLD_REV, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_IGN_REV, - req_length, req_chars); - break; - } - } else { - switch (req_flags) { - case 0: - self->req_string = make_STRING_node(self, RE_OP_STRING, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE | RE_FLAG_FULLCASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_FLD, - req_length, req_chars); - break; - case RE_FLAG_IGNORECASE: - self->req_string = make_STRING_node(self, RE_OP_STRING_IGN, - req_length, req_chars); - break; - } - } - - re_dealloc(req_chars); - } - - return (PyObject*)self; - -error: - re_dealloc(code); - set_error(RE_ERROR_ILLEGAL, NULL); - return NULL; -} - -/* Gets the size of the codewords. */ -static PyObject* get_code_size(PyObject* self, PyObject* unused) { - return Py_BuildValue("n", sizeof(RE_CODE)); -} - -/* Gets the property dict. */ -static PyObject* get_properties(PyObject* self_, PyObject* args) { - Py_INCREF(property_dict); - - return property_dict; -} - -/* Folds the case of a string. */ -static PyObject* fold_case(PyObject* self_, PyObject* args) { - RE_StringInfo str_info; - Py_UCS4 (*char_at)(void* text, Py_ssize_t pos); - Py_ssize_t folded_charsize; - void (*set_char_at)(void* text, Py_ssize_t pos, Py_UCS4 ch); - RE_EncodingTable* encoding; - Py_ssize_t buf_size; - void* folded; - Py_ssize_t folded_len; - PyObject* result; - - Py_ssize_t flags; - PyObject* string; - if (!PyArg_ParseTuple(args, "nO:fold_case", &flags, &string)) - return NULL; - - if (!(flags & RE_FLAG_IGNORECASE)) { - Py_INCREF(string); - return string; - } - - /* Get the string. */ - if (!get_string(string, &str_info)) - return NULL; - - /* Get the function for reading from the original string. */ - switch (str_info.charsize) { - case 1: - char_at = bytes1_char_at; - break; - case 2: - char_at = bytes2_char_at; - break; - case 4: - char_at = bytes4_char_at; - break; - default: -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* What's the encoding? */ - if (flags & RE_FLAG_UNICODE) - encoding = &unicode_encoding; - else if (flags & RE_FLAG_LOCALE) - encoding = &locale_encoding; - else if (flags & RE_FLAG_ASCII) - encoding = &ascii_encoding; - else - encoding = &unicode_encoding; - - /* The folded string will have the same width as the original string. */ - folded_charsize = str_info.charsize; - - /* Get the function for writing to the folded string. */ - switch (folded_charsize) { - case 1: - set_char_at = bytes1_set_char_at; - break; - case 2: - set_char_at = bytes2_set_char_at; - break; - case 4: - set_char_at = bytes4_set_char_at; - break; - default: -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* Allocate a buffer for the folded string. */ - if (flags & RE_FLAG_FULLCASE) - /* When using full case-folding with Unicode, some single codepoints - * are mapped to multiple codepoints. - */ - buf_size = str_info.length * RE_MAX_FOLDED; - else - buf_size = str_info.length; - - folded = re_alloc((size_t)(buf_size * folded_charsize)); - if (!folded) { -#if PY_VERSION_HEX >= 0x02060000 - release_buffer(&str_info); - -#endif - return NULL; - } - - /* Fold the case of the string. */ - folded_len = 0; - - if (flags & RE_FLAG_FULLCASE) { - /* Full case-folding. */ - int (*full_case_fold)(Py_UCS4 ch, Py_UCS4* folded); - Py_ssize_t i; - Py_UCS4 codepoints[RE_MAX_FOLDED]; - - full_case_fold = encoding->full_case_fold; - - for (i = 0; i < str_info.length; i++) { - int count; - int j; - - count = full_case_fold(char_at(str_info.characters, i), - codepoints); - for (j = 0; j < count; j++) - set_char_at(folded, folded_len + j, codepoints[j]); - - folded_len += count; - } - } else { - /* Simple case-folding. */ - Py_UCS4 (*simple_case_fold)(Py_UCS4 ch); - Py_ssize_t i; - - simple_case_fold = encoding->simple_case_fold; - - for (i = 0; i < str_info.length; i++) { - Py_UCS4 ch; - - ch = simple_case_fold(char_at(str_info.characters, i)); - set_char_at(folded, i, ch); - } - - folded_len = str_info.length; - } - - /* Build the result string. */ - if (str_info.is_unicode) - result = build_unicode_value(folded, folded_len, folded_charsize); - else - result = build_bytes_value(folded, folded_len, folded_charsize); - - re_dealloc(folded); - -#if PY_VERSION_HEX >= 0x02060000 - /* Release the original string's buffer. */ - release_buffer(&str_info); - -#endif - return result; -} - -/* Returns a tuple of the Unicode characters that expand on full case-folding. - */ -static PyObject* get_expand_on_folding(PyObject* self, PyObject* unused) { - int count; - int i; - PyObject* result; - - /* How many characters are there? */ - count = sizeof(re_expand_on_folding) / sizeof(re_expand_on_folding[0]); - - /* Put all the characters in a tuple. */ - result = PyTuple_New(count); - if (!result) - return NULL; - - for (i = 0; i < count; i++) { - Py_UNICODE codepoint; - PyObject* item; - - codepoint = re_expand_on_folding[i]; - - item = build_unicode_value(&codepoint, 1, sizeof(codepoint)); - if (!item) - goto error; - - /* PyTuple_SetItem borrows the reference. */ - PyTuple_SetItem(result, i, item); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* Returns whether a character has a given value for a Unicode property. */ -static PyObject* has_property_value(PyObject* self_, PyObject* args) { - BOOL v; - - Py_ssize_t property_value; - Py_ssize_t character; - if (!PyArg_ParseTuple(args, "nn:has_property_value", &property_value, - &character)) - return NULL; - - v = unicode_has_property((RE_CODE)property_value, (Py_UCS4)character) ? 1 : - 0; - - return Py_BuildValue("n", v); -} - -/* Returns a list of all the simple cases of a character. - * - * If full case-folding is turned on and the character also expands on full - * case-folding, a None is appended to the list. - */ -static PyObject* get_all_cases(PyObject* self_, PyObject* args) { - RE_EncodingTable* encoding; - int count; - Py_UCS4 cases[RE_MAX_CASES]; - Py_UCS4 folded[RE_MAX_FOLDED]; - PyObject* result; - int i; - - Py_ssize_t flags; - Py_ssize_t character; - if (!PyArg_ParseTuple(args, "nn:get_all_cases", &flags, &character)) - return NULL; - - /* What's the encoding? */ - if (flags & RE_FLAG_UNICODE) - encoding = &unicode_encoding; - else if (flags & RE_FLAG_LOCALE) - encoding = &locale_encoding; - else if (flags & RE_FLAG_ASCII) - encoding = &ascii_encoding; - else - encoding = &ascii_encoding; - - /* Get all the simple cases. */ - count = encoding->all_cases((Py_UCS4)character, cases); - - result = PyList_New(count); - if (!result) - return NULL; - - for (i = 0; i < count; i++) { - PyObject* item; - - item = Py_BuildValue("n", cases[i]); - if (!item) - goto error; - - /* PyList_SetItem borrows the reference. */ - PyList_SetItem(result, i, item); - } - - /* If the character also expands on full case-folding, append a None. */ - if ((flags & RE_FULL_CASE_FOLDING) == RE_FULL_CASE_FOLDING) { - count = encoding->full_case_fold((Py_UCS4)character, folded); - if (count > 1) - PyList_Append(result, Py_None); - } - - return result; - -error: - Py_DECREF(result); - return NULL; -} - -/* The table of the module's functions. */ -static PyMethodDef _functions[] = { - {"compile", (PyCFunction)re_compile, METH_VARARGS}, - {"get_code_size", (PyCFunction)get_code_size, METH_NOARGS}, - {"get_properties", (PyCFunction)get_properties, METH_VARARGS}, - {"fold_case", (PyCFunction)fold_case, METH_VARARGS}, - {"get_expand_on_folding", (PyCFunction)get_expand_on_folding, METH_NOARGS}, - {"has_property_value", (PyCFunction)has_property_value, METH_VARARGS}, - {"get_all_cases", (PyCFunction)get_all_cases, METH_VARARGS}, - {NULL, NULL} -}; - -/* Initialises the property dictionary. */ -Py_LOCAL_INLINE(BOOL) init_property_dict(void) { - size_t value_set_count; - size_t i; - PyObject** value_dicts; - - property_dict = NULL; - - /* How many value sets are there? */ - value_set_count = 0; - - for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); - i++) { - RE_PropertyValue* value; - - value = &re_property_values[i]; - if (value->value_set >= value_set_count) - value_set_count = (size_t)value->value_set + 1; - } - - /* Quick references for the value sets. */ - value_dicts = (PyObject**)re_alloc(value_set_count * - sizeof(value_dicts[0])); - if (!value_dicts) - return FALSE; - - memset(value_dicts, 0, value_set_count * sizeof(value_dicts[0])); - - /* Build the property values dictionaries. */ - for (i = 0; i < sizeof(re_property_values) / sizeof(re_property_values[0]); - i++) { - RE_PropertyValue* value; - PyObject* v; - int status; - - value = &re_property_values[i]; - if (!value_dicts[value->value_set]) { - value_dicts[value->value_set] = PyDict_New(); - if (!value_dicts[value->value_set]) - goto error; - } - - v = Py_BuildValue("i", value->id); - if (!v) - goto error; - - status = PyDict_SetItemString(value_dicts[value->value_set], - re_strings[value->name], v); - Py_DECREF(v); - if (status < 0) - goto error; - } - - /* Build the property dictionary. */ - property_dict = PyDict_New(); - if (!property_dict) - goto error; - - for (i = 0; i < sizeof(re_properties) / sizeof(re_properties[0]); i++) { - RE_Property* property; - PyObject* v; - int status; - - property = &re_properties[i]; - v = Py_BuildValue("iO", property->id, - value_dicts[property->value_set]); - if (!v) - goto error; - - status = PyDict_SetItemString(property_dict, - re_strings[property->name], v); - Py_DECREF(v); - if (status < 0) - goto error; - } - - /* DECREF the value sets. Any unused ones will be deallocated. */ - for (i = 0; i < value_set_count; i++) - Py_XDECREF(value_dicts[i]); - - re_dealloc(value_dicts); - - return TRUE; - -error: - Py_XDECREF(property_dict); - - /* DECREF the value sets. */ - for (i = 0; i < value_set_count; i++) - Py_XDECREF(value_dicts[i]); - - re_dealloc(value_dicts); - - return FALSE; -} - -/* Initialises the module. */ -PyMODINIT_FUNC init_regex(void) { - PyObject* m; - PyObject* d; - PyObject* x; - -#if defined(VERBOSE) - /* Unbuffered in case it crashes! */ - setvbuf(stdout, NULL, _IONBF, 0); - -#endif - /* Initialise Pattern_Type. */ - Pattern_Type.tp_dealloc = pattern_dealloc; - Pattern_Type.tp_repr = pattern_repr; - Pattern_Type.tp_flags = Py_TPFLAGS_HAVE_WEAKREFS; - Pattern_Type.tp_doc = pattern_doc; - Pattern_Type.tp_weaklistoffset = offsetof(PatternObject, weakreflist); - Pattern_Type.tp_methods = pattern_methods; - Pattern_Type.tp_members = pattern_members; - Pattern_Type.tp_getset = pattern_getset; - - /* Initialise Match_Type. */ - Match_Type.tp_dealloc = match_dealloc; - Match_Type.tp_repr = match_repr; - Match_Type.tp_as_mapping = &match_as_mapping; - Match_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Match_Type.tp_doc = match_doc; - Match_Type.tp_methods = match_methods; - Match_Type.tp_members = match_members; - Match_Type.tp_getset = match_getset; - - /* Initialise Scanner_Type. */ - Scanner_Type.tp_dealloc = scanner_dealloc; - Scanner_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Scanner_Type.tp_doc = scanner_doc; - Scanner_Type.tp_iter = scanner_iter; - Scanner_Type.tp_iternext = scanner_iternext; - Scanner_Type.tp_methods = scanner_methods; - Scanner_Type.tp_members = scanner_members; - - /* Initialise Splitter_Type. */ - Splitter_Type.tp_dealloc = splitter_dealloc; - Splitter_Type.tp_flags = Py_TPFLAGS_DEFAULT; - Splitter_Type.tp_doc = splitter_doc; - Splitter_Type.tp_iter = splitter_iter; - Splitter_Type.tp_iternext = splitter_iternext; - Splitter_Type.tp_methods = splitter_methods; - Splitter_Type.tp_members = splitter_members; - - /* Initialize object types */ - if (PyType_Ready(&Pattern_Type) < 0) - return; - if (PyType_Ready(&Match_Type) < 0) - return; - if (PyType_Ready(&Scanner_Type) < 0) - return; - if (PyType_Ready(&Splitter_Type) < 0) - return; - - error_exception = NULL; - - m = Py_InitModule("_" RE_MODULE, _functions); - if (!m) - return; - - d = PyModule_GetDict(m); - - x = PyInt_FromLong(RE_MAGIC); - if (x) { - PyDict_SetItemString(d, "MAGIC", x); - Py_DECREF(x); - } - - x = PyInt_FromLong(sizeof(RE_CODE)); - if (x) { - PyDict_SetItemString(d, "CODE_SIZE", x); - Py_DECREF(x); - } - - x = PyString_FromString(copyright); - if (x) { - PyDict_SetItemString(d, "copyright", x); - Py_DECREF(x); - } - - /* Initialise the property dictionary. */ - if (!init_property_dict()) - return; -} - -/* vim:ts=4:sw=4:et */ diff --git a/lib/regex/_regex.h b/lib/regex/_regex.h deleted file mode 100644 index 33dc1540..00000000 --- a/lib/regex/_regex.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * NOTE: This file is generated by regex.py. If you need - * to change anything in here, edit regex.py and run it. - * - * 2010-01-16 mrab Re-written - */ - -/* Supports Unicode version 6.3.0. */ - -#define RE_MAGIC 20100116 - -#include "_regex_unicode.h" - -/* Operators. */ -#define RE_OP_FAILURE 0 -#define RE_OP_SUCCESS 1 -#define RE_OP_ANY 2 -#define RE_OP_ANY_ALL 3 -#define RE_OP_ANY_ALL_REV 4 -#define RE_OP_ANY_REV 5 -#define RE_OP_ANY_U 6 -#define RE_OP_ANY_U_REV 7 -#define RE_OP_ATOMIC 8 -#define RE_OP_BOUNDARY 9 -#define RE_OP_BRANCH 10 -#define RE_OP_CALL_REF 11 -#define RE_OP_CHARACTER 12 -#define RE_OP_CHARACTER_IGN 13 -#define RE_OP_CHARACTER_IGN_REV 14 -#define RE_OP_CHARACTER_REV 15 -#define RE_OP_DEFAULT_BOUNDARY 16 -#define RE_OP_DEFAULT_END_OF_WORD 17 -#define RE_OP_DEFAULT_START_OF_WORD 18 -#define RE_OP_END 19 -#define RE_OP_END_OF_LINE 20 -#define RE_OP_END_OF_LINE_U 21 -#define RE_OP_END_OF_STRING 22 -#define RE_OP_END_OF_STRING_LINE 23 -#define RE_OP_END_OF_STRING_LINE_U 24 -#define RE_OP_END_OF_WORD 25 -#define RE_OP_FUZZY 26 -#define RE_OP_GRAPHEME_BOUNDARY 27 -#define RE_OP_GREEDY_REPEAT 28 -#define RE_OP_GROUP 29 -#define RE_OP_GROUP_CALL 30 -#define RE_OP_GROUP_EXISTS 31 -#define RE_OP_LAZY_REPEAT 32 -#define RE_OP_LOOKAROUND 33 -#define RE_OP_NEXT 34 -#define RE_OP_PROPERTY 35 -#define RE_OP_PROPERTY_IGN 36 -#define RE_OP_PROPERTY_IGN_REV 37 -#define RE_OP_PROPERTY_REV 38 -#define RE_OP_RANGE 39 -#define RE_OP_RANGE_IGN 40 -#define RE_OP_RANGE_IGN_REV 41 -#define RE_OP_RANGE_REV 42 -#define RE_OP_REF_GROUP 43 -#define RE_OP_REF_GROUP_FLD 44 -#define RE_OP_REF_GROUP_FLD_REV 45 -#define RE_OP_REF_GROUP_IGN 46 -#define RE_OP_REF_GROUP_IGN_REV 47 -#define RE_OP_REF_GROUP_REV 48 -#define RE_OP_SEARCH_ANCHOR 49 -#define RE_OP_SET_DIFF 50 -#define RE_OP_SET_DIFF_IGN 51 -#define RE_OP_SET_DIFF_IGN_REV 52 -#define RE_OP_SET_DIFF_REV 53 -#define RE_OP_SET_INTER 54 -#define RE_OP_SET_INTER_IGN 55 -#define RE_OP_SET_INTER_IGN_REV 56 -#define RE_OP_SET_INTER_REV 57 -#define RE_OP_SET_SYM_DIFF 58 -#define RE_OP_SET_SYM_DIFF_IGN 59 -#define RE_OP_SET_SYM_DIFF_IGN_REV 60 -#define RE_OP_SET_SYM_DIFF_REV 61 -#define RE_OP_SET_UNION 62 -#define RE_OP_SET_UNION_IGN 63 -#define RE_OP_SET_UNION_IGN_REV 64 -#define RE_OP_SET_UNION_REV 65 -#define RE_OP_START_OF_LINE 66 -#define RE_OP_START_OF_LINE_U 67 -#define RE_OP_START_OF_STRING 68 -#define RE_OP_START_OF_WORD 69 -#define RE_OP_STRING 70 -#define RE_OP_STRING_FLD 71 -#define RE_OP_STRING_FLD_REV 72 -#define RE_OP_STRING_IGN 73 -#define RE_OP_STRING_IGN_REV 74 -#define RE_OP_STRING_REV 75 -#define RE_OP_STRING_SET 76 -#define RE_OP_STRING_SET_FLD 77 -#define RE_OP_STRING_SET_FLD_REV 78 -#define RE_OP_STRING_SET_IGN 79 -#define RE_OP_STRING_SET_IGN_REV 80 -#define RE_OP_STRING_SET_REV 81 -#define RE_OP_BODY_END 82 -#define RE_OP_BODY_START 83 -#define RE_OP_END_FUZZY 84 -#define RE_OP_END_GREEDY_REPEAT 85 -#define RE_OP_END_GROUP 86 -#define RE_OP_END_LAZY_REPEAT 87 -#define RE_OP_GREEDY_REPEAT_ONE 88 -#define RE_OP_GROUP_RETURN 89 -#define RE_OP_LAZY_REPEAT_ONE 90 -#define RE_OP_MATCH_BODY 91 -#define RE_OP_MATCH_TAIL 92 -#define RE_OP_START_GROUP 93 - -char* re_op_text[] = { - "RE_OP_FAILURE", - "RE_OP_SUCCESS", - "RE_OP_ANY", - "RE_OP_ANY_ALL", - "RE_OP_ANY_ALL_REV", - "RE_OP_ANY_REV", - "RE_OP_ANY_U", - "RE_OP_ANY_U_REV", - "RE_OP_ATOMIC", - "RE_OP_BOUNDARY", - "RE_OP_BRANCH", - "RE_OP_CALL_REF", - "RE_OP_CHARACTER", - "RE_OP_CHARACTER_IGN", - "RE_OP_CHARACTER_IGN_REV", - "RE_OP_CHARACTER_REV", - "RE_OP_DEFAULT_BOUNDARY", - "RE_OP_DEFAULT_END_OF_WORD", - "RE_OP_DEFAULT_START_OF_WORD", - "RE_OP_END", - "RE_OP_END_OF_LINE", - "RE_OP_END_OF_LINE_U", - "RE_OP_END_OF_STRING", - "RE_OP_END_OF_STRING_LINE", - "RE_OP_END_OF_STRING_LINE_U", - "RE_OP_END_OF_WORD", - "RE_OP_FUZZY", - "RE_OP_GRAPHEME_BOUNDARY", - "RE_OP_GREEDY_REPEAT", - "RE_OP_GROUP", - "RE_OP_GROUP_CALL", - "RE_OP_GROUP_EXISTS", - "RE_OP_LAZY_REPEAT", - "RE_OP_LOOKAROUND", - "RE_OP_NEXT", - "RE_OP_PROPERTY", - "RE_OP_PROPERTY_IGN", - "RE_OP_PROPERTY_IGN_REV", - "RE_OP_PROPERTY_REV", - "RE_OP_RANGE", - "RE_OP_RANGE_IGN", - "RE_OP_RANGE_IGN_REV", - "RE_OP_RANGE_REV", - "RE_OP_REF_GROUP", - "RE_OP_REF_GROUP_FLD", - "RE_OP_REF_GROUP_FLD_REV", - "RE_OP_REF_GROUP_IGN", - "RE_OP_REF_GROUP_IGN_REV", - "RE_OP_REF_GROUP_REV", - "RE_OP_SEARCH_ANCHOR", - "RE_OP_SET_DIFF", - "RE_OP_SET_DIFF_IGN", - "RE_OP_SET_DIFF_IGN_REV", - "RE_OP_SET_DIFF_REV", - "RE_OP_SET_INTER", - "RE_OP_SET_INTER_IGN", - "RE_OP_SET_INTER_IGN_REV", - "RE_OP_SET_INTER_REV", - "RE_OP_SET_SYM_DIFF", - "RE_OP_SET_SYM_DIFF_IGN", - "RE_OP_SET_SYM_DIFF_IGN_REV", - "RE_OP_SET_SYM_DIFF_REV", - "RE_OP_SET_UNION", - "RE_OP_SET_UNION_IGN", - "RE_OP_SET_UNION_IGN_REV", - "RE_OP_SET_UNION_REV", - "RE_OP_START_OF_LINE", - "RE_OP_START_OF_LINE_U", - "RE_OP_START_OF_STRING", - "RE_OP_START_OF_WORD", - "RE_OP_STRING", - "RE_OP_STRING_FLD", - "RE_OP_STRING_FLD_REV", - "RE_OP_STRING_IGN", - "RE_OP_STRING_IGN_REV", - "RE_OP_STRING_REV", - "RE_OP_STRING_SET", - "RE_OP_STRING_SET_FLD", - "RE_OP_STRING_SET_FLD_REV", - "RE_OP_STRING_SET_IGN", - "RE_OP_STRING_SET_IGN_REV", - "RE_OP_STRING_SET_REV", - "RE_OP_BODY_END", - "RE_OP_BODY_START", - "RE_OP_END_FUZZY", - "RE_OP_END_GREEDY_REPEAT", - "RE_OP_END_GROUP", - "RE_OP_END_LAZY_REPEAT", - "RE_OP_GREEDY_REPEAT_ONE", - "RE_OP_GROUP_RETURN", - "RE_OP_LAZY_REPEAT_ONE", - "RE_OP_MATCH_BODY", - "RE_OP_MATCH_TAIL", - "RE_OP_START_GROUP", -}; - -#define RE_FLAG_ASCII 0x80 -#define RE_FLAG_BESTMATCH 0x1000 -#define RE_FLAG_DEBUG 0x200 -#define RE_FLAG_DOTALL 0x10 -#define RE_FLAG_ENHANCEMATCH 0x8000 -#define RE_FLAG_FULLCASE 0x4000 -#define RE_FLAG_IGNORECASE 0x2 -#define RE_FLAG_LOCALE 0x4 -#define RE_FLAG_MULTILINE 0x8 -#define RE_FLAG_REVERSE 0x400 -#define RE_FLAG_TEMPLATE 0x1 -#define RE_FLAG_UNICODE 0x20 -#define RE_FLAG_VERBOSE 0x40 -#define RE_FLAG_VERSION0 0x2000 -#define RE_FLAG_VERSION1 0x100 -#define RE_FLAG_WORD 0x800 diff --git a/lib/regex/_regex_core.py b/lib/regex/_regex_core.py deleted file mode 100644 index 5adbb524..00000000 --- a/lib/regex/_regex_core.py +++ /dev/null @@ -1,4086 +0,0 @@ -# -# Secret Labs' Regular Expression Engine core module -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# This version of the SRE library can be redistributed under CNRI's -# Python 1.6 license. For any other use, please contact Secret Labs -# AB (info@pythonware.com). -# -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 1.6 integration and -# other compatibility work. -# -# 2010-01-16 mrab Python front-end re-written and extended - -import string -import sys -import unicodedata -from collections import defaultdict - -if sys.version_info < (2, 6): - from Python25 import _regex -elif sys.version_info < (2, 7): - from Python26 import _regex -else: - from Python27 import _regex - - -__all__ = ["A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", - "F", "FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "R", - "REVERSE", "S", "DOTALL", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", - "V1", "VERSION1", "W", "WORD", "X", "VERBOSE", "error", - "Scanner"] - -# The regex exception. -class error(Exception): - def __init__(self, message, set_error=False): - Exception.__init__(self, message) - self.set_error = set_error - -# The exception for when a positional flag has been turned on in the old -# behaviour. -class _UnscopedFlagSet(Exception): - pass - -# The exception for when parsing fails and we want to try something else. -class ParseError(Exception): - pass - -# The exception for when there isn't a valid first set. -class _FirstSetError(Exception): - pass - -# Flags. -A = ASCII = 0x80 # Assume ASCII locale. -B = BESTMATCH = 0x1000 # Best fuzzy match. -D = DEBUG = 0x200 # Print parsed pattern. -E = ENHANCEMATCH = 0x8000 # Attempt to improve the fit after finding the first - # fuzzy match. -F = FULLCASE = 0x4000 # Unicode full case-folding. -I = IGNORECASE = 0x2 # Ignore case. -L = LOCALE = 0x4 # Assume current 8-bit locale. -M = MULTILINE = 0x8 # Make anchors look for newline. -R = REVERSE = 0x400 # Search backwards. -S = DOTALL = 0x10 # Make dot match newline. -U = UNICODE = 0x20 # Assume Unicode locale. -V0 = VERSION0 = 0x2000 # Old legacy behaviour. -V1 = VERSION1 = 0x100 # New enhanced behaviour. -W = WORD = 0x800 # Default Unicode word breaks. -X = VERBOSE = 0x40 # Ignore whitespace and comments. -T = TEMPLATE = 0x1 # Template (present because re module has it). - -DEFAULT_VERSION = VERSION1 - -_ALL_VERSIONS = VERSION0 | VERSION1 -_ALL_ENCODINGS = ASCII | LOCALE | UNICODE - -# The default flags for the various versions. -DEFAULT_FLAGS = {VERSION0: 0, VERSION1: FULLCASE} - -# The mask for the flags. -GLOBAL_FLAGS = (_ALL_ENCODINGS | _ALL_VERSIONS | BESTMATCH | DEBUG | - ENHANCEMATCH | REVERSE) -SCOPED_FLAGS = FULLCASE | IGNORECASE | MULTILINE | DOTALL | WORD | VERBOSE - -ALPHA = frozenset(string.ascii_letters) -DIGITS = frozenset(string.digits) -ALNUM = ALPHA | DIGITS -OCT_DIGITS = frozenset(string.octdigits) -HEX_DIGITS = frozenset(string.hexdigits) -SPECIAL_CHARS = frozenset("()|?*+{^$.[\\#") | frozenset([""]) -NAMED_CHAR_PART = ALNUM | frozenset(" -") -PROPERTY_NAME_PART = ALNUM | frozenset(" &_-.") -SET_OPS = ("||", "~~", "&&", "--") - -# The width of the code words inside the regex engine. -BYTES_PER_CODE = _regex.get_code_size() -BITS_PER_CODE = BYTES_PER_CODE * 8 - -# The repeat count which represents infinity. -UNLIMITED = (1 << BITS_PER_CODE) - 1 - -# The regular expression flags. -REGEX_FLAGS = {"a": ASCII, "b": BESTMATCH, "e": ENHANCEMATCH, "f": FULLCASE, - "i": IGNORECASE, "L": LOCALE, "m": MULTILINE, "r": REVERSE, "s": DOTALL, "u": - UNICODE, "V0": VERSION0, "V1": VERSION1, "w": WORD, "x": VERBOSE} - -# The case flags. -CASE_FLAGS = FULLCASE | IGNORECASE -NOCASE = 0 -FULLIGNORECASE = FULLCASE | IGNORECASE - -FULL_CASE_FOLDING = UNICODE | FULLIGNORECASE - -# The number of digits in hexadecimal escapes. -HEX_ESCAPES = {"x": 2, "u": 4, "U": 8} - -# A singleton which indicates a comment within a pattern. -COMMENT = object() -FLAGS = object() - -# The names of the opcodes. -OPCODES = """ -FAILURE -SUCCESS -ANY -ANY_ALL -ANY_ALL_REV -ANY_REV -ANY_U -ANY_U_REV -ATOMIC -BOUNDARY -BRANCH -CALL_REF -CHARACTER -CHARACTER_IGN -CHARACTER_IGN_REV -CHARACTER_REV -DEFAULT_BOUNDARY -DEFAULT_END_OF_WORD -DEFAULT_START_OF_WORD -END -END_OF_LINE -END_OF_LINE_U -END_OF_STRING -END_OF_STRING_LINE -END_OF_STRING_LINE_U -END_OF_WORD -FUZZY -GRAPHEME_BOUNDARY -GREEDY_REPEAT -GROUP -GROUP_CALL -GROUP_EXISTS -LAZY_REPEAT -LOOKAROUND -NEXT -PROPERTY -PROPERTY_IGN -PROPERTY_IGN_REV -PROPERTY_REV -RANGE -RANGE_IGN -RANGE_IGN_REV -RANGE_REV -REF_GROUP -REF_GROUP_FLD -REF_GROUP_FLD_REV -REF_GROUP_IGN -REF_GROUP_IGN_REV -REF_GROUP_REV -SEARCH_ANCHOR -SET_DIFF -SET_DIFF_IGN -SET_DIFF_IGN_REV -SET_DIFF_REV -SET_INTER -SET_INTER_IGN -SET_INTER_IGN_REV -SET_INTER_REV -SET_SYM_DIFF -SET_SYM_DIFF_IGN -SET_SYM_DIFF_IGN_REV -SET_SYM_DIFF_REV -SET_UNION -SET_UNION_IGN -SET_UNION_IGN_REV -SET_UNION_REV -START_OF_LINE -START_OF_LINE_U -START_OF_STRING -START_OF_WORD -STRING -STRING_FLD -STRING_FLD_REV -STRING_IGN -STRING_IGN_REV -STRING_REV -STRING_SET -STRING_SET_FLD -STRING_SET_FLD_REV -STRING_SET_IGN -STRING_SET_IGN_REV -STRING_SET_REV -""" - -# Define the opcodes in a namespace. -class Namespace(object): - pass - -OP = Namespace() -for i, op in enumerate(OPCODES.split()): - setattr(OP, op, i) - -def _shrink_cache(cache_dict, args_dict, max_length, divisor=5): - """Make room in the given cache. - - Args: - cache_dict: The cache dictionary to modify. - args_dict: The dictionary of named list args used by patterns. - max_length: Maximum # of entries in cache_dict before it is shrunk. - divisor: Cache will shrink to max_length - 1/divisor*max_length items. - """ - # Toss out a fraction of the entries at random to make room for new ones. - # A random algorithm was chosen as opposed to simply cache_dict.popitem() - # as popitem could penalize the same regular expression repeatedly based - # on its internal hash value. Being random should spread the cache miss - # love around. - cache_keys = tuple(cache_dict.keys()) - overage = len(cache_keys) - max_length - if overage < 0: - # Cache is already within limits. Normally this should not happen - # but it could due to multithreading. - return - - number_to_toss = max_length // divisor + overage - - # The import is done here to avoid a circular dependency. - import random - if not hasattr(random, 'sample'): - # Do nothing while resolving the circular dependency: - # re->random->warnings->tokenize->string->re - return - - for doomed_key in random.sample(cache_keys, number_to_toss): - try: - del cache_dict[doomed_key] - except KeyError: - # Ignore problems if the cache changed from another thread. - pass - - # Rebuild the arguments dictionary. - args_dict.clear() - for pattern, pattern_type, flags, args, default_version in cache_dict: - args_dict[pattern, pattern_type, flags, default_version] = args - -def _fold_case(info, string): - "Folds the case of a string." - flags = info.flags - if (flags & _ALL_ENCODINGS) == 0: - flags |= info.guess_encoding - - return _regex.fold_case(flags, string) - -def is_cased(info, char): - "Checks whether a character is cased." - return len(_regex.get_all_cases(info.flags, char)) > 1 - -def _compile_firstset(info, fs): - "Compiles the firstset for the pattern." - if not fs or None in fs: - return [] - - # If we ignore the case, for simplicity we won't build a firstset. - members = set() - for i in fs: - if i.case_flags: - if isinstance(i, Character): - if is_cased(info, i.value): - return [] - elif isinstance(i, SetBase): - return [] - - members.add(i.with_flags(case_flags=NOCASE)) - - # Build the firstset. - fs = SetUnion(info, list(members), zerowidth=True) - fs = fs.optimise(info, in_set=True) - - # Compile the firstset. - return fs.compile(bool(info.flags & REVERSE)) - -def _flatten_code(code): - "Flattens the code from a list of tuples." - flat_code = [] - for c in code: - flat_code.extend(c) - - return flat_code - -def make_character(info, value, in_set=False): - "Makes a character literal." - if in_set: - # A character set is built case-sensitively. - return Character(value) - - return Character(value, case_flags=info.flags & CASE_FLAGS) - -def make_ref_group(info, name, position): - "Makes a group reference." - return RefGroup(info, name, position, case_flags=info.flags & CASE_FLAGS) - -def make_string_set(info, name): - "Makes a string set." - return StringSet(info, name, case_flags=info.flags & CASE_FLAGS) - -def make_property(info, prop, in_set): - "Makes a property." - if in_set: - return prop - - return prop.with_flags(case_flags=info.flags & CASE_FLAGS) - -def _parse_pattern(source, info): - "Parses a pattern, eg. 'a|b|c'." - branches = [parse_sequence(source, info)] - while source.match("|"): - branches.append(parse_sequence(source, info)) - - if len(branches) == 1: - return branches[0] - return Branch(branches) - -def parse_sequence(source, info): - "Parses a sequence, eg. 'abc'." - sequence = [] - applied = False - while True: - # Get literal characters followed by an element. - characters, case_flags, element = parse_literal_and_element(source, - info) - if not element: - # No element, just a literal. We've also reached the end of the - # sequence. - append_literal(characters, case_flags, sequence) - break - - if element is COMMENT or element is FLAGS: - append_literal(characters, case_flags, sequence) - elif type(element) is tuple: - # It looks like we've found a quantifier. - ch, saved_pos = element - - counts = parse_quantifier(source, info, ch) - if counts: - # It _is_ a quantifier. - apply_quantifier(source, info, counts, characters, case_flags, - ch, saved_pos, applied, sequence) - applied = True - else: - # It's not a quantifier. Maybe it's a fuzzy constraint. - constraints = parse_fuzzy(source, ch) - if constraints: - # It _is_ a fuzzy constraint. - apply_constraint(source, info, constraints, characters, - case_flags, saved_pos, applied, sequence) - applied = True - else: - # The element was just a literal. - characters.append(ord(ch)) - append_literal(characters, case_flags, sequence) - applied = False - else: - # We have a literal followed by something else. - append_literal(characters, case_flags, sequence) - sequence.append(element) - applied = False - - return make_sequence(sequence) - -def apply_quantifier(source, info, counts, characters, case_flags, ch, - saved_pos, applied, sequence): - if characters: - # The quantifier applies to the last character. - append_literal(characters[ : -1], case_flags, sequence) - element = Character(characters[-1], case_flags=case_flags) - else: - # The quantifier applies to the last item in the sequence. - if applied or not sequence: - raise error("nothing to repeat at position %d" % saved_pos) - - element = sequence.pop() - - min_count, max_count = counts - saved_pos = source.pos - ch = source.get() - if ch == "?": - # The "?" suffix that means it's a lazy repeat. - repeated = LazyRepeat - elif ch == "+": - # The "+" suffix that means it's a possessive repeat. - repeated = PossessiveRepeat - else: - # No suffix means that it's a greedy repeat. - source.pos = saved_pos - repeated = GreedyRepeat - - # Ignore the quantifier if it applies to a zero-width item or the number of - # repeats is fixed at 1. - if not element.is_empty() and (min_count != 1 or max_count != 1): - element = repeated(element, min_count, max_count) - - sequence.append(element) - -def apply_constraint(source, info, constraints, characters, case_flags, - saved_pos, applied, sequence): - if characters: - # The constraint applies to the last character. - append_literal(characters[ : -1], case_flags, sequence) - element = Character(characters[-1], case_flags=case_flags) - sequence.append(Fuzzy(element, constraints)) - else: - # The constraint applies to the last item in the sequence. - if applied or not sequence: - raise error("nothing for fuzzy constraint at position %d" % saved_pos) - - element = sequence.pop() - - # If a group is marked as fuzzy then put all of the fuzzy part in the - # group. - if isinstance(element, Group): - element.subpattern = Fuzzy(element.subpattern, constraints) - sequence.append(element) - else: - sequence.append(Fuzzy(element, constraints)) - -def append_literal(characters, case_flags, sequence): - if characters: - sequence.append(Literal(characters, case_flags=case_flags)) - -def PossessiveRepeat(element, min_count, max_count): - "Builds a possessive repeat." - return Atomic(GreedyRepeat(element, min_count, max_count)) - -_QUANTIFIERS = {"?": (0, 1), "*": (0, None), "+": (1, None)} - -def parse_quantifier(source, info, ch): - "Parses a quantifier." - q = _QUANTIFIERS.get(ch) - if q: - # It's a quantifier. - return q - - if ch == "{": - # Looks like a limited repeated element, eg. 'a{2,3}'. - counts = parse_limited_quantifier(source) - if counts: - return counts - - return None - -def is_above_limit(count): - "Checks whether a count is above the maximum." - return count is not None and count >= UNLIMITED - -def parse_limited_quantifier(source): - "Parses a limited quantifier." - saved_pos = source.pos - min_count = parse_count(source) - if source.match(","): - max_count = parse_count(source) - - # No minimum means 0 and no maximum means unlimited. - min_count = int(min_count or 0) - max_count = int(max_count) if max_count else None - - if max_count is not None and min_count > max_count: - raise error("min repeat greater than max repeat at position %d" % saved_pos) - else: - if not min_count: - source.pos = saved_pos - return None - - min_count = max_count = int(min_count) - - if is_above_limit(min_count) or is_above_limit(max_count): - raise error("repeat count too big at position %d" % saved_pos) - - if not source.match ("}"): - source.pos = saved_pos - return None - - return min_count, max_count - -def parse_fuzzy(source, ch): - "Parses a fuzzy setting, if present." - if ch != "{": - return None - - saved_pos = source.pos - - constraints = {} - try: - parse_fuzzy_item(source, constraints) - while source.match(","): - parse_fuzzy_item(source, constraints) - except ParseError: - source.pos = saved_pos - return None - - if not source.match("}"): - raise error("expected } at position %d" % source.pos) - - return constraints - -def parse_fuzzy_item(source, constraints): - "Parses a fuzzy setting item." - saved_pos = source.pos - try: - parse_cost_constraint(source, constraints) - except ParseError: - source.pos = saved_pos - - parse_cost_equation(source, constraints) - -def parse_cost_constraint(source, constraints): - "Parses a cost constraint." - saved_pos = source.pos - ch = source.get() - if ch in ALPHA: - # Syntax: constraint [("<=" | "<") cost] - constraint = parse_constraint(source, constraints, ch) - - max_inc = parse_fuzzy_compare(source) - - if max_inc is None: - # No maximum cost. - constraints[constraint] = 0, None - else: - # There's a maximum cost. - cost_pos = source.pos - max_cost = int(parse_count(source)) - - # Inclusive or exclusive limit? - if not max_inc: - max_cost -= 1 - - if max_cost < 0: - raise error("bad fuzzy cost limit at position %d" % cost_pos) - - constraints[constraint] = 0, max_cost - elif ch in DIGITS: - # Syntax: cost ("<=" | "<") constraint ("<=" | "<") cost - source.pos = saved_pos - try: - # Minimum cost. - min_cost = int(parse_count(source)) - - min_inc = parse_fuzzy_compare(source) - if min_inc is None: - raise ParseError() - - constraint = parse_constraint(source, constraints, source.get()) - - max_inc = parse_fuzzy_compare(source) - if max_inc is None: - raise ParseError() - - # Maximum cost. - cost_pos = source.pos - max_cost = int(parse_count(source)) - - # Inclusive or exclusive limits? - if not min_inc: - min_cost += 1 - if not max_inc: - max_cost -= 1 - - if not 0 <= min_cost <= max_cost: - raise error("bad fuzzy cost limit at position %d" % cost_pos) - - constraints[constraint] = min_cost, max_cost - except ValueError: - raise ParseError() - else: - raise ParseError() - -def parse_constraint(source, constraints, ch): - "Parses a constraint." - if ch not in "deis": - raise error("bad fuzzy constraint at position %d" % source.pos) - - if ch in constraints: - raise error("repeated fuzzy constraint at position %d" % source.pos) - - return ch - -def parse_fuzzy_compare(source): - "Parses a cost comparator." - if source.match("<="): - return True - elif source.match("<"): - return False - else: - return None - -def parse_cost_equation(source, constraints): - "Parses a cost equation." - if "cost" in constraints: - raise error("more than one cost equation at position %d" % source.pos) - - cost = {} - - parse_cost_term(source, cost) - while source.match("+"): - parse_cost_term(source, cost) - - max_inc = parse_fuzzy_compare(source) - if max_inc is None: - raise error("missing fuzzy cost limit at position %d" % source.pos) - - max_cost = int(parse_count(source)) - - if not max_inc: - max_cost -= 1 - - if max_cost < 0: - raise error("bad fuzzy cost limit at position %d" % source.pos) - - cost["max"] = max_cost - - constraints["cost"] = cost - -def parse_cost_term(source, cost): - "Parses a cost equation term." - coeff = parse_count(source) - ch = source.get() - if ch not in "dis": - raise ParseError() - - if ch in cost: - raise error("repeated fuzzy cost at position %d" % source.pos) - - cost[ch] = int(coeff or 1) - -def parse_count(source): - "Parses a quantifier's count, which can be empty." - return source.get_while(DIGITS) - -def parse_literal_and_element(source, info): - """Parses a literal followed by an element. The element is FLAGS if it's an - inline flag or None if it has reached the end of a sequence. - """ - characters = [] - case_flags = info.flags & CASE_FLAGS - while True: - saved_pos = source.pos - ch = source.get() - if ch in SPECIAL_CHARS: - if ch in ")|": - # The end of a sequence. At the end of the pattern ch is "". - source.pos = saved_pos - return characters, case_flags, None - elif ch == "\\": - # An escape sequence outside a set. - element = parse_escape(source, info, False) - return characters, case_flags, element - elif ch == "(": - # A parenthesised subpattern or a flag. - element = parse_paren(source, info) - if element and element is not COMMENT: - return characters, case_flags, element - elif ch == ".": - # Any character. - if info.flags & DOTALL: - element = AnyAll() - elif info.flags & WORD: - element = AnyU() - else: - element = Any() - - return characters, case_flags, element - elif ch == "[": - # A character set. - element = parse_set(source, info) - return characters, case_flags, element - elif ch == "^": - # The start of a line or the string. - if info.flags & MULTILINE: - if info.flags & WORD: - element = StartOfLineU() - else: - element = StartOfLine() - else: - element = StartOfString() - - return characters, case_flags, element - elif ch == "$": - # The end of a line or the string. - if info.flags & MULTILINE: - if info.flags & WORD: - element = EndOfLineU() - else: - element = EndOfLine() - else: - if info.flags & WORD: - element = EndOfStringLineU() - else: - element = EndOfStringLine() - - return characters, case_flags, element - elif ch in "?*+{": - # Looks like a quantifier. - return characters, case_flags, (ch, saved_pos) - else: - # A literal. - characters.append(ord(ch)) - else: - # A literal. - characters.append(ord(ch)) - -def parse_paren(source, info): - """Parses a parenthesised subpattern or a flag. Returns FLAGS if it's an - inline flag. - """ - saved_pos = source.pos - ch = source.get() - if ch == "?": - # (?... - saved_pos_2 = source.pos - ch = source.get() - if ch == "<": - # (?<... - saved_pos_3 = source.pos - ch = source.get() - if ch in ("=", "!"): - # (?<=... or (?") - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - info.close_group() - return Group(info, group, subpattern) - if ch in ("=", "!"): - # (?=... or (?!...: lookahead. - return parse_lookaround(source, info, False, ch == "=") - if ch == "P": - # (?P...: a Python extension. - return parse_extension(source, info) - if ch == "#": - # (?#...: a comment. - return parse_comment(source) - if ch == "(": - # (?(...: a conditional subpattern. - return parse_conditional(source, info) - if ch == ">": - # (?>...: an atomic subpattern. - return parse_atomic(source, info) - if ch == "|": - # (?|...: a common/reset groups branch. - return parse_common(source, info) - if ch == "R" or "0" <= ch <= "9": - # (?R...: probably a call to a group. - return parse_call_group(source, info, ch, saved_pos_2) - if ch == "&": - # (?&...: a call to a named group. - return parse_call_named_group(source, info, saved_pos_2) - - # (?...: probably a flags subpattern. - source.pos = saved_pos_2 - return parse_flags_subpattern(source, info) - - # (...: an unnamed capture group. - source.pos = saved_pos - group = info.open_group() - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - info.close_group() - - return Group(info, group, subpattern) - -def parse_extension(source, info): - "Parses a Python extension." - saved_pos = source.pos - ch = source.get() - if ch == "<": - # (?P<...: a named capture group. - name = parse_name(source) - group = info.open_group(name) - source.expect(">") - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - info.close_group() - - return Group(info, group, subpattern) - if ch == "=": - # (?P=...: a named group reference. - name = parse_name(source) - source.expect(")") - if info.is_open_group(name): - raise error("can't refer to an open group at position %d" % saved_pos) - - return make_ref_group(info, name, saved_pos) - if ch == ">" or ch == "&": - # (?P>...: a call to a group. - return parse_call_named_group(source, info, saved_pos) - - source.pos = saved_pos - raise error("unknown extension at position %d" % saved_pos) - -def parse_comment(source): - "Parses a comment." - source.skip_while(set(")"), include=False) - source.expect(")") - - return COMMENT - -def parse_lookaround(source, info, behind, positive): - "Parses a lookaround." - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - return LookAround(behind, positive, subpattern) - -def parse_conditional(source, info): - "Parses a conditional subpattern." - saved_flags = info.flags - saved_pos = source.pos - try: - group = parse_name(source, True) - source.expect(")") - yes_branch = parse_sequence(source, info) - if source.match("|"): - no_branch = parse_sequence(source, info) - else: - no_branch = Sequence() - - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - if yes_branch.is_empty() and no_branch.is_empty(): - return Sequence() - - return Conditional(info, group, yes_branch, no_branch, saved_pos) - -def parse_atomic(source, info): - "Parses an atomic subpattern." - saved_flags = info.flags - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - return Atomic(subpattern) - -def parse_common(source, info): - "Parses a common groups branch." - # Capture group numbers in different branches can reuse the group numbers. - initial_group_count = info.group_count - branches = [parse_sequence(source, info)] - final_group_count = info.group_count - while source.match("|"): - info.group_count = initial_group_count - branches.append(parse_sequence(source, info)) - final_group_count = max(final_group_count, info.group_count) - - info.group_count = final_group_count - source.expect(")") - - if len(branches) == 1: - return branches[0] - return Branch(branches) - -def parse_call_group(source, info, ch, pos): - "Parses a call to a group." - if ch == "R": - group = "0" - else: - group = ch + source.get_while(DIGITS) - - source.expect(")") - - return CallGroup(info, group, pos) - -def parse_call_named_group(source, info, pos): - "Parses a call to a named group." - group = parse_name(source) - source.expect(")") - - return CallGroup(info, group, pos) - -def parse_flag_set(source): - "Parses a set of inline flags." - flags = 0 - - try: - while True: - saved_pos = source.pos - ch = source.get() - if ch == "V": - ch += source.get() - flags |= REGEX_FLAGS[ch] - except KeyError: - source.pos = saved_pos - - return flags - -def parse_flags(source, info): - "Parses flags being turned on/off." - flags_on = parse_flag_set(source) - if source.match("-"): - flags_off = parse_flag_set(source) - if not flags_off: - raise error("bad inline flags: no flags after '-' at position %d" % source.pos) - else: - flags_off = 0 - - return flags_on, flags_off - -def parse_subpattern(source, info, flags_on, flags_off): - "Parses a subpattern with scoped flags." - saved_flags = info.flags - info.flags = (info.flags | flags_on) & ~flags_off - source.ignore_space = bool(info.flags & VERBOSE) - try: - subpattern = _parse_pattern(source, info) - source.expect(")") - finally: - info.flags = saved_flags - source.ignore_space = bool(info.flags & VERBOSE) - - return subpattern - -def parse_flags_subpattern(source, info): - """Parses a flags subpattern. It could be inline flags or a subpattern - possibly with local flags. If it's a subpattern, then that's returned; - if it's a inline flags, then FLAGS is returned. - """ - flags_on, flags_off = parse_flags(source, info) - - if flags_off & GLOBAL_FLAGS: - raise error("bad inline flags: can't turn off global flag at position %d" % source.pos) - - if flags_on & flags_off: - raise error("bad inline flags: flag turned on and off at position %d" % source.pos) - - # Handle flags which are global in all regex behaviours. - new_global_flags = (flags_on & ~info.global_flags) & GLOBAL_FLAGS - if new_global_flags: - info.global_flags |= new_global_flags - - # A global has been turned on, so reparse the pattern. - raise _UnscopedFlagSet(info.global_flags) - - # Ensure that from now on we have only scoped flags. - flags_on &= ~GLOBAL_FLAGS - - if source.match(":"): - return parse_subpattern(source, info, flags_on, flags_off) - - if source.match(")"): - parse_positional_flags(source, info, flags_on, flags_off) - return FLAGS - - raise error("unknown extension at position %d" % source.pos) - -def parse_positional_flags(source, info, flags_on, flags_off): - "Parses positional flags." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version == VERSION0: - # Positional flags are global and can only be turned on. - if flags_off: - raise error("bad inline flags: can't turn flags off at position %d" % source.pos) - - new_global_flags = flags_on & ~info.global_flags - if new_global_flags: - info.global_flags |= new_global_flags - - # A global has been turned on, so reparse the pattern. - raise _UnscopedFlagSet(info.global_flags) - else: - info.flags = (info.flags | flags_on) & ~flags_off - - source.ignore_space = bool(info.flags & VERBOSE) - -def parse_name(source, allow_numeric=False): - "Parses a name." - name = source.get_while(set(")>"), include=False) - - if not name: - raise error("bad group name at position %d" % source.pos) - - if name.isdigit(): - if not allow_numeric: - raise error("bad group name at position %d" % source.pos) - else: - if not is_identifier(name): - raise error("bad group name at position %d" % source.pos) - - return name - -def is_identifier(name): - if not name: - return False - - if name[0] not in ALPHA and name[0] != "_": - return False - - name = name.replace("_", "") - - return not name or all(c in ALNUM for c in name) - -def is_octal(string): - "Checks whether a string is octal." - return all(ch in OCT_DIGITS for ch in string) - -def is_decimal(string): - "Checks whether a string is decimal." - return all(ch in DIGITS for ch in string) - -def is_hexadecimal(string): - "Checks whether a string is hexadecimal." - return all(ch in HEX_DIGITS for ch in string) - -def parse_escape(source, info, in_set): - "Parses an escape sequence." - saved_ignore = source.ignore_space - source.ignore_space = False - ch = source.get() - source.ignore_space = saved_ignore - if not ch: - # A backslash at the end of the pattern. - raise error("bad escape at position %d" % source.pos) - if ch in HEX_ESCAPES: - # A hexadecimal escape sequence. - return parse_hex_escape(source, info, HEX_ESCAPES[ch], in_set) - elif ch == "g" and not in_set: - # A group reference. - saved_pos = source.pos - try: - return parse_group_ref(source, info) - except error: - # Invalid as a group reference, so assume it's a literal. - source.pos = saved_pos - - return make_character(info, ord(ch), in_set) - elif ch == "G" and not in_set: - # A search anchor. - return SearchAnchor() - elif ch == "L" and not in_set: - # A string set. - return parse_string_set(source, info) - elif ch == "N": - # A named codepoint. - return parse_named_char(source, info, in_set) - elif ch in "pP": - # A Unicode property, positive or negative. - return parse_property(source, info, ch == "p", in_set) - elif ch == "X" and not in_set: - # A grapheme cluster. - return Grapheme() - elif ch in ALPHA: - # An alphabetic escape sequence. - # Positional escapes aren't allowed inside a character set. - if not in_set: - if info.flags & WORD: - value = WORD_POSITION_ESCAPES.get(ch) - else: - value = POSITION_ESCAPES.get(ch) - - if value: - return value - - value = CHARSET_ESCAPES.get(ch) - if value: - return value - - value = CHARACTER_ESCAPES.get(ch) - if value: - return Character(ord(value)) - - return make_character(info, ord(ch), in_set) - elif ch in DIGITS: - # A numeric escape sequence. - return parse_numeric_escape(source, info, ch, in_set) - else: - # A literal. - return make_character(info, ord(ch), in_set) - -def parse_numeric_escape(source, info, ch, in_set): - "Parses a numeric escape sequence." - if in_set or ch == "0": - # Octal escape sequence, max 3 digits. - return parse_octal_escape(source, info, [ch], in_set) - - # At least 1 digit, so either octal escape or group. - digits = ch - saved_pos = source.pos - ch = source.get() - if ch in DIGITS: - # At least 2 digits, so either octal escape or group. - digits += ch - saved_pos = source.pos - ch = source.get() - if is_octal(digits) and ch in OCT_DIGITS: - # 3 octal digits, so octal escape sequence. - encoding = info.flags & _ALL_ENCODINGS - if encoding == ASCII or encoding == LOCALE: - octal_mask = 0xFF - else: - octal_mask = 0x1FF - - value = int(digits + ch, 8) & octal_mask - return make_character(info, value) - - # Group reference. - source.pos = saved_pos - if info.is_open_group(digits): - raise error("can't refer to an open group at position %d" % source.pos) - - return make_ref_group(info, digits, source.pos) - -def parse_octal_escape(source, info, digits, in_set): - "Parses an octal escape sequence." - saved_pos = source.pos - ch = source.get() - while len(digits) < 3 and ch in OCT_DIGITS: - digits.append(ch) - saved_pos = source.pos - ch = source.get() - - source.pos = saved_pos - try: - value = int("".join(digits), 8) - return make_character(info, value, in_set) - except ValueError: - raise error("bad octal escape at position %d" % source.pos) - -def parse_hex_escape(source, info, expected_len, in_set): - "Parses a hex escape sequence." - digits = [] - for i in range(expected_len): - ch = source.get() - if ch not in HEX_DIGITS: - raise error("bad hex escape at position %d" % source.pos) - digits.append(ch) - - value = int("".join(digits), 16) - return make_character(info, value, in_set) - -def parse_group_ref(source, info): - "Parses a group reference." - source.expect("<") - saved_pos = source.pos - name = parse_name(source, True) - source.expect(">") - if info.is_open_group(name): - raise error("can't refer to an open group at position %d" % source.pos) - - return make_ref_group(info, name, saved_pos) - -def parse_string_set(source, info): - "Parses a string set reference." - source.expect("<") - name = parse_name(source, True) - source.expect(">") - if name is None or name not in info.kwargs: - raise error("undefined named list at position %d" % source.pos) - - return make_string_set(info, name) - -def parse_named_char(source, info, in_set): - "Parses a named character." - saved_pos = source.pos - if source.match("{"): - name = source.get_while(NAMED_CHAR_PART) - if source.match("}"): - try: - value = unicodedata.lookup(name) - return make_character(info, ord(value), in_set) - except KeyError: - raise error("undefined character name at position %d" % source.pos) - - source.pos = saved_pos - return make_character(info, ord("N"), in_set) - -def parse_property(source, info, positive, in_set): - "Parses a Unicode property." - saved_pos = source.pos - ch = source.get() - if ch == "{": - negate = source.match("^") - prop_name, name = parse_property_name(source) - if source.match("}"): - # It's correctly delimited. - prop = lookup_property(prop_name, name, positive != negate, source_pos=source.pos) - return make_property(info, prop, in_set) - elif ch and ch in "CLMNPSZ": - # An abbreviated property, eg \pL. - prop = lookup_property(None, ch, positive) - return make_property(info, prop, in_set, source_pos=source.pos) - - # Not a property, so treat as a literal "p" or "P". - source.pos = saved_pos - ch = "p" if positive else "P" - return make_character(info, ord(ch), in_set) - -def parse_property_name(source): - "Parses a property name, which may be qualified." - name = source.get_while(PROPERTY_NAME_PART) - saved_pos = source.pos - - ch = source.get() - if ch and ch in ":=": - prop_name = name - name = source.get_while(ALNUM | set(" &_-./")).strip() - - if name: - # Name after the ":" or "=", so it's a qualified name. - saved_pos = source.pos - else: - # No name after the ":" or "=", so assume it's an unqualified name. - prop_name, name = None, prop_name - else: - prop_name = None - - source.pos = saved_pos - return prop_name, name - -def parse_set(source, info): - "Parses a character set." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - saved_ignore = source.ignore_space - source.ignore_space = False - # Negative set? - negate = source.match("^") - try: - if version == VERSION0: - item = parse_set_imp_union(source, info) - else: - item = parse_set_union(source, info) - - if not source.match("]"): - raise error("missing ] at position %d" % source.pos) - finally: - source.ignore_space = saved_ignore - - if negate: - item = item.with_flags(positive=not item.positive) - - item = item.with_flags(case_flags=info.flags & CASE_FLAGS) - - return item - -def parse_set_union(source, info): - "Parses a set union ([x||y])." - items = [parse_set_symm_diff(source, info)] - while source.match("||"): - items.append(parse_set_symm_diff(source, info)) - - if len(items) == 1: - return items[0] - return SetUnion(info, items) - -def parse_set_symm_diff(source, info): - "Parses a set symmetric difference ([x~~y])." - items = [parse_set_inter(source, info)] - while source.match("~~"): - items.append(parse_set_inter(source, info)) - - if len(items) == 1: - return items[0] - return SetSymDiff(info, items) - -def parse_set_inter(source, info): - "Parses a set intersection ([x&&y])." - items = [parse_set_diff(source, info)] - while source.match("&&"): - items.append(parse_set_diff(source, info)) - - if len(items) == 1: - return items[0] - return SetInter(info, items) - -def parse_set_diff(source, info): - "Parses a set difference ([x--y])." - items = [parse_set_imp_union(source, info)] - while source.match("--"): - items.append(parse_set_imp_union(source, info)) - - if len(items) == 1: - return items[0] - return SetDiff(info, items) - -def parse_set_imp_union(source, info): - "Parses a set implicit union ([xy])." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - items = [parse_set_member(source, info)] - while True: - saved_pos = source.pos - if source.match("]"): - # End of the set. - source.pos = saved_pos - break - - if version == VERSION1 and any(source.match(op) for op in SET_OPS): - # The new behaviour has set operators. - source.pos = saved_pos - break - - items.append(parse_set_member(source, info)) - - if len(items) == 1: - return items[0] - return SetUnion(info, items) - -def parse_set_member(source, info): - "Parses a member in a character set." - # Parse a set item. - start = parse_set_item(source, info) - if (not isinstance(start, Character) or not start.positive or not - source.match("-")): - # It's not the start of a range. - return start - - # It looks like the start of a range of characters. - saved_pos = source.pos - if source.match("]"): - # We've reached the end of the set, so return both the character and - # hyphen. - source.pos = saved_pos - return SetUnion(info, [start, Character(ord("-"))]) - - # Parse a set item. - end = parse_set_item(source, info) - if not isinstance(end, Character) or not end.positive: - # It's not a range, so return the character, hyphen and property. - return SetUnion(info, [start, Character(ord("-")), end]) - - # It _is_ a range. - if start.value > end.value: - raise error("bad character range at position %d" % source.pos) - - if start.value == end.value: - return start - - return Range(start.value, end.value) - -def parse_set_item(source, info): - "Parses an item in a character set." - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - - if source.match("\\"): - # An escape sequence in a set. - return parse_escape(source, info, True) - - saved_pos = source.pos - if source.match("[:"): - # Looks like a POSIX character class. - try: - return parse_posix_class(source, info) - except ParseError: - # Not a POSIX character class. - source.pos = saved_pos - - if version == VERSION1 and source.match("["): - # It's the start of a nested set. - - # Negative set? - negate = source.match("^") - item = parse_set_union(source, info) - - if not source.match("]"): - raise error("missing ] at position %d" % source.pos) - - if negate: - item = item.with_flags(positive=not item.positive) - - return item - - ch = source.get() - if not ch: - raise error("bad set at position %d" % source.pos, True) - - return Character(ord(ch)) - -def parse_posix_class(source, info): - "Parses a POSIX character class." - negate = source.match("^") - prop_name, name = parse_property_name(source) - if not source.match(":]"): - raise ParseError() - - return lookup_property(prop_name, name, positive=not negate, source_pos=source.pos) - -def float_to_rational(flt): - "Converts a float to a rational pair." - int_part = int(flt) - error = flt - int_part - if abs(error) < 0.0001: - return int_part, 1 - - den, num = float_to_rational(1.0 / error) - - return int_part * den + num, den - -def numeric_to_rational(numeric): - "Converts a numeric string to a rational string, if possible." - if numeric[0] == "-": - sign, numeric = numeric[0], numeric[1 : ] - else: - sign = "" - - parts = numeric.split("/") - if len(parts) == 2: - num, den = float_to_rational(float(parts[0]) / float(parts[1])) - elif len(parts) == 1: - num, den = float_to_rational(float(parts[0])) - else: - raise ValueError() - - result = "%s%s/%s" % (sign, num, den) - if result.endswith("/1"): - return result[ : -2] - - return result - -def standardise_name(name): - "Standardises a property or value name." - try: - return numeric_to_rational("".join(name)) - except (ValueError, ZeroDivisionError): - return "".join(ch for ch in name if ch not in "_- ").upper() - -def lookup_property(property, value, positive, source_pos=None): - "Looks up a property." - # Normalise the names (which may still be lists). - property = standardise_name(property) if property else None - value = standardise_name(value) - - if (property, value) == ("GENERALCATEGORY", "ASSIGNED"): - property, value, positive = "GENERALCATEGORY", "UNASSIGNED", not positive - - if property: - # Both the property and the value are provided. - prop = PROPERTIES.get(property) - if not prop: - raise error("unknown property at position %d" % source_pos) - - prop_id, value_dict = prop - val_id = value_dict.get(value) - if val_id is None: - raise error("unknown property value at position %d" % source_pos) - - if "YES" in value_dict and val_id == 0: - positive, val_id = not positive, 1 - - return Property((prop_id << 16) | val_id, positive) - - # Only the value is provided. - # It might be the name of a GC, script or block value. - for property in ("GC", "SCRIPT", "BLOCK"): - prop_id, value_dict = PROPERTIES.get(property) - val_id = value_dict.get(value) - if val_id is not None: - return Property((prop_id << 16) | val_id, positive) - - # It might be the name of a binary property. - prop = PROPERTIES.get(value) - if prop: - prop_id, value_dict = prop - - if "YES" in value_dict: - return Property((prop_id << 16) | 1, positive) - - # It might be the name of a binary property starting with a prefix. - if value.startswith("IS"): - prop = PROPERTIES.get(value[2 : ]) - if prop: - prop_id, value_dict = prop - if "YES" in value_dict: - return Property((prop_id << 16) | 1, positive) - - # It might be the name of a script or block starting with a prefix. - for prefix, property in (("IS", "SCRIPT"), ("IN", "BLOCK")): - if value.startswith(prefix): - prop_id, value_dict = PROPERTIES.get(property) - val_id = value_dict.get(value[2 : ]) - if val_id is not None: - return Property((prop_id << 16) | val_id, positive) - - # Unknown property. - raise error("unknown property at position %d" % source_pos) - -def _compile_replacement(source, pattern, is_unicode): - "Compiles a replacement template escape sequence." - ch = source.get() - if ch in ALPHA: - # An alphabetic escape sequence. - value = CHARACTER_ESCAPES.get(ch) - if value: - return False, [ord(value)] - - if ch in HEX_ESCAPES and (ch == "x" or is_unicode): - # A hexadecimal escape sequence. - return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch])] - - if ch == "g": - # A group preference. - return True, [compile_repl_group(source, pattern)] - - if ch == "N" and is_unicode: - # A named character. - value = parse_repl_named_char(source) - if value is not None: - return False, [value] - - return False, [ord("\\"), ord(ch)] - - if isinstance(source.sep, str): - octal_mask = 0xFF - else: - octal_mask = 0x1FF - - if ch == "0": - # An octal escape sequence. - digits = ch - while len(digits) < 3: - saved_pos = source.pos - ch = source.get() - if ch not in OCT_DIGITS: - source.pos = saved_pos - break - digits += ch - - return False, [int(digits, 8) & octal_mask] - - if ch in DIGITS: - # Either an octal escape sequence (3 digits) or a group reference (max - # 2 digits). - digits = ch - saved_pos = source.pos - ch = source.get() - if ch in DIGITS: - digits += ch - saved_pos = source.pos - ch = source.get() - if ch and is_octal(digits + ch): - # An octal escape sequence. - return False, [int(digits + ch, 8) & octal_mask] - - # A group reference. - source.pos = saved_pos - return True, [int(digits)] - - if ch == "\\": - # An escaped backslash is a backslash. - return False, [ord("\\")] - - if not ch: - # A trailing backslash. - raise error("bad escape at position %d" % source.pos) - - # An escaped non-backslash is a backslash followed by the literal. - return False, [ord("\\"), ord(ch)] - -def parse_repl_hex_escape(source, expected_len): - "Parses a hex escape sequence in a replacement string." - digits = [] - for i in range(expected_len): - ch = source.get() - if ch not in HEX_DIGITS: - raise error("bad hex escape at position %d" % source.pos) - digits.append(ch) - - return int("".join(digits), 16) - -def parse_repl_named_char(source): - "Parses a named character in a replacement string." - saved_pos = source.pos - if source.match("{"): - name = source.get_while(ALPHA | set(" ")) - - if source.match("}"): - try: - value = unicodedata.lookup(name) - return ord(value) - except KeyError: - raise error("undefined character name at position %d" % source.pos) - - source.pos = saved_pos - return None - -def compile_repl_group(source, pattern): - "Compiles a replacement template group reference." - source.expect("<") - name = parse_name(source, True) - - source.expect(">") - if name.isdigit(): - index = int(name) - if not 0 <= index <= pattern.groups: - raise error("invalid group at position %d" % source.pos) - - return index - - try: - return pattern.groupindex[name] - except KeyError: - raise IndexError("unknown group") - -# The regular expression is parsed into a syntax tree. The different types of -# node are defined below. - -INDENT = " " -POSITIVE_OP = 0x1 -ZEROWIDTH_OP = 0x2 -FUZZY_OP = 0x4 -REVERSE_OP = 0x8 -REQUIRED_OP = 0x10 - -POS_TEXT = {False: "NON-MATCH", True: "MATCH"} -CASE_TEXT = {NOCASE: "", IGNORECASE: " SIMPLE_IGNORE_CASE", FULLCASE: "", - FULLIGNORECASE: " FULL_IGNORE_CASE"} - -def make_sequence(items): - if len(items) == 1: - return items[0] - return Sequence(items) - -# Common base class for all nodes. -class RegexBase(object): - def __init__(self): - self._key = self.__class__ - - def with_flags(self, positive=None, case_flags=None, zerowidth=None): - if positive is None: - positive = self.positive - else: - positive = bool(positive) - if case_flags is None: - case_flags = self.case_flags - else: - case_flags = case_flags & CASE_FLAGS - if zerowidth is None: - zerowidth = self.zerowidth - else: - zerowidth = bool(zerowidth) - - if (positive == self.positive and case_flags == self.case_flags and - zerowidth == self.zerowidth): - return self - - return self.rebuild(positive, case_flags, zerowidth) - - def fix_groups(self, reverse, fuzzy): - pass - - def optimise(self, info): - return self - - def pack_characters(self, info): - return self - - def remove_captures(self): - return self - - def is_atomic(self): - return True - - def can_be_affix(self): - return True - - def contains_group(self): - return False - - def get_firstset(self, reverse): - raise _FirstSetError() - - def has_simple_start(self): - return False - - def compile(self, reverse=False, fuzzy=False): - return self._compile(reverse, fuzzy) - - def dump(self, indent, reverse): - self._dump(indent, reverse) - - def is_empty(self): - return False - - def __hash__(self): - return hash(self._key) - - def __eq__(self, other): - return type(self) is type(other) and self._key == other._key - - def __ne__(self, other): - return not self.__eq__(other) - - def get_required_string(self, reverse): - return self.max_width(), None - -# Base class for zero-width nodes. -class ZeroWidthBase(RegexBase): - def __init__(self, positive=True): - RegexBase.__init__(self) - self.positive = bool(positive) - - self._key = self.__class__, self.positive - - def get_firstset(self, reverse): - return set([None]) - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if fuzzy: - flags |= FUZZY_OP - if reverse: - flags |= REVERSE_OP - return [(self._opcode, flags)] - - def _dump(self, indent, reverse): - print "%s%s %s" % (INDENT * indent, self._op_name, - POS_TEXT[self.positive]) - - def max_width(self): - return 0 - -class Any(RegexBase): - _opcode = {False: OP.ANY, True: OP.ANY_REV} - _op_name = "ANY" - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[reverse], flags)] - - def _dump(self, indent, reverse): - print "%s%s" % (INDENT * indent, self._op_name) - - def max_width(self): - return 1 - -class AnyAll(Any): - _opcode = {False: OP.ANY_ALL, True: OP.ANY_ALL_REV} - _op_name = "ANY_ALL" - -class AnyU(Any): - _opcode = {False: OP.ANY_U, True: OP.ANY_U_REV} - _op_name = "ANY_U" - -class Atomic(RegexBase): - def __init__(self, subpattern): - RegexBase.__init__(self) - self.subpattern = subpattern - - def fix_groups(self, reverse, fuzzy): - self.subpattern.fix_groups(reverse, fuzzy) - - def optimise(self, info): - self.subpattern = self.subpattern.optimise(info) - - if self.subpattern.is_empty(): - return self.subpattern - return self - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - return self - - def can_be_affix(self): - return self.subpattern.can_be_affix() - - def contains_group(self): - return self.subpattern.contains_group() - - def get_firstset(self, reverse): - return self.subpattern.get_firstset(reverse) - - def has_simple_start(self): - return self.subpattern.has_simple_start() - - def _compile(self, reverse, fuzzy): - return ([(OP.ATOMIC, )] + self.subpattern.compile(reverse, fuzzy) + - [(OP.END, )]) - - def _dump(self, indent, reverse): - print "%sATOMIC" % (INDENT * indent) - self.subpattern.dump(indent + 1, reverse) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return (type(self) is type(other) and self.subpattern == - other.subpattern) - - def max_width(self): - return self.subpattern.max_width() - - def get_required_string(self, reverse): - return self.subpattern.get_required_string(reverse) - -class Boundary(ZeroWidthBase): - _opcode = OP.BOUNDARY - _op_name = "BOUNDARY" - -class Branch(RegexBase): - def __init__(self, branches): - RegexBase.__init__(self) - self.branches = branches - - def fix_groups(self, reverse, fuzzy): - for b in self.branches: - b.fix_groups(reverse, fuzzy) - - def optimise(self, info): - # Flatten branches within branches. - branches = Branch._flatten_branches(info, self.branches) - - # Move any common prefix or suffix out of the branches. - prefix, branches = Branch._split_common_prefix(info, branches) - suffix, branches = Branch._split_common_suffix(info, branches) - - # Merge branches starting with the same character. (If a character - # prefix doesn't match in one branch, it won't match in any of the - # others starting with that same character.) - branches = Branch._merge_common_prefixes(info, branches) - - # Try to reduce adjacent single-character branches to sets. - branches = Branch._reduce_to_set(info, branches) - - if len(branches) > 1: - sequence = prefix + [Branch(branches)] + suffix - else: - sequence = prefix + branches + suffix - - return make_sequence(sequence) - - def optimise(self, info): - # Flatten branches within branches. - branches = Branch._flatten_branches(info, self.branches) - - # Try to reduce adjacent single-character branches to sets. - branches = Branch._reduce_to_set(info, branches) - - if len(branches) > 1: - sequence = [Branch(branches)] - else: - sequence = branches - - return make_sequence(sequence) - - def pack_characters(self, info): - self.branches = [b.pack_characters(info) for b in self.branches] - return self - - def remove_captures(self): - self.branches = [b.remove_captures() for b in self.branches] - return self - - def is_atomic(self): - return all(b.is_atomic() for b in self.branches) - - def can_be_affix(self): - return all(b.can_be_affix() for b in self.branches) - - def contains_group(self): - return any(b.contains_group() for b in self.branches) - - def get_firstset(self, reverse): - fs = set() - for b in self.branches: - fs |= b.get_firstset(reverse) - - return fs or set([None]) - - def _compile(self, reverse, fuzzy): - code = [(OP.BRANCH, )] - for b in self.branches: - code.extend(b.compile(reverse, fuzzy)) - code.append((OP.NEXT, )) - - code[-1] = (OP.END, ) - - return code - - def _dump(self, indent, reverse): - print "%sBRANCH" % (INDENT * indent) - self.branches[0].dump(indent + 1, reverse) - for b in self.branches[1 : ]: - print "%sOR" % (INDENT * indent) - b.dump(indent + 1, reverse) - - @staticmethod - def _flatten_branches(info, branches): - # Flatten the branches so that there aren't branches of branches. - new_branches = [] - for b in branches: - b = b.optimise(info) - if isinstance(b, Branch): - new_branches.extend(b.branches) - else: - new_branches.append(b) - - return new_branches - - @staticmethod - def _split_common_prefix(info, branches): - # Common leading items can be moved out of the branches. - # Get the items in the branches. - alternatives = [] - for b in branches: - if isinstance(b, Sequence): - alternatives.append(b.items) - else: - alternatives.append([b]) - - # What is the maximum possible length of the prefix? - max_count = min(len(a) for a in alternatives) - - # What is the longest common prefix? - prefix = alternatives[0] - pos = 0 - end_pos = max_count - while pos < end_pos and prefix[pos].can_be_affix() and all(a[pos] == - prefix[pos] for a in alternatives): - pos += 1 - count = pos - - if info.flags & UNICODE: - # We need to check that we're not splitting a sequence of - # characters which could form part of full case-folding. - count = pos - while count > 0 and not all(Branch._can_split(a, count) for a in - alternatives): - count -= 1 - - # No common prefix is possible. - if count == 0: - return [], branches - - # Rebuild the branches. - new_branches = [] - for a in alternatives: - new_branches.append(make_sequence(a[count : ])) - - return prefix[ : count], new_branches - - @staticmethod - def _split_common_suffix(info, branches): - # Common trailing items can be moved out of the branches. - # Get the items in the branches. - alternatives = [] - for b in branches: - if isinstance(b, Sequence): - alternatives.append(b.items) - else: - alternatives.append([b]) - - # What is the maximum possible length of the suffix? - max_count = min(len(a) for a in alternatives) - - # What is the longest common suffix? - suffix = alternatives[0] - pos = -1 - end_pos = -1 - max_count - while pos > end_pos and suffix[pos].can_be_affix() and all(a[pos] == - suffix[pos] for a in alternatives): - pos -= 1 - count = -1 - pos - - if info.flags & UNICODE: - # We need to check that we're not splitting a sequence of - # characters which could form part of full case-folding. - while count > 0 and not all(Branch._can_split_rev(a, count) for a - in alternatives): - count -= 1 - - # No common suffix is possible. - if count == 0: - return [], branches - - # Rebuild the branches. - new_branches = [] - for a in alternatives: - new_branches.append(make_sequence(a[ : -count])) - - return suffix[-count : ], new_branches - - @staticmethod - def _can_split(items, count): - # Check the characters either side of the proposed split. - if not Branch._is_full_case(items, count - 1): - return True - - if not Branch._is_full_case(items, count): - return True - - # Check whether a 1-1 split would be OK. - if Branch._is_folded(items[count - 1 : count + 1]): - return False - - # Check whether a 1-2 split would be OK. - if (Branch._is_full_case(items, count + 2) and - Branch._is_folded(items[count - 1 : count + 2])): - return False - - # Check whether a 2-1 split would be OK. - if (Branch._is_full_case(items, count - 2) and - Branch._is_folded(items[count - 2 : count + 1])): - return False - - return True - - @staticmethod - def _can_split_rev(items, count): - end = len(items) - - # Check the characters either side of the proposed split. - if not Branch._is_full_case(items, end - count): - return True - - if not Branch._is_full_case(items, end - count - 1): - return True - - # Check whether a 1-1 split would be OK. - if Branch._is_folded(items[end - count - 1 : end - count + 1]): - return False - - # Check whether a 1-2 split would be OK. - if (Branch._is_full_case(items, end - count + 2) and - Branch._is_folded(items[end - count - 1 : end - count + 2])): - return False - - # Check whether a 2-1 split would be OK. - if (Branch._is_full_case(items, end - count - 2) and - Branch._is_folded(items[end - count - 2 : end - count + 1])): - return False - - return True - - @staticmethod - def _merge_common_prefixes(info, branches): - # Branches with the same case-sensitive character prefix can be grouped - # together if they are separated only by other branches with a - # character prefix. - prefixed = defaultdict(list) - order = {} - new_branches = [] - for b in branches: - if Branch._is_simple_character(b): - # Branch starts with a simple character. - prefixed[b.value].append([b]) - order.setdefault(b.value, len(order)) - elif (isinstance(b, Sequence) and b.items and - Branch._is_simple_character(b.items[0])): - # Branch starts with a simple character. - prefixed[b.items[0].value].append(b.items) - order.setdefault(b.items[0].value, len(order)) - else: - Branch._flush_char_prefix(info, prefixed, order, new_branches) - - new_branches.append(b) - - Branch._flush_char_prefix(info, prefixed, order, new_branches) - - return new_branches - - @staticmethod - def _is_simple_character(c): - return isinstance(c, Character) and c.positive and not c.case_flags - - @staticmethod - def _reduce_to_set(info, branches): - # Can the branches be reduced to a set? - new_branches = [] - items = set() - case_flags = NOCASE - for b in branches: - if isinstance(b, (Character, Property, SetBase)): - # Branch starts with a single character. - if b.case_flags != case_flags: - # Different case sensitivity, so flush. - Branch._flush_set_members(info, items, case_flags, - new_branches) - - case_flags = b.case_flags - - items.add(b.with_flags(case_flags=NOCASE)) - else: - Branch._flush_set_members(info, items, case_flags, - new_branches) - - new_branches.append(b) - - Branch._flush_set_members(info, items, case_flags, new_branches) - - return new_branches - - @staticmethod - def _flush_char_prefix(info, prefixed, order, new_branches): - # Flush the prefixed branches. - if not prefixed: - return - - for value, branches in sorted(prefixed.items(), key=lambda pair: - order[pair[0]]): - if len(branches) == 1: - new_branches.append(make_sequence(branches[0])) - else: - subbranches = [] - optional = False - for b in branches: - if len(b) > 1: - subbranches.append(make_sequence(b[1 : ])) - elif not optional: - subbranches.append(Sequence()) - optional = True - - sequence = Sequence([Character(value), Branch(subbranches)]) - new_branches.append(sequence.optimise(info)) - - prefixed.clear() - order.clear() - - @staticmethod - def _flush_set_members(info, items, case_flags, new_branches): - # Flush the set members. - if not items: - return - - if len(items) == 1: - item = list(items)[0] - else: - item = SetUnion(info, list(items)).optimise(info) - - new_branches.append(item.with_flags(case_flags=case_flags)) - - items.clear() - - @staticmethod - def _is_full_case(items, i): - if not 0 <= i < len(items): - return False - - item = items[i] - return (isinstance(item, Character) and item.positive and - (item.case_flags & FULLIGNORECASE) == FULLIGNORECASE) - - @staticmethod - def _is_folded(items): - if len(items) < 2: - return False - - for i in items: - if (not isinstance(i, Character) or not i.positive or not - i.case_flags): - return False - - folded = u"".join(unichr(i.value) for i in items) - folded = _regex.fold_case(FULL_CASE_FOLDING, folded) - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - for c in expanding_chars: - if folded == _regex.fold_case(FULL_CASE_FOLDING, c): - return True - - return False - - def is_empty(self): - return all(b.is_empty() for b in self.branches) - - def __eq__(self, other): - return type(self) is type(other) and self.branches == other.branches - - def max_width(self): - return max(b.max_width() for b in self.branches) - -class CallGroup(RegexBase): - def __init__(self, info, group, position): - RegexBase.__init__(self) - self.info = info - self.group = group - self.position = position - - self._key = self.__class__, self.group - - def fix_groups(self, reverse, fuzzy): - try: - self.group = int(self.group) - except ValueError: - try: - self.group = self.info.group_index[self.group] - except KeyError: - raise error("unknown group at position %d" % self.position) - - if not 0 <= self.group <= self.info.group_count: - raise error("unknown group at position %d" % self.position) - - if self.group > 0 and self.info.open_group_count[self.group] > 1: - raise error("ambiguous group reference at position %d" % self.position) - - self.info.group_calls.append((self, reverse, fuzzy)) - - self._key = self.__class__, self.group - - def remove_captures(self): - raise error("group reference not allowed at position %d" % self.position) - - def _compile(self, reverse, fuzzy): - return [(OP.GROUP_CALL, self.call_ref)] - - def _dump(self, indent, reverse): - print "%sGROUP_CALL %s" % (INDENT * indent, self.group) - - def __eq__(self, other): - return type(self) is type(other) and self.group == other.group - - def max_width(self): - return UNLIMITED - -class Character(RegexBase): - _opcode = {(NOCASE, False): OP.CHARACTER, (IGNORECASE, False): - OP.CHARACTER_IGN, (FULLCASE, False): OP.CHARACTER, (FULLIGNORECASE, - False): OP.CHARACTER_IGN, (NOCASE, True): OP.CHARACTER_REV, (IGNORECASE, - True): OP.CHARACTER_IGN_REV, (FULLCASE, True): OP.CHARACTER_REV, - (FULLIGNORECASE, True): OP.CHARACTER_IGN_REV} - - def __init__(self, value, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.value = value - self.positive = bool(positive) - self.case_flags = case_flags - self.zerowidth = bool(zerowidth) - - if (self.positive and (self.case_flags & FULLIGNORECASE) == - FULLIGNORECASE): - self.folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(self.value)) - else: - self.folded = unichr(self.value) - - self._key = (self.__class__, self.value, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return Character(self.value, positive, case_flags, zerowidth) - - def optimise(self, info, in_set=False): - return self - - def get_firstset(self, reverse): - return set([self]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - - code = PrecompiledCode([self._opcode[self.case_flags, reverse], flags, - self.value]) - - if len(self.folded) > 1: - # The character expands on full case-folding. - code = Branch([code, String([ord(c) for c in self.folded], - case_flags=self.case_flags)]) - - return code.compile(reverse, fuzzy) - - def _dump(self, indent, reverse): - display = repr(unichr(self.value)).lstrip("bu") - print "%sCHARACTER %s %s%s" % (INDENT * indent, - POS_TEXT[self.positive], display, CASE_TEXT[self.case_flags]) - - def matches(self, ch): - return (ch == self.value) == self.positive - - def max_width(self): - return len(self.folded) - - def get_required_string(self, reverse): - if not self.positive: - return 1, None - - self.folded_characters = tuple(ord(c) for c in self.folded) - - return 0, self - -class Conditional(RegexBase): - def __init__(self, info, group, yes_item, no_item, position): - RegexBase.__init__(self) - self.info = info - self.group = group - self.yes_item = yes_item - self.no_item = no_item - self.position = position - - def fix_groups(self, reverse, fuzzy): - try: - self.group = int(self.group) - except ValueError: - try: - self.group = self.info.group_index[self.group] - except KeyError: - raise error("unknown group at position %d" % self.position) - - if not 1 <= self.group <= self.info.group_count: - raise error("unknown group at position %d" % self.position) - - self.yes_item.fix_groups(reverse, fuzzy) - self.no_item.fix_groups(reverse, fuzzy) - - def optimise(self, info): - yes_item = self.yes_item.optimise(info) - no_item = self.no_item.optimise(info) - - return Conditional(info, self.group, yes_item, no_item, self.position) - - def pack_characters(self, info): - self.yes_item = self.yes_item.pack_characters(info) - self.no_item = self.no_item.pack_characters(info) - return self - - def remove_captures(self): - self.yes_item = self.yes_item.remove_captures() - self.no_item = self.no_item.remove_captures() - - def is_atomic(self): - return self.yes_item.is_atomic() and self.no_item.is_atomic() - - def can_be_affix(self): - return self.yes_item.can_be_affix() and self.no_item.can_be_affix() - - def contains_group(self): - return self.yes_item.contains_group() or self.no_item.contains_group() - - def get_firstset(self, reverse): - return (self.yes_item.get_firstset(reverse) | - self.no_item.get_firstset(reverse)) - - def _compile(self, reverse, fuzzy): - code = [(OP.GROUP_EXISTS, self.group)] - code.extend(self.yes_item.compile(reverse, fuzzy)) - add_code = self.no_item.compile(reverse, fuzzy) - if add_code: - code.append((OP.NEXT, )) - code.extend(add_code) - - code.append((OP.END, )) - - return code - - def _dump(self, indent, reverse): - print "%sGROUP_EXISTS %s" % (INDENT * indent, self.group) - self.yes_item.dump(indent + 1, reverse) - if self.no_item: - print "%sOR" % (INDENT * indent) - self.no_item.dump(indent + 1, reverse) - - def is_empty(self): - return self.yes_item.is_empty() and self.no_item.is_empty() - - def __eq__(self, other): - return type(self) is type(other) and (self.group, self.yes_item, - self.no_item) == (other.group, other.yes_item, other.no_item) - - def max_width(self): - return max(self.yes_item.max_width(), self.no_item.max_width()) - -class DefaultBoundary(ZeroWidthBase): - _opcode = OP.DEFAULT_BOUNDARY - _op_name = "DEFAULT_BOUNDARY" - -class DefaultEndOfWord(ZeroWidthBase): - _opcode = OP.DEFAULT_END_OF_WORD - _op_name = "DEFAULT_END_OF_WORD" - -class DefaultStartOfWord(ZeroWidthBase): - _opcode = OP.DEFAULT_START_OF_WORD - _op_name = "DEFAULT_START_OF_WORD" - -class EndOfLine(ZeroWidthBase): - _opcode = OP.END_OF_LINE - _op_name = "END_OF_LINE" - -class EndOfLineU(EndOfLine): - _opcode = OP.END_OF_LINE_U - _op_name = "END_OF_LINE_U" - -class EndOfString(ZeroWidthBase): - _opcode = OP.END_OF_STRING - _op_name = "END_OF_STRING" - -class EndOfStringLine(ZeroWidthBase): - _opcode = OP.END_OF_STRING_LINE - _op_name = "END_OF_STRING_LINE" - -class EndOfStringLineU(EndOfStringLine): - _opcode = OP.END_OF_STRING_LINE_U - _op_name = "END_OF_STRING_LINE_U" - -class EndOfWord(ZeroWidthBase): - _opcode = OP.END_OF_WORD - _op_name = "END_OF_WORD" - -class Fuzzy(RegexBase): - def __init__(self, subpattern, constraints=None): - RegexBase.__init__(self) - if constraints is None: - constraints = {} - self.subpattern = subpattern - self.constraints = constraints - - # If an error type is mentioned in the cost equation, then its maximum - # defaults to unlimited. - if "cost" in constraints: - for e in "dis": - if e in constraints["cost"]: - constraints.setdefault(e, (0, None)) - - # If any error type is mentioned, then all the error maxima default to - # 0, otherwise they default to unlimited. - if set(constraints) & set("dis"): - for e in "dis": - constraints.setdefault(e, (0, 0)) - else: - for e in "dis": - constraints.setdefault(e, (0, None)) - - # The maximum of the generic error type defaults to unlimited. - constraints.setdefault("e", (0, None)) - - # The cost equation defaults to equal costs. Also, the cost of any - # error type not mentioned in the cost equation defaults to 0. - if "cost" in constraints: - for e in "dis": - constraints["cost"].setdefault(e, 0) - else: - constraints["cost"] = {"d": 1, "i": 1, "s": 1, "max": - constraints["e"][1]} - - def fix_groups(self, reverse, fuzzy): - self.subpattern.fix_groups(reverse, True) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - return self - - def is_atomic(self): - return self.subpattern.is_atomic() - - def contains_group(self): - return self.subpattern.contains_group() - - def _compile(self, reverse, fuzzy): - # The individual limits. - arguments = [] - for e in "dise": - v = self.constraints[e] - arguments.append(v[0]) - arguments.append(UNLIMITED if v[1] is None else v[1]) - - # The coeffs of the cost equation. - for e in "dis": - arguments.append(self.constraints["cost"][e]) - - # The maximum of the cost equation. - v = self.constraints["cost"]["max"] - arguments.append(UNLIMITED if v is None else v) - - flags = 0 - if reverse: - flags |= REVERSE_OP - - return ([(OP.FUZZY, flags) + tuple(arguments)] + - self.subpattern.compile(reverse, True) + [(OP.END,)]) - - def _dump(self, indent, reverse): - constraints = self._constraints_to_string() - if constraints: - constraints = " " + constraints - print "%sFUZZY%s" % (INDENT * indent, constraints) - self.subpattern.dump(indent + 1, reverse) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return (type(self) is type(other) and self.subpattern == - other.subpattern) - - def max_width(self): - return UNLIMITED - - def _constraints_to_string(self): - constraints = [] - - for name in "ids": - min, max = self.constraints[name] - if max == 0: - continue - - con = "" - - if min > 0: - con = "%s<=" % min - - con += name - - if max is not None: - con += "<=%s" % max - - constraints.append(con) - - cost = [] - for name in "ids": - coeff = self.constraints["cost"][name] - if coeff > 0: - cost.append("%s%s" % (coeff, name)) - - limit = self.constraints["cost"]["max"] - if limit is not None and limit > 0: - cost = "%s<=%s" % ("+".join(cost), limit) - constraints.append(cost) - - return ",".join(constraints) - -class Grapheme(RegexBase): - def _compile(self, reverse, fuzzy): - # Match at least 1 character until a grapheme boundary is reached. Note - # that this is the same whether matching forwards or backwards. - character_matcher = LazyRepeat(AnyAll(), 1, None).compile(reverse, - fuzzy) - boundary_matcher = [(OP.GRAPHEME_BOUNDARY, 1)] - - return character_matcher + boundary_matcher - - def _dump(self, indent, reverse): - print "%sGRAPHEME" % (INDENT * indent) - - def max_width(self): - return UNLIMITED - -class GreedyRepeat(RegexBase): - _opcode = OP.GREEDY_REPEAT - _op_name = "GREEDY_REPEAT" - - def __init__(self, subpattern, min_count, max_count): - RegexBase.__init__(self) - self.subpattern = subpattern - self.min_count = min_count - self.max_count = max_count - - def fix_groups(self, reverse, fuzzy): - self.subpattern.fix_groups(reverse, fuzzy) - - def optimise(self, info): - subpattern = self.subpattern.optimise(info) - - return type(self)(subpattern, self.min_count, self.max_count) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - self.subpattern = self.subpattern.remove_captures() - return self - - def is_atomic(self): - return self.min_count == self.max_count and self.subpattern.is_atomic() - - def contains_group(self): - return self.subpattern.contains_group() - - def get_firstset(self, reverse): - fs = self.subpattern.get_firstset(reverse) - if self.min_count == 0: - fs.add(None) - - return fs - - def _compile(self, reverse, fuzzy): - repeat = [self._opcode, self.min_count] - if self.max_count is None: - repeat.append(UNLIMITED) - else: - repeat.append(self.max_count) - - subpattern = self.subpattern.compile(reverse, fuzzy) - if not subpattern: - return [] - - return ([tuple(repeat)] + subpattern + [(OP.END, )]) - - def _dump(self, indent, reverse): - if self.max_count is None: - limit = "INF" - else: - limit = self.max_count - print "%s%s %s %s" % (INDENT * indent, self._op_name, self.min_count, - limit) - - self.subpattern.dump(indent + 1, reverse) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return type(self) is type(other) and (self.subpattern, self.min_count, - self.max_count) == (other.subpattern, other.min_count, - other.max_count) - - def max_width(self): - if self.max_count is None: - return UNLIMITED - - return self.subpattern.max_width() * self.max_count - - def get_required_string(self, reverse): - max_count = UNLIMITED if self.max_count is None else self.max_count - if self.min_count == 0: - w = self.subpattern.max_width() * max_count - return min(w, UNLIMITED), None - - ofs, req = self.subpattern.get_required_string(reverse) - if req: - return ofs, req - - w = self.subpattern.max_width() * max_count - return min(w, UNLIMITED), None - -class Group(RegexBase): - def __init__(self, info, group, subpattern): - RegexBase.__init__(self) - self.info = info - self.group = group - self.subpattern = subpattern - - self.call_ref = None - - def fix_groups(self, reverse, fuzzy): - self.info.defined_groups[self.group] = (self, reverse, fuzzy) - self.subpattern.fix_groups(reverse, fuzzy) - - def optimise(self, info): - subpattern = self.subpattern.optimise(info) - - return Group(self.info, self.group, subpattern) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - return self.subpattern.remove_captures() - - def is_atomic(self): - return self.subpattern.is_atomic() - - def can_be_affix(self): - return False - - def contains_group(self): - return True - - def get_firstset(self, reverse): - return self.subpattern.get_firstset(reverse) - - def has_simple_start(self): - return self.subpattern.has_simple_start() - - def _compile(self, reverse, fuzzy): - code = [] - - key = self.group, reverse, fuzzy - ref = self.info.call_refs.get(key) - if ref is not None: - code += [(OP.CALL_REF, ref)] - - public_group = private_group = self.group - if private_group < 0: - public_group = self.info.private_groups[private_group] - private_group = self.info.group_count - private_group - - code += ([(OP.GROUP, private_group, public_group)] + - self.subpattern.compile(reverse, fuzzy) + [(OP.END, )]) - - if ref is not None: - code += [(OP.END, )] - - return code - - def _dump(self, indent, reverse): - group = self.group - if group < 0: - group = private_groups[group] - print "%sGROUP %s" % (INDENT * indent, group) - self.subpattern.dump(indent + 1, reverse) - - def __eq__(self, other): - return (type(self) is type(other) and (self.group, self.subpattern) == - (other.group, other.subpattern)) - - def max_width(self): - return self.subpattern.max_width() - - def get_required_string(self, reverse): - return self.subpattern.get_required_string(reverse) - -class LazyRepeat(GreedyRepeat): - _opcode = OP.LAZY_REPEAT - _op_name = "LAZY_REPEAT" - -class LookAround(RegexBase): - _dir_text = {False: "AHEAD", True: "BEHIND"} - - def __new__(cls, behind, positive, subpattern): - if positive and subpattern.is_empty(): - return subpattern - - return RegexBase.__new__(cls) - - def __init__(self, behind, positive, subpattern): - RegexBase.__init__(self) - self.behind = bool(behind) - self.positive = bool(positive) - self.subpattern = subpattern - - def fix_groups(self, reverse, fuzzy): - self.subpattern.fix_groups(self.behind, fuzzy) - - def optimise(self, info): - subpattern = self.subpattern.optimise(info) - - return LookAround(self.behind, self.positive, subpattern) - - def pack_characters(self, info): - self.subpattern = self.subpattern.pack_characters(info) - return self - - def remove_captures(self): - return self.subpattern.remove_captures() - - def is_atomic(self): - return self.subpattern.is_atomic() - - def can_be_affix(self): - return self.subpattern.can_be_affix() - - def contains_group(self): - return self.subpattern.contains_group() - - def _compile(self, reverse, fuzzy): - return ([(OP.LOOKAROUND, int(self.positive), int(not self.behind))] + - self.subpattern.compile(self.behind) + [(OP.END, )]) - - def _dump(self, indent, reverse): - print "%sLOOK%s %s" % (INDENT * indent, self._dir_text[self.behind], - POS_TEXT[self.positive]) - self.subpattern.dump(indent + 1, self.behind) - - def is_empty(self): - return self.subpattern.is_empty() - - def __eq__(self, other): - return type(self) is type(other) and (self.behind, self.positive, - self.subpattern) == (other.behind, other.positive, other.subpattern) - - def max_width(self): - return 0 - -class PrecompiledCode(RegexBase): - def __init__(self, code): - self.code = code - - def _compile(self, reverse, fuzzy): - return [tuple(self.code)] - -class Property(RegexBase): - _opcode = {(NOCASE, False): OP.PROPERTY, (IGNORECASE, False): - OP.PROPERTY_IGN, (FULLCASE, False): OP.PROPERTY, (FULLIGNORECASE, False): - OP.PROPERTY_IGN, (NOCASE, True): OP.PROPERTY_REV, (IGNORECASE, True): - OP.PROPERTY_IGN_REV, (FULLCASE, True): OP.PROPERTY_REV, (FULLIGNORECASE, - True): OP.PROPERTY_IGN_REV} - - def __init__(self, value, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.value = value - self.positive = bool(positive) - self.case_flags = case_flags - self.zerowidth = bool(zerowidth) - - self._key = (self.__class__, self.value, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return Property(self.value, positive, case_flags, zerowidth) - - def optimise(self, info, in_set=False): - return self - - def get_firstset(self, reverse): - return set([self]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[self.case_flags, reverse], flags, self.value)] - - def _dump(self, indent, reverse): - prop = PROPERTY_NAMES[self.value >> 16] - name, value = prop[0], prop[1][self.value & 0xFFFF] - print "%sPROPERTY %s %s:%s%s" % (INDENT * indent, - POS_TEXT[self.positive], name, value, CASE_TEXT[self.case_flags]) - - def matches(self, ch): - return _regex.has_property_value(self.value, ch) == self.positive - - def max_width(self): - return 1 - -class Range(RegexBase): - _opcode = {(NOCASE, False): OP.RANGE, (IGNORECASE, False): OP.RANGE_IGN, - (FULLCASE, False): OP.RANGE, (FULLIGNORECASE, False): OP.RANGE_IGN, - (NOCASE, True): OP.RANGE_REV, (IGNORECASE, True): OP.RANGE_IGN_REV, - (FULLCASE, True): OP.RANGE_REV, (FULLIGNORECASE, True): OP.RANGE_IGN_REV} - _op_name = "RANGE" - - def __init__(self, lower, upper, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.lower = lower - self.upper = upper - self.positive = bool(positive) - self.case_flags = case_flags - self.zerowidth = bool(zerowidth) - - self._key = (self.__class__, self.lower, self.upper, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return Range(self.lower, self.upper, positive, case_flags, zerowidth) - - def optimise(self, info, in_set=False): - # Is the range case-sensitive? - if not self.positive or not (self.case_flags & IGNORECASE) or in_set: - return self - - # Is full case-folding possible? - if (not (info.flags & UNICODE) or (self.case_flags & FULLIGNORECASE) != - FULLIGNORECASE): - return self - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - # Get the folded characters in the range. - items = [] - for ch in expanding_chars: - if self.lower <= ord(ch) <= self.upper: - folded = _regex.fold_case(FULL_CASE_FOLDING, ch) - items.append(String([ord(c) for c in folded], - case_flags=self.case_flags)) - - if not items: - # We can fall back to simple case-folding. - return self - - if len(items) < self.upper - self.lower + 1: - # Not all the characters are covered by the full case-folding. - items.insert(0, self) - - return Branch(items) - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[self.case_flags, reverse], flags, self.lower, - self.upper)] - - def _dump(self, indent, reverse): - display_lower = repr(unichr(self.lower)).lstrip("bu") - display_upper = repr(unichr(self.upper)).lstrip("bu") - print "%sRANGE %s %s %s%s" % (INDENT * indent, POS_TEXT[self.positive], - display_lower, display_upper, CASE_TEXT[self.case_flags]) - - def matches(self, ch): - return (self.lower <= ch <= self.upper) == self.positive - - def max_width(self): - return 1 - -class RefGroup(RegexBase): - _opcode = {(NOCASE, False): OP.REF_GROUP, (IGNORECASE, False): - OP.REF_GROUP_IGN, (FULLCASE, False): OP.REF_GROUP, (FULLIGNORECASE, - False): OP.REF_GROUP_FLD, (NOCASE, True): OP.REF_GROUP_REV, (IGNORECASE, - True): OP.REF_GROUP_IGN_REV, (FULLCASE, True): OP.REF_GROUP_REV, - (FULLIGNORECASE, True): OP.REF_GROUP_FLD_REV} - - def __init__(self, info, group, position, case_flags=NOCASE): - RegexBase.__init__(self) - self.info = info - self.group = group - self.position = position - self.case_flags = case_flags - - self._key = self.__class__, self.group, self.case_flags - - def fix_groups(self, reverse, fuzzy): - try: - self.group = int(self.group) - except ValueError: - try: - self.group = self.info.group_index[self.group] - except KeyError: - raise error("unknown group at position %d" % self.position) - - if not 1 <= self.group <= self.info.group_count: - raise error("unknown group at position %d" % self.position) - - self._key = self.__class__, self.group, self.case_flags - - def remove_captures(self): - raise error("group reference not allowed at position %d" % self.position) - - def _compile(self, reverse, fuzzy): - flags = 0 - if fuzzy: - flags |= FUZZY_OP - return [(self._opcode[self.case_flags, reverse], flags, self.group)] - - def _dump(self, indent, reverse): - print "%sREF_GROUP %s%s" % (INDENT * indent, self.group, - CASE_TEXT[self.case_flags]) - - def max_width(self): - return UNLIMITED - -class SearchAnchor(ZeroWidthBase): - _opcode = OP.SEARCH_ANCHOR - _op_name = "SEARCH_ANCHOR" - -class Sequence(RegexBase): - def __init__(self, items=None): - RegexBase.__init__(self) - if items is None: - items = [] - - self.items = items - - def fix_groups(self, reverse, fuzzy): - for s in self.items: - s.fix_groups(reverse, fuzzy) - - def optimise(self, info): - # Flatten the sequences. - items = [] - for s in self.items: - s = s.optimise(info) - if isinstance(s, Sequence): - items.extend(s.items) - else: - items.append(s) - - return make_sequence(items) - - def pack_characters(self, info): - "Packs sequences of characters into strings." - items = [] - characters = [] - case_flags = NOCASE - for s in self.items: - if type(s) is Character and s.positive: - if s.case_flags != case_flags: - # Different case sensitivity, so flush, unless neither the - # previous nor the new character are cased. - if s.case_flags or is_cased(info, s.value): - Sequence._flush_characters(info, characters, - case_flags, items) - - case_flags = s.case_flags - - characters.append(s.value) - elif type(s) is String or type(s) is Literal: - if s.case_flags != case_flags: - # Different case sensitivity, so flush, unless the neither - # the previous nor the new string are cased. - if s.case_flags or any(is_cased(info, c) for c in - characters): - Sequence._flush_characters(info, characters, - case_flags, items) - - case_flags = s.case_flags - - characters.extend(s.characters) - else: - Sequence._flush_characters(info, characters, case_flags, items) - - items.append(s.pack_characters(info)) - - Sequence._flush_characters(info, characters, case_flags, items) - - return make_sequence(items) - - def remove_captures(self): - self.items = [s.remove_captures() for s in self.items] - return self - - def is_atomic(self): - return all(s.is_atomic() for s in self.items) - - def can_be_affix(self): - return False - - def contains_group(self): - return any(s.contains_group() for s in self.items) - - def get_firstset(self, reverse): - fs = set() - items = self.items - if reverse: - items.reverse() - for s in items: - fs |= s.get_firstset(reverse) - if None not in fs: - return fs - fs.discard(None) - - return fs | set([None]) - - def has_simple_start(self): - return self.items and self.items[0].has_simple_start() - - def _compile(self, reverse, fuzzy): - seq = self.items - if reverse: - seq = seq[::-1] - - code = [] - for s in seq: - code.extend(s.compile(reverse, fuzzy)) - - return code - - def _dump(self, indent, reverse): - for s in self.items: - s.dump(indent, reverse) - - @staticmethod - def _flush_characters(info, characters, case_flags, items): - if not characters: - return - - # Disregard case_flags if all of the characters are case-less. - if case_flags & IGNORECASE: - if not any(is_cased(info, c) for c in characters): - case_flags = NOCASE - - if len(characters) == 1: - items.append(Character(characters[0], case_flags=case_flags)) - else: - items.append(String(characters, case_flags=case_flags)) - - characters[:] = [] - - def is_empty(self): - return all(i.is_empty() for i in self.items) - - def __eq__(self, other): - return type(self) is type(other) and self.items == other.items - - def max_width(self): - return sum(s.max_width() for s in self.items) - - def get_required_string(self, reverse): - seq = self.items - if reverse: - seq = seq[::-1] - - offset = 0 - - for s in seq: - ofs, req = s.get_required_string(reverse) - offset += ofs - if req: - return offset, req - - return offset, None - -class SetBase(RegexBase): - def __init__(self, info, items, positive=True, case_flags=NOCASE, - zerowidth=False): - RegexBase.__init__(self) - self.info = info - self.items = tuple(items) - self.positive = bool(positive) - self.case_flags = case_flags - self.zerowidth = bool(zerowidth) - - self.char_width = 1 - - self._key = (self.__class__, self.items, self.positive, - self.case_flags, self.zerowidth) - - def rebuild(self, positive, case_flags, zerowidth): - return type(self)(self.info, self.items, positive, case_flags, - zerowidth).optimise(self.info) - - def get_firstset(self, reverse): - return set([self]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - code = [(self._opcode[self.case_flags, reverse], flags)] - for m in self.items: - code.extend(m.compile()) - - code.append((OP.END, )) - - return code - - def _dump(self, indent, reverse): - print "%s%s %s%s" % (INDENT * indent, self._op_name, - POS_TEXT[self.positive], CASE_TEXT[self.case_flags]) - for i in self.items: - i.dump(indent + 1) - - def _handle_case_folding(self, info, in_set): - # Is the set case-sensitive? - if not self.positive or not (self.case_flags & IGNORECASE) or in_set: - return self - - # Is full case-folding possible? - if (not (self.info.flags & UNICODE) or (self.case_flags & - FULLIGNORECASE) != - FULLIGNORECASE): - return self - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - # Get the folded characters in the set. - items = [] - seen = set() - for ch in expanding_chars: - if self.matches(ord(ch)): - folded = _regex.fold_case(FULL_CASE_FOLDING, ch) - if folded not in seen: - items.append(String([ord(c) for c in folded], - case_flags=self.case_flags)) - seen.add(folded) - - if not items: - # We can fall back to simple case-folding. - return self - - return Branch([self] + items) - - def max_width(self): - # Is the set case-sensitive? - if not self.positive or not (self.case_flags & IGNORECASE): - return 1 - - # Is full case-folding possible? - if (not (self.info.flags & UNICODE) or (self.case_flags & - FULLIGNORECASE) != FULLIGNORECASE): - return 1 - - # Get the characters which expand to multiple codepoints on folding. - expanding_chars = _regex.get_expand_on_folding() - - # Get the folded characters in the set. - seen = set() - for ch in expanding_chars: - if self.matches(ord(ch)): - folded = _regex.fold_case(FULL_CASE_FOLDING, ch) - seen.add(folded) - - if not seen: - return 1 - - return max(len(folded) for folded in seen) - -class SetDiff(SetBase): - _opcode = {(NOCASE, False): OP.SET_DIFF, (IGNORECASE, False): - OP.SET_DIFF_IGN, (FULLCASE, False): OP.SET_DIFF, (FULLIGNORECASE, False): - OP.SET_DIFF_IGN, (NOCASE, True): OP.SET_DIFF_REV, (IGNORECASE, True): - OP.SET_DIFF_IGN_REV, (FULLCASE, True): OP.SET_DIFF_REV, (FULLIGNORECASE, - True): OP.SET_DIFF_IGN_REV} - _op_name = "SET_DIFF" - - def optimise(self, info, in_set=False): - items = self.items - if len(items) > 2: - items = [items[0], SetUnion(info, items[1 : ])] - - if len(items) == 1: - return items[0].with_flags(case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, in_set) - - self.items = tuple(m.optimise(info, in_set=True) for m in items) - - return self._handle_case_folding(info, in_set) - - def matches(self, ch): - m = self.items[0].matches(ch) and not self.items[1].matches(ch) - return m == self.positive - -class SetInter(SetBase): - _opcode = {(NOCASE, False): OP.SET_INTER, (IGNORECASE, False): - OP.SET_INTER_IGN, (FULLCASE, False): OP.SET_INTER, (FULLIGNORECASE, - False): OP.SET_INTER_IGN, (NOCASE, True): OP.SET_INTER_REV, (IGNORECASE, - True): OP.SET_INTER_IGN_REV, (FULLCASE, True): OP.SET_INTER_REV, - (FULLIGNORECASE, True): OP.SET_INTER_IGN_REV} - _op_name = "SET_INTER" - - def optimise(self, info, in_set=False): - items = [] - for m in self.items: - m = m.optimise(info, in_set=True) - if isinstance(m, SetInter) and m.positive: - # Intersection in intersection. - items.extend(m.items) - else: - items.append(m) - - if len(items) == 1: - return items[0].with_flags(case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, in_set) - - self.items = tuple(items) - - return self._handle_case_folding(info, in_set) - - def matches(self, ch): - m = all(i.matches(ch) for i in self.items) - return m == self.positive - -class SetSymDiff(SetBase): - _opcode = {(NOCASE, False): OP.SET_SYM_DIFF, (IGNORECASE, False): - OP.SET_SYM_DIFF_IGN, (FULLCASE, False): OP.SET_SYM_DIFF, (FULLIGNORECASE, - False): OP.SET_SYM_DIFF_IGN, (NOCASE, True): OP.SET_SYM_DIFF_REV, - (IGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV, (FULLCASE, True): - OP.SET_SYM_DIFF_REV, (FULLIGNORECASE, True): OP.SET_SYM_DIFF_IGN_REV} - _op_name = "SET_SYM_DIFF" - - def optimise(self, info, in_set=False): - items = [] - for m in self.items: - m = m.optimise(info, in_set=True) - if isinstance(m, SetSymDiff) and m.positive: - # Symmetric difference in symmetric difference. - items.extend(m.items) - else: - items.append(m) - - if len(items) == 1: - return items[0].with_flags(case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, in_set) - - self.items = tuple(items) - - return self._handle_case_folding(info, in_set) - - def matches(self, ch): - m = False - for i in self.items: - m = m != i.matches(ch) - - return m == self.positive - -class SetUnion(SetBase): - _opcode = {(NOCASE, False): OP.SET_UNION, (IGNORECASE, False): - OP.SET_UNION_IGN, (FULLCASE, False): OP.SET_UNION, (FULLIGNORECASE, - False): OP.SET_UNION_IGN, (NOCASE, True): OP.SET_UNION_REV, (IGNORECASE, - True): OP.SET_UNION_IGN_REV, (FULLCASE, True): OP.SET_UNION_REV, - (FULLIGNORECASE, True): OP.SET_UNION_IGN_REV} - _op_name = "SET_UNION" - - def optimise(self, info, in_set=False): - items = [] - for m in self.items: - m = m.optimise(info, in_set=True) - if isinstance(m, SetUnion) and m.positive: - # Union in union. - items.extend(m.items) - else: - items.append(m) - - if len(items) == 1: - i = items[0] - return i.with_flags(positive=i.positive == self.positive, - case_flags=self.case_flags, - zerowidth=self.zerowidth).optimise(info, in_set) - - self.items = tuple(items) - - return self._handle_case_folding(info, in_set) - - def _compile(self, reverse, fuzzy): - flags = 0 - if self.positive: - flags |= POSITIVE_OP - if self.zerowidth: - flags |= ZEROWIDTH_OP - if fuzzy: - flags |= FUZZY_OP - - characters, others = defaultdict(list), [] - for m in self.items: - if isinstance(m, Character): - characters[m.positive].append(m.value) - else: - others.append(m) - - code = [(self._opcode[self.case_flags, reverse], flags)] - - for positive, values in characters.items(): - flags = 0 - if positive: - flags |= POSITIVE_OP - if len(values) == 1: - code.append((OP.CHARACTER, flags, values[0])) - else: - code.append((OP.STRING, flags, len(values)) + tuple(values)) - - for m in others: - code.extend(m.compile()) - - code.append((OP.END, )) - - return code - - def matches(self, ch): - m = any(i.matches(ch) for i in self.items) - return m == self.positive - -class StartOfLine(ZeroWidthBase): - _opcode = OP.START_OF_LINE - _op_name = "START_OF_LINE" - -class StartOfLineU(StartOfLine): - _opcode = OP.START_OF_LINE_U - _op_name = "START_OF_LINE_U" - -class StartOfString(ZeroWidthBase): - _opcode = OP.START_OF_STRING - _op_name = "START_OF_STRING" - -class StartOfWord(ZeroWidthBase): - _opcode = OP.START_OF_WORD - _op_name = "START_OF_WORD" - -class String(RegexBase): - _opcode = {(NOCASE, False): OP.STRING, (IGNORECASE, False): OP.STRING_IGN, - (FULLCASE, False): OP.STRING, (FULLIGNORECASE, False): OP.STRING_FLD, - (NOCASE, True): OP.STRING_REV, (IGNORECASE, True): OP.STRING_IGN_REV, - (FULLCASE, True): OP.STRING_REV, (FULLIGNORECASE, True): - OP.STRING_FLD_REV} - - def __init__(self, characters, case_flags=NOCASE): - self.characters = tuple(characters) - self.case_flags = case_flags - - if (self.case_flags & FULLIGNORECASE) == FULLIGNORECASE: - folded_characters = [] - for char in self.characters: - folded = _regex.fold_case(FULL_CASE_FOLDING, unichr(char)) - folded_characters.extend(ord(c) for c in folded) - else: - folded_characters = self.characters - - self.folded_characters = tuple(folded_characters) - self.required = False - - self._key = self.__class__, self.characters, self.case_flags - - def get_firstset(self, reverse): - if reverse: - pos = -1 - else: - pos = 0 - return set([Character(self.characters[pos], - case_flags=self.case_flags)]) - - def has_simple_start(self): - return True - - def _compile(self, reverse, fuzzy): - flags = 0 - if fuzzy: - flags |= FUZZY_OP - if self.required: - flags |= REQUIRED_OP - return [(self._opcode[self.case_flags, reverse], flags, - len(self.folded_characters)) + self.folded_characters] - - def _dump(self, indent, reverse): - display = repr("".join(unichr(c) for c in self.characters)).lstrip("bu") - print "%sSTRING %s%s" % (INDENT * indent, display, - CASE_TEXT[self.case_flags]) - - def max_width(self): - return len(self.folded_characters) - - def get_required_string(self, reverse): - return 0, self - -class Literal(String): - def _dump(self, indent, reverse): - for c in self.characters: - display = ascii("".join(chr(c))).lstrip("bu") - print("{}CHARACTER MATCH {}{}".format(INDENT * indent, - display, CASE_TEXT[self.case_flags])) - -class StringSet(RegexBase): - _opcode = {(NOCASE, False): OP.STRING_SET, (IGNORECASE, False): - OP.STRING_SET_IGN, (FULLCASE, False): OP.STRING_SET, (FULLIGNORECASE, - False): OP.STRING_SET_FLD, (NOCASE, True): OP.STRING_SET_REV, - (IGNORECASE, True): OP.STRING_SET_IGN_REV, (FULLCASE, True): - OP.STRING_SET_REV, (FULLIGNORECASE, True): OP.STRING_SET_FLD_REV} - - def __init__(self, info, name, case_flags=NOCASE): - self.info = info - self.name = name - self.case_flags = case_flags - - self._key = self.__class__, self.name, self.case_flags - - self.set_key = (name, self.case_flags) - if self.set_key not in info.named_lists_used: - info.named_lists_used[self.set_key] = len(info.named_lists_used) - - def _compile(self, reverse, fuzzy): - index = self.info.named_lists_used[self.set_key] - items = self.info.kwargs[self.name] - - case_flags = self.case_flags - - if not items: - return [] - - encoding = self.info.flags & _ALL_ENCODINGS - fold_flags = encoding | case_flags - - if fuzzy: - choices = [self._folded(fold_flags, i) for i in items] - - # Sort from longest to shortest. - choices.sort(key=lambda s: (-len(s), s)) - - branches = [] - for string in choices: - branches.append(Sequence([Character(c, case_flags=case_flags) - for c in string])) - - if len(branches) > 1: - branch = Branch(branches) - else: - branch = branches[0] - branch = branch.optimise(self.info).pack_characters(self.info) - - return branch.compile(reverse, fuzzy) - else: - min_len = min(len(i) for i in items) - max_len = max(len(self._folded(fold_flags, i)) for i in items) - return [(self._opcode[case_flags, reverse], index, min_len, - max_len)] - - def _dump(self, indent, reverse): - print "%sSTRING_SET %s%s" % (INDENT * indent, self.name, - CASE_TEXT[self.case_flags]) - - def _folded(self, fold_flags, item): - if isinstance(item, unicode): - return [ord(c) for c in _regex.fold_case(fold_flags, item)] - else: - return [ord(c) for c in item] - - def _flatten(self, s): - # Flattens the branches. - if isinstance(s, Branch): - for b in s.branches: - self._flatten(b) - elif isinstance(s, Sequence) and s.items: - seq = s.items - - while isinstance(seq[-1], Sequence): - seq[-1 : ] = seq[-1].items - - n = 0 - while n < len(seq) and isinstance(seq[n], Character): - n += 1 - - if n > 1: - seq[ : n] = [String([c.value for c in seq[ : n]], - case_flags=self.case_flags)] - - self._flatten(seq[-1]) - - def max_width(self): - if not self.info.kwargs[self.name]: - return 0 - - if self.case_flags & IGNORECASE: - fold_flags = (self.info.flags & _ALL_ENCODINGS) | self.case_flags - return max(len(_regex.fold_case(fold_flags, i)) for i in - self.info.kwargs[self.name]) - else: - return max(len(i) for i in self.info.kwargs[self.name]) - -class Source(object): - "Scanner for the regular expression source string." - def __init__(self, string): - if isinstance(string, unicode): - self.string = string - self.char_type = unichr - else: - self.string = string - self.char_type = chr - - self.pos = 0 - self.ignore_space = False - self.sep = string[ : 0] - - def get(self): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - else: - break - - ch = string[pos] - self.pos = pos + 1 - return ch - except IndexError: - # We've reached the end of the string. - self.pos = pos - return string[ : 0] - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - return string[ : 0] - - def get_many(self, count=1): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - substring = [] - - while len(substring) < count: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - else: - break - - substring.append(string[pos]) - pos += 1 - - substring = "".join(substring) - else: - substring = string[pos : pos + count] - pos += len(substring) - - self.pos = pos - return substring - except IndexError: - # We've reached the end of the string. - self.pos = len(string) - return "".join(substring) - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - return "".join(substring) - - def get_while(self, test_set, include=True): - string = self.string - pos = self.pos - - if self.ignore_space: - try: - substring = [] - - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - elif (string[pos] in test_set) == include: - substring.append(string[pos]) - pos += 1 - else: - break - - self.pos = pos - except IndexError: - # We've reached the end of the string. - self.pos = len(string) - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - - return "".join(substring) - else: - try: - while (string[pos] in test_set) == include: - pos += 1 - - substring = string[self.pos : pos] - - self.pos = pos - - return substring - except IndexError: - # We've reached the end of the string. - substring = string[self.pos : pos] - - self.pos = pos - - return substring - - def skip_while(self, test_set, include=True): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - elif (string[pos] in test_set) == include: - pos += 1 - else: - break - else: - while (string[pos] in test_set) == include: - pos += 1 - - self.pos = pos - except IndexError: - # We've reached the end of the string. - self.pos = len(string) - except ValueError: - # The comment extended to the end of the string. - self.pos = len(string) - - def match(self, substring): - string = self.string - pos = self.pos - - if self.ignore_space: - try: - for c in substring: - while True: - if string[pos].isspace(): - # Skip over the whitespace. - pos += 1 - elif string[pos] == "#": - # Skip over the comment to the end of the line. - pos = string.index("\n", pos) - else: - break - - if string[pos] != c: - return False - - pos += 1 - - self.pos = pos - - return True - except IndexError: - # We've reached the end of the string. - return False - except ValueError: - # The comment extended to the end of the string. - return False - else: - if not string.startswith(substring, pos): - return False - - self.pos = pos + len(substring) - - return True - - def expect(self, substring): - if not self.match(substring): - raise error("missing %s at position %d" % (substring, self.pos)) - - def at_end(self): - string = self.string - pos = self.pos - - try: - if self.ignore_space: - while True: - if string[pos].isspace(): - pos += 1 - elif string[pos] == "#": - pos = string.index("\n", pos) - else: - break - - return pos >= len(string) - except IndexError: - # We've reached the end of the string. - return True - except ValueError: - # The comment extended to the end of the string. - return True - -class Info(object): - "Info about the regular expression." - - def __init__(self, flags=0, char_type=None, kwargs={}): - flags |= DEFAULT_FLAGS[(flags & _ALL_VERSIONS) or DEFAULT_VERSION] - self.flags = flags - self.global_flags = flags - - self.kwargs = kwargs - - self.group_count = 0 - self.group_index = {} - self.group_name = {} - self.char_type = char_type - self.named_lists_used = {} - self.open_groups = [] - self.open_group_count = {} - self.defined_groups = {} - self.group_calls = [] - self.private_groups = {} - - def open_group(self, name=None): - group = self.group_index.get(name) - if group is None: - while True: - self.group_count += 1 - if name is None or self.group_count not in self.group_name: - break - - group = self.group_count - if name: - self.group_index[name] = group - self.group_name[group] = name - - if group in self.open_groups: - # We have a nested named group. We'll assign it a private group - # number, initially negative until we can assign a proper - # (positive) number. - group_alias = -(len(self.private_groups) + 1) - self.private_groups[group_alias] = group - group = group_alias - - self.open_groups.append(group) - self.open_group_count[group] = self.open_group_count.get(group, 0) + 1 - - return group - - def close_group(self): - self.open_groups.pop() - - def is_open_group(self, name): - # In version 1, a group reference can refer to an open group. We'll - # just pretend the group isn't open. - version = (self.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version == VERSION1: - return False - - if name.isdigit(): - group = int(name) - else: - group = self.group_index.get(name) - - return group in self.open_groups - -def _check_group_features(info, parsed): - """Checks whether the reverse and fuzzy features of the group calls match - the groups which they call. - """ - call_refs = {} - additional_groups = [] - for call, reverse, fuzzy in info.group_calls: - # Look up the reference of this group call. - key = (call.group, reverse, fuzzy) - ref = call_refs.get(key) - if ref is None: - # This group doesn't have a reference yet, so look up its features. - if call.group == 0: - # Calling the pattern as a whole. - rev = bool(info.flags & REVERSE) - fuz = isinstance(parsed, Fuzzy) - if (rev, fuz) != (reverse, fuzzy): - # The pattern as a whole doesn't have the features we want, - # so we'll need to make a copy of it with the desired - # features. - additional_groups.append((parsed, reverse, fuzzy)) - else: - # Calling a capture group. - def_info = info.defined_groups[call.group] - group = def_info[0] - if def_info[1 : ] != (reverse, fuzzy): - # The group doesn't have the features we want, so we'll - # need to make a copy of it with the desired features. - additional_groups.append((group, reverse, fuzzy)) - - ref = len(call_refs) - call_refs[key] = ref - - call.call_ref = ref - - info.call_refs = call_refs - info.additional_groups = additional_groups - -def _get_required_string(parsed, flags): - "Gets the required string and related info of a parsed pattern." - - req_offset, required = parsed.get_required_string(bool(flags & REVERSE)) - if required: - required.required = True - if req_offset >= UNLIMITED: - req_offset = -1 - - req_flags = required.case_flags - if not (flags & UNICODE): - req_flags &= ~UNICODE - - req_chars = required.folded_characters - else: - req_offset = 0 - req_chars = () - req_flags = 0 - - return req_offset, req_chars, req_flags - -class Scanner: - def __init__(self, lexicon, flags=0): - self.lexicon = lexicon - - # Combine phrases into a compound pattern. - patterns = [] - for phrase, action in lexicon: - # Parse the regular expression. - source = Source(phrase) - info = Info(flags, source.char_type) - source.ignore_space = bool(info.flags & VERBOSE) - parsed = _parse_pattern(source, info) - if not source.at_end(): - raise error("trailing characters at position %d" % source.pos) - - # We want to forbid capture groups within each phrase. - patterns.append(parsed.remove_captures()) - - # Combine all the subpatterns into one pattern. - info = Info(flags) - patterns = [Group(info, g + 1, p) for g, p in enumerate(patterns)] - parsed = Branch(patterns) - - # Optimise the compound pattern. - parsed = parsed.optimise(info) - parsed = parsed.pack_characters(info) - - # Get the required string. - req_offset, req_chars, req_flags = _get_required_string(parsed, - info.flags) - - # Check the features of the groups. - _check_group_features(info, parsed) - - # Complain if there are any group calls. They are not supported by the - # Scanner class. - if info.call_refs: - raise error("recursive regex not supported by Scanner") - - reverse = bool(info.flags & REVERSE) - - # Compile the compound pattern. The result is a list of tuples. - code = parsed.compile(reverse) + [(OP.SUCCESS, )] - - # Flatten the code into a list of ints. - code = _flatten_code(code) - - if not parsed.has_simple_start(): - # Get the first set, if possible. - try: - fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) - fs_code = _flatten_code(fs_code) - code = fs_code + code - except _FirstSetError: - pass - - # Check the global flags for conflicts. - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version not in (0, VERSION0, VERSION1): - raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") - - # Create the PatternObject. - # - # Local flags like IGNORECASE affect the code generation, but aren't - # needed by the PatternObject itself. Conversely, global flags like - # LOCALE _don't_ affect the code generation but _are_ needed by the - # PatternObject. - self.scanner = _regex.compile(None, (flags & GLOBAL_FLAGS) | version, - code, {}, {}, {}, [], req_offset, req_chars, req_flags, - len(patterns)) - - def scan(self, string): - result = [] - append = result.append - match = self.scanner.scanner(string).match - i = 0 - while True: - m = match() - if not m: - break - j = m.end() - if i == j: - break - action = self.lexicon[m.lastindex - 1][1] - if hasattr(action, '__call__'): - self.match = m - action = action(self, m.group()) - if action is not None: - append(action) - i = j - - return result, string[i : ] - -# Get the known properties dict. -PROPERTIES = _regex.get_properties() - -# Build the inverse of the properties dict. -PROPERTY_NAMES = {} -for prop_name, (prop_id, values) in PROPERTIES.items(): - name, prop_values = PROPERTY_NAMES.get(prop_id, ("", {})) - name = max(name, prop_name, key=len) - PROPERTY_NAMES[prop_id] = name, prop_values - - for val_name, val_id in values.items(): - prop_values[val_id] = max(prop_values.get(val_id, ""), val_name, - key=len) - -# Character escape sequences. -CHARACTER_ESCAPES = { - "a": "\a", - "b": "\b", - "f": "\f", - "n": "\n", - "r": "\r", - "t": "\t", - "v": "\v", -} - -# Predefined character set escape sequences. -CHARSET_ESCAPES = { - "d": lookup_property(None, "Digit", True), - "D": lookup_property(None, "Digit", False), - "s": lookup_property(None, "Space", True), - "S": lookup_property(None, "Space", False), - "w": lookup_property(None, "Word", True), - "W": lookup_property(None, "Word", False), -} - -# Positional escape sequences. -POSITION_ESCAPES = { - "A": StartOfString(), - "b": Boundary(), - "B": Boundary(False), - "m": StartOfWord(), - "M": EndOfWord(), - "Z": EndOfString(), -} - -# Positional escape sequences when WORD flag set. -WORD_POSITION_ESCAPES = dict(POSITION_ESCAPES) -WORD_POSITION_ESCAPES.update({ - "b": DefaultBoundary(), - "B": DefaultBoundary(False), - "m": DefaultStartOfWord(), - "M": DefaultEndOfWord(), -}) diff --git a/lib/regex/_regex_unicode.c b/lib/regex/_regex_unicode.c deleted file mode 100644 index 663a6bed..00000000 --- a/lib/regex/_regex_unicode.c +++ /dev/null @@ -1,12748 +0,0 @@ -/* For Unicode version 6.3.0 */ - -#include "_regex_unicode.h" - -#define RE_BLANK_MASK ((1 << RE_PROP_ZL) | (1 << RE_PROP_ZP)) -#define RE_GRAPH_MASK ((1 << RE_PROP_CC) | (1 << RE_PROP_CS) | (1 << RE_PROP_CN)) -#define RE_WORD_MASK (RE_PROP_M_MASK | (1 << RE_PROP_ND) | (1 << RE_PROP_PC)) - -typedef struct RE_AllCases { - RE_INT32 diffs[RE_MAX_CASES - 1]; -} RE_AllCases; - -typedef struct RE_FullCaseFolding { - RE_INT32 diff; - RE_UINT16 codepoints[RE_MAX_FOLDED - 1]; -} RE_FullCaseFolding; - -/* strings. */ - -char* re_strings[] = { - "-1/2", - "0", - "1", - "1/10", - "1/16", - "1/2", - "1/3", - "1/4", - "1/5", - "1/6", - "1/7", - "1/8", - "1/9", - "10", - "100", - "1000", - "10000", - "100000", - "100000000", - "1000000000000", - "103", - "107", - "11", - "11/2", - "118", - "12", - "122", - "129", - "13", - "13/2", - "130", - "132", - "133", - "14", - "15", - "15/2", - "16", - "17", - "17/2", - "18", - "19", - "2", - "2/3", - "2/5", - "20", - "200", - "2000", - "20000", - "202", - "21", - "214", - "216", - "216000", - "218", - "22", - "220", - "222", - "224", - "226", - "228", - "23", - "230", - "232", - "233", - "234", - "24", - "240", - "25", - "26", - "27", - "28", - "29", - "3", - "3/16", - "3/2", - "3/4", - "3/5", - "3/8", - "30", - "300", - "3000", - "30000", - "31", - "32", - "33", - "34", - "35", - "36", - "37", - "38", - "39", - "4", - "4/5", - "40", - "400", - "4000", - "40000", - "41", - "42", - "43", - "432000", - "44", - "45", - "46", - "47", - "48", - "49", - "5", - "5/2", - "5/6", - "5/8", - "50", - "500", - "5000", - "50000", - "6", - "60", - "600", - "6000", - "60000", - "7", - "7/2", - "7/8", - "70", - "700", - "7000", - "70000", - "8", - "80", - "800", - "8000", - "80000", - "84", - "9", - "9/2", - "90", - "900", - "9000", - "90000", - "91", - "A", - "ABOVE", - "ABOVELEFT", - "ABOVERIGHT", - "AEGEANNUMBERS", - "AHEX", - "AI", - "AIN", - "AL", - "ALAPH", - "ALCHEMICAL", - "ALCHEMICALSYMBOLS", - "ALEF", - "ALETTER", - "ALNUM", - "ALPHA", - "ALPHABETIC", - "ALPHABETICPF", - "ALPHABETICPRESENTATIONFORMS", - "ALPHANUMERIC", - "AMBIGUOUS", - "AN", - "ANCIENTGREEKMUSIC", - "ANCIENTGREEKMUSICALNOTATION", - "ANCIENTGREEKNUMBERS", - "ANCIENTSYMBOLS", - "ANY", - "AR", - "ARAB", - "ARABIC", - "ARABICEXTA", - "ARABICEXTENDEDA", - "ARABICLETTER", - "ARABICMATH", - "ARABICMATHEMATICALALPHABETICSYMBOLS", - "ARABICNUMBER", - "ARABICPFA", - "ARABICPFB", - "ARABICPRESENTATIONFORMSA", - "ARABICPRESENTATIONFORMSB", - "ARABICSUP", - "ARABICSUPPLEMENT", - "ARMENIAN", - "ARMI", - "ARMN", - "ARROWS", - "ASCII", - "ASCIIHEXDIGIT", - "ASSIGNED", - "AT", - "ATA", - "ATAR", - "ATB", - "ATBL", - "ATERM", - "ATTACHEDABOVE", - "ATTACHEDABOVERIGHT", - "ATTACHEDBELOW", - "ATTACHEDBELOWLEFT", - "AVAGRAHA", - "AVESTAN", - "AVST", - "B", - "B2", - "BA", - "BALI", - "BALINESE", - "BAMU", - "BAMUM", - "BAMUMSUP", - "BAMUMSUPPLEMENT", - "BASICLATIN", - "BATAK", - "BATK", - "BB", - "BC", - "BEH", - "BELOW", - "BELOWLEFT", - "BELOWRIGHT", - "BENG", - "BENGALI", - "BETH", - "BIDIC", - "BIDICLASS", - "BIDICONTROL", - "BIDIM", - "BIDIMIRRORED", - "BINDU", - "BK", - "BL", - "BLANK", - "BLK", - "BLOCK", - "BLOCKELEMENTS", - "BN", - "BOPO", - "BOPOMOFO", - "BOPOMOFOEXT", - "BOPOMOFOEXTENDED", - "BOTTOM", - "BOTTOMANDRIGHT", - "BOUNDARYNEUTRAL", - "BOXDRAWING", - "BR", - "BRAH", - "BRAHMI", - "BRAI", - "BRAILLE", - "BRAILLEPATTERNS", - "BREAKAFTER", - "BREAKBEFORE", - "BREAKBOTH", - "BREAKSYMBOLS", - "BUGI", - "BUGINESE", - "BUHD", - "BUHID", - "BURUSHASKIYEHBARREE", - "BYZANTINEMUSIC", - "BYZANTINEMUSICALSYMBOLS", - "C", - "C&", - "CAKM", - "CAN", - "CANADIANABORIGINAL", - "CANADIANSYLLABICS", - "CANONICAL", - "CANONICALCOMBININGCLASS", - "CANS", - "CARI", - "CARIAN", - "CARRIAGERETURN", - "CASED", - "CASEDLETTER", - "CASEIGNORABLE", - "CB", - "CC", - "CCC", - "CCC10", - "CCC103", - "CCC107", - "CCC11", - "CCC118", - "CCC12", - "CCC122", - "CCC129", - "CCC13", - "CCC130", - "CCC132", - "CCC133", - "CCC14", - "CCC15", - "CCC16", - "CCC17", - "CCC18", - "CCC19", - "CCC20", - "CCC21", - "CCC22", - "CCC23", - "CCC24", - "CCC25", - "CCC26", - "CCC27", - "CCC28", - "CCC29", - "CCC30", - "CCC31", - "CCC32", - "CCC33", - "CCC34", - "CCC35", - "CCC36", - "CCC84", - "CCC91", - "CF", - "CHAKMA", - "CHAM", - "CHANGESWHENCASEFOLDED", - "CHANGESWHENCASEMAPPED", - "CHANGESWHENLOWERCASED", - "CHANGESWHENTITLECASED", - "CHANGESWHENUPPERCASED", - "CHER", - "CHEROKEE", - "CI", - "CIRCLE", - "CJ", - "CJK", - "CJKCOMPAT", - "CJKCOMPATFORMS", - "CJKCOMPATIBILITY", - "CJKCOMPATIBILITYFORMS", - "CJKCOMPATIBILITYIDEOGRAPHS", - "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT", - "CJKCOMPATIDEOGRAPHS", - "CJKCOMPATIDEOGRAPHSSUP", - "CJKEXTA", - "CJKEXTB", - "CJKEXTC", - "CJKEXTD", - "CJKRADICALSSUP", - "CJKRADICALSSUPPLEMENT", - "CJKSTROKES", - "CJKSYMBOLS", - "CJKSYMBOLSANDPUNCTUATION", - "CJKUNIFIEDIDEOGRAPHS", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONA", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONB", - "CJKUNIFIEDIDEOGRAPHSEXTENSIONC", - "CJKUNIFIEDIDEOGRAPHSEXTENSIOND", - "CL", - "CLOSE", - "CLOSEPARENTHESIS", - "CLOSEPUNCTUATION", - "CM", - "CN", - "CNTRL", - "CO", - "COM", - "COMBININGDIACRITICALMARKS", - "COMBININGDIACRITICALMARKSFORSYMBOLS", - "COMBININGDIACRITICALMARKSSUPPLEMENT", - "COMBININGHALFMARKS", - "COMBININGMARK", - "COMBININGMARKSFORSYMBOLS", - "COMMON", - "COMMONINDICNUMBERFORMS", - "COMMONSEPARATOR", - "COMPAT", - "COMPATJAMO", - "COMPLEXCONTEXT", - "CONDITIONALJAPANESESTARTER", - "CONNECTORPUNCTUATION", - "CONSONANT", - "CONSONANTDEAD", - "CONSONANTFINAL", - "CONSONANTHEADLETTER", - "CONSONANTMEDIAL", - "CONSONANTPLACEHOLDER", - "CONSONANTREPHA", - "CONSONANTSUBJOINED", - "CONTINGENTBREAK", - "CONTROL", - "CONTROLPICTURES", - "COPT", - "COPTIC", - "COUNTINGROD", - "COUNTINGRODNUMERALS", - "CP", - "CPRT", - "CR", - "CS", - "CUNEIFORM", - "CUNEIFORMNUMBERS", - "CUNEIFORMNUMBERSANDPUNCTUATION", - "CURRENCYSYMBOL", - "CURRENCYSYMBOLS", - "CWCF", - "CWCM", - "CWL", - "CWT", - "CWU", - "CYPRIOT", - "CYPRIOTSYLLABARY", - "CYRILLIC", - "CYRILLICEXTA", - "CYRILLICEXTB", - "CYRILLICEXTENDEDA", - "CYRILLICEXTENDEDB", - "CYRILLICSUP", - "CYRILLICSUPPLEMENT", - "CYRILLICSUPPLEMENTARY", - "CYRL", - "D", - "DA", - "DAL", - "DALATHRISH", - "DASH", - "DASHPUNCTUATION", - "DB", - "DE", - "DECIMAL", - "DECIMALNUMBER", - "DECOMPOSITIONTYPE", - "DEFAULTIGNORABLECODEPOINT", - "DEP", - "DEPRECATED", - "DESERET", - "DEVA", - "DEVANAGARI", - "DEVANAGARIEXT", - "DEVANAGARIEXTENDED", - "DI", - "DIA", - "DIACRITIC", - "DIACRITICALS", - "DIACRITICALSFORSYMBOLS", - "DIACRITICALSSUP", - "DIGIT", - "DINGBATS", - "DOMINO", - "DOMINOTILES", - "DOUBLEABOVE", - "DOUBLEBELOW", - "DOUBLEQUOTE", - "DQ", - "DSRT", - "DT", - "DUALJOINING", - "E", - "EA", - "EASTASIANWIDTH", - "EGYP", - "EGYPTIANHIEROGLYPHS", - "EMOTICONS", - "EN", - "ENC", - "ENCLOSEDALPHANUM", - "ENCLOSEDALPHANUMERICS", - "ENCLOSEDALPHANUMERICSUPPLEMENT", - "ENCLOSEDALPHANUMSUP", - "ENCLOSEDCJK", - "ENCLOSEDCJKLETTERSANDMONTHS", - "ENCLOSEDIDEOGRAPHICSUP", - "ENCLOSEDIDEOGRAPHICSUPPLEMENT", - "ENCLOSINGMARK", - "ES", - "ET", - "ETHI", - "ETHIOPIC", - "ETHIOPICEXT", - "ETHIOPICEXTA", - "ETHIOPICEXTENDED", - "ETHIOPICEXTENDEDA", - "ETHIOPICSUP", - "ETHIOPICSUPPLEMENT", - "EUROPEANNUMBER", - "EUROPEANSEPARATOR", - "EUROPEANTERMINATOR", - "EX", - "EXCLAMATION", - "EXT", - "EXTEND", - "EXTENDER", - "EXTENDNUMLET", - "F", - "FALSE", - "FARSIYEH", - "FE", - "FEH", - "FIN", - "FINAL", - "FINALPUNCTUATION", - "FINALSEMKATH", - "FIRSTSTRONGISOLATE", - "FO", - "FONT", - "FORMAT", - "FRA", - "FRACTION", - "FSI", - "FULLWIDTH", - "GAF", - "GAMAL", - "GC", - "GCB", - "GENERALCATEGORY", - "GENERALPUNCTUATION", - "GEOMETRICSHAPES", - "GEOR", - "GEORGIAN", - "GEORGIANSUP", - "GEORGIANSUPPLEMENT", - "GL", - "GLAG", - "GLAGOLITIC", - "GLUE", - "GOTH", - "GOTHIC", - "GRAPH", - "GRAPHEMEBASE", - "GRAPHEMECLUSTERBREAK", - "GRAPHEMEEXTEND", - "GRAPHEMELINK", - "GRBASE", - "GREEK", - "GREEKANDCOPTIC", - "GREEKEXT", - "GREEKEXTENDED", - "GREK", - "GREXT", - "GRLINK", - "GUJARATI", - "GUJR", - "GURMUKHI", - "GURU", - "H", - "H2", - "H3", - "HAH", - "HALFANDFULLFORMS", - "HALFMARKS", - "HALFWIDTH", - "HALFWIDTHANDFULLWIDTHFORMS", - "HAMZAONHEHGOAL", - "HAN", - "HANG", - "HANGUL", - "HANGULCOMPATIBILITYJAMO", - "HANGULJAMO", - "HANGULJAMOEXTENDEDA", - "HANGULJAMOEXTENDEDB", - "HANGULSYLLABLES", - "HANGULSYLLABLETYPE", - "HANI", - "HANO", - "HANUNOO", - "HE", - "HEBR", - "HEBREW", - "HEBREWLETTER", - "HEH", - "HEHGOAL", - "HETH", - "HEX", - "HEXDIGIT", - "HIGHPRIVATEUSESURROGATES", - "HIGHPUSURROGATES", - "HIGHSURROGATES", - "HIRA", - "HIRAGANA", - "HL", - "HRKT", - "HST", - "HY", - "HYPHEN", - "ID", - "IDC", - "IDCONTINUE", - "IDEO", - "IDEOGRAPHIC", - "IDEOGRAPHICDESCRIPTIONCHARACTERS", - "IDS", - "IDSB", - "IDSBINARYOPERATOR", - "IDST", - "IDSTART", - "IDSTRINARYOPERATOR", - "IMPERIALARAMAIC", - "IN", - "INDICMATRACATEGORY", - "INDICNUMBERFORMS", - "INDICSYLLABICCATEGORY", - "INFIXNUMERIC", - "INHERITED", - "INIT", - "INITIAL", - "INITIALPUNCTUATION", - "INMC", - "INSC", - "INSCRIPTIONALPAHLAVI", - "INSCRIPTIONALPARTHIAN", - "INSEPARABLE", - "INSEPERABLE", - "INVISIBLE", - "IOTASUBSCRIPT", - "IPAEXT", - "IPAEXTENSIONS", - "IS", - "ISO", - "ISOLATED", - "ITAL", - "JAMO", - "JAMOEXTA", - "JAMOEXTB", - "JAVA", - "JAVANESE", - "JG", - "JL", - "JOINC", - "JOINCAUSING", - "JOINCONTROL", - "JOININGGROUP", - "JOININGTYPE", - "JT", - "JV", - "KA", - "KAF", - "KAITHI", - "KALI", - "KANA", - "KANASUP", - "KANASUPPLEMENT", - "KANAVOICING", - "KANBUN", - "KANGXI", - "KANGXIRADICALS", - "KANNADA", - "KAPH", - "KATAKANA", - "KATAKANAEXT", - "KATAKANAORHIRAGANA", - "KATAKANAPHONETICEXTENSIONS", - "KAYAHLI", - "KHAPH", - "KHAR", - "KHAROSHTHI", - "KHMER", - "KHMERSYMBOLS", - "KHMR", - "KNDA", - "KNOTTEDHEH", - "KTHI", - "KV", - "L", - "L&", - "LAM", - "LAMADH", - "LANA", - "LAO", - "LAOO", - "LATIN", - "LATIN1", - "LATIN1SUP", - "LATIN1SUPPLEMENT", - "LATINEXTA", - "LATINEXTADDITIONAL", - "LATINEXTB", - "LATINEXTC", - "LATINEXTD", - "LATINEXTENDEDA", - "LATINEXTENDEDADDITIONAL", - "LATINEXTENDEDB", - "LATINEXTENDEDC", - "LATINEXTENDEDD", - "LATN", - "LB", - "LC", - "LE", - "LEADINGJAMO", - "LEFT", - "LEFTANDRIGHT", - "LEFTJOINING", - "LEFTTORIGHT", - "LEFTTORIGHTEMBEDDING", - "LEFTTORIGHTISOLATE", - "LEFTTORIGHTOVERRIDE", - "LEPC", - "LEPCHA", - "LETTER", - "LETTERLIKESYMBOLS", - "LETTERNUMBER", - "LF", - "LIMB", - "LIMBU", - "LINB", - "LINEARB", - "LINEARBIDEOGRAMS", - "LINEARBSYLLABARY", - "LINEBREAK", - "LINEFEED", - "LINESEPARATOR", - "LISU", - "LL", - "LM", - "LO", - "LOE", - "LOGICALORDEREXCEPTION", - "LOWER", - "LOWERCASE", - "LOWERCASELETTER", - "LOWSURROGATES", - "LRE", - "LRI", - "LRO", - "LT", - "LU", - "LV", - "LVSYLLABLE", - "LVT", - "LVTSYLLABLE", - "LYCI", - "LYCIAN", - "LYDI", - "LYDIAN", - "M", - "M&", - "MAHJONG", - "MAHJONGTILES", - "MALAYALAM", - "MAND", - "MANDAIC", - "MANDATORYBREAK", - "MARK", - "MATH", - "MATHALPHANUM", - "MATHEMATICALALPHANUMERICSYMBOLS", - "MATHEMATICALOPERATORS", - "MATHOPERATORS", - "MATHSYMBOL", - "MB", - "MC", - "ME", - "MED", - "MEDIAL", - "MEEM", - "MEETEIMAYEK", - "MEETEIMAYEKEXT", - "MEETEIMAYEKEXTENSIONS", - "MERC", - "MERO", - "MEROITICCURSIVE", - "MEROITICHIEROGLYPHS", - "MIAO", - "MIDLETTER", - "MIDNUM", - "MIDNUMLET", - "MIM", - "MISCARROWS", - "MISCELLANEOUSMATHEMATICALSYMBOLSA", - "MISCELLANEOUSMATHEMATICALSYMBOLSB", - "MISCELLANEOUSSYMBOLS", - "MISCELLANEOUSSYMBOLSANDARROWS", - "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS", - "MISCELLANEOUSTECHNICAL", - "MISCMATHSYMBOLSA", - "MISCMATHSYMBOLSB", - "MISCPICTOGRAPHS", - "MISCSYMBOLS", - "MISCTECHNICAL", - "ML", - "MLYM", - "MN", - "MODIFIERLETTER", - "MODIFIERLETTERS", - "MODIFIERSYMBOL", - "MODIFIERTONELETTERS", - "MODIFYINGLETTER", - "MONG", - "MONGOLIAN", - "MTEI", - "MUSIC", - "MUSICALSYMBOLS", - "MYANMAR", - "MYANMAREXTA", - "MYANMAREXTENDEDA", - "MYMR", - "N", - "N&", - "NA", - "NAN", - "NAR", - "NARROW", - "NB", - "NCHAR", - "ND", - "NEUTRAL", - "NEWLINE", - "NEWTAILUE", - "NEXTLINE", - "NK", - "NKO", - "NKOO", - "NL", - "NO", - "NOBLOCK", - "NOBREAK", - "NOJOININGGROUP", - "NONCHARACTERCODEPOINT", - "NONE", - "NONJOINING", - "NONSPACINGMARK", - "NONSTARTER", - "NOON", - "NOTAPPLICABLE", - "NOTREORDERED", - "NR", - "NS", - "NSM", - "NT", - "NU", - "NUKTA", - "NUMBER", - "NUMBERFORMS", - "NUMERIC", - "NUMERICTYPE", - "NUMERICVALUE", - "NUN", - "NV", - "NYA", - "OALPHA", - "OCR", - "ODI", - "OGAM", - "OGHAM", - "OGREXT", - "OIDC", - "OIDS", - "OLCHIKI", - "OLCK", - "OLDITALIC", - "OLDPERSIAN", - "OLDSOUTHARABIAN", - "OLDTURKIC", - "OLETTER", - "OLOWER", - "OMATH", - "ON", - "OP", - "OPENPUNCTUATION", - "OPTICALCHARACTERRECOGNITION", - "ORIYA", - "ORKH", - "ORYA", - "OSMA", - "OSMANYA", - "OTHER", - "OTHERALPHABETIC", - "OTHERDEFAULTIGNORABLECODEPOINT", - "OTHERGRAPHEMEEXTEND", - "OTHERIDCONTINUE", - "OTHERIDSTART", - "OTHERLETTER", - "OTHERLOWERCASE", - "OTHERMATH", - "OTHERNEUTRAL", - "OTHERNUMBER", - "OTHERPUNCTUATION", - "OTHERSYMBOL", - "OTHERUPPERCASE", - "OUPPER", - "OV", - "OVERLAY", - "OVERSTRUCK", - "P", - "P&", - "PARAGRAPHSEPARATOR", - "PATSYN", - "PATTERNSYNTAX", - "PATTERNWHITESPACE", - "PATWS", - "PC", - "PD", - "PDF", - "PDI", - "PE", - "PF", - "PHAG", - "PHAGSPA", - "PHAISTOS", - "PHAISTOSDISC", - "PHLI", - "PHNX", - "PHOENICIAN", - "PHONETICEXT", - "PHONETICEXTENSIONS", - "PHONETICEXTENSIONSSUPPLEMENT", - "PHONETICEXTSUP", - "PI", - "PLAYINGCARDS", - "PLRD", - "PO", - "POPDIRECTIONALFORMAT", - "POPDIRECTIONALISOLATE", - "POSTFIXNUMERIC", - "PP", - "PR", - "PREFIXNUMERIC", - "PREPEND", - "PRINT", - "PRIVATEUSE", - "PRIVATEUSEAREA", - "PRTI", - "PS", - "PUA", - "PUNCT", - "PUNCTUATION", - "QAAC", - "QAAI", - "QAF", - "QAPH", - "QMARK", - "QU", - "QUOTATION", - "QUOTATIONMARK", - "R", - "RADICAL", - "REGIONALINDICATOR", - "REGISTERSHIFTER", - "REH", - "REJANG", - "REVERSEDPE", - "RI", - "RIGHT", - "RIGHTJOINING", - "RIGHTTOLEFT", - "RIGHTTOLEFTEMBEDDING", - "RIGHTTOLEFTISOLATE", - "RIGHTTOLEFTOVERRIDE", - "RJNG", - "RLE", - "RLI", - "RLO", - "ROHINGYAYEH", - "RUMI", - "RUMINUMERALSYMBOLS", - "RUNIC", - "RUNR", - "S", - "S&", - "SA", - "SAD", - "SADHE", - "SAMARITAN", - "SAMR", - "SARB", - "SAUR", - "SAURASHTRA", - "SB", - "SC", - "SCONTINUE", - "SCRIPT", - "SD", - "SE", - "SEEN", - "SEGMENTSEPARATOR", - "SEMKATH", - "SENTENCEBREAK", - "SEP", - "SEPARATOR", - "SG", - "SHARADA", - "SHAVIAN", - "SHAW", - "SHIN", - "SHRD", - "SINGLEQUOTE", - "SINH", - "SINHALA", - "SK", - "SM", - "SMALL", - "SMALLFORMS", - "SMALLFORMVARIANTS", - "SML", - "SO", - "SOFTDOTTED", - "SORA", - "SORASOMPENG", - "SP", - "SPACE", - "SPACESEPARATOR", - "SPACINGMARK", - "SPACINGMODIFIERLETTERS", - "SPECIALS", - "SQ", - "SQR", - "SQUARE", - "ST", - "STERM", - "SUB", - "SUND", - "SUNDANESE", - "SUNDANESESUP", - "SUNDANESESUPPLEMENT", - "SUP", - "SUPARROWSA", - "SUPARROWSB", - "SUPER", - "SUPERANDSUB", - "SUPERSCRIPTSANDSUBSCRIPTS", - "SUPMATHOPERATORS", - "SUPPLEMENTALARROWSA", - "SUPPLEMENTALARROWSB", - "SUPPLEMENTALMATHEMATICALOPERATORS", - "SUPPLEMENTALPUNCTUATION", - "SUPPLEMENTARYPRIVATEUSEAREAA", - "SUPPLEMENTARYPRIVATEUSEAREAB", - "SUPPUAA", - "SUPPUAB", - "SUPPUNCTUATION", - "SURROGATE", - "SWASHKAF", - "SY", - "SYLO", - "SYLOTINAGRI", - "SYMBOL", - "SYRC", - "SYRIAC", - "SYRIACWAW", - "T", - "TAGALOG", - "TAGB", - "TAGBANWA", - "TAGS", - "TAH", - "TAILE", - "TAITHAM", - "TAIVIET", - "TAIXUANJING", - "TAIXUANJINGSYMBOLS", - "TAKR", - "TAKRI", - "TALE", - "TALU", - "TAMIL", - "TAML", - "TAVT", - "TAW", - "TEHMARBUTA", - "TEHMARBUTAGOAL", - "TELU", - "TELUGU", - "TERM", - "TERMINALPUNCTUATION", - "TETH", - "TFNG", - "TGLG", - "THAA", - "THAANA", - "THAI", - "TIBETAN", - "TIBT", - "TIFINAGH", - "TITLECASELETTER", - "TONELETTER", - "TONEMARK", - "TOP", - "TOPANDBOTTOM", - "TOPANDBOTTOMANDRIGHT", - "TOPANDLEFT", - "TOPANDLEFTANDRIGHT", - "TOPANDRIGHT", - "TRAILINGJAMO", - "TRANSPARENT", - "TRANSPORTANDMAP", - "TRANSPORTANDMAPSYMBOLS", - "TRUE", - "U", - "UCAS", - "UCASEXT", - "UGAR", - "UGARITIC", - "UIDEO", - "UNASSIGNED", - "UNIFIEDCANADIANABORIGINALSYLLABICS", - "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED", - "UNIFIEDIDEOGRAPH", - "UNKNOWN", - "UP", - "UPPER", - "UPPERCASE", - "UPPERCASELETTER", - "V", - "VAI", - "VAII", - "VARIATIONSELECTOR", - "VARIATIONSELECTORS", - "VARIATIONSELECTORSSUPPLEMENT", - "VEDICEXT", - "VEDICEXTENSIONS", - "VERT", - "VERTICAL", - "VERTICALFORMS", - "VIRAMA", - "VISARGA", - "VISUALORDERLEFT", - "VOWEL", - "VOWELDEPENDENT", - "VOWELINDEPENDENT", - "VOWELJAMO", - "VR", - "VS", - "VSSUP", - "W", - "WAW", - "WB", - "WHITESPACE", - "WIDE", - "WJ", - "WORD", - "WORDBREAK", - "WORDJOINER", - "WS", - "WSPACE", - "XDIGIT", - "XIDC", - "XIDCONTINUE", - "XIDS", - "XIDSTART", - "XPEO", - "XSUX", - "XX", - "Y", - "YEH", - "YEHBARREE", - "YEHWITHTAIL", - "YES", - "YI", - "YIII", - "YIJING", - "YIJINGHEXAGRAMSYMBOLS", - "YIRADICALS", - "YISYLLABLES", - "YUDH", - "YUDHHE", - "Z", - "Z&", - "ZAIN", - "ZHAIN", - "ZINH", - "ZL", - "ZP", - "ZS", - "ZW", - "ZWSPACE", - "ZYYY", - "ZZZZ", -}; - -/* strings: 10595 bytes. */ - -/* properties. */ - -RE_Property re_properties[] = { - { 508, 0, 0}, - { 506, 0, 0}, - { 233, 1, 1}, - { 232, 1, 1}, - { 963, 2, 2}, - { 961, 2, 2}, - {1123, 3, 3}, - {1118, 3, 3}, - { 523, 4, 4}, - { 507, 4, 4}, - { 969, 5, 5}, - { 960, 5, 5}, - { 736, 6, 6}, - { 156, 7, 6}, - { 155, 7, 6}, - { 711, 8, 6}, - { 710, 8, 6}, - {1093, 9, 6}, - {1092, 9, 6}, - { 273, 10, 6}, - { 275, 11, 6}, - { 326, 11, 6}, - { 321, 12, 6}, - { 401, 12, 6}, - { 323, 13, 6}, - { 403, 13, 6}, - { 322, 14, 6}, - { 402, 14, 6}, - { 319, 15, 6}, - { 399, 15, 6}, - { 320, 16, 6}, - { 400, 16, 6}, - { 588, 17, 6}, - { 584, 17, 6}, - { 580, 18, 6}, - { 579, 18, 6}, - {1131, 19, 6}, - {1130, 19, 6}, - {1129, 20, 6}, - {1128, 20, 6}, - { 426, 21, 6}, - { 434, 21, 6}, - { 524, 22, 6}, - { 532, 22, 6}, - { 522, 23, 6}, - { 526, 23, 6}, - { 525, 24, 6}, - { 533, 24, 6}, - {1119, 25, 6}, - {1126, 25, 6}, - { 992, 25, 6}, - { 225, 26, 6}, - { 223, 26, 6}, - { 623, 27, 6}, - { 621, 27, 6}, - { 419, 28, 6}, - { 577, 29, 6}, - { 926, 30, 6}, - { 923, 30, 6}, - {1056, 31, 6}, - {1055, 31, 6}, - { 866, 32, 6}, - { 848, 32, 6}, - { 567, 33, 6}, - { 566, 33, 6}, - { 187, 34, 6}, - { 145, 34, 6}, - { 859, 35, 6}, - { 832, 35, 6}, - { 582, 36, 6}, - { 581, 36, 6}, - { 436, 37, 6}, - { 435, 37, 6}, - { 485, 38, 6}, - { 483, 38, 6}, - { 865, 39, 6}, - { 847, 39, 6}, - { 871, 40, 6}, - { 872, 40, 6}, - { 810, 41, 6}, - { 796, 41, 6}, - { 861, 42, 6}, - { 837, 42, 6}, - { 586, 43, 6}, - { 585, 43, 6}, - { 589, 44, 6}, - { 587, 44, 6}, - { 928, 45, 6}, - {1089, 46, 6}, - {1085, 46, 6}, - { 860, 47, 6}, - { 834, 47, 6}, - { 428, 48, 6}, - { 427, 48, 6}, - { 988, 49, 6}, - { 964, 49, 6}, - { 709, 50, 6}, - { 708, 50, 6}, - { 863, 51, 6}, - { 839, 51, 6}, - { 862, 52, 6}, - { 838, 52, 6}, - {1001, 53, 6}, - {1098, 54, 6}, - {1114, 54, 6}, - { 881, 55, 6}, - { 882, 55, 6}, - { 880, 56, 6}, - { 879, 56, 6}, - { 555, 57, 7}, - { 575, 57, 7}, - { 224, 58, 8}, - { 215, 58, 8}, - { 268, 59, 9}, - { 278, 59, 9}, - { 425, 60, 10}, - { 449, 60, 10}, - { 453, 61, 11}, - { 452, 61, 11}, - { 624, 62, 12}, - { 619, 62, 12}, - { 625, 63, 13}, - { 626, 63, 13}, - { 701, 64, 14}, - { 678, 64, 14}, - { 827, 65, 15}, - { 821, 65, 15}, - { 828, 66, 16}, - { 830, 66, 16}, - { 227, 67, 6}, - { 226, 67, 6}, - { 592, 68, 17}, - { 600, 68, 17}, - { 594, 69, 18}, - { 601, 69, 18}, - { 159, 70, 6}, - { 154, 70, 6}, - { 166, 71, 6}, - { 231, 72, 6}, - { 521, 73, 6}, - { 911, 74, 6}, - {1122, 75, 6}, - {1127, 76, 6}, -}; - -/* properties: 572 bytes. */ - -/* property values. */ - -RE_PropertyValue re_property_values[] = { - {1086, 0, 0}, - { 357, 0, 0}, - {1094, 0, 1}, - { 718, 0, 1}, - { 712, 0, 2}, - { 705, 0, 2}, - {1066, 0, 3}, - { 717, 0, 3}, - { 775, 0, 4}, - { 706, 0, 4}, - { 864, 0, 5}, - { 707, 0, 5}, - { 813, 0, 6}, - { 774, 0, 6}, - { 467, 0, 7}, - { 744, 0, 7}, - { 994, 0, 8}, - { 743, 0, 8}, - { 424, 0, 9}, - { 797, 0, 9}, - { 440, 0, 9}, - { 693, 0, 10}, - { 805, 0, 10}, - { 868, 0, 11}, - { 806, 0, 11}, - { 993, 0, 12}, - {1155, 0, 12}, - { 703, 0, 13}, - {1153, 0, 13}, - { 878, 0, 14}, - {1154, 0, 14}, - { 384, 0, 15}, - { 277, 0, 15}, - { 358, 0, 15}, - { 499, 0, 16}, - { 316, 0, 16}, - { 912, 0, 17}, - { 359, 0, 17}, - {1023, 0, 18}, - { 393, 0, 18}, - { 420, 0, 19}, - { 884, 0, 19}, - { 851, 0, 20}, - { 915, 0, 20}, - { 355, 0, 21}, - { 887, 0, 21}, - { 374, 0, 22}, - { 883, 0, 22}, - { 869, 0, 23}, - { 903, 0, 23}, - { 741, 0, 24}, - { 982, 0, 24}, - { 397, 0, 25}, - { 961, 0, 25}, - { 777, 0, 26}, - { 981, 0, 26}, - { 870, 0, 27}, - { 987, 0, 27}, - { 599, 0, 28}, - { 900, 0, 28}, - { 494, 0, 29}, - { 888, 0, 29}, - { 858, 0, 30}, - { 261, 0, 30}, - { 262, 0, 30}, - { 691, 0, 31}, - { 656, 0, 31}, - { 657, 0, 31}, - { 735, 0, 32}, - { 727, 0, 32}, - { 365, 0, 32}, - { 728, 0, 32}, - { 824, 0, 33}, - { 789, 0, 33}, - { 790, 0, 33}, - { 918, 0, 34}, - { 876, 0, 34}, - { 917, 0, 34}, - { 877, 0, 34}, - {1028, 0, 35}, - { 950, 0, 35}, - { 951, 0, 35}, - { 971, 0, 36}, - {1148, 0, 36}, - {1149, 0, 36}, - { 274, 0, 37}, - { 679, 0, 37}, - { 188, 0, 38}, - { 807, 1, 0}, - { 795, 1, 0}, - { 211, 1, 1}, - { 186, 1, 1}, - { 666, 1, 2}, - { 665, 1, 2}, - { 664, 1, 2}, - { 672, 1, 3}, - { 667, 1, 3}, - { 674, 1, 4}, - { 669, 1, 4}, - { 609, 1, 5}, - { 608, 1, 5}, - { 995, 1, 6}, - { 776, 1, 6}, - { 361, 1, 7}, - { 437, 1, 7}, - { 528, 1, 8}, - { 527, 1, 8}, - { 406, 1, 9}, - { 412, 1, 10}, - { 411, 1, 10}, - { 413, 1, 10}, - { 182, 1, 11}, - { 561, 1, 12}, - { 169, 1, 13}, - {1030, 1, 14}, - { 181, 1, 15}, - { 180, 1, 15}, - {1061, 1, 16}, - { 803, 1, 17}, - { 955, 1, 18}, - { 733, 1, 19}, - { 171, 1, 20}, - { 170, 1, 20}, - { 431, 1, 21}, - { 221, 1, 22}, - { 536, 1, 23}, - { 534, 1, 24}, - { 853, 1, 25}, - {1047, 1, 26}, - {1054, 1, 27}, - { 639, 1, 28}, - { 731, 1, 29}, - { 980, 1, 30}, - {1062, 1, 31}, - { 661, 1, 32}, - {1063, 1, 33}, - { 785, 1, 34}, - { 512, 1, 35}, - { 551, 1, 36}, - { 614, 1, 36}, - { 471, 1, 37}, - { 477, 1, 38}, - { 476, 1, 38}, - { 325, 1, 39}, - {1087, 1, 40}, - {1081, 1, 40}, - { 266, 1, 40}, - { 836, 1, 41}, - { 948, 1, 42}, - {1033, 1, 43}, - { 558, 1, 44}, - { 257, 1, 45}, - {1035, 1, 46}, - { 649, 1, 47}, - { 781, 1, 48}, - {1088, 1, 49}, - {1082, 1, 49}, - { 696, 1, 50}, - {1038, 1, 51}, - { 800, 1, 52}, - { 650, 1, 53}, - { 255, 1, 54}, - {1039, 1, 55}, - { 206, 1, 56}, - {1004, 1, 57}, - { 212, 1, 58}, - { 690, 1, 59}, - { 840, 1, 60}, - {1006, 1, 61}, - {1005, 1, 61}, - {1102, 1, 62}, - {1101, 1, 62}, - { 897, 1, 63}, - { 896, 1, 63}, - { 898, 1, 64}, - { 899, 1, 64}, - { 363, 1, 65}, - { 439, 1, 65}, - { 673, 1, 66}, - { 668, 1, 66}, - { 530, 1, 67}, - { 529, 1, 67}, - { 509, 1, 68}, - { 918, 1, 68}, - {1012, 1, 69}, - {1011, 1, 69}, - { 398, 1, 70}, - { 362, 1, 71}, - { 438, 1, 71}, - { 366, 1, 71}, - { 692, 1, 72}, - { 825, 1, 73}, - { 185, 1, 74}, - { 739, 1, 75}, - { 740, 1, 75}, - { 766, 1, 76}, - { 771, 1, 76}, - { 385, 1, 77}, - { 852, 1, 78}, - { 833, 1, 78}, - { 460, 1, 79}, - { 459, 1, 79}, - { 243, 1, 80}, - { 234, 1, 81}, - { 510, 1, 82}, - { 763, 1, 83}, - { 770, 1, 83}, - { 441, 1, 84}, - { 761, 1, 85}, - { 767, 1, 85}, - {1014, 1, 86}, - {1008, 1, 86}, - { 249, 1, 87}, - { 248, 1, 87}, - {1015, 1, 88}, - {1009, 1, 88}, - { 762, 1, 89}, - { 768, 1, 89}, - {1016, 1, 90}, - {1013, 1, 90}, - { 764, 1, 91}, - { 760, 1, 91}, - { 517, 1, 92}, - { 675, 1, 93}, - { 670, 1, 93}, - { 387, 1, 94}, - { 514, 1, 95}, - { 513, 1, 95}, - {1065, 1, 96}, - { 474, 1, 97}, - { 472, 1, 97}, - { 409, 1, 98}, - { 407, 1, 98}, - {1017, 1, 99}, - {1022, 1, 99}, - { 343, 1, 100}, - { 342, 1, 100}, - { 638, 1, 101}, - { 637, 1, 101}, - { 583, 1, 102}, - { 579, 1, 102}, - { 346, 1, 103}, - { 345, 1, 103}, - { 572, 1, 104}, - { 641, 1, 105}, - { 237, 1, 106}, - { 550, 1, 107}, - { 371, 1, 107}, - { 636, 1, 108}, - { 239, 1, 109}, - { 238, 1, 109}, - { 344, 1, 110}, - { 644, 1, 111}, - { 642, 1, 111}, - { 464, 1, 112}, - { 463, 1, 112}, - { 332, 1, 113}, - { 330, 1, 113}, - { 348, 1, 114}, - { 338, 1, 114}, - {1143, 1, 115}, - {1142, 1, 115}, - { 347, 1, 116}, - { 329, 1, 116}, - {1145, 1, 117}, - {1144, 1, 118}, - { 704, 1, 119}, - {1096, 1, 120}, - { 410, 1, 121}, - { 408, 1, 121}, - { 208, 1, 122}, - { 778, 1, 123}, - { 676, 1, 124}, - { 671, 1, 124}, - {1027, 1, 125}, - { 368, 1, 126}, - { 593, 1, 126}, - { 890, 1, 127}, - { 959, 1, 128}, - { 433, 1, 129}, - { 432, 1, 129}, - { 645, 1, 130}, - { 932, 1, 131}, - { 552, 1, 132}, - { 615, 1, 132}, - { 618, 1, 133}, - { 318, 1, 134}, - { 787, 1, 135}, - { 786, 1, 135}, - {1040, 1, 136}, - { 750, 1, 137}, - { 749, 1, 137}, - { 475, 1, 138}, - { 473, 1, 138}, - { 748, 1, 139}, - { 554, 1, 140}, - { 549, 1, 140}, - { 553, 1, 141}, - { 616, 1, 141}, - { 570, 1, 142}, - { 568, 1, 143}, - { 569, 1, 143}, - { 713, 1, 144}, - { 913, 1, 145}, - { 916, 1, 145}, - { 912, 1, 145}, - { 334, 1, 146}, - { 336, 1, 146}, - { 158, 1, 147}, - { 157, 1, 147}, - { 178, 1, 148}, - { 176, 1, 148}, - {1099, 1, 149}, - {1114, 1, 149}, - {1105, 1, 150}, - { 364, 1, 151}, - { 543, 1, 151}, - { 333, 1, 152}, - { 331, 1, 152}, - { 985, 1, 153}, - { 984, 1, 153}, - { 179, 1, 154}, - { 177, 1, 154}, - { 545, 1, 155}, - { 542, 1, 155}, - { 996, 1, 156}, - { 700, 1, 157}, - { 699, 1, 158}, - { 144, 1, 159}, - { 164, 1, 160}, - { 165, 1, 161}, - { 892, 1, 162}, - { 891, 1, 162}, - { 724, 1, 163}, - { 271, 1, 164}, - { 842, 1, 165}, - { 520, 1, 166}, - {1084, 1, 167}, - { 843, 1, 168}, - { 429, 1, 169}, - { 974, 1, 170}, - { 857, 1, 171}, - { 405, 1, 172}, - { 590, 1, 173}, - { 895, 1, 174}, - { 726, 1, 175}, - { 754, 1, 176}, - { 753, 1, 177}, - { 648, 1, 178}, - { 844, 1, 179}, - { 200, 1, 180}, - { 603, 1, 181}, - { 602, 1, 182}, - { 845, 1, 183}, - { 947, 1, 184}, - { 946, 1, 184}, - { 246, 1, 185}, - { 630, 1, 186}, - { 990, 1, 187}, - { 317, 1, 188}, - { 973, 1, 189}, - {1044, 1, 190}, - { 394, 1, 191}, - { 396, 1, 192}, - { 395, 1, 192}, - { 455, 1, 193}, - { 210, 1, 194}, - { 209, 1, 194}, - { 755, 1, 195}, - { 634, 1, 196}, - { 633, 1, 196}, - { 260, 1, 197}, - { 259, 1, 197}, - { 784, 1, 198}, - { 783, 1, 198}, - { 163, 1, 199}, - { 162, 1, 199}, - {1042, 1, 200}, - {1041, 1, 200}, - { 389, 1, 201}, - { 388, 1, 201}, - { 738, 1, 202}, - { 737, 1, 202}, - { 174, 1, 203}, - { 173, 1, 203}, - { 730, 1, 204}, - { 729, 1, 204}, - { 443, 1, 205}, - { 442, 1, 205}, - { 901, 1, 206}, - { 461, 1, 207}, - { 462, 1, 207}, - { 466, 1, 208}, - { 465, 1, 208}, - { 765, 1, 209}, - { 769, 1, 209}, - { 456, 1, 210}, - {1078, 1, 211}, - {1077, 1, 211}, - { 151, 1, 212}, - { 150, 1, 212}, - { 349, 1, 213}, - { 339, 1, 213}, - { 350, 1, 214}, - { 340, 1, 214}, - { 351, 1, 215}, - { 341, 1, 215}, - { 335, 1, 216}, - { 337, 1, 216}, - {1036, 1, 217}, - {1100, 1, 218}, - {1115, 1, 218}, - {1018, 1, 219}, - {1020, 1, 219}, - {1019, 1, 220}, - {1021, 1, 220}, - {1090, 2, 0}, - {1159, 2, 0}, - { 367, 2, 1}, - {1158, 2, 1}, - { 663, 2, 2}, - { 677, 2, 2}, - { 527, 2, 3}, - { 531, 2, 3}, - { 406, 2, 4}, - { 414, 2, 4}, - { 182, 2, 5}, - { 184, 2, 5}, - { 561, 2, 6}, - { 560, 2, 6}, - { 169, 2, 7}, - { 168, 2, 7}, - {1030, 2, 8}, - {1029, 2, 8}, - {1061, 2, 9}, - {1060, 2, 9}, - { 431, 2, 10}, - { 430, 2, 10}, - { 221, 2, 11}, - { 220, 2, 11}, - { 536, 2, 12}, - { 537, 2, 12}, - { 534, 2, 13}, - { 535, 2, 13}, - { 853, 2, 14}, - { 855, 2, 14}, - {1047, 2, 15}, - {1048, 2, 15}, - {1054, 2, 16}, - {1053, 2, 16}, - { 639, 2, 17}, - { 652, 2, 17}, - { 731, 2, 18}, - { 773, 2, 18}, - { 980, 2, 19}, - { 979, 2, 19}, - {1062, 2, 20}, - { 661, 2, 21}, - { 662, 2, 21}, - {1063, 2, 22}, - {1064, 2, 22}, - { 785, 2, 23}, - { 788, 2, 23}, - { 512, 2, 24}, - { 511, 2, 24}, - { 549, 2, 25}, - { 548, 2, 25}, - { 471, 2, 26}, - { 470, 2, 26}, - { 325, 2, 27}, - { 324, 2, 27}, - { 265, 2, 28}, - { 269, 2, 28}, - { 836, 2, 29}, - { 835, 2, 29}, - { 948, 2, 30}, - { 949, 2, 30}, - { 649, 2, 31}, - { 651, 2, 31}, - { 781, 2, 32}, - { 780, 2, 32}, - { 572, 2, 33}, - { 571, 2, 33}, - { 641, 2, 34}, - { 632, 2, 34}, - { 237, 2, 35}, - { 236, 2, 35}, - { 547, 2, 36}, - { 556, 2, 36}, - {1140, 2, 37}, - {1141, 2, 37}, - { 842, 2, 38}, - { 613, 2, 38}, - { 520, 2, 39}, - { 519, 2, 39}, - { 429, 2, 40}, - { 448, 2, 40}, - { 596, 2, 41}, - {1152, 2, 41}, - { 920, 2, 41}, - {1033, 2, 42}, - {1059, 2, 42}, - { 558, 2, 43}, - { 557, 2, 43}, - { 257, 2, 44}, - { 256, 2, 44}, - {1035, 2, 45}, - {1034, 2, 45}, - { 696, 2, 46}, - { 695, 2, 46}, - {1038, 2, 47}, - {1045, 2, 47}, - { 698, 2, 48}, - { 697, 2, 48}, - {1084, 2, 49}, - {1083, 2, 49}, - { 974, 2, 50}, - { 975, 2, 50}, - { 857, 2, 51}, - { 856, 2, 51}, - { 404, 2, 52}, - { 391, 2, 52}, - { 248, 2, 53}, - { 247, 2, 53}, - { 255, 2, 54}, - { 254, 2, 54}, - { 387, 2, 55}, - { 386, 2, 55}, - { 919, 2, 55}, - { 800, 2, 56}, - {1046, 2, 56}, - { 517, 2, 57}, - { 516, 2, 57}, - {1065, 2, 58}, - {1058, 2, 58}, - {1027, 2, 59}, - {1026, 2, 59}, - { 843, 2, 60}, - {1132, 2, 60}, - { 648, 2, 61}, - { 647, 2, 61}, - { 206, 2, 62}, - { 205, 2, 62}, - { 394, 2, 63}, - {1133, 2, 63}, - { 895, 2, 64}, - { 894, 2, 64}, - { 890, 2, 65}, - { 889, 2, 65}, - { 803, 2, 66}, - { 804, 2, 66}, - {1004, 2, 67}, - {1003, 2, 67}, - { 690, 2, 68}, - { 689, 2, 68}, - { 840, 2, 69}, - { 841, 2, 69}, - {1096, 2, 70}, - {1097, 2, 70}, - { 959, 2, 71}, - { 958, 2, 71}, - { 645, 2, 72}, - { 631, 2, 72}, - { 932, 2, 73}, - { 941, 2, 73}, - { 724, 2, 74}, - { 723, 2, 74}, - { 271, 2, 75}, - { 270, 2, 75}, - { 726, 2, 76}, - { 725, 2, 76}, - { 318, 2, 77}, - {1039, 2, 78}, - { 660, 2, 78}, - {1040, 2, 79}, - {1049, 2, 79}, - { 200, 2, 80}, - { 201, 2, 80}, - { 455, 2, 81}, - { 454, 2, 81}, - { 955, 2, 82}, - { 956, 2, 82}, - { 704, 2, 83}, - { 208, 2, 84}, - { 207, 2, 84}, - { 618, 2, 85}, - { 617, 2, 85}, - { 748, 2, 86}, - { 782, 2, 86}, - { 590, 2, 87}, - { 183, 2, 87}, - { 844, 2, 88}, - { 957, 2, 88}, - { 603, 2, 89}, - { 914, 2, 89}, - { 602, 2, 90}, - { 893, 2, 90}, - { 845, 2, 91}, - { 854, 2, 91}, - { 630, 2, 92}, - { 654, 2, 92}, - { 212, 2, 93}, - { 213, 2, 93}, - { 246, 2, 94}, - { 245, 2, 94}, - { 733, 2, 95}, - { 732, 2, 95}, - { 317, 2, 96}, - { 263, 2, 96}, - { 753, 2, 97}, - { 751, 2, 97}, - { 754, 2, 98}, - { 752, 2, 98}, - { 755, 2, 99}, - { 902, 2, 99}, - { 973, 2, 100}, - { 977, 2, 100}, - { 990, 2, 101}, - { 989, 2, 101}, - {1044, 2, 102}, - {1043, 2, 102}, - { 643, 2, 103}, - { 574, 2, 103}, - { 858, 3, 0}, - {1134, 3, 0}, - { 446, 3, 1}, - { 447, 3, 1}, - { 978, 3, 2}, - { 997, 3, 2}, - { 562, 3, 3}, - { 573, 3, 3}, - { 392, 3, 4}, - { 694, 3, 5}, - { 799, 3, 6}, - { 805, 3, 6}, - { 484, 3, 7}, - { 929, 3, 8}, - { 934, 3, 8}, - { 499, 3, 9}, - { 497, 3, 9}, - { 641, 3, 10}, - { 628, 3, 10}, - { 153, 3, 11}, - { 680, 3, 11}, - { 756, 3, 12}, - { 772, 3, 12}, - { 757, 3, 13}, - { 774, 3, 13}, - { 758, 3, 14}, - { 742, 3, 14}, - { 826, 3, 15}, - { 822, 3, 15}, - { 486, 3, 16}, - { 481, 3, 16}, - { 858, 4, 0}, - {1134, 4, 0}, - { 392, 4, 1}, - { 694, 4, 2}, - { 384, 4, 3}, - { 357, 4, 3}, - { 484, 4, 4}, - { 481, 4, 4}, - { 929, 4, 5}, - { 934, 4, 5}, - { 994, 4, 6}, - { 982, 4, 6}, - { 656, 4, 7}, - {1095, 4, 8}, - {1032, 4, 9}, - { 719, 4, 10}, - { 721, 4, 11}, - { 910, 4, 12}, - { 907, 4, 12}, - { 858, 5, 0}, - {1134, 5, 0}, - { 392, 5, 1}, - { 694, 5, 2}, - { 484, 5, 3}, - { 481, 5, 3}, - { 970, 5, 4}, - { 965, 5, 4}, - { 499, 5, 5}, - { 497, 5, 5}, - { 991, 5, 6}, - { 710, 5, 7}, - { 707, 5, 7}, - {1092, 5, 8}, - {1091, 5, 8}, - { 846, 5, 9}, - { 680, 5, 9}, - { 826, 5, 10}, - { 822, 5, 10}, - { 194, 5, 11}, - { 189, 5, 11}, - {1001, 5, 12}, - {1000, 5, 12}, - { 353, 5, 13}, - { 352, 5, 13}, - { 962, 5, 14}, - { 961, 5, 14}, - { 806, 6, 0}, - { 789, 6, 0}, - { 487, 6, 0}, - { 488, 6, 0}, - {1139, 6, 1}, - {1135, 6, 1}, - {1032, 6, 1}, - {1079, 6, 1}, - { 816, 7, 0}, - { 791, 7, 0}, - { 681, 7, 1}, - { 656, 7, 1}, - {1112, 7, 2}, - {1095, 7, 2}, - {1075, 7, 3}, - {1032, 7, 3}, - { 720, 7, 4}, - { 719, 7, 4}, - { 722, 7, 5}, - { 721, 7, 5}, - { 685, 8, 0}, - { 656, 8, 0}, - { 937, 8, 1}, - { 927, 8, 1}, - { 478, 8, 2}, - { 457, 8, 2}, - { 479, 8, 3}, - { 468, 8, 3}, - { 480, 8, 4}, - { 469, 8, 4}, - { 175, 8, 5}, - { 161, 8, 5}, - { 369, 8, 6}, - { 393, 8, 6}, - { 878, 8, 7}, - { 202, 8, 7}, - { 967, 8, 8}, - { 950, 8, 8}, - {1119, 8, 9}, - {1125, 8, 9}, - { 867, 8, 10}, - { 849, 8, 10}, - { 242, 8, 11}, - { 235, 8, 11}, - { 813, 8, 12}, - { 820, 8, 12}, - { 172, 8, 13}, - { 148, 8, 13}, - { 688, 8, 14}, - { 716, 8, 14}, - { 940, 8, 15}, - { 944, 8, 15}, - { 686, 8, 16}, - { 714, 8, 16}, - { 938, 8, 17}, - { 942, 8, 17}, - { 904, 8, 18}, - { 885, 8, 18}, - { 687, 8, 19}, - { 715, 8, 19}, - { 939, 8, 20}, - { 943, 8, 20}, - { 496, 8, 21}, - { 502, 8, 21}, - { 905, 8, 22}, - { 886, 8, 22}, - { 817, 9, 0}, - { 1, 9, 0}, - { 818, 9, 0}, - { 874, 9, 1}, - { 2, 9, 1}, - { 873, 9, 1}, - { 823, 9, 2}, - { 120, 9, 2}, - { 802, 9, 2}, - { 635, 9, 3}, - { 127, 9, 3}, - { 655, 9, 3}, - {1106, 9, 4}, - { 133, 9, 4}, - {1113, 9, 4}, - { 279, 9, 5}, - { 13, 9, 5}, - { 282, 9, 6}, - { 22, 9, 6}, - { 284, 9, 7}, - { 25, 9, 7}, - { 287, 9, 8}, - { 28, 9, 8}, - { 291, 9, 9}, - { 33, 9, 9}, - { 292, 9, 10}, - { 34, 9, 10}, - { 293, 9, 11}, - { 36, 9, 11}, - { 294, 9, 12}, - { 37, 9, 12}, - { 295, 9, 13}, - { 39, 9, 13}, - { 296, 9, 14}, - { 40, 9, 14}, - { 297, 9, 15}, - { 44, 9, 15}, - { 298, 9, 16}, - { 49, 9, 16}, - { 299, 9, 17}, - { 54, 9, 17}, - { 300, 9, 18}, - { 60, 9, 18}, - { 301, 9, 19}, - { 65, 9, 19}, - { 302, 9, 20}, - { 67, 9, 20}, - { 303, 9, 21}, - { 68, 9, 21}, - { 304, 9, 22}, - { 69, 9, 22}, - { 305, 9, 23}, - { 70, 9, 23}, - { 306, 9, 24}, - { 71, 9, 24}, - { 307, 9, 25}, - { 78, 9, 25}, - { 308, 9, 26}, - { 82, 9, 26}, - { 309, 9, 27}, - { 83, 9, 27}, - { 310, 9, 28}, - { 84, 9, 28}, - { 311, 9, 29}, - { 85, 9, 29}, - { 312, 9, 30}, - { 86, 9, 30}, - { 313, 9, 31}, - { 87, 9, 31}, - { 314, 9, 32}, - { 132, 9, 32}, - { 315, 9, 33}, - { 139, 9, 33}, - { 280, 9, 34}, - { 20, 9, 34}, - { 281, 9, 35}, - { 21, 9, 35}, - { 283, 9, 36}, - { 24, 9, 36}, - { 285, 9, 37}, - { 26, 9, 37}, - { 286, 9, 38}, - { 27, 9, 38}, - { 288, 9, 39}, - { 30, 9, 39}, - { 289, 9, 40}, - { 31, 9, 40}, - { 197, 9, 41}, - { 48, 9, 41}, - { 192, 9, 41}, - { 195, 9, 42}, - { 50, 9, 42}, - { 190, 9, 42}, - { 196, 9, 43}, - { 51, 9, 43}, - { 191, 9, 43}, - { 218, 9, 44}, - { 53, 9, 44}, - { 230, 9, 44}, - { 217, 9, 45}, - { 55, 9, 45}, - { 202, 9, 45}, - { 219, 9, 46}, - { 56, 9, 46}, - { 244, 9, 46}, - { 682, 9, 47}, - { 57, 9, 47}, - { 656, 9, 47}, - { 935, 9, 48}, - { 58, 9, 48}, - { 927, 9, 48}, - { 142, 9, 49}, - { 59, 9, 49}, - { 148, 9, 49}, - { 141, 9, 50}, - { 61, 9, 50}, - { 140, 9, 50}, - { 143, 9, 51}, - { 62, 9, 51}, - { 167, 9, 51}, - { 445, 9, 52}, - { 63, 9, 52}, - { 421, 9, 52}, - { 444, 9, 53}, - { 64, 9, 53}, - { 416, 9, 53}, - { 607, 9, 54}, - { 66, 9, 54}, - { 610, 9, 54}, - { 290, 9, 55}, - { 32, 9, 55}, - { 198, 9, 56}, - { 45, 9, 56}, - { 193, 9, 56}, - { 811, 10, 0}, - { 267, 10, 1}, - { 264, 10, 1}, - { 370, 10, 2}, - { 360, 10, 2}, - { 498, 10, 3}, - { 808, 10, 4}, - { 795, 10, 4}, - { 598, 10, 5}, - { 597, 10, 5}, - { 746, 10, 6}, - { 745, 10, 6}, - { 493, 10, 7}, - { 492, 10, 7}, - { 612, 10, 8}, - { 611, 10, 8}, - { 327, 10, 9}, - { 458, 10, 9}, - {1010, 10, 10}, - {1007, 10, 10}, - {1002, 10, 11}, - {1104, 10, 12}, - {1103, 10, 12}, - {1120, 10, 13}, - { 794, 10, 14}, - { 793, 10, 14}, - { 983, 10, 15}, - { 986, 10, 15}, - { 999, 10, 16}, - { 998, 10, 16}, - { 501, 10, 17}, - { 500, 10, 17}, - { 798, 11, 0}, - { 789, 11, 0}, - { 160, 11, 1}, - { 140, 11, 1}, - { 544, 11, 2}, - { 538, 11, 2}, - {1120, 11, 3}, - {1116, 11, 3}, - { 503, 11, 4}, - { 487, 11, 4}, - { 794, 11, 5}, - { 791, 11, 5}, - { 809, 12, 0}, - { 147, 12, 1}, - { 149, 12, 2}, - { 152, 12, 3}, - { 216, 12, 4}, - { 222, 12, 5}, - { 417, 12, 6}, - { 418, 12, 7}, - { 451, 12, 8}, - { 491, 12, 9}, - { 495, 12, 10}, - { 504, 12, 11}, - { 505, 12, 12}, - { 541, 12, 13}, - { 546, 12, 14}, - {1052, 12, 14}, - { 559, 12, 15}, - { 563, 12, 16}, - { 564, 12, 17}, - { 565, 12, 18}, - { 629, 12, 19}, - { 640, 12, 20}, - { 653, 12, 21}, - { 658, 12, 22}, - { 659, 12, 23}, - { 747, 12, 24}, - { 759, 12, 25}, - { 815, 12, 26}, - { 829, 12, 27}, - { 887, 12, 28}, - { 921, 12, 29}, - { 922, 12, 30}, - { 931, 12, 31}, - { 933, 12, 32}, - { 953, 12, 33}, - { 954, 12, 34}, - { 966, 12, 35}, - { 968, 12, 36}, - { 976, 12, 37}, - {1024, 12, 38}, - {1037, 12, 39}, - {1050, 12, 40}, - {1051, 12, 41}, - {1057, 12, 42}, - {1117, 12, 43}, - {1031, 12, 44}, - {1136, 12, 45}, - {1137, 12, 46}, - {1138, 12, 47}, - {1146, 12, 48}, - {1147, 12, 49}, - {1150, 12, 50}, - {1151, 12, 51}, - { 646, 12, 52}, - { 490, 12, 53}, - { 258, 12, 54}, - { 489, 12, 55}, - { 831, 12, 56}, - { 945, 12, 57}, - { 812, 13, 0}, - {1080, 13, 0}, - { 622, 13, 1}, - { 261, 13, 1}, - { 450, 13, 2}, - { 415, 13, 2}, - { 936, 13, 3}, - { 927, 13, 3}, - { 684, 13, 4}, - { 656, 13, 4}, - {1076, 13, 5}, - {1032, 13, 5}, - {1090, 14, 0}, - {1134, 14, 0}, - { 851, 14, 1}, - { 850, 14, 1}, - { 355, 14, 2}, - { 352, 14, 2}, - { 925, 14, 3}, - { 924, 14, 3}, - { 518, 14, 4}, - { 515, 14, 4}, - { 814, 14, 5}, - { 819, 14, 5}, - { 482, 14, 6}, - { 481, 14, 6}, - { 253, 14, 7}, - {1025, 14, 7}, - { 595, 14, 8}, - { 610, 14, 8}, - { 909, 14, 9}, - { 908, 14, 9}, - { 906, 14, 10}, - { 903, 14, 10}, - { 826, 14, 11}, - { 822, 14, 11}, - { 156, 14, 12}, - { 148, 14, 12}, - { 582, 14, 13}, - { 578, 14, 13}, - { 604, 14, 14}, - { 591, 14, 14}, - { 605, 14, 14}, - { 577, 14, 15}, - { 576, 14, 15}, - { 365, 14, 16}, - { 356, 14, 16}, - { 251, 14, 17}, - { 214, 14, 17}, - { 250, 14, 18}, - { 204, 14, 18}, - { 992, 14, 19}, - { 991, 14, 19}, - { 734, 14, 20}, - { 229, 14, 20}, - { 272, 14, 21}, - { 392, 14, 21}, - { 702, 14, 22}, - { 694, 14, 22}, - { 383, 14, 23}, - { 276, 14, 23}, - { 372, 14, 24}, - { 952, 14, 24}, - { 160, 14, 25}, - { 146, 14, 25}, - { 252, 14, 26}, - { 203, 14, 26}, - {1023, 14, 27}, - { 972, 14, 27}, - {1157, 14, 28}, - {1156, 14, 28}, - { 801, 14, 29}, - { 805, 14, 29}, - {1124, 14, 30}, - {1121, 14, 30}, - { 620, 14, 31}, - { 627, 14, 32}, - { 626, 14, 33}, - { 539, 14, 34}, - { 540, 14, 35}, - { 354, 14, 36}, - { 390, 14, 36}, - { 562, 14, 37}, - { 573, 14, 37}, - { 373, 14, 38}, - { 328, 14, 38}, - { 929, 14, 39}, - { 934, 14, 39}, - { 811, 15, 0}, - { 826, 15, 1}, - { 822, 15, 1}, - { 440, 15, 2}, - { 434, 15, 2}, - { 423, 15, 3}, - { 422, 15, 3}, - { 792, 16, 0}, - { 0, 16, 1}, - { 1, 16, 2}, - { 4, 16, 3}, - { 3, 16, 4}, - { 12, 16, 5}, - { 11, 16, 6}, - { 10, 16, 7}, - { 9, 16, 8}, - { 73, 16, 9}, - { 8, 16, 10}, - { 7, 16, 11}, - { 6, 16, 12}, - { 77, 16, 13}, - { 43, 16, 14}, - { 5, 16, 15}, - { 76, 16, 16}, - { 110, 16, 17}, - { 42, 16, 18}, - { 75, 16, 19}, - { 92, 16, 20}, - { 109, 16, 21}, - { 122, 16, 22}, - { 2, 16, 23}, - { 74, 16, 24}, - { 41, 16, 25}, - { 108, 16, 26}, - { 72, 16, 27}, - { 121, 16, 28}, - { 91, 16, 29}, - { 134, 16, 30}, - { 107, 16, 31}, - { 23, 16, 32}, - { 115, 16, 33}, - { 29, 16, 34}, - { 120, 16, 35}, - { 35, 16, 36}, - { 127, 16, 37}, - { 38, 16, 38}, - { 133, 16, 39}, - { 13, 16, 40}, - { 22, 16, 41}, - { 25, 16, 42}, - { 28, 16, 43}, - { 33, 16, 44}, - { 34, 16, 45}, - { 36, 16, 46}, - { 37, 16, 47}, - { 39, 16, 48}, - { 40, 16, 49}, - { 44, 16, 50}, - { 49, 16, 51}, - { 54, 16, 52}, - { 60, 16, 53}, - { 65, 16, 54}, - { 67, 16, 55}, - { 68, 16, 56}, - { 69, 16, 57}, - { 70, 16, 58}, - { 71, 16, 59}, - { 78, 16, 60}, - { 82, 16, 61}, - { 83, 16, 62}, - { 84, 16, 63}, - { 85, 16, 64}, - { 86, 16, 65}, - { 87, 16, 66}, - { 88, 16, 67}, - { 89, 16, 68}, - { 90, 16, 69}, - { 93, 16, 70}, - { 97, 16, 71}, - { 98, 16, 72}, - { 99, 16, 73}, - { 101, 16, 74}, - { 102, 16, 75}, - { 103, 16, 76}, - { 104, 16, 77}, - { 105, 16, 78}, - { 106, 16, 79}, - { 111, 16, 80}, - { 116, 16, 81}, - { 123, 16, 82}, - { 128, 16, 83}, - { 135, 16, 84}, - { 14, 16, 85}, - { 45, 16, 86}, - { 79, 16, 87}, - { 94, 16, 88}, - { 112, 16, 89}, - { 117, 16, 90}, - { 124, 16, 91}, - { 129, 16, 92}, - { 136, 16, 93}, - { 15, 16, 94}, - { 46, 16, 95}, - { 80, 16, 96}, - { 95, 16, 97}, - { 113, 16, 98}, - { 118, 16, 99}, - { 125, 16, 100}, - { 130, 16, 101}, - { 137, 16, 102}, - { 16, 16, 103}, - { 47, 16, 104}, - { 81, 16, 105}, - { 96, 16, 106}, - { 114, 16, 107}, - { 119, 16, 108}, - { 126, 16, 109}, - { 131, 16, 110}, - { 138, 16, 111}, - { 17, 16, 112}, - { 52, 16, 113}, - { 100, 16, 114}, - { 18, 16, 115}, - { 19, 16, 116}, - { 791, 17, 0}, - { 935, 17, 1}, - { 682, 17, 2}, - {1108, 17, 3}, - { 683, 17, 4}, - {1069, 17, 5}, - { 240, 17, 6}, - {1070, 17, 7}, - {1074, 17, 8}, - {1072, 17, 9}, - {1073, 17, 10}, - { 241, 17, 11}, - {1071, 17, 12}, - { 875, 17, 13}, - { 606, 17, 14}, - { 858, 18, 0}, - { 228, 18, 1}, - {1107, 18, 2}, - { 199, 18, 3}, - { 823, 18, 4}, - {1106, 18, 5}, - {1111, 18, 6}, - {1110, 18, 7}, - {1109, 18, 8}, - { 380, 18, 9}, - { 375, 18, 10}, - { 376, 18, 11}, - { 381, 18, 12}, - { 382, 18, 13}, - { 379, 18, 14}, - { 377, 18, 15}, - { 378, 18, 16}, - { 779, 18, 17}, - {1067, 18, 18}, - {1068, 18, 19}, - { 930, 18, 20}, -}; - -/* property values: 5004 bytes. */ - -/* Codepoints which expand on full case-folding. */ - -RE_UINT16 re_expand_on_folding[] = { - 223, 304, 329, 496, 912, 944, 1415, 7830, - 7831, 7832, 7833, 7834, 7838, 8016, 8018, 8020, - 8022, 8064, 8065, 8066, 8067, 8068, 8069, 8070, - 8071, 8072, 8073, 8074, 8075, 8076, 8077, 8078, - 8079, 8080, 8081, 8082, 8083, 8084, 8085, 8086, - 8087, 8088, 8089, 8090, 8091, 8092, 8093, 8094, - 8095, 8096, 8097, 8098, 8099, 8100, 8101, 8102, - 8103, 8104, 8105, 8106, 8107, 8108, 8109, 8110, - 8111, 8114, 8115, 8116, 8118, 8119, 8124, 8130, - 8131, 8132, 8134, 8135, 8140, 8146, 8147, 8150, - 8151, 8162, 8163, 8164, 8166, 8167, 8178, 8179, - 8180, 8182, 8183, 8188, 64256, 64257, 64258, 64259, - 64260, 64261, 64262, 64275, 64276, 64277, 64278, 64279, -}; - -/* expand_on_folding: 208 bytes. */ - -/* General_Category. */ - -static RE_UINT8 re_general_category_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 14, 14, 14, 15, - 16, 17, 18, 19, 20, 19, 21, 19, 19, 19, 19, 19, 19, 22, 19, 19, - 19, 19, 19, 19, 19, 19, 23, 19, 19, 19, 24, 19, 19, 25, 26, 19, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 27, 7, 28, 29, 19, 19, 19, 19, 19, 19, 19, 30, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 31, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 32, -}; - -static RE_UINT8 re_general_category_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 69, 72, 73, - 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83, - 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, - 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, - 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, - 108, 108, 34, 34, 109, 110, 111, 112, 34, 34, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 76, 123, 124, 125, 126, 127, 76, 76, 76, 76, 76, 76, - 128, 76, 129, 130, 131, 76, 132, 76, 133, 76, 76, 76, 134, 76, 76, 76, - 135, 136, 137, 138, 76, 76, 76, 76, 76, 76, 76, 76, 76, 139, 76, 76, - 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 34, 34, 140, 76, 141, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 34, 34, 34, 34, 142, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 143, 76, 76, 76, 76, 76, 76, 76, 76, 76, 144, 145, - 146, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 69, 147, 148, 149, 150, 76, 151, 76, 152, 153, 154, 155, 156, 157, 158, 159, - 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 160, 161, 76, 76, - 162, 163, 164, 165, 166, 76, 167, 168, 169, 170, 171, 172, 173, 174, 175, 76, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 176, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 177, 34, - 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 34, 34, 34, 34, 178, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 179, 76, 180, 181, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, - 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 182, -}; - -static RE_UINT16 re_general_category_stage_3[] = { - 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, - 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, - 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, - 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, - 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, - 13, 13, 42, 43, 9, 44, 45, 11, 46, 47, 32, 48, 49, 50, 51, 52, - 53, 54, 50, 50, 55, 32, 56, 57, 50, 50, 50, 50, 50, 58, 59, 60, - 61, 62, 50, 32, 63, 50, 50, 50, 50, 50, 64, 65, 66, 50, 67, 68, - 50, 69, 70, 71, 50, 72, 73, 73, 73, 73, 74, 73, 73, 73, 75, 76, - 77, 50, 50, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102, - 83, 84, 85, 103, 104, 105, 89, 106, 107, 108, 109, 110, 111, 112, 95, 113, - 114, 115, 85, 116, 117, 118, 89, 119, 120, 115, 85, 121, 122, 123, 89, 124, - 120, 115, 50, 125, 126, 127, 89, 128, 129, 130, 50, 131, 132, 133, 73, 134, - 135, 50, 50, 136, 137, 138, 73, 73, 139, 140, 141, 142, 143, 144, 73, 73, - 145, 146, 147, 148, 149, 50, 150, 151, 152, 153, 32, 154, 155, 156, 73, 73, - 50, 50, 157, 158, 159, 160, 161, 162, 163, 164, 9, 9, 165, 50, 50, 166, - 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 167, 168, 50, 50, - 167, 50, 50, 169, 170, 171, 50, 50, 50, 170, 50, 50, 50, 172, 173, 174, - 50, 175, 50, 50, 50, 50, 50, 176, 177, 50, 50, 50, 50, 50, 50, 50, - 50, 50, 50, 50, 50, 50, 178, 50, 179, 180, 50, 50, 50, 50, 181, 182, - 183, 184, 50, 185, 50, 186, 183, 187, 50, 50, 50, 188, 189, 190, 191, 192, - 193, 191, 50, 50, 194, 50, 50, 195, 50, 50, 196, 50, 50, 50, 50, 197, - 50, 150, 198, 199, 200, 50, 201, 176, 50, 50, 202, 203, 204, 205, 206, 206, - 50, 207, 50, 50, 50, 208, 209, 210, 191, 191, 211, 73, 73, 73, 73, 73, - 212, 50, 50, 213, 214, 159, 215, 216, 217, 50, 218, 66, 50, 50, 219, 220, - 50, 50, 221, 222, 223, 66, 50, 224, 73, 73, 73, 73, 225, 226, 227, 228, - 11, 11, 229, 27, 27, 27, 230, 231, 11, 232, 27, 27, 32, 32, 233, 234, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 235, 13, 13, 13, 13, 13, 13, - 236, 237, 236, 236, 237, 238, 236, 239, 240, 240, 240, 241, 242, 243, 244, 245, - 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 73, 258, 259, 260, - 261, 262, 263, 264, 265, 266, 267, 267, 268, 269, 270, 206, 271, 272, 206, 273, - 274, 274, 274, 274, 274, 274, 274, 274, 275, 206, 276, 206, 206, 206, 206, 277, - 206, 278, 274, 279, 206, 280, 281, 282, 206, 206, 283, 73, 284, 73, 266, 266, - 266, 285, 206, 206, 206, 206, 286, 266, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 287, 288, 206, 206, 289, 206, 206, 206, 206, 206, 206, 290, 206, - 291, 206, 206, 206, 206, 206, 292, 293, 266, 294, 206, 206, 295, 274, 296, 274, - 297, 298, 274, 274, 274, 299, 274, 300, 206, 206, 206, 274, 301, 175, 73, 73, - 73, 73, 73, 73, 73, 73, 73, 73, 9, 9, 302, 11, 11, 303, 304, 305, - 13, 13, 13, 13, 13, 13, 306, 307, 11, 11, 308, 50, 50, 50, 309, 310, - 50, 311, 312, 312, 312, 312, 32, 32, 313, 314, 315, 316, 73, 73, 73, 73, - 206, 317, 206, 206, 206, 206, 206, 282, 206, 206, 206, 206, 206, 318, 73, 319, - 320, 321, 322, 323, 135, 50, 50, 50, 50, 324, 177, 50, 50, 50, 50, 325, - 326, 50, 201, 135, 50, 50, 50, 50, 327, 328, 50, 51, 206, 206, 282, 50, - 206, 329, 330, 206, 331, 332, 206, 206, 330, 206, 206, 332, 206, 206, 206, 329, - 50, 50, 50, 197, 206, 206, 206, 206, 50, 50, 50, 50, 150, 73, 73, 73, - 50, 333, 50, 50, 50, 50, 50, 50, 150, 206, 206, 206, 283, 50, 50, 224, - 334, 50, 335, 73, 13, 13, 336, 337, 13, 338, 50, 50, 50, 50, 339, 340, - 31, 341, 342, 343, 13, 13, 13, 344, 345, 346, 347, 73, 73, 73, 73, 348, - 349, 50, 350, 351, 50, 50, 50, 352, 353, 50, 50, 354, 355, 191, 32, 356, - 66, 50, 357, 50, 358, 359, 50, 150, 77, 50, 50, 360, 361, 362, 73, 73, - 50, 50, 363, 364, 365, 366, 50, 367, 50, 50, 50, 368, 369, 370, 371, 372, - 373, 374, 312, 73, 73, 73, 73, 73, 73, 73, 73, 73, 50, 50, 375, 191, - 50, 50, 376, 50, 377, 50, 50, 202, 378, 378, 378, 378, 378, 378, 378, 378, - 379, 379, 379, 379, 379, 379, 379, 379, 50, 50, 50, 50, 50, 50, 201, 50, - 50, 50, 50, 50, 50, 380, 73, 73, 381, 382, 383, 384, 385, 50, 50, 50, - 50, 50, 50, 386, 387, 388, 50, 50, 50, 50, 50, 389, 73, 50, 50, 50, - 50, 390, 50, 50, 195, 73, 73, 391, 32, 392, 233, 393, 394, 395, 396, 397, - 50, 50, 50, 50, 50, 50, 50, 398, 399, 2, 3, 4, 5, 400, 401, 402, - 50, 403, 50, 327, 404, 405, 406, 407, 408, 50, 171, 409, 201, 201, 73, 73, - 50, 50, 50, 50, 50, 50, 50, 51, 410, 266, 266, 411, 267, 267, 267, 412, - 413, 319, 73, 73, 73, 206, 206, 414, 50, 150, 50, 50, 50, 101, 73, 73, - 50, 327, 415, 50, 416, 73, 73, 73, 50, 417, 50, 50, 418, 419, 73, 73, - 9, 9, 420, 11, 11, 50, 50, 50, 50, 201, 191, 73, 73, 73, 73, 73, - 421, 50, 50, 422, 50, 423, 73, 73, 50, 424, 50, 425, 73, 73, 73, 73, - 50, 50, 50, 426, 73, 73, 73, 73, 427, 428, 50, 429, 430, 431, 50, 432, - 50, 50, 50, 433, 50, 434, 50, 435, 50, 50, 50, 50, 436, 73, 73, 73, - 73, 73, 73, 73, 73, 73, 266, 437, 438, 50, 50, 439, 440, 441, 442, 73, - 217, 50, 50, 443, 444, 50, 436, 191, 445, 50, 446, 447, 448, 73, 73, 73, - 217, 50, 50, 449, 450, 191, 73, 73, 50, 50, 451, 452, 191, 73, 73, 73, - 50, 50, 50, 50, 50, 50, 327, 73, 267, 267, 267, 267, 267, 267, 453, 448, - 50, 50, 327, 73, 73, 73, 73, 73, 50, 50, 50, 436, 73, 73, 73, 73, - 50, 50, 50, 50, 176, 454, 203, 455, 456, 457, 73, 73, 73, 73, 73, 73, - 458, 73, 73, 73, 73, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 318, - 206, 206, 459, 206, 206, 206, 460, 461, 462, 206, 463, 206, 206, 464, 73, 73, - 206, 206, 206, 206, 465, 73, 73, 73, 206, 206, 206, 206, 206, 283, 266, 466, - 9, 467, 11, 468, 469, 470, 236, 9, 471, 472, 473, 474, 475, 9, 467, 11, - 476, 477, 11, 478, 479, 480, 481, 9, 482, 11, 9, 467, 11, 468, 469, 11, - 236, 9, 471, 481, 9, 482, 11, 9, 467, 11, 483, 9, 484, 485, 486, 487, - 11, 488, 9, 489, 490, 491, 492, 11, 493, 9, 494, 11, 495, 496, 496, 496, - 497, 50, 498, 499, 500, 501, 502, 503, 504, 202, 505, 202, 73, 73, 73, 506, - 206, 206, 319, 206, 206, 206, 206, 206, 206, 282, 329, 507, 291, 291, 73, 73, - 508, 206, 329, 206, 206, 206, 319, 206, 206, 284, 73, 73, 73, 73, 509, 206, - 510, 206, 206, 284, 511, 512, 73, 73, 206, 206, 513, 514, 206, 206, 206, 515, - 206, 282, 206, 206, 516, 73, 206, 513, 206, 206, 206, 329, 517, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 518, 206, 206, 206, 464, 282, 206, 519, 73, - 73, 73, 73, 73, 73, 73, 73, 520, 206, 206, 206, 206, 521, 73, 73, 73, - 206, 206, 206, 206, 318, 73, 73, 73, 206, 206, 206, 206, 206, 206, 206, 282, - 50, 50, 50, 50, 50, 311, 73, 73, 50, 50, 50, 176, 50, 50, 50, 50, - 50, 201, 73, 73, 73, 73, 73, 73, 522, 73, 523, 523, 523, 523, 523, 523, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 73, - 379, 379, 379, 379, 379, 379, 379, 524, -}; - -static RE_UINT8 re_general_category_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, - 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25, - 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32, - 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11, - 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11, - 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34, - 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32, - 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32, - 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, - 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, - 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 46, - 44, 44, 41, 47, 11, 48, 48, 11, 34, 11, 11, 11, 11, 11, 11, 11, - 11, 49, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, - 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 50, 34, 32, 34, 11, - 32, 51, 43, 43, 52, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, - 32, 32, 32, 32, 44, 44, 44, 44, 49, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 48, 53, 2, 2, 2, 54, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 55, 56, 44, 57, 58, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 59, 60, 61, 43, 60, 44, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 44, 44, - 36, 63, 46, 44, 44, 44, 44, 44, 64, 64, 65, 8, 9, 66, 2, 67, - 43, 43, 43, 43, 43, 61, 65, 2, 68, 36, 36, 36, 36, 69, 43, 43, - 7, 7, 7, 7, 7, 2, 2, 36, 70, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 71, 43, 43, 43, 72, 51, 43, 43, 73, 74, 75, 43, 43, 36, - 7, 7, 7, 7, 7, 36, 76, 77, 2, 2, 2, 2, 2, 2, 2, 78, - 69, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 79, 80, 36, - 36, 36, 36, 43, 43, 43, 43, 43, 70, 44, 44, 44, 44, 44, 44, 44, - 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 36, 69, 43, 43, - 43, 43, 40, 21, 2, 81, 44, 44, 36, 36, 36, 43, 43, 74, 43, 43, - 43, 43, 74, 43, 74, 43, 43, 44, 2, 2, 2, 2, 2, 2, 2, 46, - 36, 36, 36, 36, 69, 43, 44, 46, 44, 44, 44, 44, 44, 44, 44, 44, - 62, 36, 36, 36, 36, 36, 62, 44, 44, 44, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 79, 43, 82, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 82, 70, 83, 84, 43, 43, 43, 82, 83, 84, 83, - 69, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7, - 85, 36, 36, 36, 80, 36, 36, 36, 58, 83, 80, 36, 36, 36, 62, 80, - 62, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, - 62, 62, 44, 36, 36, 44, 70, 83, 84, 43, 79, 86, 87, 86, 84, 62, - 44, 44, 44, 86, 44, 44, 36, 80, 36, 43, 44, 7, 7, 7, 7, 7, - 36, 20, 27, 27, 27, 88, 44, 44, 58, 82, 80, 36, 36, 62, 44, 80, - 62, 36, 80, 62, 36, 44, 79, 83, 84, 79, 44, 58, 79, 58, 43, 44, - 58, 44, 44, 44, 80, 36, 62, 62, 44, 44, 44, 7, 7, 7, 7, 7, - 43, 36, 69, 44, 44, 44, 44, 44, 58, 82, 80, 36, 36, 36, 36, 80, - 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, 36, 36, 44, 70, 83, - 84, 43, 43, 58, 82, 86, 84, 44, 62, 44, 44, 44, 44, 44, 44, 44, - 66, 44, 44, 44, 44, 44, 44, 44, 62, 36, 80, 36, 36, 44, 70, 84, - 84, 43, 79, 86, 87, 86, 84, 44, 44, 44, 44, 82, 44, 44, 36, 80, - 77, 27, 27, 27, 44, 44, 44, 44, 44, 70, 80, 36, 36, 62, 44, 36, - 62, 36, 36, 44, 80, 62, 62, 36, 44, 80, 62, 44, 36, 62, 44, 36, - 36, 36, 36, 36, 36, 44, 44, 83, 82, 87, 44, 83, 87, 83, 84, 44, - 62, 44, 44, 86, 44, 44, 44, 44, 27, 89, 67, 67, 88, 90, 44, 44, - 86, 83, 80, 36, 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 80, 36, 36, 44, 80, 43, 82, 83, 87, 43, 79, 43, 43, 44, - 44, 44, 58, 79, 36, 44, 44, 44, 44, 44, 44, 44, 27, 27, 27, 89, - 44, 83, 80, 36, 36, 36, 62, 36, 36, 36, 80, 36, 36, 44, 70, 84, - 83, 83, 87, 82, 87, 83, 43, 44, 44, 44, 86, 87, 44, 44, 44, 62, - 80, 62, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 62, 80, 83, - 84, 43, 79, 83, 87, 83, 84, 62, 44, 44, 44, 86, 44, 44, 44, 44, - 27, 27, 27, 44, 91, 36, 36, 36, 44, 83, 80, 36, 36, 36, 36, 36, - 36, 36, 36, 62, 44, 36, 36, 36, 36, 80, 36, 36, 36, 36, 80, 44, - 36, 36, 36, 62, 44, 79, 44, 86, 83, 43, 79, 79, 83, 83, 83, 83, - 44, 83, 46, 44, 44, 44, 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, - 69, 36, 43, 43, 43, 79, 44, 57, 36, 36, 36, 74, 43, 43, 43, 61, - 7, 7, 7, 7, 7, 2, 44, 44, 80, 62, 62, 80, 62, 62, 80, 44, - 44, 44, 36, 36, 80, 36, 36, 36, 80, 36, 80, 80, 44, 36, 80, 36, - 69, 36, 43, 43, 43, 58, 70, 44, 36, 36, 62, 81, 43, 43, 43, 44, - 7, 7, 7, 7, 7, 44, 36, 36, 76, 67, 2, 2, 2, 2, 2, 2, - 2, 92, 92, 67, 43, 67, 67, 67, 7, 7, 7, 7, 7, 27, 27, 27, - 27, 27, 51, 51, 51, 4, 4, 83, 36, 36, 36, 36, 80, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 62, 44, 58, 43, 43, 43, 43, 43, 43, 82, - 43, 43, 61, 43, 36, 36, 69, 43, 43, 43, 43, 43, 58, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 79, 67, 67, 67, 67, 75, 67, 67, 90, 67, - 2, 2, 92, 67, 21, 46, 44, 44, 36, 36, 36, 36, 36, 93, 84, 43, - 82, 43, 43, 43, 84, 82, 84, 70, 7, 7, 7, 7, 7, 2, 2, 2, - 36, 36, 36, 83, 43, 36, 36, 43, 70, 83, 94, 93, 83, 83, 83, 36, - 69, 43, 70, 36, 36, 36, 36, 36, 36, 82, 84, 82, 83, 83, 84, 93, - 7, 7, 7, 7, 7, 83, 84, 67, 11, 11, 11, 49, 44, 44, 49, 44, - 36, 36, 36, 36, 36, 63, 68, 36, 36, 36, 36, 36, 62, 36, 36, 44, - 36, 36, 36, 62, 62, 36, 36, 44, 62, 36, 36, 44, 36, 36, 36, 62, - 62, 36, 36, 44, 36, 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 62, 58, 43, 2, 2, 2, 2, 95, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 96, 44, 67, 67, 67, 67, 67, 44, 44, 44, - 36, 36, 62, 44, 44, 44, 44, 44, 97, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 63, 71, 98, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 99, 100, 44, 36, 36, 36, 36, 36, 63, 2, 101, - 102, 44, 44, 44, 44, 44, 44, 44, 36, 36, 36, 36, 36, 36, 62, 36, - 36, 43, 79, 44, 44, 44, 44, 44, 36, 43, 61, 46, 44, 44, 44, 44, - 36, 43, 44, 44, 44, 44, 44, 44, 62, 43, 44, 44, 44, 44, 44, 44, - 36, 36, 43, 84, 43, 43, 43, 83, 83, 83, 83, 82, 84, 43, 43, 43, - 43, 43, 2, 85, 2, 66, 69, 44, 7, 7, 7, 7, 7, 44, 44, 44, - 27, 27, 27, 27, 27, 44, 44, 44, 2, 2, 2, 103, 2, 60, 43, 65, - 36, 104, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, - 36, 36, 36, 36, 69, 62, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44, - 43, 82, 83, 84, 82, 83, 44, 44, 83, 82, 83, 83, 84, 43, 44, 44, - 90, 44, 2, 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 44, - 36, 36, 36, 36, 36, 36, 44, 44, 83, 83, 83, 83, 83, 83, 83, 83, - 94, 36, 36, 36, 83, 44, 44, 44, 7, 7, 7, 7, 7, 96, 44, 67, - 67, 67, 67, 67, 67, 67, 67, 67, 36, 36, 36, 69, 82, 84, 44, 2, - 36, 36, 93, 82, 43, 43, 43, 79, 82, 82, 84, 43, 43, 43, 82, 83, - 83, 84, 43, 43, 43, 43, 79, 58, 2, 2, 2, 85, 2, 2, 2, 44, - 43, 43, 94, 36, 36, 36, 36, 36, 36, 36, 82, 43, 43, 82, 82, 83, - 83, 82, 94, 36, 36, 36, 44, 44, 92, 67, 67, 67, 67, 51, 43, 43, - 43, 43, 67, 67, 67, 67, 90, 44, 43, 94, 36, 36, 36, 36, 36, 36, - 93, 43, 43, 83, 43, 84, 83, 36, 36, 36, 36, 82, 43, 83, 84, 84, - 43, 83, 44, 44, 44, 44, 2, 2, 36, 36, 83, 83, 83, 83, 43, 43, - 43, 43, 83, 43, 44, 55, 2, 2, 7, 7, 7, 7, 7, 44, 80, 36, - 36, 36, 36, 36, 40, 40, 40, 2, 2, 2, 2, 2, 44, 44, 44, 44, - 43, 61, 43, 43, 43, 43, 43, 43, 82, 43, 43, 43, 70, 36, 69, 36, - 36, 83, 70, 62, 44, 44, 44, 44, 16, 16, 16, 16, 16, 16, 40, 40, - 40, 40, 40, 40, 40, 45, 16, 16, 16, 16, 16, 16, 45, 16, 16, 16, - 16, 16, 16, 16, 16, 105, 40, 40, 43, 43, 43, 79, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, - 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44, - 16, 16, 16, 16, 49, 49, 49, 49, 16, 16, 16, 16, 16, 16, 16, 44, - 16, 16, 16, 16, 106, 106, 106, 106, 16, 16, 107, 16, 11, 11, 108, 109, - 41, 16, 107, 16, 11, 11, 108, 41, 16, 16, 44, 16, 11, 11, 110, 41, - 16, 16, 16, 16, 11, 11, 111, 41, 44, 16, 107, 16, 11, 11, 108, 112, - 113, 113, 113, 113, 113, 114, 64, 64, 115, 115, 115, 2, 116, 117, 116, 117, - 2, 2, 2, 2, 118, 64, 64, 119, 2, 2, 2, 2, 120, 121, 2, 122, - 123, 2, 124, 125, 2, 2, 2, 2, 2, 9, 123, 2, 2, 2, 2, 126, - 64, 64, 65, 64, 64, 64, 64, 64, 127, 44, 27, 27, 27, 8, 124, 128, - 27, 27, 27, 27, 27, 8, 124, 100, 40, 40, 40, 40, 40, 40, 81, 44, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 129, 44, 44, - 43, 43, 43, 43, 43, 43, 130, 52, 131, 52, 131, 43, 43, 43, 43, 43, - 79, 44, 44, 44, 44, 44, 44, 44, 67, 132, 67, 133, 67, 34, 11, 16, - 11, 32, 133, 67, 50, 11, 11, 67, 67, 67, 132, 132, 132, 11, 11, 134, - 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 50, 16, 16, 26, 67, 135, - 27, 27, 27, 27, 27, 27, 27, 27, 101, 101, 101, 101, 101, 101, 101, 101, - 101, 136, 137, 101, 138, 44, 44, 44, 8, 8, 139, 67, 67, 8, 67, 67, - 139, 26, 67, 139, 67, 67, 67, 139, 67, 67, 67, 67, 67, 67, 67, 8, - 67, 139, 139, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 4, 4, 67, 67, - 8, 67, 67, 67, 140, 141, 67, 67, 67, 67, 67, 67, 67, 67, 139, 67, - 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 44, 44, 44, 44, - 67, 67, 67, 67, 67, 90, 44, 44, 27, 27, 27, 27, 27, 27, 67, 67, - 67, 67, 67, 67, 67, 27, 27, 27, 67, 67, 67, 26, 67, 67, 67, 67, - 26, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, - 67, 67, 67, 67, 67, 67, 67, 26, 91, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, - 27, 27, 67, 67, 67, 67, 67, 67, 8, 8, 124, 142, 8, 8, 8, 8, - 8, 8, 8, 4, 4, 4, 4, 4, 8, 124, 143, 143, 143, 143, 143, 143, - 143, 143, 143, 143, 142, 8, 8, 8, 8, 8, 8, 8, 4, 4, 8, 8, - 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 139, 26, 8, 8, 144, 44, - 11, 11, 11, 11, 11, 11, 11, 48, 16, 16, 16, 16, 16, 16, 16, 107, - 32, 11, 32, 34, 34, 34, 34, 11, 32, 32, 34, 16, 16, 16, 40, 11, - 32, 32, 135, 67, 67, 133, 34, 145, 43, 32, 44, 44, 55, 2, 95, 2, - 16, 16, 16, 54, 44, 44, 54, 44, 36, 36, 36, 36, 44, 44, 44, 53, - 46, 44, 44, 44, 44, 44, 44, 58, 36, 36, 36, 62, 44, 44, 44, 44, - 36, 36, 36, 62, 36, 36, 36, 62, 2, 116, 116, 2, 120, 121, 116, 2, - 2, 2, 2, 6, 2, 103, 116, 2, 116, 4, 4, 4, 4, 2, 2, 85, - 2, 2, 2, 2, 2, 115, 44, 44, 67, 67, 67, 67, 67, 91, 67, 67, - 67, 67, 67, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 67, 44, 44, - 1, 2, 146, 147, 4, 4, 4, 4, 4, 67, 4, 4, 4, 4, 148, 149, - 150, 101, 101, 101, 101, 43, 43, 83, 151, 40, 40, 67, 101, 152, 63, 67, - 36, 36, 36, 62, 58, 153, 154, 68, 36, 36, 36, 36, 36, 63, 40, 68, - 44, 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 62, - 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, - 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, - 155, 27, 27, 27, 27, 27, 27, 27, 36, 36, 104, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 156, 2, 7, 7, 7, 7, 7, 36, 44, 44, - 32, 32, 32, 32, 32, 32, 32, 69, 52, 157, 43, 43, 43, 43, 43, 85, - 32, 32, 32, 32, 44, 44, 44, 58, 36, 36, 36, 101, 101, 101, 101, 101, - 43, 2, 2, 2, 44, 44, 44, 44, 41, 41, 41, 154, 40, 40, 40, 40, - 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, - 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42, 158, 34, 107, - 32, 32, 44, 44, 44, 44, 44, 44, 32, 32, 32, 32, 32, 48, 44, 44, - 44, 44, 44, 44, 40, 35, 36, 36, 36, 70, 36, 70, 36, 69, 36, 36, - 36, 93, 84, 82, 67, 67, 44, 44, 27, 27, 27, 67, 159, 44, 44, 44, - 36, 36, 2, 2, 44, 44, 44, 44, 83, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 83, 83, 83, 83, 83, 83, 83, 83, 79, 44, 44, 44, 44, 2, - 43, 36, 36, 36, 2, 71, 44, 44, 36, 36, 36, 43, 43, 43, 43, 2, - 36, 36, 36, 69, 43, 43, 43, 43, 43, 83, 44, 44, 44, 44, 44, 55, - 36, 69, 83, 43, 43, 83, 82, 83, 160, 2, 2, 2, 2, 2, 2, 53, - 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 36, 36, 69, 43, 43, 82, - 84, 82, 84, 79, 44, 44, 44, 44, 36, 69, 36, 36, 36, 36, 82, 44, - 7, 7, 7, 7, 7, 44, 2, 2, 68, 36, 36, 76, 67, 93, 44, 44, - 70, 43, 70, 69, 70, 36, 36, 43, 69, 62, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 80, 104, 2, 36, 36, 36, 36, 36, 93, 43, 83, - 2, 104, 161, 79, 44, 44, 44, 44, 80, 36, 36, 62, 80, 36, 36, 62, - 80, 36, 36, 62, 44, 44, 44, 44, 36, 93, 84, 83, 82, 160, 84, 44, - 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 62, 44, 80, 36, 36, - 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, 163, 163, 163, 163, 163, 163, - 36, 36, 36, 36, 36, 44, 44, 44, 16, 16, 16, 107, 44, 44, 44, 44, - 44, 54, 16, 16, 44, 44, 80, 70, 36, 36, 36, 36, 164, 36, 36, 36, - 36, 36, 36, 62, 36, 36, 62, 62, 36, 80, 62, 36, 36, 36, 36, 36, - 36, 41, 41, 41, 41, 41, 41, 41, 41, 44, 44, 44, 44, 44, 44, 44, - 44, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 4, - 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 159, 44, - 2, 2, 2, 165, 125, 44, 44, 44, 6, 166, 167, 143, 143, 143, 143, 143, - 143, 143, 125, 165, 125, 2, 122, 168, 2, 46, 2, 2, 148, 143, 143, 125, - 2, 169, 8, 144, 66, 2, 44, 44, 36, 36, 62, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 62, 78, 55, 2, 3, 2, 4, 5, 6, 2, - 16, 16, 16, 16, 16, 17, 18, 124, 125, 4, 2, 36, 36, 36, 36, 36, - 68, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, - 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 62, 44, - 20, 170, 88, 129, 26, 8, 139, 90, 44, 44, 44, 44, 78, 64, 67, 44, - 36, 36, 36, 36, 36, 36, 80, 36, 36, 36, 36, 36, 36, 62, 36, 80, - 2, 46, 44, 171, 27, 27, 27, 27, 27, 27, 44, 91, 67, 67, 67, 67, - 101, 101, 138, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 96, 44, 44, - 67, 67, 67, 67, 67, 67, 51, 44, 27, 27, 44, 44, 44, 44, 44, 44, - 147, 36, 36, 36, 36, 102, 44, 44, 36, 36, 36, 36, 36, 36, 36, 55, - 36, 36, 44, 44, 36, 36, 36, 36, 172, 101, 101, 44, 44, 44, 44, 44, - 11, 11, 11, 11, 16, 16, 16, 16, 36, 36, 36, 44, 62, 36, 36, 36, - 36, 36, 36, 80, 62, 44, 62, 80, 36, 36, 36, 55, 27, 27, 27, 27, - 36, 36, 36, 27, 27, 27, 44, 55, 36, 36, 36, 36, 36, 44, 44, 55, - 36, 36, 36, 36, 44, 44, 44, 36, 69, 43, 58, 79, 44, 44, 43, 43, - 36, 36, 80, 36, 80, 36, 36, 36, 36, 36, 44, 44, 43, 79, 44, 58, - 27, 27, 27, 27, 44, 44, 44, 44, 2, 2, 2, 2, 46, 44, 44, 44, - 36, 36, 36, 36, 36, 36, 173, 30, 36, 36, 36, 44, 55, 2, 2, 2, - 36, 36, 36, 44, 27, 27, 27, 27, 36, 62, 44, 44, 27, 27, 27, 27, - 36, 36, 36, 36, 62, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 96, - 84, 94, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, - 43, 43, 43, 61, 2, 2, 2, 44, 44, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 7, 7, 7, 7, 7, 83, 84, 43, 82, 84, 61, 174, 2, - 2, 44, 44, 44, 44, 44, 44, 44, 43, 70, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 69, 43, 43, 84, 43, 43, 43, 79, 7, 7, 7, 7, 7, - 2, 2, 44, 44, 44, 44, 44, 44, 36, 93, 83, 43, 43, 43, 43, 82, - 94, 36, 63, 2, 46, 44, 44, 44, 36, 36, 36, 36, 36, 69, 84, 83, - 43, 43, 43, 84, 44, 44, 44, 44, 101, 102, 44, 44, 44, 44, 44, 44, - 93, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 87, - 44, 44, 44, 44, 44, 44, 44, 58, 43, 73, 40, 40, 40, 40, 40, 40, - 36, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 90, 91, 67, 67, 67, - 67, 67, 175, 84, 43, 67, 175, 83, 83, 176, 64, 64, 64, 177, 43, 43, - 43, 75, 51, 43, 43, 43, 67, 67, 67, 67, 67, 67, 67, 43, 43, 67, - 67, 67, 67, 67, 67, 67, 67, 44, 67, 43, 75, 44, 44, 44, 44, 44, - 27, 44, 44, 44, 44, 44, 44, 44, 11, 11, 11, 11, 11, 16, 16, 16, - 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, - 16, 16, 107, 16, 16, 16, 16, 16, 11, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 48, 11, 44, 48, 49, 48, 49, 11, 48, 11, - 11, 11, 11, 16, 16, 54, 54, 16, 16, 16, 54, 16, 16, 16, 16, 16, - 16, 16, 11, 49, 11, 48, 49, 11, 11, 11, 48, 11, 11, 11, 48, 16, - 16, 16, 16, 16, 11, 49, 11, 48, 11, 11, 48, 48, 44, 11, 11, 11, - 48, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 11, - 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 44, 11, 11, 11, 11, - 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, - 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, - 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, - 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 32, 44, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 36, 36, 80, 36, 36, 36, 36, 36, - 80, 62, 62, 80, 80, 36, 36, 36, 36, 62, 36, 36, 80, 80, 44, 44, - 44, 62, 44, 80, 80, 80, 80, 36, 80, 62, 62, 80, 80, 80, 80, 80, - 80, 62, 62, 80, 36, 62, 36, 36, 36, 62, 36, 36, 80, 36, 62, 62, - 36, 36, 36, 36, 36, 80, 36, 36, 80, 36, 80, 36, 36, 80, 36, 36, - 8, 44, 44, 44, 44, 44, 44, 44, 91, 67, 67, 67, 67, 67, 67, 90, - 27, 27, 27, 27, 27, 96, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, - 67, 90, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 90, 44, 44, 44, - 67, 44, 44, 44, 44, 44, 44, 44, 90, 44, 44, 44, 44, 44, 44, 44, - 67, 67, 67, 91, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, 44, - 67, 67, 90, 67, 67, 90, 44, 44, 90, 67, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 91, 67, 90, 44, 67, 67, 67, 67, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 91, 67, 67, 90, 44, 91, 67, 67, 67, 67, 67, - 78, 44, 44, 44, 44, 44, 44, 44, 64, 64, 64, 64, 64, 64, 64, 64, - 163, 163, 163, 163, 163, 163, 163, 44, -}; - -static RE_UINT8 re_general_category_stage_5[] = { - 15, 15, 12, 23, 23, 23, 25, 23, 20, 21, 23, 24, 23, 19, 9, 9, - 24, 24, 24, 23, 23, 1, 1, 1, 1, 20, 23, 21, 26, 22, 26, 2, - 2, 2, 2, 20, 24, 21, 24, 15, 25, 25, 27, 23, 26, 27, 5, 28, - 24, 16, 27, 26, 27, 24, 11, 11, 26, 11, 5, 29, 11, 23, 1, 24, - 1, 2, 2, 24, 2, 1, 2, 5, 5, 5, 1, 3, 3, 2, 5, 2, - 4, 4, 26, 26, 4, 26, 6, 6, 0, 0, 4, 2, 23, 0, 1, 23, - 1, 0, 0, 1, 24, 1, 27, 6, 7, 7, 0, 4, 0, 2, 0, 23, - 19, 0, 0, 25, 0, 6, 19, 6, 23, 6, 6, 23, 5, 0, 5, 23, - 16, 16, 16, 0, 23, 25, 27, 27, 4, 5, 5, 6, 6, 5, 23, 5, - 6, 16, 6, 4, 4, 6, 6, 27, 5, 27, 27, 5, 0, 16, 6, 0, - 0, 5, 4, 0, 6, 8, 8, 8, 8, 6, 23, 4, 0, 8, 8, 0, - 27, 25, 11, 27, 27, 0, 0, 27, 23, 27, 5, 8, 8, 5, 23, 11, - 11, 0, 19, 5, 12, 5, 5, 20, 21, 0, 10, 10, 10, 0, 19, 23, - 5, 4, 2, 4, 3, 3, 2, 0, 3, 26, 2, 26, 0, 26, 1, 26, - 26, 0, 12, 12, 12, 16, 19, 19, 28, 29, 20, 28, 13, 14, 16, 12, - 23, 28, 29, 23, 23, 22, 22, 23, 24, 20, 21, 23, 23, 12, 11, 4, - 21, 4, 25, 0, 6, 7, 7, 6, 1, 27, 27, 1, 27, 2, 2, 27, - 10, 1, 2, 10, 10, 11, 24, 27, 27, 20, 21, 27, 21, 24, 21, 20, - 24, 0, 2, 6, 27, 4, 5, 10, 19, 20, 21, 21, 27, 10, 19, 4, - 10, 4, 6, 26, 26, 4, 27, 11, 4, 23, 7, 23, 26, 1, 25, 27, - 8, 23, 4, 8, 18, 18, 17, 17, 5, 24, 23, 20, 19, 22, 22, 20, - 22, 22, 24, 19, 24, 26, 0, 11, 23, 10, 5, 11, 23, 16, 27, 8, - 8, 16, 16, 6, -}; - -/* General_Category: 8556 bytes. */ - -RE_UINT32 re_get_general_category(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_general_category_stage_1[f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_general_category_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_general_category_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_general_category_stage_4[pos + f] << 1; - value = re_general_category_stage_5[pos + code]; - - return value; -} - -/* Block. */ - -static RE_UINT8 re_block_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 14, 15, 15, 15, 16, - 17, 18, 19, 20, 21, 20, 22, 20, 20, 20, 20, 20, 20, 23, 20, 20, - 20, 20, 20, 20, 20, 20, 24, 20, 20, 20, 25, 20, 20, 26, 27, 20, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 29, 30, 31, 32, 20, 20, 20, 20, 20, 20, 20, 33, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 34, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, -}; - -static RE_UINT8 re_block_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, - 29, 30, 31, 31, 32, 32, 32, 33, 34, 34, 34, 34, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 50, 51, 51, - 52, 53, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, - 65, 65, 66, 67, 68, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 85, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 89, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 102, 102, 102, 102, 102, 102, - 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, - 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 103, - 104, 104, 104, 104, 104, 104, 104, 105, 106, 106, 106, 106, 106, 106, 106, 106, - 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, 107, - 107, 107, 108, 108, 108, 108, 109, 110, 110, 110, 110, 110, 111, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 119, 119, 119, 119, 119, 119, - 125, 119, 126, 127, 128, 119, 129, 119, 130, 119, 119, 119, 131, 119, 119, 119, - 132, 133, 134, 135, 119, 119, 119, 119, 119, 119, 119, 119, 119, 136, 119, 119, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 137, 137, 137, 137, 137, 137, 137, 137, 138, 119, 119, 119, 119, 119, 119, 119, - 139, 139, 139, 139, 139, 139, 139, 139, 140, 119, 119, 119, 119, 119, 119, 119, - 141, 141, 141, 141, 142, 119, 119, 119, 119, 119, 119, 119, 119, 119, 143, 144, - 145, 145, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 146, 146, 147, 147, 148, 119, 149, 119, 150, 150, 150, 150, 150, 150, 150, 150, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 151, 151, 119, 119, - 152, 153, 154, 154, 155, 155, 156, 156, 156, 156, 156, 156, 157, 158, 159, 119, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 161, 162, 162, - 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, - 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 163, 164, - 165, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 166, 166, 166, 166, 167, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 168, 119, 169, 170, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, - 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, 172, -}; - -static RE_UINT8 re_block_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, - 10, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, - 17, 17, 17, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, - 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, - 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 36, - 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 41, 41, 42, 42, 42, 42, 42, 42, - 43, 43, 44, 44, 45, 45, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49, 49, 49, 49, 49, - 50, 50, 50, 50, 50, 51, 51, 51, 52, 52, 52, 52, 52, 52, 53, 53, - 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 55, 19, 19, 19, 19, 19, - 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, - 59, 59, 59, 59, 59, 60, 60, 60, 19, 19, 19, 19, 61, 62, 62, 62, - 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, - 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, - 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, - 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, - 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, - 77, 77, 77, 77, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 80, 80, 80, 80, 80, 80, 80, 80, 81, 81, 82, 82, 82, 82, 82, 82, - 83, 83, 83, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 85, 85, 85, 86, 87, 87, 87, 87, 87, 87, 87, 87, - 88, 88, 88, 88, 88, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, - 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, 91, 91, - 92, 92, 92, 92, 92, 92, 93, 93, 94, 94, 94, 94, 94, 94, 94, 94, - 95, 95, 95, 96, 96, 96, 96, 96, 97, 97, 97, 97, 97, 97, 98, 98, - 99, 99, 99, 99, 99, 99, 99, 99, 100, 100, 100, 100, 100, 100, 100, 100, - 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 19, 102, - 103, 103, 103, 103, 104, 104, 104, 104, 104, 104, 105, 105, 105, 105, 105, 105, - 106, 106, 106, 107, 107, 107, 107, 107, 107, 108, 109, 109, 110, 110, 110, 111, - 112, 112, 112, 112, 112, 112, 112, 112, 113, 113, 113, 113, 113, 113, 113, 113, - 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 114, 115, 115, 115, 115, - 116, 116, 116, 116, 116, 116, 116, 116, 117, 117, 117, 117, 117, 117, 117, 117, - 117, 118, 118, 118, 118, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 121, 121, 121, 121, 121, 121, 122, 122, 122, 122, 122, 122, - 123, 123, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, - 125, 125, 125, 126, 127, 127, 127, 127, 128, 128, 128, 128, 128, 128, 129, 129, - 130, 130, 130, 131, 131, 131, 132, 132, 133, 133, 133, 133, 133, 133, 19, 19, - 134, 134, 134, 134, 134, 134, 135, 135, 136, 136, 136, 136, 136, 136, 137, 137, - 138, 138, 138, 19, 19, 19, 19, 19, 19, 19, 19, 19, 139, 139, 139, 139, - 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 141, 141, 141, 141, 141, - 142, 142, 142, 142, 142, 142, 142, 142, 143, 143, 143, 143, 143, 143, 143, 143, - 144, 144, 144, 144, 144, 144, 144, 144, 145, 145, 145, 145, 145, 145, 145, 145, - 146, 146, 146, 146, 146, 146, 146, 146, 147, 147, 147, 147, 147, 148, 148, 148, - 148, 148, 148, 148, 148, 148, 148, 148, 149, 150, 151, 152, 152, 153, 153, 154, - 154, 154, 154, 154, 154, 154, 154, 154, 155, 155, 155, 155, 155, 155, 155, 155, - 155, 155, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, - 158, 158, 158, 158, 158, 158, 158, 158, 159, 159, 159, 159, 160, 160, 160, 160, - 160, 161, 161, 161, 161, 162, 162, 162, 19, 19, 19, 19, 19, 19, 19, 19, - 163, 163, 164, 164, 164, 164, 19, 19, 165, 165, 165, 166, 166, 19, 19, 19, - 167, 167, 168, 168, 168, 168, 19, 19, 169, 169, 169, 169, 169, 170, 170, 170, - 171, 171, 171, 19, 19, 19, 19, 19, 172, 172, 172, 172, 173, 173, 19, 19, - 174, 174, 175, 175, 19, 19, 19, 19, 176, 176, 177, 177, 177, 177, 177, 177, - 178, 178, 178, 178, 178, 178, 179, 179, 180, 180, 180, 180, 181, 181, 182, 182, - 183, 183, 183, 183, 183, 19, 19, 19, 19, 19, 19, 19, 19, 19, 184, 184, - 185, 185, 185, 185, 185, 185, 185, 185, 186, 186, 186, 186, 186, 187, 187, 187, - 188, 188, 188, 188, 188, 19, 19, 19, 189, 189, 189, 189, 189, 189, 19, 19, - 190, 190, 190, 190, 190, 19, 19, 19, 191, 191, 191, 191, 191, 191, 191, 191, - 192, 192, 192, 192, 192, 192, 192, 192, 193, 193, 193, 193, 193, 193, 193, 193, - 193, 193, 193, 19, 19, 19, 19, 19, 194, 194, 194, 194, 194, 194, 194, 194, - 194, 194, 194, 194, 19, 19, 19, 19, 195, 195, 195, 195, 195, 195, 195, 195, - 195, 195, 19, 19, 19, 19, 19, 19, 196, 196, 196, 196, 196, 196, 196, 196, - 197, 197, 197, 197, 197, 197, 197, 197, 198, 198, 198, 198, 198, 198, 198, 198, - 199, 199, 199, 199, 199, 19, 19, 19, 200, 200, 200, 200, 200, 200, 201, 201, - 202, 202, 202, 202, 202, 202, 202, 202, 203, 203, 203, 203, 203, 203, 203, 203, - 204, 204, 204, 205, 205, 205, 205, 205, 205, 205, 206, 206, 206, 206, 206, 206, - 207, 207, 207, 207, 207, 207, 207, 207, 208, 208, 208, 208, 208, 208, 208, 208, - 209, 209, 209, 209, 209, 209, 209, 209, 210, 210, 210, 210, 210, 19, 19, 19, - 211, 211, 211, 211, 211, 211, 211, 211, 212, 212, 212, 212, 212, 212, 212, 212, - 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 213, 19, 19, - 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 215, 215, 215, 215, - 215, 215, 215, 215, 215, 215, 215, 215, 215, 215, 19, 19, 19, 19, 19, 19, - 216, 216, 216, 216, 216, 216, 216, 216, 216, 216, 19, 19, 19, 19, 19, 19, - 217, 217, 217, 217, 217, 217, 217, 217, 218, 218, 218, 218, 218, 218, 218, 218, - 218, 218, 218, 218, 218, 218, 218, 19, 219, 219, 219, 219, 219, 219, 219, 219, - 220, 220, 220, 220, 220, 220, 220, 220, -}; - -static RE_UINT8 re_block_stage_4[] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, - 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, - 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, - 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, - 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, - 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, - 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, - 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, - 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, - 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, - 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, - 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, - 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, - 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, - 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, - 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, - 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, - 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, - 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, - 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, - 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, - 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, - 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, - 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, - 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, - 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, - 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, - 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, - 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, - 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, - 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, - 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, - 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, - 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, - 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, - 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, - 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, - 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, - 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, - 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, - 220, 220, 220, 220, -}; - -static RE_UINT8 re_block_stage_5[] = { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, - 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, - 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, - 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 0, 0, 0, 0, - 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, - 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, - 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, - 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, - 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, - 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, - 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, - 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, - 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, - 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, - 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, - 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, - 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, - 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, - 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, - 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, - 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, - 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, - 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, - 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99, - 100, 100, 100, 100, 101, 101, 101, 101, 102, 102, 102, 102, 103, 103, 103, 103, - 104, 104, 104, 104, 105, 105, 105, 105, 106, 106, 106, 106, 107, 107, 107, 107, - 108, 108, 108, 108, 109, 109, 109, 109, 110, 110, 110, 110, 111, 111, 111, 111, - 112, 112, 112, 112, 113, 113, 113, 113, 114, 114, 114, 114, 115, 115, 115, 115, - 116, 116, 116, 116, 117, 117, 117, 117, 118, 118, 118, 118, 119, 119, 119, 119, - 120, 120, 120, 120, 121, 121, 121, 121, 122, 122, 122, 122, 123, 123, 123, 123, - 124, 124, 124, 124, 125, 125, 125, 125, 126, 126, 126, 126, 127, 127, 127, 127, - 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, - 132, 132, 132, 132, 133, 133, 133, 133, 134, 134, 134, 134, 135, 135, 135, 135, - 136, 136, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 139, 139, 139, 139, - 140, 140, 140, 140, 141, 141, 141, 141, 142, 142, 142, 142, 143, 143, 143, 143, - 144, 144, 144, 144, 145, 145, 145, 145, 146, 146, 146, 146, 147, 147, 147, 147, - 148, 148, 148, 148, 149, 149, 149, 149, 150, 150, 150, 150, 151, 151, 151, 151, - 152, 152, 152, 152, 153, 153, 153, 153, 154, 154, 154, 154, 155, 155, 155, 155, - 156, 156, 156, 156, 157, 157, 157, 157, 158, 158, 158, 158, 159, 159, 159, 159, - 160, 160, 160, 160, 161, 161, 161, 161, 162, 162, 162, 162, 163, 163, 163, 163, - 164, 164, 164, 164, 165, 165, 165, 165, 166, 166, 166, 166, 167, 167, 167, 167, - 168, 168, 168, 168, 169, 169, 169, 169, 170, 170, 170, 170, 171, 171, 171, 171, - 172, 172, 172, 172, 173, 173, 173, 173, 174, 174, 174, 174, 175, 175, 175, 175, - 176, 176, 176, 176, 177, 177, 177, 177, 178, 178, 178, 178, 179, 179, 179, 179, - 180, 180, 180, 180, 181, 181, 181, 181, 182, 182, 182, 182, 183, 183, 183, 183, - 184, 184, 184, 184, 185, 185, 185, 185, 186, 186, 186, 186, 187, 187, 187, 187, - 188, 188, 188, 188, 189, 189, 189, 189, 190, 190, 190, 190, 191, 191, 191, 191, - 192, 192, 192, 192, 193, 193, 193, 193, 194, 194, 194, 194, 195, 195, 195, 195, - 196, 196, 196, 196, 197, 197, 197, 197, 198, 198, 198, 198, 199, 199, 199, 199, - 200, 200, 200, 200, 201, 201, 201, 201, 202, 202, 202, 202, 203, 203, 203, 203, - 204, 204, 204, 204, 205, 205, 205, 205, 206, 206, 206, 206, 207, 207, 207, 207, - 208, 208, 208, 208, 209, 209, 209, 209, 210, 210, 210, 210, 211, 211, 211, 211, - 212, 212, 212, 212, 213, 213, 213, 213, 214, 214, 214, 214, 215, 215, 215, 215, - 216, 216, 216, 216, 217, 217, 217, 217, 218, 218, 218, 218, 219, 219, 219, 219, - 220, 220, 220, 220, -}; - -/* Block: 4288 bytes. */ - -RE_UINT32 re_get_block(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_block_stage_1[f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_block_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_block_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_block_stage_4[pos + f] << 2; - value = re_block_stage_5[pos + code]; - - return value; -} - -/* Script. */ - -static RE_UINT8 re_script_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, - 16, 17, 18, 14, 19, 14, 20, 14, 14, 14, 14, 14, 14, 21, 14, 14, - 14, 14, 14, 14, 14, 14, 22, 14, 14, 14, 23, 14, 14, 24, 25, 14, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 26, 7, 27, 28, 14, 14, 14, 14, 14, 14, 14, 29, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 30, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, -}; - -static RE_UINT8 re_script_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, - 55, 56, 57, 58, 59, 59, 59, 60, 61, 59, 59, 59, 59, 59, 62, 59, - 63, 63, 59, 59, 59, 59, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 59, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 81, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 82, - 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 85, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 98, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 72, 72, 99, 100, 101, 102, 103, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 65, 114, 115, 116, 117, 118, 65, 65, 65, 65, 65, 65, - 119, 65, 120, 121, 122, 65, 123, 65, 124, 65, 65, 65, 125, 65, 65, 65, - 126, 127, 128, 129, 65, 65, 65, 65, 65, 65, 65, 65, 65, 130, 65, 65, - 131, 131, 131, 131, 131, 131, 132, 65, 133, 65, 65, 65, 65, 65, 65, 65, - 134, 134, 134, 134, 134, 134, 134, 134, 135, 65, 65, 65, 65, 65, 65, 65, - 136, 136, 136, 136, 137, 65, 65, 65, 65, 65, 65, 65, 65, 65, 138, 139, - 140, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 59, 141, 142, 143, 144, 65, 145, 65, 146, 147, 148, 59, 59, 149, 59, 150, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 151, 152, 65, 65, - 153, 154, 155, 156, 157, 65, 158, 159, 160, 161, 162, 163, 164, 165, 60, 65, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 166, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 167, 72, - 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 72, 72, 72, 72, 168, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 169, 65, 170, 171, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, -}; - -static RE_UINT16 re_script_stage_3[] = { - 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, - 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11, - 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 16, 17, 18, 19, 17, 18, 20, 21, 22, 22, 23, 22, 24, 25, - 26, 27, 28, 28, 29, 30, 31, 32, 28, 28, 28, 28, 28, 33, 28, 28, - 34, 35, 35, 35, 36, 28, 28, 28, 37, 37, 37, 38, 39, 39, 39, 40, - 41, 41, 42, 43, 44, 45, 46, 46, 46, 46, 47, 46, 46, 46, 48, 49, - 50, 50, 50, 50, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, - 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 46, 124, - 125, 126, 126, 127, 126, 128, 46, 46, 129, 130, 131, 132, 133, 134, 46, 46, - 135, 135, 135, 135, 136, 135, 137, 138, 135, 136, 135, 139, 139, 140, 46, 46, - 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 142, 143, 142, 142, 144, - 145, 145, 145, 145, 145, 145, 145, 145, 146, 146, 146, 146, 147, 148, 146, 146, - 147, 146, 146, 149, 150, 151, 146, 146, 146, 150, 146, 146, 146, 152, 146, 153, - 146, 154, 155, 155, 155, 155, 155, 156, 157, 157, 157, 157, 157, 157, 157, 157, - 158, 159, 160, 160, 160, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, - 171, 171, 171, 171, 171, 172, 173, 173, 174, 175, 176, 176, 176, 176, 176, 177, - 176, 176, 178, 157, 157, 157, 157, 179, 180, 181, 182, 182, 183, 184, 185, 186, - 187, 187, 188, 187, 189, 190, 171, 171, 191, 192, 193, 193, 193, 194, 193, 195, - 196, 196, 197, 46, 46, 46, 46, 46, 198, 198, 198, 198, 199, 198, 198, 200, - 201, 201, 201, 201, 202, 202, 202, 203, 204, 204, 204, 205, 206, 207, 207, 207, - 46, 46, 46, 46, 208, 209, 210, 211, 4, 4, 212, 4, 4, 213, 214, 215, - 4, 4, 4, 216, 8, 8, 217, 218, 11, 219, 11, 11, 219, 220, 11, 221, - 11, 11, 11, 222, 222, 223, 11, 224, 225, 0, 0, 0, 0, 0, 226, 227, - 228, 229, 0, 230, 46, 8, 8, 231, 0, 0, 232, 233, 234, 0, 4, 4, - 235, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 236, 0, 0, 237, 46, 230, 46, 0, 0, - 238, 0, 0, 0, 0, 0, 0, 0, 239, 239, 239, 239, 239, 239, 239, 239, - 0, 0, 0, 0, 240, 241, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, - 242, 242, 243, 242, 242, 243, 4, 4, 244, 244, 244, 244, 244, 244, 244, 245, - 142, 142, 143, 246, 246, 246, 247, 248, 146, 249, 250, 250, 250, 250, 14, 14, - 0, 0, 0, 251, 46, 46, 46, 46, 252, 253, 252, 252, 252, 252, 252, 254, - 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 255, 46, 251, - 256, 0, 257, 258, 259, 260, 260, 260, 260, 261, 262, 263, 263, 263, 263, 264, - 265, 266, 267, 268, 145, 145, 145, 145, 269, 0, 266, 270, 0, 0, 236, 263, - 145, 269, 0, 0, 0, 0, 145, 271, 0, 0, 0, 0, 0, 263, 263, 272, - 263, 263, 263, 263, 263, 273, 0, 0, 252, 252, 252, 255, 0, 0, 0, 0, - 252, 252, 252, 252, 274, 46, 46, 46, 275, 275, 275, 275, 275, 275, 275, 275, - 276, 275, 275, 275, 277, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, - 279, 279, 280, 46, 14, 14, 14, 14, 14, 281, 282, 282, 282, 282, 282, 283, - 0, 0, 284, 4, 4, 4, 4, 4, 285, 286, 287, 46, 46, 46, 46, 288, - 289, 289, 290, 241, 291, 291, 291, 292, 293, 293, 293, 293, 294, 295, 50, 296, - 297, 297, 297, 298, 298, 299, 145, 300, 301, 301, 301, 301, 302, 303, 46, 46, - 304, 304, 304, 305, 306, 307, 141, 308, 309, 309, 309, 309, 310, 311, 312, 313, - 314, 315, 250, 46, 46, 46, 46, 46, 46, 46, 46, 46, 312, 312, 316, 317, - 145, 145, 318, 145, 319, 145, 145, 320, 252, 252, 252, 252, 252, 252, 321, 252, - 252, 252, 252, 252, 252, 322, 46, 46, 323, 324, 22, 325, 326, 28, 28, 28, - 28, 28, 28, 28, 327, 328, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 329, 46, 28, 28, 28, 28, 330, 28, 28, 331, 46, 46, 332, - 8, 241, 217, 0, 0, 333, 334, 335, 28, 28, 28, 28, 28, 28, 28, 336, - 238, 0, 1, 2, 1, 2, 337, 262, 263, 338, 145, 269, 339, 340, 341, 342, - 343, 344, 345, 346, 347, 347, 46, 46, 344, 344, 344, 344, 344, 344, 344, 348, - 349, 0, 0, 350, 11, 11, 11, 11, 351, 251, 46, 46, 46, 0, 0, 352, - 353, 354, 355, 355, 355, 356, 46, 46, 357, 358, 359, 360, 361, 46, 46, 46, - 362, 363, 364, 364, 365, 366, 46, 46, 367, 367, 367, 367, 367, 368, 368, 368, - 369, 370, 371, 46, 46, 46, 46, 46, 372, 373, 373, 374, 375, 376, 46, 46, - 377, 378, 379, 380, 46, 46, 46, 46, 381, 381, 382, 383, 46, 46, 46, 46, - 384, 385, 386, 387, 388, 389, 390, 390, 391, 391, 391, 392, 393, 394, 395, 396, - 397, 397, 397, 397, 398, 46, 46, 46, 46, 46, 46, 46, 46, 46, 28, 49, - 399, 399, 399, 399, 400, 401, 399, 46, 402, 402, 402, 402, 403, 404, 405, 406, - 407, 407, 407, 408, 409, 46, 46, 46, 410, 410, 410, 410, 411, 412, 46, 46, - 413, 413, 413, 414, 415, 46, 46, 46, 416, 416, 416, 416, 416, 416, 416, 416, - 416, 416, 416, 416, 416, 416, 417, 46, 416, 416, 416, 416, 416, 416, 418, 419, - 420, 420, 420, 420, 420, 420, 420, 420, 420, 420, 421, 46, 46, 46, 46, 46, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 422, 46, 46, 46, 46, - 423, 423, 423, 423, 424, 423, 423, 425, 426, 423, 46, 46, 46, 46, 46, 46, - 427, 46, 46, 46, 46, 46, 46, 46, 0, 0, 0, 0, 0, 0, 0, 428, - 0, 0, 429, 0, 0, 0, 430, 431, 432, 0, 433, 0, 0, 434, 46, 46, - 11, 11, 11, 11, 435, 46, 46, 46, 0, 0, 0, 0, 0, 237, 0, 436, - 0, 0, 0, 0, 0, 226, 0, 0, 0, 437, 438, 439, 440, 0, 0, 0, - 441, 442, 0, 443, 444, 445, 0, 0, 0, 0, 446, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 447, 0, 0, 0, 448, 28, 449, 450, 451, 452, 453, 454, - 455, 456, 457, 456, 46, 46, 46, 327, 0, 0, 251, 0, 0, 0, 0, 0, - 0, 236, 228, 458, 238, 238, 46, 46, 230, 0, 228, 0, 0, 0, 251, 0, - 0, 230, 46, 46, 46, 46, 459, 0, 460, 0, 0, 230, 461, 436, 46, 46, - 0, 0, 462, 463, 0, 0, 0, 240, 0, 236, 0, 0, 464, 46, 0, 462, - 0, 0, 0, 228, 445, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 465, - 0, 0, 0, 434, 236, 0, 466, 46, 46, 46, 46, 46, 46, 46, 46, 467, - 0, 0, 0, 0, 468, 46, 46, 46, 0, 0, 0, 0, 428, 46, 46, 46, - 252, 252, 252, 252, 252, 469, 46, 46, 252, 252, 252, 470, 252, 252, 252, 252, - 252, 321, 46, 46, 46, 46, 46, 46, 471, 46, 0, 0, 0, 0, 0, 0, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 46, -}; - -static RE_UINT8 re_script_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, - 2, 2, 2, 2, 3, 0, 0, 0, 2, 2, 3, 0, 0, 4, 0, 0, - 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 7, 6, 8, 6, 6, 9, - 8, 8, 10, 10, 6, 11, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 16, 14, 14, 14, 14, - 14, 14, 14, 14, 8, 8, 8, 8, 17, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 17, 18, 18, 18, - 18, 18, 18, 18, 20, 19, 8, 17, 21, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 8, 8, 8, 8, - 22, 22, 22, 22, 22, 23, 8, 8, 22, 22, 23, 8, 8, 8, 8, 8, - 24, 24, 25, 24, 24, 24, 26, 24, 24, 24, 24, 24, 24, 27, 25, 27, - 24, 24, 24, 24, 24, 24, 24, 24, 26, 24, 24, 24, 24, 28, 5, 5, - 5, 5, 5, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0, 24, 24, 24, - 29, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 27, 24, - 30, 30, 30, 30, 30, 30, 30, 31, 30, 30, 30, 30, 30, 30, 30, 30, - 30, 30, 30, 30, 30, 32, 31, 30, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 8, 8, 8, 8, 8, 8, 8, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 35, 8, 8, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 36, 8, 36, 36, 36, 36, 36, 36, 36, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 8, 39, - 8, 8, 8, 8, 8, 8, 8, 8, 25, 24, 24, 24, 24, 24, 25, 8, - 8, 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, - 40, 40, 40, 40, 40, 40, 40, 40, 41, 42, 40, 40, 40, 40, 40, 40, - 40, 40, 0, 40, 40, 40, 40, 40, 40, 40, 40, 40, 43, 40, 40, 40, - 44, 45, 44, 45, 45, 45, 46, 44, 46, 44, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 46, 45, 45, 45, 46, 46, 8, 45, 45, 8, 45, 45, - 45, 45, 46, 44, 46, 44, 45, 46, 8, 8, 8, 44, 8, 8, 45, 44, - 45, 45, 8, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 8, 8, - 47, 48, 47, 48, 48, 49, 8, 47, 49, 47, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 49, 48, 48, 48, 49, 48, 47, 49, 48, 8, 49, 48, - 48, 49, 8, 47, 49, 47, 48, 8, 47, 8, 8, 8, 47, 48, 49, 49, - 8, 8, 8, 48, 48, 48, 48, 48, 48, 48, 48, 8, 8, 8, 8, 8, - 50, 51, 50, 51, 51, 51, 51, 50, 51, 50, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 52, 51, 51, 51, 52, 51, 50, 51, 51, 8, 51, 51, - 51, 51, 51, 50, 51, 50, 51, 8, 52, 8, 8, 8, 8, 8, 8, 8, - 51, 51, 8, 51, 51, 51, 51, 51, 51, 8, 8, 8, 8, 8, 8, 8, - 53, 54, 53, 54, 54, 54, 55, 53, 55, 53, 54, 54, 54, 54, 54, 54, - 54, 54, 54, 54, 55, 54, 54, 54, 55, 54, 53, 54, 54, 8, 54, 54, - 54, 54, 55, 53, 55, 53, 54, 8, 8, 8, 8, 54, 8, 8, 54, 53, - 54, 54, 8, 54, 54, 54, 54, 54, 54, 54, 54, 54, 8, 8, 8, 8, - 8, 56, 57, 56, 56, 58, 8, 56, 58, 56, 56, 8, 57, 58, 58, 56, - 8, 57, 58, 8, 56, 58, 8, 56, 56, 56, 56, 56, 56, 8, 8, 56, - 56, 58, 8, 56, 58, 56, 56, 8, 58, 8, 8, 57, 8, 8, 8, 8, - 8, 8, 8, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 58, 8, 8, - 59, 60, 59, 60, 60, 60, 61, 60, 61, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 61, 60, 60, 60, 60, 60, 59, 60, 60, 8, 59, 60, - 60, 60, 61, 60, 61, 60, 60, 8, 8, 8, 59, 61, 60, 8, 8, 8, - 60, 60, 8, 60, 60, 60, 60, 60, 8, 8, 8, 8, 60, 60, 60, 60, - 8, 62, 63, 62, 62, 62, 64, 62, 64, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 64, 62, 62, 62, 62, 62, 63, 62, 62, 8, 62, 62, - 62, 62, 64, 62, 64, 62, 62, 8, 8, 8, 63, 64, 8, 8, 8, 64, - 62, 62, 8, 62, 62, 62, 62, 62, 63, 64, 8, 8, 8, 8, 8, 8, - 8, 65, 66, 65, 65, 65, 67, 65, 67, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 67, 66, 65, - 65, 65, 67, 65, 67, 65, 65, 67, 8, 8, 8, 66, 8, 8, 8, 8, - 65, 65, 8, 65, 65, 65, 65, 65, 65, 65, 65, 8, 66, 65, 65, 65, - 8, 68, 69, 68, 68, 68, 68, 68, 68, 68, 68, 70, 8, 68, 68, 68, - 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 68, 68, 68, 68, 69, 8, - 68, 68, 68, 70, 8, 70, 8, 69, 68, 68, 70, 70, 68, 68, 68, 68, - 8, 68, 70, 8, 8, 8, 8, 8, 71, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 8, 20, - 72, 72, 72, 72, 72, 72, 8, 8, 74, 75, 75, 74, 75, 75, 74, 8, - 8, 8, 76, 76, 74, 76, 76, 76, 74, 76, 74, 74, 8, 76, 74, 76, - 76, 76, 76, 76, 76, 74, 76, 8, 76, 76, 75, 75, 76, 76, 76, 8, - 76, 76, 76, 76, 76, 8, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 8, - 78, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 79, 77, - 77, 77, 80, 0, 81, 79, 8, 8, 82, 82, 82, 82, 82, 82, 82, 82, - 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 84, 8, 8, 84, 8, - 83, 83, 83, 83, 83, 85, 83, 83, 86, 86, 86, 86, 86, 86, 86, 86, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 8, - 87, 87, 87, 88, 88, 87, 87, 8, 88, 87, 87, 8, 87, 87, 87, 88, - 88, 87, 87, 8, 87, 87, 87, 87, 87, 87, 87, 88, 87, 87, 87, 87, - 87, 87, 87, 87, 87, 88, 89, 87, 87, 87, 87, 87, 87, 87, 88, 8, - 87, 87, 87, 87, 87, 8, 8, 8, 90, 90, 90, 90, 90, 90, 90, 90, - 90, 90, 91, 8, 8, 8, 8, 8, 92, 92, 92, 92, 92, 92, 92, 92, - 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 8, - 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 96, 0, 95, - 97, 8, 8, 8, 8, 8, 8, 8, 98, 98, 98, 98, 98, 98, 99, 98, - 98, 98, 99, 8, 8, 8, 8, 8, 100, 100, 100, 100, 100, 100, 100, 100, - 100, 100, 101, 9, 8, 8, 8, 8, 102, 102, 102, 102, 102, 102, 102, 102, - 102, 102, 8, 8, 8, 8, 8, 8, 103, 103, 103, 103, 103, 103, 104, 103, - 104, 103, 8, 8, 8, 8, 8, 8, 105, 105, 105, 105, 105, 105, 105, 105, - 105, 105, 105, 105, 105, 105, 105, 8, 105, 105, 105, 105, 105, 8, 8, 8, - 106, 0, 107, 106, 106, 106, 106, 108, 106, 106, 106, 106, 106, 8, 8, 8, - 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 8, 8, 8, 8, - 106, 106, 106, 106, 106, 108, 8, 8, 92, 92, 92, 8, 8, 8, 8, 8, - 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 110, 8, - 109, 109, 109, 109, 109, 109, 8, 8, 110, 8, 109, 109, 109, 109, 109, 109, - 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 8, - 111, 111, 112, 8, 8, 8, 8, 8, 113, 113, 113, 113, 113, 113, 113, 113, - 113, 113, 113, 113, 113, 113, 8, 8, 113, 113, 113, 113, 113, 8, 8, 8, - 113, 113, 113, 113, 113, 114, 8, 113, 115, 115, 115, 115, 115, 115, 115, 115, - 115, 115, 115, 115, 115, 115, 8, 115, 116, 116, 116, 116, 116, 116, 116, 116, - 116, 116, 116, 116, 116, 116, 116, 117, 116, 116, 116, 116, 116, 116, 117, 118, - 116, 116, 116, 116, 116, 8, 8, 8, 116, 116, 116, 116, 116, 116, 116, 8, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 8, 8, - 119, 119, 119, 119, 119, 119, 120, 8, 121, 121, 121, 121, 121, 121, 121, 121, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 8, 8, 8, 8, 122, 122, - 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 8, 124, 123, 123, - 123, 123, 123, 123, 123, 8, 124, 123, 125, 125, 125, 125, 125, 125, 125, 125, - 121, 121, 121, 121, 8, 8, 8, 8, 5, 126, 5, 5, 5, 5, 5, 5, - 126, 5, 5, 5, 126, 0, 127, 0, 0, 0, 126, 9, 8, 8, 8, 8, - 2, 2, 2, 6, 6, 128, 2, 2, 2, 2, 2, 2, 2, 2, 129, 6, - 6, 2, 2, 6, 6, 130, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 129, 5, 5, 5, 132, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 5, 5, 6, 6, 6, 8, 6, 6, 6, 8, - 6, 6, 6, 6, 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 8, - 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 12, 6, - 8, 6, 11, 6, 6, 6, 6, 11, 0, 0, 0, 0, 0, 0, 5, 0, - 0, 0, 9, 0, 0, 0, 0, 0, 1, 8, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 9, 2, 2, 2, 2, 2, 2, 133, 8, - 0, 0, 0, 0, 0, 9, 8, 8, 132, 8, 8, 8, 8, 8, 8, 8, - 0, 0, 0, 10, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 3, 2, 2, 2, 2, 3, 8, 8, 8, - 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0, 9, 8, 8, 8, 8, - 20, 0, 0, 0, 0, 0, 0, 0, 134, 134, 134, 134, 134, 134, 134, 134, - 0, 0, 0, 0, 0, 0, 9, 8, 0, 0, 0, 0, 0, 8, 8, 8, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 136, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 8, 8, 137, 13, 13, 13, - 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 138, 8, 8, 8, 139, - 140, 8, 8, 8, 8, 8, 8, 139, 87, 87, 87, 88, 8, 8, 8, 8, - 87, 87, 87, 88, 87, 87, 87, 88, 0, 0, 0, 0, 0, 0, 8, 8, - 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 142, 141, 141, - 141, 141, 8, 8, 8, 8, 8, 8, 141, 141, 141, 8, 8, 8, 8, 8, - 0, 0, 143, 143, 0, 0, 0, 0, 143, 141, 141, 141, 141, 5, 5, 86, - 0, 0, 0, 0, 141, 141, 0, 0, 144, 145, 145, 145, 145, 145, 145, 145, - 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 146, 147, 126, 148, 145, - 149, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, - 150, 150, 150, 150, 150, 151, 149, 150, 8, 8, 152, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, - 153, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 154, - 4, 4, 4, 4, 4, 155, 8, 8, 86, 86, 86, 86, 86, 86, 86, 156, - 150, 150, 150, 150, 150, 150, 150, 157, 150, 150, 150, 150, 0, 0, 0, 0, - 141, 141, 141, 141, 141, 141, 158, 8, 159, 159, 159, 159, 159, 159, 159, 159, - 159, 159, 159, 159, 159, 159, 160, 8, 159, 159, 159, 160, 8, 8, 8, 8, - 161, 161, 161, 161, 161, 161, 161, 161, 162, 162, 162, 162, 162, 162, 162, 162, - 162, 162, 162, 162, 162, 162, 8, 8, 14, 14, 14, 14, 8, 8, 8, 163, - 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 8, 8, 8, 8, - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 133, - 2, 2, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 133, 8, 8, - 8, 8, 8, 8, 2, 2, 2, 2, 165, 165, 165, 165, 165, 165, 165, 165, - 165, 165, 165, 165, 165, 165, 8, 8, 166, 166, 166, 166, 166, 166, 166, 166, - 166, 166, 166, 166, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 167, 167, - 167, 167, 168, 8, 8, 8, 8, 167, 167, 167, 167, 167, 167, 8, 8, 8, - 40, 40, 40, 40, 40, 40, 8, 8, 169, 169, 169, 169, 169, 169, 169, 169, - 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 8, 8, 8, 8, 8, 171, - 86, 86, 86, 86, 86, 86, 154, 8, 172, 172, 172, 172, 172, 172, 172, 172, - 172, 172, 172, 172, 172, 172, 172, 20, 172, 172, 172, 172, 172, 8, 8, 172, - 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, 174, 8, 8, 8, 8, - 173, 173, 173, 173, 173, 173, 173, 8, 173, 173, 173, 173, 173, 8, 173, 173, - 82, 82, 82, 82, 82, 82, 8, 8, 175, 175, 175, 175, 175, 175, 175, 175, - 175, 176, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 177, 175, 175, - 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 178, 179, 8, 8, 8, 8, - 89, 87, 87, 88, 89, 87, 87, 88, 89, 87, 87, 88, 8, 8, 8, 8, - 178, 178, 178, 178, 178, 178, 178, 8, 178, 178, 178, 178, 178, 8, 8, 8, - 86, 86, 8, 8, 8, 8, 8, 8, 86, 86, 86, 154, 8, 153, 86, 86, - 86, 86, 86, 86, 86, 86, 8, 8, 141, 141, 141, 141, 141, 141, 141, 8, - 141, 141, 141, 141, 141, 8, 8, 8, 2, 2, 2, 133, 8, 8, 8, 8, - 8, 17, 18, 18, 8, 8, 21, 22, 22, 22, 22, 23, 22, 22, 23, 23, - 22, 21, 23, 22, 22, 22, 22, 22, 24, 8, 8, 8, 8, 8, 8, 8, - 8, 180, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, - 8, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 8, 8, 8, 8, - 24, 24, 24, 24, 24, 24, 27, 8, 0, 9, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 0, 0, 8, 8, 24, 24, 25, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 25, 20, 0, 0, 0, 150, 150, 150, 150, 150, - 150, 150, 150, 150, 150, 150, 150, 0, 8, 86, 86, 86, 8, 86, 86, 86, - 8, 86, 86, 86, 8, 86, 154, 8, 0, 0, 0, 9, 0, 0, 0, 9, - 8, 8, 8, 8, 20, 0, 0, 8, 181, 181, 181, 181, 181, 181, 182, 181, - 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 181, 183, 181, 181, 181, 181, - 181, 181, 181, 181, 181, 183, 181, 182, 181, 181, 181, 181, 181, 181, 181, 8, - 181, 181, 181, 181, 181, 183, 8, 8, 0, 9, 8, 20, 0, 0, 0, 0, - 0, 0, 8, 20, 0, 0, 0, 0, 6, 6, 6, 6, 6, 11, 8, 8, - 0, 0, 0, 0, 0, 0, 127, 8, 184, 184, 184, 184, 184, 184, 184, 184, - 184, 184, 184, 184, 184, 184, 185, 8, 186, 186, 186, 186, 186, 186, 186, 186, - 187, 8, 8, 8, 8, 8, 8, 8, 188, 188, 188, 188, 188, 188, 188, 188, - 188, 188, 188, 188, 188, 188, 188, 189, 188, 188, 8, 8, 8, 8, 8, 8, - 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 191, 8, 8, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 193, - 194, 194, 194, 194, 194, 194, 194, 194, 194, 194, 8, 8, 194, 194, 194, 194, - 194, 194, 194, 8, 8, 8, 8, 8, 195, 195, 195, 195, 195, 195, 195, 195, - 196, 196, 196, 196, 196, 196, 196, 196, 197, 197, 197, 197, 197, 197, 197, 197, - 197, 197, 197, 197, 197, 197, 197, 8, 197, 197, 197, 197, 197, 8, 8, 8, - 198, 198, 198, 8, 199, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, 198, - 198, 198, 198, 200, 199, 8, 199, 200, 201, 201, 201, 201, 201, 201, 201, 201, - 201, 201, 201, 202, 201, 201, 201, 201, 203, 203, 203, 203, 203, 203, 203, 203, - 203, 203, 203, 203, 203, 203, 8, 204, 205, 205, 205, 205, 205, 205, 205, 205, - 205, 205, 205, 205, 205, 8, 8, 206, 207, 207, 207, 207, 207, 207, 207, 207, - 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 8, 8, 8, 208, - 209, 209, 210, 211, 8, 8, 209, 209, 209, 209, 210, 209, 210, 209, 209, 209, - 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 8, 8, 209, 211, 8, 210, - 209, 209, 209, 209, 8, 8, 8, 8, 209, 209, 209, 209, 211, 8, 8, 8, - 212, 212, 212, 212, 212, 212, 212, 212, 213, 213, 213, 213, 213, 213, 213, 213, - 213, 213, 213, 8, 214, 213, 213, 213, 215, 215, 215, 215, 215, 215, 215, 215, - 215, 215, 215, 8, 215, 215, 215, 215, 216, 216, 216, 216, 216, 216, 216, 216, - 216, 217, 8, 8, 216, 216, 216, 216, 218, 218, 218, 218, 218, 218, 218, 218, - 218, 218, 218, 218, 219, 8, 8, 8, 220, 220, 220, 220, 220, 220, 220, 220, - 220, 220, 220, 220, 220, 220, 220, 8, 8, 220, 220, 220, 220, 220, 220, 220, - 221, 221, 221, 221, 221, 221, 221, 221, 221, 8, 8, 8, 8, 8, 8, 8, - 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 223, 8, 8, 8, - 222, 222, 222, 222, 222, 8, 8, 8, 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 225, 224, 224, 224, 224, 224, 224, 224, 8, 8, 8, 8, 8, 8, - 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 226, 227, 8, 8, 8, - 226, 226, 226, 226, 226, 8, 8, 8, 228, 228, 228, 228, 228, 228, 228, 228, - 228, 228, 228, 228, 8, 8, 8, 8, 228, 228, 228, 228, 228, 8, 8, 8, - 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 230, - 229, 230, 8, 8, 8, 8, 8, 8, 229, 229, 8, 8, 8, 8, 8, 8, - 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 232, - 164, 164, 164, 164, 233, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 234, - 234, 234, 235, 8, 8, 8, 8, 8, 234, 234, 234, 234, 234, 234, 234, 235, - 8, 8, 8, 8, 8, 8, 8, 236, 237, 8, 8, 8, 8, 8, 8, 8, - 0, 0, 0, 8, 8, 8, 8, 8, 0, 0, 0, 9, 20, 0, 0, 0, - 0, 0, 0, 127, 5, 0, 0, 0, 0, 0, 0, 0, 0, 127, 5, 5, - 5, 126, 127, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 5, 5, 0, - 0, 0, 0, 0, 0, 0, 0, 8, 6, 6, 6, 8, 8, 8, 8, 8, - 0, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 9, 0, - 8, 9, 20, 9, 20, 0, 9, 0, 0, 0, 0, 0, 0, 20, 20, 0, - 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 9, 20, 0, - 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 20, 0, 9, - 0, 0, 9, 9, 8, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, - 24, 24, 180, 24, 24, 24, 24, 24, 180, 25, 25, 180, 180, 24, 24, 24, - 24, 25, 24, 24, 180, 180, 8, 8, 8, 25, 8, 180, 180, 180, 180, 24, - 180, 25, 25, 180, 180, 180, 180, 180, 180, 25, 25, 180, 24, 25, 24, 24, - 24, 25, 24, 24, 180, 24, 25, 25, 24, 24, 24, 24, 24, 180, 24, 24, - 24, 24, 24, 24, 24, 24, 8, 8, 180, 24, 180, 24, 24, 180, 24, 24, - 20, 0, 0, 0, 0, 0, 0, 9, 8, 8, 8, 0, 0, 0, 0, 0, - 238, 9, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 9, 8, 8, 8, - 9, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 20, 0, 0, 0, 0, - 0, 0, 9, 0, 0, 9, 8, 8, 0, 0, 0, 0, 20, 0, 9, 8, - 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 20, 0, 0, - 9, 8, 20, 0, 0, 0, 0, 0, 141, 141, 141, 158, 8, 8, 8, 8, - 141, 141, 158, 8, 8, 8, 8, 8, 20, 8, 8, 8, 8, 8, 8, 8, -}; - -static RE_UINT8 re_script_stage_5[] = { - 1, 1, 1, 2, 2, 2, 2, 1, 35, 35, 41, 41, 3, 3, 1, 3, - 0, 0, 1, 0, 3, 1, 3, 0, 0, 3, 55, 55, 4, 4, 4, 41, - 41, 4, 0, 5, 5, 5, 5, 0, 0, 1, 0, 6, 6, 6, 6, 0, - 7, 7, 7, 0, 1, 7, 7, 1, 7, 41, 41, 7, 8, 8, 0, 8, - 8, 0, 9, 9, 66, 66, 66, 0, 82, 82, 82, 0, 95, 95, 95, 0, - 10, 10, 10, 41, 41, 10, 0, 10, 0, 11, 11, 11, 11, 0, 0, 12, - 12, 12, 12, 0, 0, 13, 13, 13, 13, 0, 0, 14, 14, 14, 14, 0, - 15, 15, 0, 15, 15, 0, 0, 16, 16, 16, 16, 0, 17, 17, 0, 17, - 17, 0, 18, 18, 0, 18, 18, 0, 19, 19, 0, 19, 19, 0, 0, 20, - 20, 20, 20, 0, 0, 21, 21, 0, 21, 21, 22, 22, 0, 22, 22, 0, - 22, 1, 1, 22, 23, 23, 24, 24, 0, 24, 24, 1, 25, 25, 26, 26, - 26, 0, 0, 26, 27, 27, 27, 0, 28, 28, 29, 29, 29, 0, 30, 30, - 30, 1, 30, 0, 42, 42, 42, 0, 43, 43, 43, 1, 44, 44, 45, 45, - 45, 0, 31, 31, 32, 32, 32, 1, 32, 0, 46, 46, 46, 0, 47, 47, - 47, 0, 56, 56, 56, 0, 54, 54, 78, 78, 78, 0, 0, 78, 62, 62, - 62, 0, 67, 67, 93, 93, 68, 68, 0, 68, 69, 69, 41, 1, 1, 41, - 3, 4, 2, 3, 3, 2, 4, 2, 41, 0, 2, 0, 53, 53, 57, 57, - 57, 0, 0, 55, 58, 58, 0, 58, 58, 0, 36, 36, 0, 36, 1, 36, - 0, 33, 33, 33, 33, 0, 0, 41, 1, 33, 1, 34, 34, 34, 34, 1, - 0, 35, 0, 25, 25, 0, 35, 0, 25, 1, 34, 0, 36, 0, 37, 37, - 37, 0, 83, 83, 70, 70, 0, 4, 84, 84, 59, 59, 65, 65, 71, 71, - 71, 0, 72, 72, 73, 73, 0, 73, 85, 85, 77, 77, 77, 0, 79, 79, - 79, 0, 0, 79, 86, 86, 86, 0, 0, 7, 48, 48, 0, 48, 48, 0, - 74, 74, 74, 0, 75, 75, 75, 0, 38, 38, 38, 0, 39, 39, 39, 0, - 49, 49, 0, 49, 60, 60, 40, 40, 50, 50, 51, 51, 52, 52, 52, 0, - 0, 52, 87, 87, 0, 87, 64, 64, 0, 64, 76, 76, 0, 76, 98, 98, - 97, 97, 61, 61, 0, 61, 61, 0, 88, 88, 80, 80, 0, 80, 89, 89, - 90, 90, 90, 0, 91, 91, 91, 0, 94, 94, 92, 92, 101, 101, 101, 0, - 96, 96, 96, 0, 100, 100, 100, 0, 102, 102, 63, 63, 63, 0, 81, 81, - 81, 0, 84, 0, 99, 99, 99, 0, 0, 99, 34, 33, 33, 1, -}; - -/* Script: 8046 bytes. */ - -RE_UINT32 re_get_script(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 11; - code = ch ^ (f << 11); - pos = (RE_UINT32)re_script_stage_1[f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_script_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_script_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_script_stage_4[pos + f] << 1; - value = re_script_stage_5[pos + code]; - - return value; -} - -/* Word_Break. */ - -static RE_UINT8 re_word_break_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 5, 6, 6, 7, 4, 8, - 9, 10, 11, 12, 4, 4, 13, 4, 4, 4, 4, 14, 4, 15, 16, 17, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 18, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_word_break_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 2, 2, 31, 32, 33, 34, 35, 2, 2, 2, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 2, 2, 51, 52, - 53, 54, 55, 56, 57, 57, 57, 57, 57, 58, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 59, 60, 61, 62, 63, 57, 57, 57, - 64, 65, 66, 67, 57, 68, 69, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 83, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 84, 85, 2, 2, 86, 87, 88, 89, 90, 91, - 92, 93, 94, 95, 57, 96, 97, 98, 2, 99, 57, 57, 57, 57, 57, 57, - 100, 57, 101, 102, 103, 57, 104, 57, 105, 57, 57, 57, 57, 57, 57, 57, - 106, 107, 108, 109, 57, 57, 57, 57, 57, 57, 57, 57, 57, 110, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 2, 2, 111, 57, 112, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 2, 2, 2, 2, 113, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 2, 2, 2, 2, 114, 57, 57, 57, 57, 57, 57, 57, 57, 57, 115, 116, - 117, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 118, 119, 120, 57, 57, 57, 121, 122, 123, 2, 2, 124, 125, 126, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 127, 128, 57, 57, - 57, 57, 57, 129, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 130, 57, 131, 132, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, -}; - -static RE_UINT8 re_word_break_stage_3[] = { - 0, 1, 0, 0, 2, 3, 4, 5, 6, 7, 7, 8, 6, 7, 7, 9, - 10, 0, 0, 0, 0, 11, 12, 13, 7, 7, 14, 7, 7, 7, 14, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 16, 0, 17, 18, 0, 0, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 21, - 22, 23, 7, 7, 24, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 7, - 26, 27, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 0, 6, 7, 7, 7, 14, 28, 6, 7, 7, 7, - 7, 29, 30, 19, 19, 19, 19, 31, 32, 0, 33, 33, 33, 34, 35, 0, - 36, 37, 19, 38, 7, 7, 7, 7, 7, 39, 19, 19, 4, 40, 41, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 42, 43, 44, 45, 4, 46, - 0, 47, 48, 7, 7, 7, 19, 19, 19, 49, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 50, 19, 51, 0, 4, 52, 7, 7, 7, 39, 53, 54, - 7, 7, 50, 55, 56, 57, 0, 0, 7, 7, 7, 58, 0, 0, 0, 0, - 0, 0, 0, 0, 59, 17, 0, 0, 0, 0, 0, 0, 60, 19, 19, 61, - 62, 7, 7, 7, 7, 7, 7, 63, 19, 19, 64, 7, 65, 4, 6, 6, - 66, 67, 68, 7, 7, 59, 69, 70, 71, 72, 73, 74, 65, 4, 75, 0, - 66, 76, 68, 7, 7, 59, 77, 78, 79, 80, 81, 82, 83, 4, 84, 0, - 66, 25, 24, 7, 7, 59, 85, 70, 31, 86, 87, 0, 65, 4, 0, 0, - 66, 67, 68, 7, 7, 59, 85, 70, 71, 80, 88, 74, 65, 4, 28, 0, - 89, 90, 91, 92, 93, 90, 7, 94, 95, 96, 97, 0, 83, 4, 0, 0, - 66, 20, 59, 7, 7, 59, 98, 99, 100, 96, 101, 75, 65, 4, 0, 0, - 102, 20, 59, 7, 7, 59, 98, 70, 100, 96, 101, 103, 65, 4, 104, 0, - 102, 20, 59, 7, 7, 7, 7, 105, 100, 106, 73, 0, 65, 4, 0, 107, - 102, 7, 14, 107, 7, 7, 24, 108, 14, 109, 110, 19, 0, 0, 111, 0, - 0, 0, 0, 0, 0, 0, 112, 113, 73, 61, 4, 114, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 112, 115, 0, 116, 4, 114, 0, 0, 0, 0, - 87, 0, 0, 117, 4, 114, 118, 119, 7, 6, 7, 7, 7, 17, 30, 19, - 100, 120, 19, 30, 19, 19, 19, 121, 122, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 123, 19, 61, 4, 114, 88, 124, 125, 116, 126, 0, - 127, 31, 4, 128, 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 130, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 91, 14, 91, 7, 7, 7, 7, - 7, 91, 7, 7, 7, 7, 91, 14, 91, 7, 14, 7, 7, 7, 7, 7, - 7, 7, 91, 7, 7, 7, 7, 7, 7, 7, 7, 131, 0, 0, 0, 0, - 7, 7, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, - 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 67, 7, 7, - 6, 7, 7, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 90, 87, 0, - 7, 20, 132, 0, 7, 7, 132, 0, 7, 7, 133, 0, 7, 20, 134, 0, - 0, 0, 0, 0, 0, 0, 60, 19, 19, 19, 135, 136, 4, 114, 0, 0, - 0, 137, 4, 114, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, - 7, 7, 7, 7, 7, 138, 7, 7, 7, 7, 7, 7, 7, 7, 139, 0, - 7, 7, 7, 17, 19, 135, 19, 135, 83, 4, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 19, 19, 140, 117, 4, 114, 0, 0, 0, 0, - 7, 7, 141, 135, 0, 0, 0, 0, 0, 0, 142, 61, 19, 19, 19, 71, - 4, 114, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 143, 7, 7, 7, 7, 7, 144, 19, 143, 145, 4, 114, 0, 123, 135, 0, - 146, 7, 7, 7, 64, 147, 4, 52, 7, 7, 7, 7, 50, 19, 135, 0, - 7, 7, 7, 7, 144, 19, 19, 0, 4, 148, 4, 52, 7, 7, 7, 139, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 149, 19, 19, 150, 151, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 19, 19, 19, 19, 61, 0, 0, 60, - 7, 7, 139, 139, 7, 7, 7, 7, 139, 139, 7, 152, 7, 7, 7, 139, - 7, 7, 7, 7, 7, 7, 20, 153, 154, 17, 155, 145, 7, 17, 154, 17, - 0, 156, 0, 157, 158, 159, 0, 160, 161, 0, 162, 0, 163, 164, 28, 165, - 0, 0, 7, 17, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 140, 0, - 166, 107, 108, 167, 18, 168, 7, 169, 170, 171, 0, 0, 7, 7, 7, 7, - 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 172, 7, 7, 7, 7, 7, 7, 75, 0, 0, - 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, 7, 14, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 17, 173, 174, 0, - 7, 7, 7, 7, 25, 129, 7, 7, 7, 7, 7, 7, 7, 165, 0, 73, - 7, 7, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 19, 19, 19, 19, - 0, 0, 0, 0, 0, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 129, 0, 0, 0, 0, 127, 175, 93, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 176, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 178, - 170, 7, 7, 7, 7, 139, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, 177, 177, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 179, - 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 0, 0, 0, 0, 0, - 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 139, - 7, 17, 7, 7, 4, 180, 0, 0, 7, 7, 7, 7, 7, 141, 149, 181, - 7, 7, 7, 73, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 117, 0, - 0, 0, 165, 7, 107, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 182, 145, 0, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - 183, 184, 7, 7, 39, 0, 0, 0, 7, 7, 7, 7, 7, 7, 145, 0, - 27, 7, 7, 7, 7, 7, 144, 19, 121, 0, 4, 114, 19, 19, 27, 185, - 4, 52, 7, 7, 50, 116, 7, 7, 141, 19, 135, 0, 7, 7, 7, 17, - 62, 7, 7, 7, 7, 7, 39, 19, 140, 165, 4, 114, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 64, 61, 0, 184, 186, 4, 114, 0, 0, 0, 187, - 0, 0, 0, 0, 0, 0, 125, 188, 81, 0, 0, 0, 7, 39, 189, 0, - 190, 190, 190, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 39, 191, 4, 114, - 7, 7, 7, 7, 145, 0, 7, 7, 14, 192, 7, 7, 7, 7, 7, 145, - 14, 0, 192, 193, 33, 194, 195, 196, 197, 33, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 75, 0, 0, 0, 192, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 139, 0, 0, 7, 7, 7, 7, 7, 7, - 7, 7, 107, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 7, 145, - 19, 19, 198, 0, 61, 0, 199, 0, 0, 200, 201, 0, 0, 0, 20, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 202, - 203, 3, 0, 204, 6, 7, 7, 8, 6, 7, 7, 9, 205, 177, 177, 177, - 177, 177, 177, 206, 7, 7, 7, 14, 107, 107, 107, 207, 0, 0, 0, 208, - 7, 98, 7, 7, 14, 7, 7, 209, 7, 139, 7, 139, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, - 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 17, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, - 7, 7, 7, 17, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, - 7, 7, 7, 14, 0, 0, 7, 7, 7, 9, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 139, 7, 7, 7, 7, 145, 7, 167, 0, 0, 0, 0, 0, - 7, 7, 7, 139, 4, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 139, 59, 7, 7, 7, 7, 25, 210, 7, 7, 139, 0, 0, 0, 0, 0, - 7, 7, 139, 0, 7, 7, 7, 75, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 172, 0, 0, 0, 0, 0, 0, 0, 0, - 211, 60, 98, 6, 7, 7, 145, 79, 0, 0, 0, 0, 7, 7, 7, 17, - 7, 7, 7, 7, 7, 7, 139, 0, 7, 7, 139, 0, 7, 7, 9, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, - 146, 7, 7, 7, 7, 7, 7, 19, 61, 0, 0, 0, 83, 4, 0, 0, - 146, 7, 7, 7, 7, 7, 19, 212, 0, 0, 7, 7, 7, 87, 4, 114, - 146, 7, 7, 7, 141, 19, 213, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 146, 7, 7, 7, 7, 7, 39, 19, 214, 0, 4, 114, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 39, 19, 0, 4, 114, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 0, 0, 0, - 7, 7, 7, 7, 7, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 87, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 17, 0, 64, 19, 19, 19, 19, 61, - 0, 73, 146, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 215, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142, 216, 217, 218, - 219, 135, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 221, 0, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 20, 7, 7, 7, 7, 7, - 7, 7, 7, 20, 222, 223, 7, 224, 98, 7, 7, 7, 7, 7, 7, 7, - 25, 225, 20, 20, 7, 7, 7, 226, 153, 107, 59, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 139, 7, 7, 7, 59, 7, 7, 130, 7, 7, 7, 130, - 7, 7, 20, 7, 7, 7, 20, 7, 7, 14, 7, 7, 7, 14, 7, 7, - 7, 59, 7, 7, 7, 59, 7, 7, 130, 227, 4, 4, 4, 4, 4, 4, - 98, 7, 7, 7, 228, 6, 130, 229, 166, 230, 228, 152, 228, 130, 130, 82, - 7, 24, 7, 145, 231, 24, 7, 145, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 232, 233, 233, 233, - 234, 0, 0, 0, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, -}; - -static RE_UINT8 re_word_break_stage_4[] = { - 0, 0, 1, 2, 3, 4, 0, 5, 6, 6, 7, 0, 8, 9, 9, 9, - 10, 11, 10, 0, 0, 12, 13, 14, 0, 15, 13, 0, 9, 10, 16, 17, - 16, 18, 9, 19, 0, 20, 21, 21, 9, 22, 17, 23, 0, 24, 10, 22, - 25, 9, 9, 25, 26, 21, 27, 9, 28, 0, 29, 0, 30, 21, 21, 31, - 32, 31, 33, 33, 34, 0, 35, 36, 37, 38, 0, 39, 40, 38, 41, 21, - 42, 43, 44, 9, 9, 45, 21, 46, 21, 47, 48, 27, 49, 50, 0, 51, - 52, 9, 40, 8, 9, 53, 54, 0, 49, 9, 21, 16, 55, 0, 56, 21, - 21, 57, 57, 58, 57, 0, 22, 9, 0, 21, 21, 40, 21, 9, 53, 59, - 57, 21, 53, 60, 30, 8, 9, 50, 50, 9, 20, 17, 16, 59, 21, 61, - 61, 62, 0, 63, 0, 25, 16, 0, 10, 64, 22, 65, 16, 48, 40, 63, - 61, 58, 66, 0, 8, 20, 0, 60, 27, 67, 22, 8, 31, 58, 19, 0, - 0, 68, 69, 8, 10, 17, 22, 16, 65, 22, 64, 19, 16, 68, 40, 68, - 48, 58, 19, 63, 9, 8, 16, 45, 21, 48, 0, 32, 68, 8, 0, 13, - 65, 0, 10, 45, 48, 62, 17, 9, 9, 28, 70, 63, 21, 71, 68, 0, - 66, 21, 40, 0, 72, 0, 31, 73, 21, 58, 58, 0, 0, 74, 66, 68, - 9, 57, 21, 73, 0, 70, 63, 21, 58, 68, 48, 61, 30, 73, 68, 21, - 75, 58, 0, 28, 10, 9, 10, 30, 53, 73, 53, 0, 76, 0, 21, 0, - 0, 66, 63, 77, 78, 0, 9, 16, 73, 0, 9, 41, 0, 30, 21, 44, - 9, 21, 9, 0, 79, 9, 21, 27, 72, 8, 40, 21, 44, 52, 53, 80, - 81, 81, 9, 20, 17, 22, 9, 17, 0, 82, 83, 0, 0, 84, 85, 86, - 0, 11, 87, 88, 0, 87, 37, 89, 37, 37, 0, 64, 13, 64, 8, 16, - 22, 25, 16, 9, 0, 8, 16, 13, 0, 17, 64, 41, 27, 0, 90, 91, - 92, 93, 94, 94, 95, 94, 94, 95, 49, 0, 21, 96, 50, 10, 97, 97, - 41, 9, 64, 0, 9, 58, 63, 0, 73, 68, 17, 98, 8, 10, 40, 58, - 64, 9, 0, 99, 100, 33, 33, 34, 33, 101, 102, 100, 103, 88, 11, 87, - 0, 104, 5, 105, 9, 106, 0, 107, 108, 0, 0, 109, 94, 110, 17, 19, - 111, 0, 10, 25, 19, 50, 57, 32, 40, 14, 21, 112, 44, 19, 93, 0, - 58, 30, 113, 37, 114, 21, 40, 30, 68, 58, 68, 73, 13, 65, 8, 22, - 25, 8, 10, 8, 25, 10, 9, 60, 65, 50, 81, 0, 81, 8, 8, 8, - 0, 115, 116, 116, 14, 0, -}; - -static RE_UINT8 re_word_break_stage_5[] = { - 0, 0, 0, 0, 0, 0, 5, 6, 6, 4, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 2, 13, 0, 14, 0, 15, 15, 15, 15, 15, 15, 12, 13, - 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 16, - 0, 6, 0, 0, 0, 0, 11, 0, 0, 9, 0, 0, 0, 11, 0, 12, - 11, 11, 0, 0, 0, 0, 11, 11, 0, 0, 0, 12, 11, 0, 0, 0, - 11, 0, 11, 0, 7, 7, 7, 7, 11, 0, 11, 11, 11, 11, 13, 0, - 0, 0, 11, 12, 11, 11, 0, 11, 11, 11, 0, 7, 7, 7, 11, 11, - 0, 11, 0, 0, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 0, 7, - 0, 7, 7, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 11, - 12, 0, 0, 0, 9, 9, 9, 9, 9, 0, 0, 0, 13, 13, 0, 0, - 7, 7, 7, 0, 11, 11, 11, 7, 15, 15, 0, 15, 13, 0, 11, 11, - 7, 11, 11, 11, 0, 11, 7, 7, 7, 9, 0, 7, 7, 11, 11, 7, - 7, 0, 7, 7, 15, 15, 11, 11, 11, 0, 0, 11, 0, 0, 0, 9, - 11, 7, 11, 11, 11, 11, 7, 7, 7, 11, 0, 0, 13, 0, 11, 0, - 7, 7, 11, 7, 11, 7, 7, 7, 7, 7, 0, 0, 7, 11, 7, 7, - 0, 0, 15, 15, 7, 0, 0, 7, 7, 7, 11, 0, 0, 0, 0, 7, - 0, 0, 0, 11, 0, 11, 11, 0, 0, 7, 0, 0, 11, 7, 0, 0, - 0, 0, 7, 7, 0, 0, 7, 11, 0, 0, 7, 0, 7, 0, 7, 0, - 15, 15, 0, 0, 7, 0, 0, 0, 0, 7, 0, 7, 15, 15, 7, 7, - 11, 0, 7, 7, 7, 7, 9, 0, 11, 7, 11, 0, 7, 7, 7, 11, - 7, 11, 11, 0, 0, 11, 0, 11, 7, 7, 9, 9, 14, 14, 0, 0, - 14, 0, 0, 12, 6, 6, 9, 9, 9, 9, 9, 0, 16, 0, 0, 0, - 13, 0, 0, 0, 9, 0, 9, 9, 0, 10, 10, 10, 10, 10, 0, 0, - 0, 7, 7, 10, 10, 0, 0, 0, 10, 10, 10, 10, 10, 10, 10, 0, - 7, 7, 0, 11, 11, 11, 7, 11, 11, 7, 7, 0, 0, 3, 7, 3, - 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 13, 0, 0, 12, - 0, 16, 16, 16, 13, 12, 0, 0, 11, 0, 0, 9, 0, 0, 0, 14, - 0, 0, 12, 13, 0, 0, 10, 10, 10, 10, 7, 7, 0, 9, 9, 9, - 7, 0, 15, 15, 7, 7, 7, 9, 9, 9, 9, 7, 0, 0, 8, 8, - 8, 8, 8, 8, -}; - -/* Word_Break: 3946 bytes. */ - -RE_UINT32 re_get_word_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_word_break_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_word_break_stage_2[pos + f] << 4; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_word_break_stage_3[pos + f] << 1; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_word_break_stage_4[pos + f] << 2; - value = re_word_break_stage_5[pos + code]; - - return value; -} - -/* Grapheme_Cluster_Break. */ - -static RE_UINT8 re_grapheme_cluster_break_stage_1[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 7, 2, 2, 8, 9, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_2[] = { - 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 1, - 25, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 26, 27, 1, 1, - 28, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 29, 1, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 34, 35, 36, 37, 38, 39, 40, 34, 35, 36, 37, 38, 39, - 40, 34, 35, 36, 37, 38, 39, 40, 34, 35, 36, 37, 38, 39, 40, 34, - 35, 36, 37, 38, 39, 40, 34, 41, 42, 42, 42, 42, 42, 42, 42, 42, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 43, 1, 1, 44, 45, - 1, 46, 1, 1, 1, 1, 1, 1, 1, 1, 47, 1, 1, 1, 1, 1, - 48, 49, 1, 1, 1, 1, 50, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 51, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 52, 53, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 54, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 42, 55, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_3[] = { - 0, 1, 2, 2, 2, 2, 2, 3, 1, 1, 4, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 5, 8, 9, 2, 2, 2, - 10, 11, 2, 2, 12, 5, 2, 13, 2, 2, 2, 2, 2, 14, 15, 2, - 3, 16, 2, 5, 17, 2, 2, 2, 2, 2, 18, 13, 2, 2, 12, 19, - 2, 20, 21, 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 23, 24, - 25, 2, 2, 26, 27, 28, 29, 2, 30, 2, 2, 31, 32, 33, 29, 2, - 34, 2, 2, 35, 36, 16, 2, 37, 34, 2, 2, 35, 38, 2, 29, 2, - 30, 2, 2, 39, 32, 40, 29, 2, 41, 2, 2, 42, 43, 33, 2, 2, - 44, 2, 2, 45, 46, 47, 29, 2, 48, 2, 2, 49, 50, 47, 29, 2, - 48, 2, 2, 42, 51, 33, 29, 2, 48, 2, 2, 2, 52, 53, 2, 48, - 2, 2, 2, 54, 55, 2, 2, 2, 2, 2, 2, 56, 57, 2, 2, 2, - 2, 58, 2, 59, 2, 2, 2, 60, 61, 62, 5, 63, 64, 2, 2, 2, - 2, 2, 65, 66, 2, 67, 13, 68, 69, 70, 2, 2, 2, 2, 2, 2, - 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73, 74, 74, 74, 74, 74, - 2, 2, 2, 2, 2, 65, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 75, 2, 75, 2, 29, 2, 29, 2, 2, 2, 76, 77, 78, 2, 2, - 79, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80, 2, 2, 2, 2, 2, - 2, 2, 81, 82, 2, 2, 2, 2, 2, 2, 2, 83, 2, 2, 2, 2, - 2, 84, 2, 2, 2, 85, 86, 87, 2, 2, 2, 2, 2, 2, 2, 2, - 88, 2, 2, 89, 90, 2, 12, 19, 91, 2, 92, 2, 2, 2, 93, 94, - 2, 2, 95, 96, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97, 98, 99, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 100, 101, - 102, 2, 103, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 5, 5, 13, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 104, 105, - 2, 2, 2, 2, 2, 2, 2, 104, 2, 2, 2, 2, 2, 2, 5, 5, - 2, 2, 106, 2, 2, 2, 2, 2, 2, 107, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 104, 108, 2, 104, 2, 2, 2, 2, 2, 105, - 109, 2, 110, 2, 2, 2, 2, 2, 111, 2, 2, 112, 113, 2, 5, 105, - 2, 2, 114, 2, 115, 94, 71, 116, 25, 2, 2, 117, 118, 2, 2, 2, - 2, 2, 119, 120, 121, 2, 2, 2, 2, 2, 2, 122, 16, 2, 123, 124, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 125, 2, - 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, - 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, - 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, - 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, - 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, - 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, - 130, 128, 126, 127, 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 130, 128, - 128, 129, 128, 130, 128, 126, 127, 128, 129, 128, 131, 72, 132, 74, 74, 133, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 134, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 5, 2, 100, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 45, 2, 2, 2, 2, 2, 135, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, - 136, 2, 2, 137, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 138, 2, 2, 139, 100, 2, 2, 2, 91, 2, 2, 140, 2, 2, 2, 2, - 141, 2, 142, 143, 2, 2, 2, 2, 91, 2, 2, 144, 118, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 145, 146, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 147, 148, 149, 104, 141, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 150, 151, 152, 2, 153, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 75, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 154, 155, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 4, - 3, 3, 3, 5, 6, 6, 6, 6, 7, 6, 8, 3, 9, 6, 6, 6, - 6, 6, 6, 10, 11, 10, 3, 3, 0, 12, 3, 3, 6, 6, 13, 12, - 3, 3, 7, 6, 14, 3, 3, 3, 3, 15, 6, 16, 6, 17, 18, 8, - 19, 3, 3, 3, 6, 6, 13, 3, 3, 15, 6, 6, 6, 3, 3, 3, - 3, 15, 10, 6, 6, 9, 9, 8, 3, 3, 9, 3, 3, 6, 6, 6, - 6, 6, 6, 13, 20, 3, 3, 3, 3, 3, 21, 22, 23, 6, 24, 25, - 9, 6, 3, 3, 15, 3, 3, 3, 26, 3, 3, 3, 3, 3, 3, 27, - 23, 28, 29, 30, 3, 7, 3, 3, 31, 3, 3, 3, 3, 3, 3, 22, - 32, 7, 17, 8, 8, 19, 3, 3, 23, 10, 33, 30, 3, 3, 3, 18, - 3, 15, 3, 3, 34, 3, 3, 3, 3, 3, 3, 21, 35, 36, 37, 30, - 38, 3, 3, 3, 3, 3, 3, 15, 24, 39, 18, 8, 3, 11, 3, 3, - 36, 3, 3, 3, 3, 3, 3, 40, 41, 42, 37, 8, 23, 22, 37, 30, - 3, 3, 34, 7, 43, 44, 45, 46, 47, 6, 13, 3, 3, 7, 6, 13, - 47, 6, 10, 14, 3, 3, 6, 8, 3, 3, 8, 3, 3, 48, 19, 36, - 9, 6, 6, 20, 6, 18, 3, 9, 6, 6, 9, 6, 6, 6, 6, 14, - 3, 34, 3, 3, 3, 3, 3, 9, 49, 6, 31, 32, 3, 36, 8, 15, - 9, 14, 3, 3, 34, 32, 3, 19, 3, 3, 3, 19, 50, 50, 50, 50, - 51, 51, 51, 51, 51, 51, 52, 52, 52, 52, 52, 52, 15, 14, 3, 3, - 3, 53, 6, 54, 45, 41, 23, 6, 6, 3, 3, 19, 3, 3, 7, 55, - 3, 3, 19, 3, 20, 46, 24, 3, 41, 45, 23, 3, 3, 38, 56, 3, - 3, 7, 57, 3, 3, 58, 6, 13, 44, 9, 6, 24, 46, 6, 6, 17, - 6, 59, 3, 3, 3, 49, 20, 24, 41, 59, 3, 3, 60, 3, 3, 3, - 61, 54, 53, 62, 3, 21, 54, 63, 54, 3, 3, 3, 3, 45, 45, 6, - 6, 43, 3, 3, 13, 6, 6, 6, 49, 6, 14, 19, 36, 14, 3, 3, - 6, 13, 3, 3, 3, 3, 3, 6, 3, 3, 4, 64, 3, 3, 0, 65, - 3, 3, 3, 7, 8, 3, 3, 3, 3, 3, 15, 6, 3, 3, 11, 3, - 13, 6, 6, 8, 34, 34, 7, 3, 66, 67, 3, 3, 62, 3, 3, 3, - 3, 45, 45, 45, 45, 14, 3, 3, 3, 15, 6, 8, 3, 7, 6, 6, - 50, 50, 50, 68, 7, 43, 54, 24, 59, 3, 3, 3, 3, 3, 9, 20, - 67, 32, 3, 3, 7, 3, 3, 69, 18, 17, 14, 15, 3, 3, 66, 54, - 3, 70, 3, 3, 66, 25, 35, 30, 71, 72, 72, 72, 72, 72, 72, 71, - 72, 72, 72, 72, 72, 72, 71, 72, 72, 71, 72, 72, 72, 3, 3, 3, - 51, 73, 74, 52, 52, 52, 52, 3, 3, 3, 3, 34, 0, 0, 0, 3, - 9, 11, 3, 6, 3, 3, 13, 7, 75, 3, 3, 3, 3, 3, 6, 6, - 46, 20, 32, 5, 13, 3, 3, 3, 3, 7, 6, 23, 6, 14, 3, 3, - 66, 43, 6, 20, 3, 3, 7, 25, 6, 53, 3, 3, 38, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 76, 3, 77, 8, 61, 78, 0, 79, 6, - 13, 9, 6, 3, 3, 3, 15, 8, 3, 80, 81, 81, 81, 81, 81, 81, -}; - -static RE_UINT8 re_grapheme_cluster_break_stage_5[] = { - 3, 3, 3, 3, 3, 3, 2, 3, 3, 1, 3, 3, 0, 0, 0, 0, - 0, 0, 0, 3, 0, 3, 0, 0, 4, 4, 4, 4, 0, 0, 0, 4, - 4, 4, 0, 0, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 0, - 3, 0, 0, 0, 4, 4, 4, 0, 4, 0, 0, 0, 0, 0, 4, 4, - 4, 3, 0, 4, 4, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, - 4, 4, 4, 6, 0, 0, 4, 6, 4, 0, 6, 6, 6, 4, 4, 4, - 4, 6, 6, 6, 6, 4, 6, 6, 0, 4, 6, 6, 4, 0, 4, 6, - 4, 0, 0, 6, 6, 0, 0, 6, 6, 4, 0, 0, 0, 4, 4, 6, - 6, 4, 4, 0, 4, 6, 0, 6, 0, 0, 4, 0, 4, 6, 6, 0, - 0, 0, 6, 6, 6, 0, 6, 6, 0, 6, 6, 6, 6, 0, 4, 4, - 4, 0, 6, 4, 6, 6, 4, 6, 6, 0, 4, 6, 6, 6, 4, 4, - 4, 0, 4, 0, 6, 6, 6, 6, 6, 6, 6, 4, 0, 4, 0, 6, - 0, 4, 0, 4, 4, 6, 4, 4, 7, 7, 7, 7, 8, 8, 8, 8, - 9, 9, 9, 9, 4, 4, 6, 4, 4, 4, 6, 6, 4, 4, 3, 0, - 0, 0, 6, 0, 4, 6, 6, 4, 0, 6, 4, 6, 6, 0, 0, 0, - 4, 4, 6, 0, 0, 6, 4, 4, 6, 6, 0, 0, 6, 4, 6, 4, - 4, 4, 3, 3, 3, 3, 3, 0, 0, 0, 0, 6, 6, 4, 4, 6, - 7, 0, 0, 0, 4, 6, 0, 0, 0, 6, 4, 0, 10, 11, 11, 11, - 11, 11, 11, 11, 8, 8, 8, 0, 0, 0, 0, 9, 6, 4, 6, 0, - 6, 6, 6, 0, 0, 4, 6, 4, 4, 4, 4, 3, 3, 3, 3, 4, - 0, 0, 5, 5, 5, 5, 5, 5, -}; - -/* Grapheme_Cluster_Break: 2336 bytes. */ - -RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_grapheme_cluster_break_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_cluster_break_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_grapheme_cluster_break_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_grapheme_cluster_break_stage_4[pos + f] << 2; - value = re_grapheme_cluster_break_stage_5[pos + code]; - - return value; -} - -/* Sentence_Break. */ - -static RE_UINT8 re_sentence_break_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 6, 7, 5, 5, 8, 9, 10, - 11, 12, 13, 14, 9, 9, 15, 9, 9, 9, 9, 16, 9, 17, 18, 9, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 19, 20, 9, 9, 9, 21, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 22, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, -}; - -static RE_UINT8 re_sentence_break_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 33, 33, 36, 33, 37, 33, 33, 38, 39, 40, 33, - 41, 42, 33, 33, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 43, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 44, - 17, 17, 17, 17, 45, 17, 46, 47, 48, 49, 50, 51, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 52, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 17, 53, 54, 17, 55, 56, 57, - 58, 59, 60, 61, 62, 33, 33, 33, 63, 64, 65, 66, 67, 33, 33, 33, - 68, 69, 33, 33, 33, 33, 70, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 17, 17, 17, 71, 72, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 17, 17, 17, 17, 73, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 74, 33, 33, 33, 33, 75, - 76, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 77, 78, 33, 79, 80, 81, 82, 33, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 83, 33, - 17, 17, 17, 17, 17, 17, 84, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 85, 86, 33, 33, 33, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 33, 33, 33, 17, 17, 86, 33, 33, 33, 33, 33, - 87, 88, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, -}; - -static RE_UINT16 re_sentence_break_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 8, 16, 17, 18, 19, 20, 21, 22, 23, 23, 23, 24, 25, 26, 27, 28, - 29, 30, 18, 8, 31, 8, 32, 8, 8, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 41, 41, 44, 45, 46, 47, 48, 41, 41, 49, 50, 51, - 52, 53, 54, 55, 55, 56, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 62, 71, 74, 75, 76, 77, 78, 79, - 80, 81, 82, 73, 83, 84, 85, 86, 83, 87, 88, 89, 90, 91, 92, 93, - 94, 95, 96, 55, 97, 98, 99, 55, 100, 101, 102, 103, 104, 105, 106, 55, - 41, 107, 108, 109, 110, 29, 111, 112, 41, 41, 41, 41, 41, 41, 41, 41, - 41, 41, 113, 41, 114, 115, 116, 41, 117, 41, 118, 119, 120, 41, 41, 121, - 94, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 122, 123, 41, 41, 124, - 125, 126, 127, 128, 41, 129, 130, 131, 132, 41, 41, 133, 41, 134, 41, 135, - 136, 137, 138, 139, 41, 140, 141, 55, 142, 41, 143, 144, 145, 146, 55, 55, - 147, 129, 148, 149, 150, 151, 41, 152, 41, 153, 154, 155, 55, 55, 156, 157, - 18, 18, 18, 18, 18, 18, 23, 158, 8, 8, 8, 8, 159, 8, 8, 8, - 160, 161, 162, 163, 161, 164, 165, 166, 167, 168, 169, 170, 171, 55, 172, 173, - 174, 175, 176, 30, 177, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, - 178, 179, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 180, 30, 181, - 55, 55, 182, 183, 55, 55, 184, 185, 55, 55, 55, 55, 186, 55, 187, 188, - 29, 189, 190, 191, 8, 8, 8, 192, 18, 193, 41, 194, 195, 196, 196, 23, - 197, 198, 55, 55, 55, 55, 55, 55, 199, 200, 94, 41, 201, 94, 41, 112, - 202, 203, 41, 41, 204, 205, 55, 206, 41, 41, 41, 41, 41, 135, 55, 55, - 41, 41, 41, 41, 41, 41, 207, 55, 41, 41, 41, 41, 207, 55, 206, 208, - 209, 210, 8, 211, 212, 41, 41, 213, 214, 215, 8, 216, 217, 218, 55, 219, - 220, 221, 41, 222, 223, 129, 224, 225, 50, 226, 227, 136, 58, 228, 229, 55, - 41, 230, 231, 232, 41, 233, 234, 235, 236, 237, 55, 55, 55, 55, 41, 238, - 41, 41, 41, 41, 41, 239, 240, 241, 41, 41, 41, 242, 41, 41, 243, 55, - 244, 245, 246, 41, 41, 247, 248, 41, 41, 249, 206, 41, 250, 41, 251, 252, - 253, 254, 255, 256, 41, 41, 41, 257, 258, 2, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 55, 41, 41, 41, 205, 55, 55, 41, 121, 55, 55, 55, 268, - 55, 55, 55, 55, 136, 41, 269, 55, 262, 206, 270, 55, 271, 41, 272, 55, - 29, 273, 274, 41, 271, 131, 55, 55, 275, 276, 135, 55, 55, 55, 55, 55, - 135, 243, 55, 55, 41, 277, 55, 55, 278, 279, 280, 136, 55, 55, 55, 55, - 41, 135, 135, 281, 55, 55, 55, 55, 41, 41, 282, 55, 55, 55, 55, 55, - 150, 283, 284, 79, 150, 285, 286, 287, 150, 288, 289, 55, 150, 228, 290, 55, - 55, 55, 55, 55, 41, 291, 131, 55, 41, 41, 41, 204, 55, 55, 55, 55, - 41, 41, 41, 292, 55, 55, 55, 55, 41, 204, 55, 55, 55, 55, 55, 55, - 41, 293, 55, 55, 55, 55, 55, 55, 41, 41, 294, 295, 296, 55, 55, 55, - 297, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 298, 299, 300, 55, 55, - 55, 55, 301, 55, 55, 55, 55, 55, 302, 303, 304, 305, 306, 307, 308, 309, - 310, 311, 312, 313, 314, 302, 303, 315, 305, 316, 317, 318, 309, 319, 320, 321, - 322, 323, 324, 189, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 55, 55, - 41, 41, 41, 41, 41, 41, 195, 55, 41, 121, 41, 41, 41, 41, 41, 41, - 271, 55, 55, 55, 55, 55, 55, 55, 335, 336, 336, 336, 55, 55, 55, 55, - 23, 23, 23, 23, 23, 23, 23, 337, -}; - -static RE_UINT8 re_sentence_break_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 0, 3, 4, 5, 6, 7, 7, 8, 9, - 10, 11, 11, 11, 11, 11, 12, 13, 14, 15, 15, 15, 15, 15, 16, 13, - 0, 17, 0, 0, 0, 0, 0, 0, 18, 0, 19, 20, 0, 21, 19, 0, - 11, 11, 11, 11, 11, 22, 11, 23, 15, 15, 15, 15, 15, 24, 15, 15, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, - 26, 26, 27, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 29, - 30, 31, 32, 33, 28, 31, 34, 28, 25, 31, 29, 31, 32, 26, 35, 34, - 36, 28, 31, 26, 26, 26, 26, 27, 25, 25, 25, 25, 30, 31, 25, 25, - 25, 25, 25, 25, 25, 15, 33, 30, 26, 23, 25, 25, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 37, 15, 15, - 15, 15, 15, 15, 15, 15, 38, 36, 39, 40, 36, 36, 41, 0, 0, 0, - 15, 42, 0, 43, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 25, 45, 46, 39, 0, 47, 22, 48, 32, 11, 11, 11, - 49, 11, 11, 15, 15, 15, 15, 15, 15, 15, 15, 50, 33, 34, 25, 25, - 25, 25, 25, 25, 15, 51, 30, 32, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 15, 15, 15, 15, 52, 44, 53, 25, 25, 25, 25, 25, - 28, 26, 26, 29, 25, 25, 25, 25, 25, 25, 0, 0, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 22, 54, 55, 14, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 56, 0, 57, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 58, - 59, 58, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, 36, 0, 0, 0, - 61, 62, 0, 63, 44, 44, 64, 65, 36, 36, 36, 36, 36, 36, 36, 36, - 36, 36, 66, 44, 44, 44, 44, 44, 7, 7, 67, 68, 69, 36, 36, 36, - 36, 36, 36, 36, 36, 70, 44, 71, 44, 72, 73, 74, 7, 7, 75, 76, - 77, 0, 0, 78, 79, 36, 36, 36, 36, 36, 36, 36, 44, 44, 44, 44, - 44, 44, 64, 80, 36, 36, 36, 36, 36, 81, 44, 44, 82, 0, 0, 0, - 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 66, 44, 44, 41, 83, 0, - 36, 36, 36, 36, 36, 81, 84, 44, 44, 85, 85, 86, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 87, 36, 36, 88, 0, 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 64, - 44, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, 89, - 44, 44, 44, 44, 85, 44, 36, 36, 81, 90, 7, 7, 80, 36, 80, 36, - 57, 80, 36, 76, 76, 36, 36, 36, 36, 36, 87, 36, 43, 40, 41, 89, - 44, 91, 91, 92, 0, 93, 0, 94, 81, 95, 7, 7, 41, 0, 0, 0, - 57, 80, 60, 96, 76, 36, 36, 36, 36, 36, 87, 36, 87, 97, 41, 73, - 64, 93, 91, 86, 98, 0, 80, 43, 0, 95, 7, 7, 74, 99, 0, 0, - 57, 80, 36, 94, 94, 36, 36, 36, 36, 36, 87, 36, 87, 80, 41, 89, - 44, 58, 58, 86, 88, 0, 0, 0, 81, 95, 7, 7, 0, 0, 0, 0, - 44, 91, 91, 86, 0, 100, 0, 94, 81, 95, 7, 7, 54, 0, 0, 0, - 101, 80, 60, 40, 87, 41, 97, 87, 96, 88, 60, 40, 36, 36, 41, 100, - 64, 100, 73, 86, 88, 93, 0, 0, 0, 95, 7, 7, 0, 0, 0, 0, - 57, 80, 36, 87, 87, 36, 36, 36, 36, 36, 87, 36, 36, 80, 41, 102, - 44, 73, 73, 86, 0, 59, 41, 0, 100, 80, 36, 87, 87, 36, 36, 36, - 36, 36, 87, 36, 36, 80, 41, 89, 44, 73, 73, 86, 0, 59, 0, 103, - 81, 95, 7, 7, 97, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 102, - 44, 73, 73, 92, 0, 93, 0, 0, 81, 95, 7, 7, 0, 0, 40, 36, - 100, 80, 36, 36, 36, 60, 40, 36, 36, 36, 36, 36, 94, 36, 36, 54, - 36, 60, 104, 93, 44, 105, 44, 44, 0, 0, 0, 0, 100, 0, 0, 0, - 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 79, 44, 64, 0, - 36, 66, 44, 64, 7, 7, 106, 0, 97, 76, 43, 54, 0, 36, 80, 36, - 80, 107, 40, 80, 79, 44, 58, 82, 36, 43, 44, 86, 7, 7, 106, 36, - 88, 0, 0, 0, 0, 0, 86, 0, 7, 7, 106, 0, 0, 108, 109, 110, - 36, 36, 80, 36, 36, 36, 36, 36, 36, 36, 36, 88, 57, 44, 44, 44, - 44, 73, 36, 85, 44, 44, 57, 44, 44, 44, 44, 44, 44, 44, 44, 111, - 0, 104, 0, 0, 0, 0, 0, 0, 36, 36, 66, 44, 44, 44, 44, 112, - 7, 7, 113, 0, 36, 81, 74, 81, 89, 72, 44, 74, 85, 69, 36, 36, - 81, 44, 44, 84, 7, 7, 114, 86, 11, 49, 0, 115, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 36, 60, 36, 36, 36, 87, 41, 36, 60, 87, 41, - 36, 36, 87, 41, 36, 36, 36, 36, 36, 36, 36, 36, 87, 41, 36, 60, - 87, 41, 36, 36, 36, 60, 36, 36, 36, 36, 36, 36, 87, 41, 36, 36, - 36, 36, 36, 36, 36, 36, 60, 57, 116, 9, 117, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 0, 0, 0, 0, 36, 36, 36, 36, 36, 88, 0, 0, - 36, 36, 36, 118, 36, 36, 36, 36, 119, 36, 36, 36, 36, 36, 120, 121, - 36, 36, 60, 40, 88, 0, 0, 0, 36, 36, 36, 87, 81, 111, 0, 0, - 36, 36, 36, 36, 81, 122, 0, 0, 36, 36, 36, 36, 81, 0, 0, 0, - 36, 36, 36, 87, 123, 0, 0, 0, 36, 36, 36, 36, 36, 44, 44, 44, - 44, 44, 44, 44, 44, 96, 0, 99, 7, 7, 106, 0, 0, 0, 0, 0, - 124, 0, 125, 126, 7, 7, 106, 0, 36, 36, 36, 36, 36, 36, 0, 0, - 36, 36, 127, 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, - 36, 36, 36, 36, 36, 36, 36, 88, 44, 44, 44, 0, 44, 44, 44, 0, - 0, 90, 7, 7, 36, 36, 36, 36, 36, 36, 36, 41, 36, 88, 0, 0, - 36, 36, 36, 0, 44, 44, 44, 44, 69, 36, 86, 0, 7, 7, 106, 0, - 36, 36, 36, 36, 36, 66, 44, 0, 36, 36, 36, 36, 36, 85, 44, 64, - 44, 44, 44, 44, 44, 44, 44, 91, 7, 7, 106, 0, 7, 7, 106, 0, - 0, 96, 128, 0, 0, 0, 0, 0, 44, 69, 36, 36, 36, 36, 36, 36, - 44, 69, 36, 0, 7, 7, 113, 129, 0, 0, 93, 44, 44, 0, 0, 0, - 112, 36, 36, 36, 36, 36, 36, 36, 85, 44, 44, 74, 7, 7, 75, 36, - 36, 81, 44, 44, 44, 0, 0, 0, 36, 44, 44, 44, 44, 44, 9, 117, - 7, 7, 106, 80, 7, 7, 75, 36, 36, 36, 36, 36, 36, 36, 36, 130, - 0, 0, 0, 0, 64, 44, 44, 44, 44, 44, 69, 79, 81, 131, 0, 0, - 44, 64, 0, 0, 0, 0, 0, 44, 25, 25, 25, 25, 25, 34, 15, 27, - 15, 15, 11, 11, 15, 39, 11, 132, 15, 15, 11, 11, 15, 15, 11, 11, - 15, 39, 11, 132, 15, 15, 133, 133, 15, 15, 11, 11, 15, 15, 15, 39, - 15, 15, 11, 11, 15, 134, 11, 135, 46, 134, 11, 136, 15, 46, 11, 0, - 15, 15, 11, 136, 46, 134, 11, 136, 137, 137, 138, 139, 140, 141, 142, 142, - 0, 143, 144, 145, 0, 0, 146, 147, 0, 148, 147, 0, 0, 0, 0, 149, - 61, 150, 61, 61, 21, 0, 0, 151, 0, 0, 0, 146, 15, 15, 15, 42, - 0, 0, 0, 0, 44, 44, 44, 44, 44, 44, 44, 44, 111, 0, 0, 0, - 47, 152, 153, 154, 23, 115, 10, 132, 0, 155, 48, 156, 11, 38, 157, 33, - 0, 158, 39, 159, 0, 0, 0, 0, 160, 38, 88, 0, 0, 0, 0, 0, - 0, 0, 142, 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 161, 11, 11, 15, 15, 39, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 162, 0, 0, 142, 142, 142, 5, 0, 0, - 0, 146, 0, 0, 0, 0, 0, 0, 0, 163, 142, 142, 0, 0, 0, 0, - 4, 142, 142, 142, 142, 142, 121, 0, 0, 0, 0, 0, 0, 0, 142, 0, - 0, 0, 0, 0, 0, 0, 0, 5, 11, 11, 11, 22, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 24, 31, 164, 26, 32, 25, 29, 15, 33, - 25, 42, 152, 165, 53, 0, 0, 0, 15, 166, 0, 21, 36, 36, 36, 36, - 36, 36, 0, 96, 0, 0, 0, 93, 36, 36, 36, 36, 36, 60, 0, 0, - 36, 60, 36, 60, 36, 60, 36, 60, 142, 142, 142, 5, 0, 0, 0, 5, - 142, 142, 5, 167, 0, 0, 0, 0, 168, 80, 142, 142, 5, 142, 142, 169, - 80, 36, 81, 44, 80, 41, 36, 88, 36, 36, 36, 36, 36, 60, 59, 80, - 0, 80, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 80, 36, 36, 36, - 36, 36, 36, 60, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 60, 0, - 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 88, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 36, 170, 36, 36, 36, 171, 36, 36, 36, 36, - 7, 7, 75, 0, 0, 0, 0, 0, 25, 25, 25, 172, 64, 44, 44, 173, - 25, 25, 25, 25, 25, 25, 0, 93, 36, 36, 36, 36, 174, 9, 0, 0, - 0, 0, 0, 0, 0, 96, 36, 36, 175, 25, 25, 25, 27, 25, 25, 25, - 25, 25, 25, 25, 15, 15, 26, 30, 25, 25, 176, 177, 25, 0, 0, 0, - 25, 25, 178, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179, 36, - 180, 180, 66, 36, 36, 36, 36, 36, 66, 44, 0, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 129, 0, 0, 74, 36, 36, 36, 36, 36, 36, 36, - 44, 111, 0, 129, 7, 7, 106, 0, 44, 44, 44, 44, 74, 36, 96, 0, - 36, 81, 44, 174, 36, 36, 36, 36, 36, 66, 44, 44, 44, 0, 0, 0, - 36, 36, 36, 36, 66, 44, 44, 44, 111, 0, 147, 96, 7, 7, 106, 0, - 36, 36, 85, 44, 44, 64, 0, 0, 66, 36, 36, 86, 7, 7, 106, 181, - 36, 36, 36, 36, 36, 60, 182, 0, 36, 36, 36, 36, 89, 72, 69, 81, - 127, 0, 0, 0, 0, 0, 96, 41, 36, 36, 66, 44, 183, 184, 0, 0, - 80, 60, 80, 60, 80, 60, 0, 0, 36, 60, 36, 60, 0, 0, 0, 0, - 66, 44, 185, 86, 7, 7, 106, 0, 36, 0, 0, 0, 36, 36, 36, 36, - 36, 60, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, - 36, 36, 36, 41, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, - 15, 24, 0, 0, 186, 15, 0, 187, 36, 36, 87, 36, 36, 60, 36, 43, - 94, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 41, 0, 0, 0, - 0, 0, 0, 0, 96, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 188, - 36, 36, 36, 36, 40, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 36, 36, 36, 0, 44, 44, 44, 44, 189, 4, 121, 0, - 44, 64, 0, 0, 190, 169, 142, 142, 142, 191, 121, 0, 6, 192, 193, 162, - 140, 0, 0, 0, 36, 87, 36, 36, 36, 36, 36, 36, 36, 36, 36, 194, - 56, 0, 5, 6, 0, 0, 195, 9, 14, 15, 15, 15, 15, 15, 16, 196, - 197, 198, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 81, - 36, 36, 36, 36, 36, 36, 36, 60, 40, 36, 40, 36, 40, 36, 40, 88, - 0, 0, 0, 0, 0, 0, 199, 0, 36, 36, 36, 80, 36, 36, 36, 36, - 36, 60, 36, 36, 36, 36, 60, 94, 36, 36, 36, 41, 36, 36, 36, 41, - 0, 0, 0, 0, 0, 0, 0, 98, 36, 36, 36, 36, 88, 0, 0, 0, - 36, 36, 60, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 36, 41, - 36, 0, 36, 36, 80, 41, 0, 0, 11, 11, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 36, 36, 36, 36, 36, 41, 87, 36, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 94, 88, 76, 36, 36, 36, 36, 36, 36, 0, 40, - 85, 59, 0, 44, 36, 80, 80, 36, 36, 36, 36, 36, 36, 0, 64, 93, - 0, 0, 0, 0, 0, 129, 0, 0, 36, 36, 36, 36, 60, 0, 0, 0, - 36, 36, 88, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 44, 44, - 44, 185, 117, 0, 0, 0, 0, 0, 36, 36, 36, 36, 44, 44, 64, 200, - 147, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, 7, 7, 106, 0, - 36, 66, 44, 44, 44, 201, 7, 7, 181, 0, 0, 0, 0, 0, 0, 0, - 69, 202, 0, 0, 7, 7, 106, 0, 36, 36, 66, 44, 44, 44, 0, 0, - 60, 0, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 88, 0, - 36, 88, 0, 0, 85, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 64, - 0, 0, 0, 93, 112, 36, 36, 36, 41, 0, 0, 0, 0, 0, 0, 0, - 0, 57, 86, 57, 203, 61, 204, 44, 64, 57, 44, 0, 0, 0, 0, 0, - 0, 0, 100, 86, 0, 0, 0, 0, 100, 111, 0, 0, 0, 0, 0, 0, - 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, - 11, 11, 11, 154, 15, 134, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, - 154, 15, 15, 15, 15, 15, 15, 48, 47, 205, 10, 48, 11, 154, 166, 14, - 15, 14, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, 15, 15, 15, - 15, 49, 22, 10, 11, 48, 11, 206, 15, 15, 15, 15, 15, 15, 49, 22, - 11, 155, 161, 11, 206, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, - 11, 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 154, 15, 15, 15, 15, - 154, 15, 15, 15, 15, 15, 15, 11, 11, 11, 11, 11, 11, 154, 15, 15, - 15, 15, 15, 15, 11, 11, 11, 11, 15, 39, 11, 11, 11, 11, 11, 11, - 206, 15, 15, 15, 15, 15, 24, 15, 33, 11, 11, 11, 11, 11, 22, 15, - 15, 15, 15, 15, 15, 134, 15, 11, 11, 11, 11, 11, 11, 206, 15, 15, - 15, 15, 15, 24, 15, 33, 11, 11, 15, 15, 134, 15, 11, 11, 11, 11, - 11, 11, 206, 15, 15, 15, 15, 15, 24, 15, 27, 95, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 36, 80, 36, 36, 36, 36, 36, 36, - 97, 76, 80, 36, 60, 36, 107, 0, 103, 96, 107, 80, 97, 76, 107, 107, - 97, 76, 60, 36, 60, 36, 80, 43, 36, 36, 94, 36, 36, 36, 36, 0, - 80, 80, 94, 36, 36, 36, 36, 0, 20, 0, 0, 0, 0, 0, 0, 0, - 61, 61, 61, 61, 61, 61, 61, 61, 44, 44, 44, 44, 0, 0, 0, 0, -}; - -static RE_UINT8 re_sentence_break_stage_5[] = { - 0, 0, 0, 0, 0, 6, 2, 6, 6, 1, 0, 0, 6, 12, 13, 0, - 0, 0, 0, 13, 13, 13, 0, 0, 14, 14, 11, 0, 10, 10, 10, 10, - 10, 10, 14, 0, 0, 0, 0, 12, 0, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 13, 0, 13, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 13, 0, 4, 0, 0, 6, 0, 0, 0, 0, 0, 7, 13, - 0, 5, 0, 0, 0, 7, 0, 0, 8, 8, 8, 0, 8, 8, 8, 7, - 7, 7, 7, 0, 8, 7, 8, 7, 7, 8, 7, 8, 7, 7, 8, 7, - 8, 8, 7, 8, 7, 8, 7, 7, 7, 8, 8, 7, 8, 7, 8, 8, - 7, 8, 8, 8, 7, 7, 8, 8, 8, 7, 7, 7, 8, 7, 7, 9, - 9, 9, 9, 9, 9, 7, 7, 7, 7, 9, 9, 9, 7, 7, 0, 0, - 0, 0, 9, 9, 9, 9, 0, 0, 7, 0, 0, 0, 9, 0, 9, 0, - 3, 3, 3, 3, 9, 0, 8, 7, 0, 0, 7, 7, 0, 0, 8, 0, - 8, 0, 8, 8, 8, 8, 0, 8, 7, 7, 7, 8, 8, 7, 0, 8, - 8, 7, 0, 3, 3, 3, 8, 7, 0, 9, 0, 0, 12, 14, 12, 0, - 0, 12, 0, 0, 0, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, - 9, 9, 9, 0, 5, 5, 5, 5, 5, 0, 0, 0, 14, 14, 0, 0, - 3, 3, 3, 0, 5, 0, 0, 12, 9, 9, 9, 3, 10, 10, 0, 10, - 10, 0, 9, 9, 3, 9, 9, 9, 12, 9, 3, 3, 3, 5, 0, 3, - 3, 9, 9, 3, 3, 0, 3, 3, 3, 3, 9, 9, 10, 10, 9, 9, - 9, 0, 0, 9, 12, 12, 12, 0, 0, 0, 0, 5, 9, 3, 9, 9, - 0, 9, 9, 9, 9, 9, 3, 3, 3, 9, 0, 0, 14, 12, 9, 0, - 3, 3, 9, 3, 9, 3, 3, 3, 3, 3, 0, 0, 9, 0, 9, 9, - 9, 0, 0, 0, 3, 9, 3, 3, 12, 12, 10, 10, 3, 0, 0, 3, - 3, 3, 9, 0, 0, 0, 0, 3, 9, 9, 0, 9, 0, 0, 10, 10, - 0, 0, 0, 9, 0, 9, 9, 0, 0, 3, 0, 0, 9, 3, 0, 0, - 0, 0, 3, 3, 0, 0, 3, 9, 0, 9, 3, 3, 0, 0, 9, 0, - 0, 0, 3, 0, 3, 0, 3, 0, 10, 10, 0, 0, 0, 9, 0, 9, - 0, 3, 0, 3, 0, 3, 13, 13, 13, 13, 3, 3, 3, 0, 0, 0, - 3, 3, 3, 9, 10, 10, 12, 12, 10, 10, 3, 3, 0, 8, 0, 0, - 0, 0, 12, 0, 12, 0, 0, 0, 9, 0, 12, 9, 6, 9, 9, 9, - 9, 9, 9, 13, 13, 0, 0, 0, 3, 12, 12, 0, 9, 0, 3, 3, - 0, 0, 14, 12, 14, 12, 0, 3, 3, 3, 5, 0, 9, 3, 9, 0, - 12, 12, 12, 12, 0, 0, 12, 12, 9, 9, 12, 12, 3, 9, 9, 0, - 8, 8, 0, 0, 0, 8, 0, 8, 7, 0, 7, 7, 8, 0, 7, 0, - 8, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 5, 3, 3, 5, 5, - 0, 0, 0, 14, 14, 0, 0, 0, 13, 13, 13, 13, 11, 0, 0, 0, - 4, 4, 5, 5, 5, 5, 5, 6, 0, 13, 13, 0, 12, 12, 0, 0, - 0, 13, 13, 12, 0, 0, 0, 6, 5, 0, 5, 5, 0, 13, 13, 7, - 0, 0, 0, 8, 0, 0, 7, 8, 8, 8, 7, 7, 8, 0, 8, 0, - 8, 8, 0, 7, 9, 7, 0, 0, 0, 8, 7, 7, 0, 0, 7, 0, - 9, 9, 9, 8, 0, 0, 8, 8, 13, 13, 13, 0, 0, 0, 13, 13, - 8, 7, 7, 8, 7, 8, 7, 3, 7, 7, 0, 7, 0, 0, 12, 9, - 6, 14, 12, 0, 0, 13, 13, 13, 9, 9, 0, 12, 9, 0, 12, 12, - 8, 7, 9, 3, 3, 3, 0, 9, 3, 3, 0, 12, 0, 0, 8, 7, - 9, 0, 0, 8, 7, 8, 7, 0, 8, 7, 8, 0, 7, 7, 7, 9, - 9, 9, 3, 9, 0, 12, 12, 12, 0, 0, 9, 3, 12, 12, 9, 9, - 9, 3, 3, 0, 3, 3, 3, 12, 0, 0, 0, 7, 0, 9, 3, 9, - 9, 9, 13, 13, 14, 14, 0, 14, 0, 14, 14, 0, 13, 0, 0, 13, - 0, 14, 12, 12, 14, 13, 13, 13, 9, 0, 0, 5, 0, 0, 14, 0, - 0, 13, 0, 13, 13, 12, 13, 13, 14, 0, 9, 9, 0, 5, 5, 5, - 0, 5, 12, 12, 3, 0, 10, 10, 9, 12, 12, 0, 3, 3, 3, 5, - 5, 5, 5, 3, 0, 8, 8, 0, 8, 0, 7, 7, -}; - -/* Sentence_Break: 5596 bytes. */ - -RE_UINT32 re_get_sentence_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_sentence_break_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_sentence_break_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_sentence_break_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_sentence_break_stage_4[pos + f] << 2; - value = re_sentence_break_stage_5[pos + code]; - - return value; -} - -/* Math. */ - -static RE_UINT8 re_math_stage_1[] = { - 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, -}; - -static RE_UINT8 re_math_stage_2[] = { - 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, -}; - -static RE_UINT8 re_math_stage_3[] = { - 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 4, 5, 6, 7, 1, 8, 9, 10, 1, 6, 6, 11, 1, 1, 1, 1, - 1, 1, 1, 12, 1, 1, 13, 14, 1, 1, 1, 1, 15, 16, 17, 18, - 1, 1, 1, 1, 1, 1, 19, 1, -}; - -static RE_UINT8 re_math_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, - 9, 10, 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 19, 20, 21, 22, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 25, 0, 26, 27, 28, 29, 30, - 0, 0, 0, 0, 0, 31, 32, 33, 34, 0, 35, 36, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 23, 23, 0, 19, 37, 0, 0, 0, 0, 0, - 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, - 1, 3, 3, 0, 0, 0, 0, 40, 23, 23, 41, 23, 42, 43, 44, 23, - 45, 46, 47, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 48, 23, 23, - 23, 23, 23, 23, 23, 23, 49, 23, 44, 50, 51, 52, 53, 54, 0, 55, -}; - -static RE_UINT8 re_math_stage_5[] = { - 0, 0, 0, 0, 0, 8, 0, 112, 0, 0, 0, 64, 0, 0, 0, 80, - 0, 16, 2, 0, 0, 0, 128, 0, 0, 0, 39, 0, 0, 0, 115, 0, - 192, 1, 0, 0, 0, 0, 64, 0, 0, 0, 28, 0, 17, 0, 4, 0, - 30, 0, 0, 124, 0, 124, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, - 132, 252, 47, 63, 16, 179, 251, 241, 255, 11, 0, 0, 0, 0, 255, 255, - 255, 126, 195, 240, 255, 255, 255, 47, 48, 0, 240, 255, 255, 255, 255, 255, - 0, 15, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 248, - 255, 255, 191, 0, 0, 0, 1, 240, 7, 0, 0, 0, 3, 192, 255, 240, - 195, 140, 15, 0, 148, 31, 0, 255, 96, 0, 0, 0, 5, 0, 0, 0, - 15, 224, 0, 0, 159, 31, 0, 0, 0, 2, 0, 0, 126, 1, 0, 0, - 4, 30, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, - 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, 0, 0, 3, 0, -}; - -/* Math: 538 bytes. */ - -RE_UINT32 re_get_math(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_math_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_math_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_math_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_math_stage_4[pos + f] << 5; - pos += code; - value = (re_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Alphabetic. */ - -static RE_UINT8 re_alphabetic_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_alphabetic_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_alphabetic_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, - 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, - 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, - 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, - 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, - 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_alphabetic_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 7, 8, 9, 10, 4, 11, - 4, 4, 4, 4, 12, 4, 4, 4, 4, 13, 14, 15, 16, 17, 18, 19, - 20, 4, 21, 22, 4, 4, 23, 24, 25, 4, 26, 4, 4, 27, 28, 29, - 30, 31, 32, 0, 0, 33, 0, 34, 4, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 38, 47, 50, 51, 52, 53, 54, 0, - 55, 56, 57, 49, 58, 56, 59, 60, 58, 61, 62, 63, 64, 65, 66, 67, - 15, 68, 69, 0, 70, 71, 72, 0, 73, 0, 74, 75, 76, 77, 0, 0, - 4, 78, 25, 79, 80, 4, 81, 82, 4, 4, 83, 4, 84, 85, 86, 4, - 87, 4, 88, 0, 89, 4, 4, 90, 15, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 91, 1, 4, 4, 92, 93, 94, 94, 95, 4, 96, 97, 0, - 0, 4, 4, 98, 4, 99, 4, 100, 77, 101, 25, 102, 4, 103, 104, 0, - 105, 4, 106, 107, 0, 108, 0, 0, 4, 109, 110, 0, 4, 111, 4, 112, - 4, 100, 113, 114, 0, 0, 0, 115, 4, 4, 4, 4, 4, 4, 0, 0, - 116, 4, 117, 114, 4, 118, 119, 120, 0, 0, 0, 121, 122, 0, 0, 0, - 123, 124, 125, 4, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 127, 4, 104, 4, 128, 106, 4, 4, 4, 4, 129, - 4, 81, 4, 130, 131, 132, 132, 4, 0, 133, 0, 0, 0, 0, 0, 0, - 134, 135, 15, 4, 136, 15, 4, 82, 137, 138, 4, 4, 139, 68, 0, 25, - 4, 4, 4, 4, 4, 100, 0, 0, 4, 4, 4, 4, 4, 4, 31, 0, - 4, 4, 4, 4, 31, 0, 25, 114, 140, 141, 4, 142, 143, 4, 4, 89, - 144, 145, 4, 4, 146, 147, 0, 148, 149, 16, 4, 94, 4, 4, 49, 150, - 28, 99, 151, 77, 4, 152, 133, 0, 4, 131, 153, 154, 4, 106, 155, 156, - 157, 158, 0, 0, 0, 0, 4, 147, 4, 4, 4, 4, 4, 159, 160, 105, - 4, 4, 4, 161, 4, 4, 162, 0, 163, 164, 165, 4, 4, 27, 166, 4, - 4, 114, 25, 4, 167, 4, 16, 168, 0, 0, 0, 169, 4, 4, 4, 77, - 0, 1, 1, 170, 4, 106, 171, 0, 172, 173, 174, 0, 4, 4, 4, 68, - 0, 0, 4, 90, 0, 0, 0, 0, 0, 0, 0, 0, 77, 4, 175, 0, - 106, 25, 147, 0, 114, 4, 176, 0, 4, 4, 4, 4, 114, 0, 0, 0, - 177, 178, 100, 0, 0, 0, 0, 0, 100, 162, 0, 0, 4, 179, 0, 0, - 180, 94, 0, 77, 0, 0, 0, 0, 4, 100, 100, 151, 0, 0, 0, 0, - 4, 4, 126, 0, 0, 0, 0, 0, 4, 4, 181, 0, 145, 32, 25, 126, - 4, 151, 0, 0, 4, 4, 182, 0, 0, 0, 0, 0, 4, 100, 0, 0, - 4, 4, 4, 139, 0, 0, 0, 0, 4, 4, 4, 183, 0, 0, 0, 0, - 4, 139, 0, 0, 0, 0, 0, 0, 4, 32, 0, 0, 0, 0, 0, 0, - 4, 4, 184, 106, 166, 0, 0, 0, 185, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 186, 4, 187, 188, 189, 4, 190, 191, 192, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 193, 194, 82, 186, 186, 128, 128, 195, 195, 196, 0, - 189, 197, 198, 199, 200, 201, 0, 0, 4, 4, 4, 4, 4, 4, 131, 0, - 4, 90, 4, 4, 4, 4, 4, 4, 114, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_alphabetic_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 32, 0, 0, 0, - 0, 0, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, - 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, - 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 255, 255, 254, 0, 192, 255, 255, 255, 255, 239, 31, - 254, 225, 0, 156, 0, 0, 255, 255, 0, 224, 255, 255, 255, 255, 3, 0, - 0, 252, 255, 255, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, - 255, 255, 255, 1, 253, 31, 0, 0, 240, 3, 255, 127, 255, 255, 255, 239, - 255, 223, 225, 255, 15, 0, 254, 254, 238, 159, 249, 255, 255, 253, 197, 227, - 159, 89, 128, 176, 15, 0, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, - 135, 25, 2, 94, 0, 0, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, - 191, 27, 1, 0, 15, 0, 0, 0, 159, 25, 192, 176, 15, 0, 2, 0, - 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, 238, 223, 253, 255, - 255, 253, 239, 227, 223, 29, 96, 3, 236, 223, 253, 255, 223, 29, 96, 64, - 15, 0, 6, 0, 255, 255, 255, 231, 223, 93, 128, 0, 15, 0, 0, 252, - 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, 0, 0, 12, 0, - 255, 255, 255, 7, 127, 32, 0, 0, 150, 37, 240, 254, 174, 236, 255, 59, - 95, 32, 0, 240, 1, 0, 0, 0, 255, 254, 255, 255, 255, 31, 254, 255, - 3, 255, 255, 254, 255, 255, 255, 31, 255, 255, 127, 249, 231, 193, 255, 255, - 127, 64, 0, 48, 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, - 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, - 255, 255, 255, 135, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, - 255, 199, 1, 0, 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, - 255, 255, 207, 255, 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, - 255, 255, 63, 0, 255, 15, 255, 1, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 0, 0, 255, 255, 255, 15, 255, 255, 255, 127, 254, 255, 31, 0, - 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 0, 0, 255, 243, 0, 252, - 191, 255, 3, 0, 0, 224, 0, 252, 255, 255, 255, 63, 0, 222, 111, 0, - 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, - 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, - 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, - 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, - 255, 31, 255, 255, 0, 12, 0, 0, 255, 127, 240, 143, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 0, 0, 252, 8, 255, 255, 7, 0, - 255, 255, 247, 255, 255, 63, 0, 0, 255, 255, 127, 4, 5, 0, 0, 56, - 255, 255, 60, 0, 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, - 127, 248, 255, 255, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, - 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, - 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, - 111, 240, 239, 254, 63, 0, 0, 0, 30, 0, 0, 0, 7, 0, 0, 0, - 31, 0, 255, 255, 3, 0, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, - 247, 15, 0, 0, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* Alphabetic: 1817 bytes. */ - -RE_UINT32 re_get_alphabetic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_alphabetic_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_alphabetic_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_alphabetic_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_alphabetic_stage_4[pos + f] << 5; - pos += code; - value = (re_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Lowercase. */ - -static RE_UINT8 re_lowercase_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_lowercase_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_lowercase_stage_3[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 5, - 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, 3, 11, - 3, 3, 12, 3, 3, 3, 3, 3, 3, 3, 13, 14, 3, 3, 3, 3, -}; - -static RE_UINT8 re_lowercase_stage_4[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 5, 13, 14, 15, 16, 17, 18, 19, 0, 0, 20, 21, 22, 23, 24, 25, - 0, 26, 15, 5, 27, 5, 28, 5, 5, 29, 0, 30, 31, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, - 5, 5, 5, 5, 32, 5, 5, 5, 33, 34, 35, 36, 34, 37, 38, 39, - 0, 0, 0, 40, 41, 0, 0, 0, 42, 43, 44, 26, 45, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 26, 46, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 26, 47, 48, 5, 5, 5, 49, 15, 50, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 51, 52, 0, 0, 0, 0, 53, 5, 54, 55, 56, 0, 57, - 0, 0, 0, 0, 0, 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 59, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 61, 62, 63, 31, 64, 65, 66, 67, 68, 69, 70, 71, 72, 61, 62, 73, - 31, 64, 74, 60, 67, 75, 76, 77, 78, 74, 79, 26, 80, 67, 81, 0, -}; - -static RE_UINT8 re_lowercase_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 0, 0, 0, 128, - 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 85, 85, 171, 170, 170, - 170, 170, 170, 212, 41, 49, 36, 78, 42, 45, 81, 230, 64, 82, 85, 181, - 170, 170, 41, 170, 170, 170, 250, 147, 133, 170, 255, 255, 255, 255, 255, 255, - 255, 255, 239, 255, 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 138, 60, 0, 0, 1, 0, 0, 240, 255, 255, - 255, 127, 227, 170, 170, 170, 47, 25, 0, 0, 255, 255, 2, 168, 170, 170, - 84, 213, 170, 170, 170, 0, 0, 0, 254, 255, 255, 255, 255, 0, 0, 0, - 170, 170, 234, 191, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, - 255, 0, 255, 63, 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, - 0, 0, 2, 128, 0, 0, 255, 31, 0, 196, 8, 0, 0, 128, 16, 50, - 192, 67, 0, 0, 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, - 98, 21, 218, 63, 26, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, - 170, 170, 170, 0, 168, 170, 171, 170, 170, 170, 255, 149, 170, 80, 10, 0, - 170, 2, 0, 0, 0, 0, 0, 7, 127, 0, 248, 0, 0, 255, 255, 255, - 255, 255, 0, 0, 0, 0, 0, 252, 255, 255, 15, 0, 0, 192, 223, 255, - 252, 255, 255, 15, 0, 0, 192, 235, 239, 255, 0, 0, 0, 252, 255, 255, - 15, 0, 0, 192, 255, 255, 255, 0, 0, 0, 252, 255, 255, 15, 0, 0, - 192, 255, 255, 255, 0, 192, 255, 255, 0, 0, 192, 255, 63, 0, 0, 0, - 252, 255, 255, 247, 3, 0, 0, 240, 255, 255, 223, 15, 255, 127, 63, 0, - 255, 253, 0, 0, 247, 11, 0, 0, -}; - -/* Lowercase: 697 bytes. */ - -RE_UINT32 re_get_lowercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_lowercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_lowercase_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_lowercase_stage_3[pos + f] << 4; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_lowercase_stage_4[pos + f] << 5; - pos += code; - value = (re_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Uppercase. */ - -static RE_UINT8 re_uppercase_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_uppercase_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_uppercase_stage_3[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 5, - 6, 3, 7, 3, 3, 3, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 10, - 3, 3, 11, 3, 3, 3, 3, 3, 3, 3, 12, 13, 3, 3, 3, 3, -}; - -static RE_UINT8 re_uppercase_stage_4[] = { - 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, - 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, - 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 18, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 0, 30, 31, 32, - 0, 0, 0, 0, 0, 0, 0, 0, 33, 34, 35, 19, 36, 0, 0, 0, - 0, 0, 0, 0, 0, 37, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 18, 38, 0, 39, 3, 3, 3, 40, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 41, 42, 0, 0, 0, 0, 43, 3, 44, 45, 46, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 18, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 48, 49, 50, - 51, 61, 62, 54, 55, 51, 63, 64, 65, 66, 37, 38, 54, 67, 68, 0, -}; - -static RE_UINT8 re_uppercase_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, - 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, - 213, 210, 174, 17, 144, 164, 170, 74, 85, 85, 210, 85, 85, 85, 5, 108, - 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, - 0, 128, 28, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, - 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, - 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, - 0, 63, 0, 170, 0, 255, 0, 0, 0, 0, 0, 15, 0, 15, 0, 15, - 0, 31, 0, 15, 132, 56, 39, 62, 80, 61, 15, 192, 32, 0, 0, 0, - 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, 157, 234, 37, 192, - 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, 84, 85, 84, 85, - 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, 255, 0, 0, 0, - 255, 255, 255, 3, 0, 0, 240, 255, 255, 63, 0, 0, 0, 255, 255, 255, - 3, 0, 0, 208, 100, 222, 63, 0, 0, 0, 255, 255, 255, 3, 0, 0, - 176, 231, 223, 31, 0, 0, 0, 123, 95, 252, 1, 0, 0, 240, 255, 255, - 63, 0, 0, 0, 3, 0, 0, 240, 255, 255, 63, 0, 1, 0, 0, 0, - 252, 255, 255, 7, 0, 0, 0, 240, 255, 255, 31, 0, 255, 1, 0, 0, - 0, 4, 0, 0, -}; - -/* Uppercase: 629 bytes. */ - -RE_UINT32 re_get_uppercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_uppercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_uppercase_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_uppercase_stage_3[pos + f] << 4; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_uppercase_stage_4[pos + f] << 5; - pos += code; - value = (re_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Cased. */ - -static RE_UINT8 re_cased_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_cased_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 5, - 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_cased_stage_3[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 6, - 7, 3, 8, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 3, 12, - 3, 3, 13, 3, 3, 3, 3, 3, 3, 3, 14, 15, 3, 3, 3, 3, -}; - -static RE_UINT8 re_cased_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 5, 6, 4, - 4, 4, 4, 4, 7, 8, 9, 10, 0, 0, 11, 12, 13, 14, 4, 15, - 4, 4, 4, 4, 16, 4, 4, 4, 4, 17, 18, 19, 20, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, - 4, 4, 4, 4, 4, 4, 4, 4, 22, 4, 23, 24, 4, 25, 26, 27, - 0, 0, 0, 28, 29, 0, 0, 0, 30, 31, 32, 4, 33, 0, 0, 0, - 0, 0, 0, 0, 0, 34, 4, 35, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 36, 37, 4, 4, 4, 4, 38, 4, 21, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 39, 40, 0, 0, 0, 0, 41, 4, 4, 42, 43, 0, 44, - 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, - 4, 4, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 47, 4, 48, 49, 50, 4, 51, 52, 53, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 54, 55, 5, 47, 47, 36, 36, 56, 56, 57, 0, -}; - -static RE_UINT8 re_cased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 255, 255, 255, 247, 240, 255, 255, 255, 255, 255, 239, 255, - 255, 255, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, 32, 0, 0, 0, - 0, 0, 207, 60, 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, - 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, - 255, 0, 0, 0, 191, 32, 0, 0, 255, 255, 63, 63, 63, 63, 255, 170, - 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, 80, 189, 31, 242, - 224, 67, 0, 0, 24, 0, 0, 0, 0, 0, 192, 255, 255, 3, 0, 0, - 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 63, 0, 0, - 255, 255, 255, 0, 252, 255, 255, 255, 255, 120, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 7, 127, 0, 248, 0, 255, 255, 0, 0, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 255, 253, 255, 255, 247, 15, 0, 0, -}; - -/* Cased: 617 bytes. */ - -RE_UINT32 re_get_cased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_cased_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_cased_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_cased_stage_3[pos + f] << 4; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_cased_stage_4[pos + f] << 5; - pos += code; - value = (re_cased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Case_Ignorable. */ - -static RE_UINT8 re_case_ignorable_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, - 4, 4, -}; - -static RE_UINT8 re_case_ignorable_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, - 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -static RE_UINT8 re_case_ignorable_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 1, 17, 1, 1, 1, 18, 19, 20, 21, 22, 23, 24, 1, 25, - 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, 29, 1, - 30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 31, 1, 1, 1, 32, 1, 33, 34, 35, 36, 37, 38, 1, 1, 1, 1, - 1, 1, 1, 39, 1, 1, 40, 41, 1, 42, 1, 1, 1, 1, 1, 1, - 1, 1, 43, 1, 1, 1, 1, 1, 44, 45, 1, 1, 1, 1, 46, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 1, 48, 49, 1, 1, 1, 1, 1, - 50, 51, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_case_ignorable_stage_4[] = { - 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 5, 6, 6, 6, 6, 6, 7, 8, 0, 0, 0, - 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 14, - 15, 0, 16, 17, 0, 0, 18, 19, 20, 5, 21, 0, 0, 22, 0, 23, - 24, 25, 26, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 33, 37, 38, 36, 33, 39, 35, 32, 40, 41, 35, 42, 0, 43, 0, - 0, 44, 45, 35, 0, 40, 46, 35, 0, 0, 34, 35, 0, 0, 47, 0, - 0, 48, 49, 0, 0, 50, 51, 0, 52, 53, 0, 54, 55, 56, 57, 0, - 0, 58, 59, 60, 61, 0, 0, 33, 0, 0, 62, 0, 0, 0, 0, 0, - 63, 63, 64, 64, 0, 65, 66, 0, 67, 0, 68, 0, 0, 69, 0, 0, - 0, 70, 0, 0, 0, 0, 0, 0, 71, 0, 72, 73, 0, 74, 0, 0, - 75, 76, 42, 77, 78, 79, 0, 80, 0, 81, 0, 82, 0, 0, 83, 84, - 0, 85, 6, 86, 87, 6, 6, 88, 0, 0, 0, 0, 0, 89, 90, 91, - 92, 93, 0, 94, 95, 0, 5, 96, 0, 0, 0, 97, 0, 0, 0, 98, - 0, 0, 0, 99, 0, 0, 0, 6, 0, 100, 0, 0, 0, 0, 0, 0, - 101, 102, 0, 0, 103, 0, 0, 104, 105, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 82, 106, 0, 0, 107, 108, 0, 0, 109, - 6, 78, 0, 17, 110, 0, 0, 52, 111, 112, 0, 0, 0, 0, 113, 114, - 0, 115, 116, 0, 28, 117, 100, 0, 0, 118, 119, 17, 0, 120, 121, 122, - 0, 0, 0, 0, 0, 0, 0, 123, 2, 0, 0, 0, 0, 124, 78, 0, - 125, 126, 127, 0, 0, 0, 0, 108, 1, 2, 3, 17, 44, 0, 0, 128, - 0, 0, 0, 0, 0, 0, 0, 129, 130, 131, 0, 0, 0, 0, 0, 0, - 32, 132, 126, 0, 78, 133, 0, 0, 28, 134, 0, 0, 78, 135, 0, 0, - 0, 0, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 137, 0, 0, 0, - 0, 0, 0, 138, 139, 140, 0, 0, 0, 0, 141, 0, 0, 0, 0, 0, - 32, 6, 6, 6, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 142, -}; - -static RE_UINT8 re_case_ignorable_stage_5[] = { - 0, 0, 0, 0, 128, 64, 0, 4, 0, 0, 0, 64, 1, 0, 0, 0, - 0, 161, 144, 1, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 48, 4, - 176, 0, 0, 0, 248, 3, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, - 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 16, 0, 31, 0, 255, 23, - 1, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 191, 255, 61, 0, 0, - 0, 128, 2, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 4, - 0, 0, 192, 255, 255, 63, 0, 0, 0, 0, 0, 14, 240, 255, 255, 127, - 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, 12, 0, 2, 0, - 2, 0, 0, 0, 0, 0, 0, 16, 30, 32, 0, 0, 12, 0, 0, 0, - 6, 0, 0, 0, 134, 57, 2, 0, 0, 0, 35, 0, 190, 33, 0, 0, - 0, 0, 0, 144, 30, 32, 64, 0, 4, 0, 0, 0, 1, 32, 0, 0, - 0, 0, 0, 192, 193, 61, 96, 0, 64, 48, 0, 0, 0, 4, 92, 0, - 0, 0, 242, 7, 192, 127, 0, 0, 0, 0, 242, 27, 64, 63, 0, 0, - 0, 0, 0, 3, 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, - 255, 255, 255, 31, 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, - 1, 0, 30, 0, 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, - 0, 0, 12, 0, 0, 0, 176, 63, 64, 254, 143, 32, 0, 120, 0, 0, - 8, 0, 0, 0, 0, 2, 0, 0, 135, 1, 4, 14, 0, 0, 128, 9, - 0, 0, 64, 127, 229, 31, 248, 159, 128, 0, 0, 0, 15, 0, 0, 0, - 0, 0, 208, 23, 0, 248, 15, 0, 3, 0, 0, 0, 60, 11, 0, 0, - 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 0, 63, 0, 0, 247, 255, - 253, 33, 16, 0, 0, 240, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, - 127, 0, 0, 240, 0, 0, 0, 160, 3, 224, 0, 224, 0, 224, 0, 96, - 0, 248, 0, 3, 144, 124, 0, 0, 223, 255, 2, 128, 0, 0, 255, 31, - 255, 255, 1, 0, 0, 0, 0, 48, 0, 128, 3, 0, 0, 128, 0, 128, - 0, 128, 0, 0, 32, 0, 0, 0, 0, 60, 62, 8, 0, 0, 0, 126, - 0, 0, 0, 112, 0, 0, 32, 0, 0, 16, 0, 0, 0, 128, 247, 191, - 0, 0, 0, 128, 0, 0, 3, 0, 0, 7, 0, 0, 68, 8, 0, 0, - 96, 0, 0, 0, 16, 0, 0, 0, 255, 255, 3, 0, 192, 63, 0, 0, - 128, 255, 3, 0, 0, 0, 200, 19, 0, 126, 102, 0, 8, 16, 0, 0, - 0, 0, 157, 193, 2, 0, 0, 32, 0, 48, 88, 0, 32, 33, 0, 0, - 0, 0, 252, 255, 255, 255, 8, 0, 127, 0, 0, 0, 0, 0, 36, 0, - 8, 0, 0, 14, 0, 0, 0, 32, 110, 240, 0, 0, 0, 0, 0, 135, - 0, 0, 0, 255, 0, 0, 120, 38, 128, 239, 31, 0, 0, 0, 192, 127, - 0, 40, 191, 0, 0, 128, 255, 255, 128, 3, 248, 255, 231, 15, 0, 0, - 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 0, 0, -}; - -/* Case_Ignorable: 1254 bytes. */ - -RE_UINT32 re_get_case_ignorable(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_case_ignorable_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_case_ignorable_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_case_ignorable_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_case_ignorable_stage_4[pos + f] << 5; - pos += code; - value = (re_case_ignorable_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Lowercased. */ - -static RE_UINT8 re_changes_when_lowercased_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_2[] = { - 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, - 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 15, - 6, 6, 6, 6, 16, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_4[] = { - 0, 0, 1, 0, 0, 0, 2, 0, 3, 4, 5, 6, 7, 8, 9, 10, - 3, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, - 18, 19, 0, 3, 20, 3, 21, 3, 3, 22, 23, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 24, 0, - 3, 3, 3, 3, 25, 3, 3, 3, 26, 27, 28, 29, 27, 30, 31, 32, - 0, 33, 0, 19, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 19, 0, - 18, 36, 0, 37, 3, 3, 3, 38, 0, 0, 3, 39, 40, 0, 0, 0, - 0, 41, 3, 42, 43, 44, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 18, 45, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_lowercased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 255, 255, 127, 127, 85, 85, 85, 85, - 85, 85, 85, 170, 170, 84, 85, 85, 85, 85, 85, 43, 214, 206, 219, 177, - 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, 85, 85, 5, 108, - 122, 85, 0, 0, 0, 0, 69, 0, 64, 215, 254, 255, 251, 15, 0, 0, - 0, 128, 0, 85, 85, 85, 144, 230, 255, 255, 255, 255, 255, 255, 0, 0, - 1, 84, 85, 85, 171, 42, 85, 85, 85, 0, 254, 255, 255, 255, 127, 0, - 191, 32, 0, 0, 85, 85, 21, 64, 0, 255, 0, 63, 0, 255, 0, 255, - 0, 63, 0, 170, 0, 255, 0, 0, 0, 255, 0, 31, 0, 31, 0, 15, - 0, 31, 0, 31, 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, - 255, 127, 0, 0, 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, - 85, 85, 85, 0, 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, - 85, 5, 0, 0, 255, 0, 0, 0, -}; - -/* Changes_When_Lowercased: 490 bytes. */ - -RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_changes_when_lowercased_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_changes_when_lowercased_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_lowercased_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_lowercased_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_lowercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Uppercased. */ - -static RE_UINT8 re_changes_when_uppercased_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_2[] = { - 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, - 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, - 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_4[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, - 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, - 5, 5, 5, 5, 31, 5, 5, 5, 32, 33, 34, 35, 24, 36, 37, 38, - 0, 0, 39, 23, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 41, - 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, - 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 53, 54, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_uppercased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, - 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, - 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 96, 91, 85, 181, - 170, 170, 45, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, - 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, - 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, - 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, - 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, - 255, 255, 223, 80, 220, 16, 207, 0, 255, 0, 220, 16, 0, 64, 0, 0, - 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, - 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, - 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, - 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, -}; - -/* Changes_When_Uppercased: 534 bytes. */ - -RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_changes_when_uppercased_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_changes_when_uppercased_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_uppercased_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_uppercased_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_uppercased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Titlecased. */ - -static RE_UINT8 re_changes_when_titlecased_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_2[] = { - 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 7, 8, 9, 6, 10, 6, 6, 11, 6, 6, 6, - 6, 6, 6, 6, 12, 13, 6, 6, 6, 6, 6, 6, 6, 6, 14, 15, - 6, 6, 6, 16, 6, 6, 6, 17, 6, 6, 6, 6, 18, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_4[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 5, 13, 14, 15, 16, 0, 0, 0, 0, 0, 17, 18, 19, 20, 21, 22, - 0, 23, 24, 5, 25, 5, 26, 5, 5, 27, 0, 28, 29, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, - 5, 5, 5, 5, 31, 5, 5, 5, 32, 33, 34, 35, 33, 36, 37, 38, - 0, 0, 39, 23, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 41, - 0, 23, 42, 43, 5, 5, 5, 44, 24, 45, 0, 0, 0, 0, 0, 0, - 0, 0, 5, 46, 47, 0, 0, 0, 0, 48, 5, 49, 50, 51, 0, 0, - 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 53, 54, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_titlecased_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 0, 0, 0, 128, - 255, 255, 127, 255, 170, 170, 170, 170, 170, 170, 170, 84, 85, 171, 170, 170, - 170, 170, 170, 212, 41, 17, 36, 70, 42, 33, 81, 162, 208, 86, 85, 181, - 170, 170, 43, 170, 168, 170, 10, 144, 133, 170, 223, 10, 105, 139, 38, 32, - 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 138, 56, 0, 0, 1, 0, - 0, 240, 255, 255, 255, 127, 227, 170, 170, 170, 39, 9, 0, 0, 255, 255, - 255, 255, 255, 255, 2, 168, 170, 170, 84, 213, 170, 170, 170, 0, 0, 0, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 34, 170, 170, 234, 15, - 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 255, 0, 255, 0, 255, 63, - 255, 0, 223, 64, 220, 0, 207, 0, 255, 0, 220, 0, 0, 64, 0, 0, - 16, 0, 0, 0, 255, 3, 0, 0, 255, 255, 255, 127, 98, 21, 72, 0, - 10, 80, 8, 0, 191, 32, 0, 0, 170, 42, 0, 0, 170, 170, 170, 0, - 168, 170, 168, 170, 170, 170, 0, 148, 170, 16, 10, 0, 170, 2, 0, 0, - 127, 0, 248, 0, 0, 255, 255, 255, 255, 255, 0, 0, -}; - -/* Changes_When_Titlecased: 534 bytes. */ - -RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_changes_when_titlecased_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_changes_when_titlecased_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_titlecased_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_titlecased_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_titlecased_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Casefolded. */ - -static RE_UINT8 re_changes_when_casefolded_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_2[] = { - 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, - 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 15, 6, 6, 6, 16, - 6, 6, 6, 6, 17, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_4[] = { - 0, 0, 1, 0, 0, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, - 4, 12, 13, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, - 20, 21, 0, 4, 22, 4, 23, 4, 4, 24, 25, 0, 26, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 27, 0, - 4, 4, 4, 4, 28, 4, 4, 4, 29, 30, 31, 32, 20, 33, 34, 35, - 0, 36, 0, 21, 37, 0, 0, 0, 0, 0, 0, 0, 0, 38, 21, 0, - 20, 39, 0, 40, 4, 4, 4, 41, 0, 0, 4, 42, 43, 0, 0, 0, - 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 0, 20, 49, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_casefolded_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, - 85, 85, 85, 85, 85, 85, 85, 170, 170, 86, 85, 85, 85, 85, 85, 171, - 214, 206, 219, 177, 213, 210, 174, 17, 176, 173, 170, 74, 85, 85, 214, 85, - 85, 85, 5, 108, 122, 85, 0, 0, 32, 0, 0, 0, 0, 0, 69, 0, - 64, 215, 254, 255, 251, 15, 0, 0, 4, 128, 99, 85, 85, 85, 179, 230, - 255, 255, 255, 255, 255, 255, 0, 0, 1, 84, 85, 85, 171, 42, 85, 85, - 85, 0, 254, 255, 255, 255, 127, 0, 128, 0, 0, 0, 191, 32, 0, 0, - 85, 85, 21, 76, 0, 255, 0, 63, 0, 255, 0, 255, 0, 63, 0, 170, - 0, 255, 0, 0, 255, 255, 156, 31, 156, 31, 0, 15, 0, 31, 156, 31, - 64, 12, 4, 0, 8, 0, 0, 0, 0, 0, 192, 255, 255, 127, 0, 0, - 157, 234, 37, 192, 5, 40, 4, 0, 85, 21, 0, 0, 85, 85, 85, 0, - 84, 85, 84, 85, 85, 85, 0, 106, 85, 40, 5, 0, 85, 5, 0, 0, - 127, 0, 248, 0, 255, 0, 0, 0, -}; - -/* Changes_When_Casefolded: 514 bytes. */ - -RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_changes_when_casefolded_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_changes_when_casefolded_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_casefolded_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_casefolded_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_casefolded_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Changes_When_Casemapped. */ - -static RE_UINT8 re_changes_when_casemapped_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_2[] = { - 0, 1, 2, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, - 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, - 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 17, 6, 6, 6, 18, - 6, 6, 6, 6, 19, 6, 6, 6, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 5, 4, 4, 6, 7, 8, 4, - 4, 9, 10, 11, 12, 0, 0, 0, 0, 0, 13, 14, 15, 16, 17, 18, - 4, 4, 4, 4, 19, 4, 4, 4, 4, 20, 21, 22, 23, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0, - 0, 0, 0, 25, 0, 0, 0, 0, 4, 4, 4, 4, 26, 4, 4, 4, - 27, 4, 28, 29, 4, 30, 31, 32, 0, 33, 34, 4, 35, 0, 0, 0, - 0, 0, 0, 0, 0, 36, 4, 37, 4, 38, 39, 40, 4, 4, 4, 41, - 4, 24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 42, 43, 0, 0, 0, - 0, 44, 4, 45, 46, 47, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 0, 0, 0, 0, 0, 4, 4, 49, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_changes_when_casemapped_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 0, 32, 0, 255, 255, 127, 255, - 255, 255, 255, 255, 255, 255, 255, 254, 255, 223, 255, 247, 255, 243, 255, 179, - 240, 255, 255, 255, 253, 255, 15, 252, 255, 255, 223, 10, 105, 139, 38, 32, - 9, 31, 4, 0, 32, 0, 0, 0, 0, 0, 207, 56, 64, 215, 255, 255, - 251, 255, 255, 255, 255, 255, 227, 255, 255, 255, 183, 239, 3, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 0, 254, 255, 255, 255, 255, 0, 0, 0, - 191, 32, 0, 0, 0, 0, 0, 34, 255, 255, 255, 79, 255, 255, 63, 63, - 63, 63, 255, 170, 255, 255, 255, 63, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 64, 12, 4, 0, 0, 64, 0, 0, 24, 0, 0, 0, - 0, 0, 192, 255, 255, 3, 0, 0, 255, 127, 255, 255, 255, 255, 255, 127, - 255, 255, 109, 192, 15, 120, 12, 0, 255, 63, 0, 0, 255, 255, 255, 0, - 252, 255, 252, 255, 255, 255, 0, 254, 255, 56, 15, 0, 255, 7, 0, 0, - 127, 0, 248, 0, 255, 255, 0, 0, -}; - -/* Changes_When_Casemapped: 530 bytes. */ - -RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_changes_when_casemapped_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_changes_when_casemapped_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_changes_when_casemapped_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_changes_when_casemapped_stage_4[pos + f] << 5; - pos += code; - value = (re_changes_when_casemapped_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* ID_Start. */ - -static RE_UINT8 re_id_start_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_id_start_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_id_start_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, - 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, - 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, - 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 70, 71, 72, 73, 31, 31, 31, 31, 31, 31, 74, 31, - 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, 1, 76, - 77, 31, 31, 31, 31, 31, 31, 31, 1, 1, 77, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_id_start_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, - 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, - 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, - 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, - 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, - 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, - 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, - 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, - 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, - 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, - 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, - 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, - 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, - 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, - 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, - 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, - 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, - 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, - 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, - 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, - 4, 107, 16, 4, 159, 4, 15, 160, 0, 0, 0, 161, 4, 4, 4, 94, - 0, 1, 1, 162, 4, 121, 163, 0, 164, 165, 166, 0, 4, 4, 4, 82, - 0, 0, 4, 83, 0, 0, 0, 0, 0, 0, 0, 0, 94, 4, 167, 0, - 121, 16, 18, 0, 107, 4, 168, 0, 4, 4, 4, 4, 107, 0, 0, 0, - 169, 170, 93, 0, 0, 0, 0, 0, 93, 154, 0, 0, 4, 171, 0, 0, - 172, 89, 0, 94, 0, 0, 0, 0, 4, 93, 93, 141, 0, 0, 0, 0, - 4, 4, 119, 0, 0, 0, 0, 0, 102, 91, 0, 0, 102, 23, 16, 119, - 102, 62, 0, 0, 102, 141, 173, 0, 0, 0, 0, 0, 4, 18, 0, 0, - 4, 4, 4, 129, 0, 0, 0, 0, 4, 4, 4, 138, 0, 0, 0, 0, - 4, 129, 0, 0, 0, 0, 0, 0, 4, 30, 0, 0, 0, 0, 0, 0, - 4, 4, 174, 0, 158, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 175, 4, 176, 177, 178, 4, 179, 180, 181, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 182, 183, 76, 175, 175, 120, 120, 184, 184, 143, 0, - 178, 185, 186, 187, 188, 189, 0, 0, 4, 4, 4, 4, 4, 4, 98, 0, - 4, 83, 4, 4, 4, 4, 4, 4, 107, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_id_start_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 60, - 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, - 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, - 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, - 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, - 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, - 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, - 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, - 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, - 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, - 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, - 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, - 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, - 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 13, 0, - 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 0, 240, 1, 0, 0, 0, - 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, - 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, - 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, - 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, - 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, - 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, - 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, - 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, - 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, - 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, - 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 248, - 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, - 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, - 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, - 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, - 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 15, 0, 0, 223, 255, 192, 255, 255, 255, 252, 252, 252, 28, - 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, - 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, - 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* ID_Start: 1753 bytes. */ - -RE_UINT32 re_get_id_start(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_id_start_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_id_start_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_id_start_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_id_start_stage_4[pos + f] << 5; - pos += code; - value = (re_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* ID_Continue. */ - -static RE_UINT8 re_id_continue_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_id_continue_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_id_continue_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 1, 48, 49, 50, 51, 52, 53, 54, 55, 31, 31, 31, - 56, 57, 58, 59, 60, 31, 31, 31, 61, 62, 31, 31, 31, 31, 63, 31, - 1, 1, 1, 64, 65, 31, 31, 31, 1, 1, 1, 1, 66, 31, 31, 31, - 1, 1, 67, 31, 31, 31, 31, 68, 69, 31, 31, 31, 31, 31, 31, 31, - 31, 70, 71, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, - 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, - 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, - 31, 80, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_id_continue_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, - 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, - 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, - 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, - 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, - 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, - 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, - 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, - 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, - 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, - 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, - 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, - 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, - 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, - 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, - 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, - 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, - 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, - 6, 78, 24, 6, 168, 6, 149, 169, 87, 170, 171, 172, 6, 6, 6, 75, - 1, 2, 3, 101, 6, 105, 173, 0, 174, 175, 176, 0, 6, 6, 6, 65, - 0, 0, 6, 88, 0, 0, 0, 177, 0, 0, 0, 0, 75, 6, 178, 0, - 105, 24, 147, 0, 78, 6, 179, 0, 6, 6, 6, 6, 78, 95, 0, 0, - 180, 181, 99, 0, 0, 0, 0, 0, 99, 163, 0, 0, 6, 182, 0, 0, - 183, 184, 0, 75, 0, 0, 0, 0, 6, 99, 99, 185, 0, 0, 0, 0, - 6, 6, 128, 0, 0, 0, 0, 0, 6, 6, 186, 50, 6, 65, 24, 187, - 6, 188, 0, 0, 6, 6, 150, 0, 0, 0, 0, 0, 6, 97, 95, 0, - 6, 6, 6, 138, 0, 0, 0, 0, 6, 6, 6, 189, 0, 0, 0, 0, - 6, 138, 0, 0, 0, 0, 0, 0, 6, 190, 0, 0, 0, 0, 0, 0, - 6, 6, 191, 105, 192, 0, 0, 0, 193, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 194, 195, 196, 0, 0, 0, 0, 197, 0, 0, 0, 0, 0, - 6, 6, 188, 6, 198, 199, 200, 6, 201, 202, 203, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 204, 205, 80, 188, 188, 129, 129, 206, 206, 207, 6, - 200, 208, 209, 210, 211, 212, 0, 0, 6, 6, 6, 6, 6, 6, 113, 0, - 6, 88, 6, 6, 6, 6, 6, 6, 78, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 87, -}; - -static RE_UINT8 re_id_continue_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 60, 192, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, - 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, - 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, - 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, - 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, - 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, - 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, - 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, - 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, - 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, - 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, - 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, - 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, - 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, - 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, - 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, - 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, - 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 254, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, - 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, - 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, - 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 255, 255, 252, 255, 0, 0, 255, 15, 127, 0, 24, 0, 0, 224, 0, 0, - 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, - 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, - 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, - 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, - 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, -}; - -/* ID_Continue: 1894 bytes. */ - -RE_UINT32 re_get_id_continue(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_id_continue_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_id_continue_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_id_continue_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_id_continue_stage_4[pos + f] << 5; - pos += code; - value = (re_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* XID_Start. */ - -static RE_UINT8 re_xid_start_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_xid_start_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_xid_start_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, - 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, - 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, - 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 71, 72, 73, 74, 31, 31, 31, 31, 31, 31, 75, 31, - 1, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, 1, 1, 77, - 78, 31, 31, 31, 31, 31, 31, 31, 1, 1, 78, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_xid_start_stage_4[] = { - 0, 0, 1, 1, 0, 2, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 5, 6, 0, 0, 0, 7, 8, 9, 4, 10, - 4, 4, 4, 4, 11, 4, 4, 4, 4, 12, 13, 14, 15, 0, 16, 17, - 0, 4, 18, 19, 4, 4, 20, 21, 22, 23, 24, 4, 4, 25, 26, 27, - 28, 29, 30, 0, 0, 31, 0, 0, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 36, 45, 48, 49, 50, 51, 46, 0, - 52, 53, 54, 47, 52, 53, 55, 56, 52, 57, 58, 59, 60, 61, 62, 0, - 14, 63, 62, 0, 64, 65, 66, 0, 67, 0, 68, 69, 70, 0, 0, 0, - 4, 71, 72, 73, 74, 4, 75, 76, 4, 4, 77, 4, 78, 79, 80, 4, - 81, 4, 82, 0, 23, 4, 4, 83, 14, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 84, 1, 4, 4, 85, 86, 87, 87, 88, 4, 89, 90, 0, - 0, 4, 4, 91, 4, 92, 4, 93, 94, 0, 16, 95, 4, 96, 97, 0, - 98, 4, 83, 0, 0, 99, 0, 0, 100, 89, 101, 0, 102, 103, 4, 104, - 4, 105, 106, 107, 0, 0, 0, 108, 4, 4, 4, 4, 4, 4, 0, 0, - 109, 4, 110, 107, 4, 111, 112, 113, 0, 0, 0, 114, 115, 0, 0, 0, - 116, 117, 118, 4, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 120, 121, 4, 4, 4, 4, 122, 4, 75, 4, 123, 98, 124, 124, 0, - 125, 126, 14, 4, 127, 14, 4, 76, 100, 128, 4, 4, 129, 82, 0, 16, - 4, 4, 4, 4, 4, 93, 0, 0, 4, 4, 4, 4, 4, 4, 69, 0, - 4, 4, 4, 4, 69, 0, 16, 107, 130, 131, 4, 132, 91, 4, 4, 23, - 133, 134, 4, 4, 135, 18, 0, 136, 137, 138, 4, 89, 134, 89, 0, 139, - 26, 140, 62, 94, 32, 141, 142, 0, 4, 119, 143, 144, 4, 145, 146, 147, - 148, 149, 0, 0, 0, 0, 4, 138, 4, 4, 4, 4, 4, 150, 151, 152, - 4, 4, 4, 153, 4, 4, 154, 0, 155, 156, 157, 4, 4, 87, 158, 4, - 4, 4, 107, 32, 4, 4, 4, 4, 4, 107, 16, 4, 159, 4, 15, 160, - 0, 0, 0, 161, 4, 4, 4, 94, 0, 1, 1, 162, 107, 121, 163, 0, - 164, 165, 166, 0, 4, 4, 4, 82, 0, 0, 4, 83, 0, 0, 0, 0, - 0, 0, 0, 0, 94, 4, 167, 0, 121, 16, 18, 0, 107, 4, 168, 0, - 4, 4, 4, 4, 107, 0, 0, 0, 169, 170, 93, 0, 0, 0, 0, 0, - 93, 154, 0, 0, 4, 171, 0, 0, 172, 89, 0, 94, 0, 0, 0, 0, - 4, 93, 93, 141, 0, 0, 0, 0, 4, 4, 119, 0, 0, 0, 0, 0, - 102, 91, 0, 0, 102, 23, 16, 119, 102, 62, 0, 0, 102, 141, 173, 0, - 0, 0, 0, 0, 4, 18, 0, 0, 4, 4, 4, 129, 0, 0, 0, 0, - 4, 4, 4, 138, 0, 0, 0, 0, 4, 129, 0, 0, 0, 0, 0, 0, - 4, 30, 0, 0, 0, 0, 0, 0, 4, 4, 174, 0, 158, 0, 0, 0, - 47, 0, 0, 0, 0, 0, 0, 0, 4, 4, 175, 4, 176, 177, 178, 4, - 179, 180, 181, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 182, 183, 76, - 175, 175, 120, 120, 184, 184, 143, 0, 178, 185, 186, 187, 188, 189, 0, 0, - 4, 4, 4, 4, 4, 4, 98, 0, 4, 83, 4, 4, 4, 4, 4, 4, - 107, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_xid_start_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 7, 0, 4, 32, 4, 255, 255, 127, 255, - 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, 0, 0, 223, 56, - 64, 215, 255, 255, 251, 255, 255, 255, 255, 255, 191, 255, 3, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, 255, 0, 0, 0, - 0, 0, 255, 255, 255, 7, 7, 0, 255, 7, 0, 0, 0, 192, 254, 255, - 255, 255, 47, 0, 96, 192, 0, 156, 0, 0, 253, 255, 255, 255, 0, 0, - 0, 224, 255, 255, 63, 0, 2, 0, 0, 252, 255, 255, 255, 7, 48, 4, - 255, 255, 63, 4, 16, 1, 0, 0, 255, 255, 255, 1, 253, 31, 0, 0, - 240, 255, 255, 255, 255, 255, 255, 35, 0, 0, 1, 255, 3, 0, 254, 254, - 224, 159, 249, 255, 255, 253, 197, 35, 0, 64, 0, 176, 3, 0, 3, 0, - 224, 135, 249, 255, 255, 253, 109, 3, 0, 0, 0, 94, 0, 0, 28, 0, - 224, 191, 251, 255, 255, 253, 237, 35, 0, 0, 1, 0, 3, 0, 0, 0, - 0, 0, 0, 176, 3, 0, 2, 0, 232, 199, 61, 214, 24, 199, 255, 3, - 224, 223, 253, 255, 255, 253, 239, 35, 0, 0, 0, 3, 0, 0, 0, 64, - 3, 0, 6, 0, 255, 255, 255, 39, 0, 64, 0, 0, 3, 0, 0, 252, - 224, 255, 127, 252, 255, 255, 251, 47, 127, 0, 0, 0, 255, 255, 5, 0, - 150, 37, 240, 254, 174, 236, 5, 32, 95, 0, 0, 240, 1, 0, 0, 0, - 255, 254, 255, 255, 255, 31, 0, 0, 0, 31, 0, 0, 255, 7, 0, 128, - 0, 0, 63, 60, 98, 192, 225, 255, 3, 64, 0, 0, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 31, 0, - 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 3, 0, 255, 255, 3, 0, - 255, 223, 1, 0, 255, 255, 15, 0, 0, 0, 128, 16, 255, 255, 255, 0, - 255, 5, 255, 255, 255, 255, 63, 0, 255, 255, 255, 31, 255, 63, 31, 0, - 255, 15, 0, 0, 254, 0, 0, 0, 255, 255, 127, 0, 128, 0, 0, 0, - 224, 255, 255, 255, 224, 15, 0, 0, 248, 255, 255, 255, 1, 192, 0, 252, - 63, 0, 0, 0, 15, 0, 0, 0, 0, 224, 0, 252, 255, 255, 255, 63, - 0, 222, 99, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, - 220, 31, 207, 15, 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, - 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, - 255, 127, 255, 255, 255, 255, 255, 127, 31, 120, 12, 0, 255, 128, 0, 0, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 3, 62, 31, 255, 255, 127, 224, - 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 255, 255, 0, 12, 0, 0, - 255, 127, 0, 128, 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 7, 0, 0, 0, 0, 0, 252, 8, - 63, 0, 255, 255, 255, 255, 7, 0, 0, 128, 0, 0, 247, 15, 0, 0, - 255, 255, 127, 4, 255, 255, 98, 62, 5, 0, 0, 56, 255, 7, 28, 0, - 126, 126, 126, 0, 127, 127, 0, 0, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 255, 255, 15, 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 160, - 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, - 0, 0, 255, 3, 0, 0, 138, 170, 192, 255, 255, 255, 252, 252, 252, 28, - 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, 255, 255, 1, 0, - 15, 255, 62, 0, 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, - 1, 0, 239, 254, 30, 0, 0, 0, 31, 0, 1, 0, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 255, 253, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* XID_Start: 1761 bytes. */ - -RE_UINT32 re_get_xid_start(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_xid_start_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_xid_start_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_xid_start_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_xid_start_stage_4[pos + f] << 5; - pos += code; - value = (re_xid_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* XID_Continue. */ - -static RE_UINT8 re_xid_continue_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_xid_continue_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_xid_continue_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 31, 31, - 34, 35, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37, - 1, 1, 1, 1, 38, 1, 39, 40, 41, 42, 43, 44, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 45, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 31, 31, 31, - 57, 58, 59, 60, 61, 31, 31, 31, 62, 63, 31, 31, 31, 31, 64, 31, - 1, 1, 1, 65, 66, 31, 31, 31, 1, 1, 1, 1, 67, 31, 31, 31, - 1, 1, 68, 31, 31, 31, 31, 69, 70, 31, 31, 31, 31, 31, 31, 31, - 31, 71, 72, 31, 73, 74, 75, 76, 31, 31, 31, 31, 31, 31, 77, 31, - 1, 1, 1, 1, 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 79, - 80, 31, 31, 31, 31, 31, 31, 31, 1, 1, 80, 31, 31, 31, 31, 31, - 31, 81, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_xid_continue_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 13, 6, 6, 6, 6, 14, 15, 16, 14, 17, 18, 19, - 20, 6, 6, 21, 6, 6, 22, 23, 24, 6, 25, 6, 6, 26, 6, 27, - 6, 28, 29, 0, 0, 30, 0, 31, 6, 6, 6, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 33, 42, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 44, 54, 55, 56, 57, 54, 58, 59, 60, 61, 62, 63, 64, - 16, 65, 66, 0, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 0, - 6, 6, 77, 6, 78, 6, 79, 80, 6, 6, 81, 6, 82, 83, 84, 6, - 85, 6, 58, 86, 87, 6, 6, 88, 16, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 89, 3, 6, 6, 90, 91, 88, 92, 93, 6, 6, 94, 95, - 96, 6, 6, 97, 6, 98, 6, 99, 75, 100, 101, 102, 6, 103, 104, 0, - 29, 6, 105, 106, 107, 108, 0, 0, 6, 6, 109, 110, 6, 6, 6, 92, - 6, 97, 111, 78, 0, 0, 112, 113, 6, 6, 6, 6, 6, 6, 6, 114, - 115, 6, 116, 78, 6, 117, 118, 119, 0, 120, 121, 122, 123, 0, 123, 124, - 125, 126, 127, 6, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 129, 105, 6, 6, 6, 6, 130, 6, 79, 6, 131, 113, 132, 132, 6, - 133, 134, 16, 6, 135, 16, 6, 80, 136, 137, 6, 6, 138, 65, 0, 24, - 6, 6, 6, 6, 6, 99, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, - 6, 6, 6, 6, 139, 0, 24, 78, 140, 141, 6, 142, 143, 6, 6, 26, - 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 92, 6, 6, 150, 151, - 6, 152, 92, 75, 6, 6, 153, 0, 6, 113, 154, 155, 6, 6, 156, 157, - 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 29, - 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 26, 167, 6, - 6, 6, 78, 168, 6, 6, 6, 6, 6, 78, 24, 6, 169, 6, 149, 1, - 87, 170, 171, 172, 6, 6, 6, 75, 1, 2, 3, 101, 6, 105, 173, 0, - 174, 175, 176, 0, 6, 6, 6, 65, 0, 0, 6, 88, 0, 0, 0, 177, - 0, 0, 0, 0, 75, 6, 178, 0, 105, 24, 147, 0, 78, 6, 179, 0, - 6, 6, 6, 6, 78, 95, 0, 0, 180, 181, 99, 0, 0, 0, 0, 0, - 99, 163, 0, 0, 6, 182, 0, 0, 183, 184, 0, 75, 0, 0, 0, 0, - 6, 99, 99, 185, 0, 0, 0, 0, 6, 6, 128, 0, 0, 0, 0, 0, - 6, 6, 186, 50, 6, 65, 24, 187, 6, 188, 0, 0, 6, 6, 150, 0, - 0, 0, 0, 0, 6, 97, 95, 0, 6, 6, 6, 138, 0, 0, 0, 0, - 6, 6, 6, 189, 0, 0, 0, 0, 6, 138, 0, 0, 0, 0, 0, 0, - 6, 190, 0, 0, 0, 0, 0, 0, 6, 6, 191, 105, 192, 0, 0, 0, - 193, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 194, 195, 196, 0, 0, - 0, 0, 197, 0, 0, 0, 0, 0, 6, 6, 188, 6, 198, 199, 200, 6, - 201, 202, 203, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 204, 205, 80, - 188, 188, 129, 129, 206, 206, 207, 6, 200, 208, 209, 210, 211, 212, 0, 0, - 6, 6, 6, 6, 6, 6, 113, 0, 6, 88, 6, 6, 6, 6, 6, 6, - 78, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, 6, 87, -}; - -static RE_UINT8 re_xid_continue_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - 0, 4, 160, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 56, 192, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 251, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, - 0, 0, 255, 7, 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, - 0, 0, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, - 255, 63, 0, 0, 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, - 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, - 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, - 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, - 207, 255, 0, 0, 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, - 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, - 255, 253, 239, 227, 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, - 223, 61, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, - 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, - 0, 0, 12, 0, 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, - 255, 254, 255, 255, 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, - 64, 0, 0, 0, 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, - 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 0, 254, 3, 0, 255, 255, 0, 0, - 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, 255, 223, 31, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 143, 48, 255, 3, 0, 0, - 0, 56, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 15, 255, 15, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 255, 7, 255, 255, 255, 127, 255, 255, 255, 159, 255, 3, 255, 3, - 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, 255, 227, 255, 255, - 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, 255, 255, 63, 63, - 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, 255, 31, 220, 31, - 0, 0, 0, 128, 1, 0, 16, 0, 0, 0, 2, 128, 0, 0, 255, 31, - 226, 255, 1, 0, 132, 252, 47, 63, 80, 253, 255, 243, 224, 67, 0, 0, - 255, 1, 0, 0, 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, - 127, 127, 127, 127, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 240, 191, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, - 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, - 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, - 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 240, 255, 255, 255, 255, 255, 252, 255, 127, 0, 24, 0, 0, 224, 0, 0, - 0, 0, 138, 170, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 0, 0, 0, 32, 255, 255, 1, 0, 15, 255, 62, 0, - 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, - 255, 255, 15, 135, 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, - 255, 255, 223, 255, 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 128, 255, 255, 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, - 0, 60, 0, 0, 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, - 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, - 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, -}; - -/* XID_Continue: 1902 bytes. */ - -RE_UINT32 re_get_xid_continue(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_xid_continue_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_xid_continue_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_xid_continue_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_xid_continue_stage_4[pos + f] << 5; - pos += code; - value = (re_xid_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Default_Ignorable_Code_Point. */ - -static RE_UINT8 re_default_ignorable_code_point_stage_1[] = { - 0, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_2[] = { - 0, 1, 2, 3, 4, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 7, 1, 1, 1, 1, 1, - 8, 8, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_3[] = { - 0, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 4, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, - 7, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 9, 10, 1, 11, 1, 1, 1, 1, 1, 1, - 12, 12, 12, 12, 12, 12, 12, 12, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_4[] = { - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, - 7, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10, 0, 0, 0, 0, - 0, 0, 0, 11, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 5, 0, 12, 0, 0, 0, 13, 0, 0, 0, 0, - 14, 14, 14, 14, 14, 14, 14, 14, -}; - -static RE_UINT8 re_default_ignorable_code_point_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 128, 0, 0, 0, 0, 0, 16, - 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 48, 0, 0, 120, 0, 0, - 0, 248, 0, 0, 0, 124, 0, 0, 255, 255, 0, 0, 16, 0, 0, 0, - 0, 0, 255, 1, 0, 0, 248, 7, 255, 255, 255, 255, -}; - -/* Default_Ignorable_Code_Point: 344 bytes. */ - -RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_default_ignorable_code_point_stage_1[f] << 3; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_default_ignorable_code_point_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_default_ignorable_code_point_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_default_ignorable_code_point_stage_4[pos + f] << 5; - pos += code; - value = (re_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Grapheme_Extend. */ - -static RE_UINT8 re_grapheme_extend_stage_1[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, - 4, 4, -}; - -static RE_UINT8 re_grapheme_extend_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 8, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, - 11, 12, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 14, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 16, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, -}; - -static RE_UINT8 re_grapheme_extend_stage_3[] = { - 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, - 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, - 0, 0, 0, 32, 0, 0, 33, 34, 0, 35, 0, 0, 0, 0, 0, 0, - 0, 0, 36, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 39, 0, - 0, 0, 0, 0, 0, 0, 0, 40, 0, 41, 42, 0, 0, 0, 0, 0, - 0, 43, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_extend_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 0, 0, 0, - 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 0, - 7, 0, 8, 9, 0, 0, 10, 11, 12, 13, 14, 0, 0, 15, 0, 16, - 17, 18, 19, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 24, - 28, 29, 30, 31, 28, 29, 32, 24, 25, 33, 34, 24, 35, 36, 37, 0, - 0, 38, 39, 24, 0, 40, 41, 24, 0, 36, 27, 24, 0, 0, 42, 0, - 0, 43, 44, 0, 0, 45, 46, 0, 47, 48, 0, 49, 50, 51, 52, 0, - 0, 53, 54, 55, 56, 0, 0, 0, 0, 0, 57, 0, 0, 0, 0, 0, - 58, 58, 59, 59, 0, 60, 61, 0, 62, 0, 0, 0, 0, 63, 0, 0, - 0, 64, 0, 0, 0, 0, 0, 0, 65, 0, 66, 67, 0, 0, 0, 0, - 68, 69, 35, 16, 70, 71, 0, 72, 0, 73, 0, 0, 0, 0, 74, 75, - 0, 0, 0, 0, 0, 0, 1, 76, 77, 0, 0, 0, 0, 0, 13, 78, - 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 80, 0, 0, 0, 1, - 0, 81, 0, 0, 82, 0, 0, 0, 0, 0, 0, 83, 80, 0, 0, 84, - 85, 86, 0, 0, 0, 0, 87, 88, 0, 89, 90, 0, 21, 91, 0, 0, - 0, 92, 93, 0, 0, 94, 25, 95, 0, 0, 0, 0, 0, 0, 0, 96, - 36, 0, 0, 0, 0, 0, 0, 0, 2, 97, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, - 99, 100, 0, 0, 0, 0, 0, 0, 25, 101, 97, 0, 70, 102, 0, 0, - 21, 103, 0, 0, 70, 104, 0, 0, 0, 0, 0, 0, 0, 105, 0, 0, - 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, 107, 108, 109, 0, 0, - 0, 0, 110, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, -}; - -static RE_UINT8 re_grapheme_extend_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 0, 0, 248, 3, 0, 0, - 0, 0, 254, 255, 255, 255, 255, 191, 182, 0, 0, 0, 0, 0, 255, 7, - 0, 248, 255, 255, 0, 0, 1, 0, 0, 0, 192, 159, 159, 61, 0, 0, - 0, 0, 2, 0, 0, 0, 255, 255, 255, 7, 0, 0, 192, 255, 1, 0, - 0, 248, 15, 0, 0, 0, 192, 251, 239, 62, 0, 0, 0, 0, 0, 14, - 240, 255, 255, 127, 7, 0, 0, 0, 0, 0, 0, 20, 254, 33, 254, 0, - 12, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 80, 30, 32, 128, 0, - 6, 0, 0, 0, 0, 0, 0, 16, 134, 57, 2, 0, 0, 0, 35, 0, - 190, 33, 0, 0, 0, 0, 0, 208, 30, 32, 192, 0, 4, 0, 0, 0, - 0, 0, 0, 64, 1, 32, 128, 0, 0, 0, 0, 192, 193, 61, 96, 0, - 0, 0, 0, 144, 68, 48, 96, 0, 0, 132, 92, 128, 0, 0, 242, 7, - 128, 127, 0, 0, 0, 0, 242, 27, 0, 63, 0, 0, 0, 0, 0, 3, - 0, 0, 160, 2, 0, 0, 254, 127, 223, 224, 255, 254, 255, 255, 255, 31, - 64, 0, 0, 0, 0, 224, 253, 102, 0, 0, 0, 195, 1, 0, 30, 0, - 100, 32, 0, 32, 0, 0, 0, 224, 0, 0, 28, 0, 0, 0, 12, 0, - 0, 0, 176, 63, 64, 254, 15, 32, 0, 56, 0, 0, 0, 2, 0, 0, - 135, 1, 4, 14, 0, 0, 128, 9, 0, 0, 64, 127, 229, 31, 248, 159, - 15, 0, 0, 0, 0, 0, 208, 23, 3, 0, 0, 0, 60, 11, 0, 0, - 64, 163, 3, 0, 0, 240, 207, 0, 0, 0, 247, 255, 253, 33, 16, 0, - 127, 0, 0, 240, 0, 48, 0, 0, 255, 255, 1, 0, 0, 128, 3, 0, - 0, 0, 0, 128, 0, 252, 0, 0, 0, 0, 0, 6, 0, 128, 247, 63, - 0, 0, 3, 0, 68, 8, 0, 0, 96, 0, 0, 0, 16, 0, 0, 0, - 255, 255, 3, 0, 192, 63, 0, 0, 128, 255, 3, 0, 0, 0, 200, 19, - 0, 126, 102, 0, 8, 16, 0, 0, 0, 0, 157, 193, 0, 48, 64, 0, - 32, 33, 0, 0, 127, 0, 0, 0, 0, 0, 0, 32, 110, 240, 0, 0, - 0, 0, 0, 135, 0, 0, 0, 255, 0, 0, 120, 6, 128, 239, 31, 0, - 0, 0, 192, 127, 0, 40, 191, 0, 0, 128, 7, 0, 160, 195, 7, 248, - 231, 15, 0, 0, 0, 60, 0, 0, 28, 0, 0, 0, -}; - -/* Grapheme_Extend: 1062 bytes. */ - -RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_grapheme_extend_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_grapheme_extend_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_extend_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_grapheme_extend_stage_4[pos + f] << 5; - pos += code; - value = (re_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Grapheme_Base. */ - -static RE_UINT8 re_grapheme_base_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_grapheme_base_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_grapheme_base_stage_3[] = { - 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 16, 17, 1, 1, 18, 19, 20, 21, 22, 23, 24, 25, 1, 26, - 27, 28, 1, 29, 30, 1, 1, 31, 1, 1, 1, 32, 33, 34, 35, 36, - 37, 38, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, - 1, 1, 1, 1, 42, 1, 43, 44, 45, 46, 47, 48, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 49, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 1, 51, 52, 1, 53, 54, 55, 56, 57, 58, 59, 60, 50, 50, 50, - 61, 62, 63, 64, 65, 50, 66, 50, 67, 68, 50, 50, 50, 50, 69, 50, - 1, 1, 1, 70, 71, 50, 50, 50, 1, 1, 1, 1, 72, 50, 50, 50, - 1, 1, 73, 50, 50, 50, 50, 74, 75, 50, 50, 50, 50, 50, 50, 50, - 76, 77, 78, 79, 80, 81, 82, 83, 50, 50, 50, 50, 50, 50, 84, 50, - 85, 86, 87, 88, 89, 90, 91, 92, 1, 1, 1, 1, 1, 1, 93, 1, - 1, 1, 1, 1, 1, 1, 1, 94, 95, 50, 50, 50, 50, 50, 50, 50, - 1, 1, 95, 50, 50, 50, 50, 50, -}; - -static RE_UINT8 re_grapheme_base_stage_4[] = { - 0, 1, 1, 2, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 4, 5, 6, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, - 1, 8, 9, 10, 11, 12, 13, 14, 15, 1, 16, 17, 1, 1, 18, 19, - 20, 21, 22, 1, 1, 23, 1, 24, 25, 26, 27, 0, 0, 28, 0, 0, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 33, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 56, 60, 61, 62, 63, 64, 65, 66, 10, 67, 68, 0, 69, 70, 71, 0, - 72, 73, 74, 75, 76, 77, 78, 0, 1, 79, 80, 81, 82, 1, 83, 1, - 1, 1, 84, 1, 85, 86, 87, 1, 88, 1, 89, 90, 91, 1, 1, 92, - 1, 1, 1, 1, 90, 1, 1, 93, 94, 95, 96, 97, 1, 98, 99, 100, - 101, 1, 1, 102, 1, 103, 1, 104, 90, 105, 106, 107, 1, 108, 109, 1, - 110, 1, 111, 112, 100, 113, 0, 0, 114, 115, 116, 117, 118, 119, 1, 120, - 1, 121, 122, 1, 0, 0, 123, 124, 1, 1, 1, 1, 1, 1, 0, 0, - 125, 1, 126, 127, 1, 128, 129, 130, 131, 132, 1, 133, 134, 89, 0, 0, - 1, 1, 1, 1, 135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 136, - 1, 137, 16, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 138, 0, 0, 0, 0, 0, 1, 139, 2, 1, 1, 1, 1, 140, - 1, 83, 1, 141, 142, 143, 143, 0, 1, 144, 0, 0, 145, 1, 1, 136, - 1, 1, 1, 1, 1, 1, 104, 146, 1, 135, 10, 1, 147, 1, 1, 1, - 148, 149, 1, 1, 139, 89, 1, 150, 2, 1, 1, 1, 1, 1, 1, 2, - 1, 1, 1, 1, 1, 104, 1, 1, 1, 1, 1, 1, 1, 1, 151, 0, - 1, 1, 1, 1, 152, 1, 153, 1, 1, 154, 1, 155, 102, 1, 1, 156, - 1, 1, 1, 1, 157, 16, 0, 158, 159, 160, 1, 102, 1, 1, 161, 162, - 1, 163, 164, 90, 29, 165, 166, 0, 1, 167, 168, 144, 1, 169, 170, 171, - 172, 173, 0, 0, 0, 0, 1, 174, 1, 1, 1, 1, 1, 150, 175, 144, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 176, 1, 1, 91, 0, - 177, 178, 179, 1, 1, 1, 180, 1, 1, 1, 181, 1, 182, 1, 183, 184, - 185, 181, 186, 187, 1, 1, 1, 90, 10, 1, 1, 1, 127, 2, 188, 189, - 190, 191, 192, 0, 1, 1, 1, 89, 193, 194, 1, 1, 195, 0, 181, 90, - 0, 0, 0, 0, 90, 1, 93, 0, 2, 150, 16, 0, 196, 1, 197, 0, - 1, 1, 1, 1, 127, 198, 0, 0, 199, 200, 201, 0, 0, 0, 0, 0, - 202, 203, 0, 0, 1, 204, 0, 0, 205, 136, 206, 1, 0, 0, 0, 0, - 1, 207, 208, 209, 0, 0, 0, 0, 1, 1, 210, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 0, 0, 0, 211, 102, 212, 21, 118, 213, 214, 215, - 29, 216, 217, 0, 118, 218, 215, 0, 0, 0, 0, 0, 1, 219, 198, 0, - 1, 1, 1, 220, 0, 0, 0, 0, 1, 1, 1, 221, 0, 0, 0, 0, - 1, 220, 0, 0, 0, 0, 0, 0, 1, 222, 0, 0, 0, 0, 0, 0, - 1, 1, 223, 2, 224, 0, 0, 0, 225, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 104, 1, 226, 1, 227, 228, 229, 127, 0, - 1, 1, 230, 0, 0, 0, 0, 0, 1, 1, 142, 96, 0, 0, 0, 0, - 1, 1, 128, 1, 231, 232, 233, 1, 234, 235, 236, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 237, 1, 1, 1, 1, 1, 1, 1, 1, 238, 1, - 233, 239, 240, 241, 242, 243, 0, 244, 1, 108, 1, 1, 136, 245, 246, 0, - 131, 139, 1, 108, 89, 0, 0, 247, 248, 89, 249, 0, 0, 0, 0, 0, - 1, 250, 1, 90, 136, 1, 251, 93, 1, 2, 211, 1, 1, 1, 1, 252, - 1, 127, 150, 183, 0, 0, 0, 253, 1, 1, 254, 0, 1, 1, 255, 0, - 1, 1, 1, 136, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 142, 0, - 1, 92, 1, 1, 1, 1, 1, 1, 127, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_base_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 223, 255, 255, - 0, 0, 255, 124, 240, 215, 255, 255, 251, 255, 255, 255, 7, 252, 255, 255, - 255, 0, 254, 255, 255, 255, 127, 254, 254, 255, 255, 255, 255, 134, 0, 0, - 0, 0, 0, 64, 73, 0, 255, 255, 255, 7, 31, 0, 192, 255, 0, 200, - 255, 7, 0, 0, 255, 255, 254, 255, 255, 255, 63, 64, 96, 194, 255, 255, - 255, 63, 253, 255, 255, 255, 0, 0, 0, 224, 255, 255, 63, 0, 2, 0, - 255, 7, 240, 7, 255, 255, 63, 4, 16, 1, 255, 127, 255, 255, 255, 65, - 253, 31, 0, 0, 248, 255, 255, 255, 255, 255, 255, 235, 1, 222, 1, 255, - 243, 255, 255, 254, 236, 159, 249, 255, 255, 253, 197, 163, 129, 89, 0, 176, - 195, 255, 255, 15, 232, 135, 249, 255, 255, 253, 109, 195, 1, 0, 0, 94, - 192, 255, 28, 0, 232, 191, 251, 255, 255, 253, 237, 227, 1, 26, 1, 0, - 195, 255, 3, 0, 255, 253, 237, 35, 129, 25, 0, 176, 195, 255, 255, 0, - 232, 199, 61, 214, 24, 199, 255, 131, 198, 29, 1, 0, 192, 255, 255, 7, - 238, 223, 253, 255, 255, 253, 239, 35, 30, 0, 0, 3, 195, 255, 0, 255, - 236, 223, 253, 255, 255, 253, 239, 99, 155, 13, 0, 64, 195, 255, 6, 0, - 255, 255, 255, 167, 193, 93, 0, 0, 195, 255, 63, 254, 236, 255, 127, 252, - 255, 255, 251, 47, 127, 0, 3, 127, 0, 0, 28, 0, 255, 255, 13, 128, - 127, 128, 255, 15, 150, 37, 240, 254, 174, 236, 13, 32, 95, 0, 255, 243, - 255, 255, 255, 252, 255, 255, 95, 253, 255, 254, 255, 255, 255, 31, 0, 128, - 32, 31, 0, 0, 0, 0, 0, 192, 191, 223, 255, 7, 255, 31, 2, 153, - 255, 255, 255, 60, 254, 255, 225, 255, 155, 223, 255, 223, 191, 32, 255, 255, - 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, - 255, 255, 61, 255, 255, 255, 255, 7, 255, 255, 255, 31, 255, 255, 255, 3, - 255, 255, 31, 0, 255, 255, 1, 0, 255, 223, 3, 0, 255, 255, 99, 0, - 255, 255, 3, 0, 255, 223, 1, 0, 255, 255, 79, 192, 191, 1, 240, 31, - 255, 3, 255, 3, 255, 7, 255, 3, 255, 255, 255, 0, 255, 5, 255, 255, - 255, 255, 63, 0, 120, 14, 251, 1, 241, 255, 255, 255, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 255, 199, 255, 255, 127, 198, 255, 255, 191, 0, - 26, 224, 7, 0, 255, 63, 0, 0, 240, 255, 255, 255, 255, 255, 47, 232, - 251, 15, 255, 255, 255, 7, 240, 31, 252, 255, 255, 255, 195, 244, 255, 255, - 191, 92, 12, 240, 255, 15, 48, 248, 255, 227, 255, 255, 255, 0, 8, 0, - 2, 222, 111, 0, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 255, 63, - 255, 255, 223, 255, 223, 255, 207, 239, 255, 255, 220, 127, 255, 7, 255, 255, - 255, 128, 255, 255, 0, 0, 243, 255, 255, 127, 255, 31, 255, 3, 255, 255, - 255, 255, 15, 0, 127, 0, 0, 0, 255, 31, 255, 3, 255, 127, 255, 255, - 255, 127, 12, 254, 255, 128, 1, 0, 255, 255, 127, 0, 127, 127, 127, 127, - 255, 255, 255, 15, 255, 255, 255, 251, 0, 0, 255, 15, 255, 255, 127, 248, - 224, 255, 255, 255, 255, 63, 254, 255, 15, 0, 255, 255, 255, 31, 0, 0, - 255, 31, 255, 255, 127, 0, 255, 255, 255, 15, 0, 0, 255, 127, 8, 192, - 255, 255, 252, 0, 255, 127, 15, 0, 0, 0, 0, 255, 187, 247, 255, 255, - 159, 15, 255, 3, 15, 192, 255, 3, 0, 0, 252, 15, 63, 192, 255, 255, - 127, 0, 12, 128, 255, 255, 55, 236, 255, 191, 255, 195, 255, 129, 25, 0, - 247, 47, 255, 243, 255, 255, 98, 62, 5, 0, 0, 248, 255, 207, 63, 0, - 126, 126, 126, 0, 127, 127, 0, 0, 223, 30, 255, 3, 127, 248, 255, 255, - 255, 63, 255, 255, 127, 0, 248, 160, 255, 255, 127, 95, 219, 255, 255, 255, - 3, 0, 248, 255, 0, 0, 255, 255, 255, 255, 252, 255, 255, 0, 0, 0, - 0, 0, 255, 63, 0, 0, 255, 3, 255, 255, 247, 255, 127, 15, 223, 255, - 252, 252, 252, 28, 127, 127, 0, 48, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 135, 255, 255, 255, 255, 255, 143, 255, 255, 7, 255, 15, - 255, 255, 255, 191, 15, 255, 63, 0, 255, 3, 0, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 191, 255, 255, 255, 255, 143, 255, 255, 255, 131, - 255, 255, 255, 192, 1, 0, 239, 254, 255, 0, 255, 1, 255, 255, 63, 254, - 255, 255, 63, 255, 255, 255, 7, 255, 255, 1, 0, 0, 253, 255, 255, 255, - 128, 63, 252, 255, 255, 255, 135, 217, 3, 0, 255, 255, 255, 1, 255, 3, - 127, 16, 192, 255, 15, 0, 0, 0, 255, 255, 63, 128, 255, 215, 64, 0, - 255, 127, 0, 0, 7, 0, 15, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 0, 248, 255, 3, 0, 0, 0, 127, 254, 255, 255, 95, 60, 0, 0, - 24, 240, 255, 255, 255, 195, 255, 255, 35, 0, 0, 0, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, - 0, 0, 3, 0, 255, 127, 254, 127, 254, 255, 254, 255, 192, 255, 255, 255, - 7, 0, 255, 255, 255, 1, 3, 0, 1, 0, 191, 255, 223, 7, 0, 0, - 255, 255, 255, 30, 0, 0, 0, 248, 225, 255, 0, 0, 63, 0, 0, 0, -}; - -/* Grapheme_Base: 2169 bytes. */ - -RE_UINT32 re_get_grapheme_base(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_grapheme_base_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_grapheme_base_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_base_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_grapheme_base_stage_4[pos + f] << 5; - pos += code; - value = (re_grapheme_base_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Grapheme_Link. */ - -static RE_UINT8 re_grapheme_link_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_grapheme_link_stage_2[] = { - 0, 1, 2, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_grapheme_link_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 3, 4, - 5, 0, 0, 0, 0, 0, 0, 6, 0, 0, 7, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 9, 0, 0, 10, 11, 12, 13, 0, 0, 0, 0, - 0, 0, 14, 0, 0, 0, 0, 0, 15, 16, 0, 0, 0, 0, 17, 0, -}; - -static RE_UINT8 re_grapheme_link_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 0, 1, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 4, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, - 6, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 8, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 9, 0, 10, 0, 0, 0, 11, 0, 0, 0, 0, - 12, 0, 0, 0, 0, 0, 4, 0, 0, 0, 13, 0, 0, 0, 8, 0, - 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 15, 0, 0, - 0, 16, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 14, 0, 0, -}; - -static RE_UINT8 re_grapheme_link_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, - 16, 0, 0, 0, 0, 0, 0, 6, 0, 0, 16, 0, 0, 0, 4, 0, - 1, 0, 0, 0, 0, 12, 0, 0, 0, 0, 12, 0, 0, 0, 0, 128, - 64, 0, 0, 0, 0, 0, 8, 0, 0, 0, 64, 0, 0, 0, 0, 2, - 0, 0, 24, 0, -}; - -/* Grapheme_Link: 374 bytes. */ - -RE_UINT32 re_get_grapheme_link(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_grapheme_link_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_grapheme_link_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_grapheme_link_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_grapheme_link_stage_4[pos + f] << 5; - pos += code; - value = (re_grapheme_link_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* White_Space. */ - -static RE_UINT8 re_white_space_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_white_space_stage_2[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_white_space_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_white_space_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_white_space_stage_5[] = { - 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 32, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 255, 7, 0, 0, 0, 131, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, -}; - -/* White_Space: 169 bytes. */ - -RE_UINT32 re_get_white_space(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_white_space_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_white_space_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_white_space_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_white_space_stage_4[pos + f] << 6; - pos += code; - value = (re_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Bidi_Control. */ - -static RE_UINT8 re_bidi_control_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_bidi_control_stage_2[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_bidi_control_stage_3[] = { - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_bidi_control_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 2, 3, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_bidi_control_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, - 0, 192, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 192, 3, 0, 0, -}; - -/* Bidi_Control: 129 bytes. */ - -RE_UINT32 re_get_bidi_control(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_bidi_control_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_bidi_control_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_bidi_control_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_bidi_control_stage_4[pos + f] << 6; - pos += code; - value = (re_bidi_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Join_Control. */ - -static RE_UINT8 re_join_control_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_join_control_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_join_control_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_join_control_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_join_control_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, -}; - -/* Join_Control: 97 bytes. */ - -RE_UINT32 re_get_join_control(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_join_control_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_join_control_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_join_control_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_join_control_stage_4[pos + f] << 6; - pos += code; - value = (re_join_control_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Dash. */ - -static RE_UINT8 re_dash_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_dash_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_dash_stage_3[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 4, 1, 1, 1, - 5, 6, 1, 1, 1, 1, 1, 7, 8, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, -}; - -static RE_UINT8 re_dash_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 4, 1, 1, 1, 1, 1, 1, 1, 5, 6, 7, 1, 1, 1, 1, 1, - 8, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 1, 1, 1, - 10, 1, 11, 1, 1, 1, 1, 1, 12, 13, 1, 1, 14, 1, 1, 1, -}; - -static RE_UINT8 re_dash_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 0, 0, 0, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, - 0, 0, 8, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 128, 4, 0, 0, 0, 12, - 0, 0, 0, 16, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 8, 0, 0, 0, - 0, 32, 0, 0, 0, 0, 0, 0, -}; - -/* Dash: 297 bytes. */ - -RE_UINT32 re_get_dash(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_dash_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_dash_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_dash_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_dash_stage_4[pos + f] << 6; - pos += code; - value = (re_dash_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Hyphen. */ - -static RE_UINT8 re_hyphen_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_hyphen_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_hyphen_stage_3[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, - 4, 1, 1, 1, 1, 1, 1, 5, 6, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, -}; - -static RE_UINT8 re_hyphen_stage_4[] = { - 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 4, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 6, 1, 1, 1, 1, 1, 7, 1, 1, 8, 9, 1, 1, -}; - -static RE_UINT8 re_hyphen_stage_5[] = { - 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, - 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, -}; - -/* Hyphen: 241 bytes. */ - -RE_UINT32 re_get_hyphen(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_hyphen_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_hyphen_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_hyphen_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_hyphen_stage_4[pos + f] << 6; - pos += code; - value = (re_hyphen_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Quotation_Mark. */ - -static RE_UINT8 re_quotation_mark_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_quotation_mark_stage_2[] = { - 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_quotation_mark_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 4, -}; - -static RE_UINT8 re_quotation_mark_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 5, 1, 1, 6, 7, 1, 1, -}; - -static RE_UINT8 re_quotation_mark_stage_5[] = { - 0, 0, 0, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 255, 0, 0, 0, 6, - 0, 240, 0, 224, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, - 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, -}; - -/* Quotation_Mark: 193 bytes. */ - -RE_UINT32 re_get_quotation_mark(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_quotation_mark_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_quotation_mark_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_quotation_mark_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_quotation_mark_stage_4[pos + f] << 6; - pos += code; - value = (re_quotation_mark_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Terminal_Punctuation. */ - -static RE_UINT8 re_terminal_punctuation_stage_1[] = { - 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, -}; - -static RE_UINT8 re_terminal_punctuation_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 12, 13, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 14, - 15, 9, 16, 9, 17, 9, 9, 9, 9, 18, 9, 9, 9, 9, 9, 9, -}; - -static RE_UINT8 re_terminal_punctuation_stage_3[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 4, 5, 6, 7, 8, - 9, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 1, 12, 1, - 13, 1, 1, 1, 1, 1, 14, 1, 1, 1, 1, 1, 15, 16, 1, 17, - 18, 1, 19, 1, 1, 20, 21, 1, 22, 1, 1, 1, 1, 1, 1, 1, - 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 24, 1, 1, 1, 25, 1, 1, 1, 1, 1, 1, 1, - 1, 26, 1, 1, 27, 28, 1, 1, 29, 30, 31, 32, 33, 34, 1, 35, - 1, 1, 1, 1, 36, 1, 37, 1, 1, 1, 1, 1, 1, 1, 1, 38, - 39, 1, 40, 1, 1, 1, 41, 1, 42, 43, 44, 45, 1, 1, 1, 1, - 46, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_terminal_punctuation_stage_4[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, - 4, 0, 5, 0, 6, 0, 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, - 0, 0, 0, 9, 0, 10, 2, 0, 0, 0, 0, 11, 0, 0, 12, 0, - 13, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 15, 0, 0, 0, 16, - 0, 0, 0, 17, 0, 0, 18, 0, 19, 0, 0, 0, 0, 0, 11, 0, - 0, 20, 0, 0, 0, 0, 21, 0, 0, 22, 0, 23, 0, 24, 25, 0, - 0, 26, 0, 0, 27, 0, 0, 0, 0, 0, 0, 23, 28, 0, 0, 0, - 0, 0, 0, 29, 0, 0, 0, 30, 0, 0, 31, 0, 0, 32, 0, 0, - 0, 0, 25, 0, 0, 0, 33, 0, 0, 0, 34, 35, 0, 0, 0, 36, - 0, 0, 37, 0, 1, 0, 0, 38, 34, 0, 39, 0, 0, 0, 40, 0, - 34, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 0, 0, 23, 43, 0, - 0, 0, 44, 0, 0, 0, 45, 0, 0, 0, 0, 46, -}; - -static RE_UINT8 re_terminal_punctuation_stage_5[] = { - 0, 0, 0, 0, 2, 80, 0, 140, 0, 0, 0, 64, 128, 0, 0, 0, - 0, 2, 0, 0, 8, 0, 0, 0, 0, 16, 0, 136, 0, 0, 16, 0, - 255, 23, 0, 0, 0, 0, 0, 3, 0, 0, 255, 127, 48, 0, 0, 0, - 0, 0, 0, 12, 0, 225, 7, 0, 0, 12, 0, 0, 254, 1, 0, 0, - 0, 96, 0, 0, 0, 56, 0, 0, 0, 0, 112, 4, 60, 3, 0, 0, - 0, 15, 0, 0, 0, 0, 0, 236, 0, 0, 0, 248, 0, 0, 0, 192, - 0, 0, 0, 48, 128, 3, 0, 0, 0, 64, 0, 0, 6, 0, 0, 0, - 0, 224, 0, 0, 0, 0, 248, 0, 0, 0, 192, 0, 0, 192, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 224, 0, 0, 0, 128, 0, 0, 3, 0, - 0, 8, 0, 0, 0, 0, 247, 0, 18, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 128, 0, 0, 0, 0, 252, 128, 63, 0, 0, 3, 0, 0, 0, - 14, 0, 0, 0, 96, 0, 0, 0, 0, 0, 15, 0, -}; - -/* Terminal_Punctuation: 676 bytes. */ - -RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_terminal_punctuation_stage_1[f] << 4; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_terminal_punctuation_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_terminal_punctuation_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_terminal_punctuation_stage_4[pos + f] << 5; - pos += code; - value = (re_terminal_punctuation_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Math. */ - -static RE_UINT8 re_other_math_stage_1[] = { - 0, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, -}; - -static RE_UINT8 re_other_math_stage_2[] = { - 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 6, 1, 1, -}; - -static RE_UINT8 re_other_math_stage_3[] = { - 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 4, 1, 5, 1, 6, 7, 8, 1, 9, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 10, 11, 1, 1, 1, 1, 12, 13, 14, 15, - 1, 1, 1, 1, 1, 1, 16, 1, -}; - -static RE_UINT8 re_other_math_stage_4[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, - 11, 12, 13, 0, 14, 15, 16, 17, 18, 0, 0, 0, 0, 19, 20, 21, - 0, 0, 0, 0, 0, 22, 23, 24, 25, 0, 26, 27, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 25, 28, 0, 0, 0, 0, 29, 0, 30, 31, - 0, 0, 0, 32, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, - 34, 34, 35, 34, 36, 37, 38, 34, 39, 40, 41, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 42, 43, 44, 35, 35, 45, 45, 46, 46, 47, 34, - 38, 48, 49, 50, 51, 52, 0, 0, -}; - -static RE_UINT8 re_other_math_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 39, 0, 0, 0, 51, 0, - 0, 0, 64, 0, 0, 0, 28, 0, 1, 0, 0, 0, 30, 0, 0, 96, - 0, 96, 0, 0, 0, 0, 255, 31, 98, 248, 0, 0, 132, 252, 47, 62, - 16, 179, 251, 241, 224, 3, 0, 0, 0, 0, 224, 243, 182, 62, 195, 240, - 255, 63, 235, 47, 48, 0, 0, 0, 0, 15, 0, 0, 0, 0, 176, 0, - 0, 0, 1, 0, 4, 0, 0, 0, 3, 192, 127, 240, 193, 140, 15, 0, - 148, 31, 0, 0, 96, 0, 0, 0, 5, 0, 0, 0, 15, 96, 0, 0, - 192, 255, 0, 0, 248, 255, 255, 1, 0, 0, 0, 15, 0, 0, 0, 48, - 10, 1, 0, 0, 0, 0, 0, 80, 255, 255, 255, 255, 255, 255, 223, 255, - 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, - 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, - 255, 255, 255, 247, 255, 127, 255, 255, 255, 253, 255, 255, 247, 207, 255, 255, - 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, - 238, 251, 255, 15, -}; - -/* Other_Math: 502 bytes. */ - -RE_UINT32 re_get_other_math(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_other_math_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_other_math_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_math_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_math_stage_4[pos + f] << 5; - pos += code; - value = (re_other_math_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Hex_Digit. */ - -static RE_UINT8 re_hex_digit_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_hex_digit_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_hex_digit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 2, -}; - -static RE_UINT8 re_hex_digit_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, -}; - -static RE_UINT8 re_hex_digit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Hex_Digit: 129 bytes. */ - -RE_UINT32 re_get_hex_digit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_hex_digit_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_hex_digit_stage_2[pos + f] << 3; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_hex_digit_stage_3[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_hex_digit_stage_4[pos + f] << 7; - pos += code; - value = (re_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* ASCII_Hex_Digit. */ - -static RE_UINT8 re_ascii_hex_digit_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_4[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_ascii_hex_digit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 126, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -/* ASCII_Hex_Digit: 97 bytes. */ - -RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ascii_hex_digit_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_ascii_hex_digit_stage_2[pos + f] << 3; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_ascii_hex_digit_stage_3[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_ascii_hex_digit_stage_4[pos + f] << 7; - pos += code; - value = (re_ascii_hex_digit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Alphabetic. */ - -static RE_UINT8 re_other_alphabetic_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, -}; - -static RE_UINT8 re_other_alphabetic_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 7, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, - 6, 10, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_other_alphabetic_stage_3[] = { - 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 0, 0, 14, 0, 0, 0, 15, 16, 17, 18, 19, 20, 0, 0, 0, - 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, - 24, 25, 26, 27, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, - 0, 0, 29, 0, 0, 0, 0, 0, 30, 31, 0, 0, 0, 0, 32, 0, - 0, 0, 0, 0, 0, 0, 0, 33, -}; - -static RE_UINT8 re_other_alphabetic_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 5, 6, 0, 0, 7, 8, - 9, 10, 0, 0, 0, 11, 0, 0, 12, 13, 0, 0, 0, 0, 0, 14, - 15, 16, 17, 18, 19, 20, 21, 18, 19, 20, 22, 23, 19, 20, 24, 18, - 19, 20, 25, 18, 26, 20, 27, 0, 19, 20, 28, 18, 18, 20, 28, 18, - 18, 20, 29, 18, 18, 0, 30, 31, 0, 32, 33, 0, 0, 34, 33, 0, - 0, 0, 0, 35, 36, 37, 0, 0, 0, 38, 39, 40, 41, 0, 0, 0, - 0, 0, 42, 0, 0, 0, 0, 0, 31, 31, 31, 31, 0, 43, 44, 0, - 0, 0, 0, 0, 0, 45, 0, 0, 0, 46, 0, 0, 0, 10, 47, 0, - 48, 0, 49, 50, 0, 0, 0, 0, 51, 52, 15, 0, 53, 54, 0, 55, - 0, 56, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 43, 57, 58, - 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 59, 42, 0, 0, 0, - 0, 60, 0, 0, 61, 62, 15, 0, 0, 63, 64, 0, 15, 62, 0, 0, - 0, 65, 66, 0, 0, 67, 0, 68, 0, 0, 0, 0, 0, 0, 0, 69, - 70, 0, 0, 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, - 53, 72, 73, 0, 26, 74, 0, 0, 53, 64, 0, 0, 53, 75, 0, 0, - 0, 0, 0, 0, 0, 76, 0, 0, 0, 0, 35, 77, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_alphabetic_stage_5[] = { - 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 255, 191, 182, 0, 0, 0, - 0, 0, 255, 7, 0, 248, 255, 254, 0, 0, 1, 0, 0, 0, 192, 31, - 158, 33, 0, 0, 0, 0, 2, 0, 0, 0, 255, 255, 192, 255, 1, 0, - 0, 0, 192, 248, 239, 30, 0, 0, 240, 3, 255, 127, 15, 0, 0, 0, - 0, 0, 0, 204, 255, 223, 224, 0, 12, 0, 0, 0, 14, 0, 0, 0, - 0, 0, 0, 192, 159, 25, 128, 0, 135, 25, 2, 0, 0, 0, 35, 0, - 191, 27, 0, 0, 159, 25, 192, 0, 4, 0, 0, 0, 199, 29, 128, 0, - 223, 29, 96, 0, 223, 29, 128, 0, 0, 128, 95, 255, 0, 0, 12, 0, - 0, 0, 242, 7, 0, 32, 0, 0, 0, 0, 242, 27, 0, 0, 254, 255, - 3, 224, 255, 254, 255, 255, 255, 31, 0, 248, 127, 121, 0, 0, 192, 195, - 133, 1, 30, 0, 124, 0, 0, 48, 0, 0, 0, 128, 0, 0, 192, 255, - 255, 1, 0, 0, 0, 2, 0, 0, 255, 15, 255, 1, 1, 3, 0, 0, - 0, 0, 128, 15, 0, 0, 224, 127, 254, 255, 31, 0, 31, 0, 0, 0, - 0, 0, 224, 255, 7, 0, 0, 0, 254, 51, 0, 0, 128, 255, 3, 0, - 240, 255, 63, 0, 255, 255, 255, 255, 255, 3, 0, 0, 0, 0, 240, 15, - 248, 0, 0, 0, 3, 0, 0, 0, 0, 0, 240, 255, 192, 7, 0, 0, - 128, 255, 7, 0, 0, 254, 127, 0, 8, 48, 0, 0, 0, 0, 157, 65, - 0, 248, 32, 0, 248, 7, 0, 0, 0, 0, 0, 64, 110, 240, 0, 0, - 0, 0, 0, 255, 63, 0, 0, 0, 0, 0, 255, 1, 0, 0, 248, 255, - 0, 248, 63, 0, 255, 255, 255, 127, -}; - -/* Other_Alphabetic: 786 bytes. */ - -RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_other_alphabetic_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_other_alphabetic_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_alphabetic_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_alphabetic_stage_4[pos + f] << 5; - pos += code; - value = (re_other_alphabetic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Ideographic. */ - -static RE_UINT8 re_ideographic_stage_1[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ideographic_stage_2[] = { - 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 0, 0, 0, 8, -}; - -static RE_UINT8 re_ideographic_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 5, 6, 0, 0, - 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 8, 9, 0, 0, 0, - 0, 0, 0, 0, 2, 9, 0, 0, -}; - -static RE_UINT8 re_ideographic_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 0, - 2, 2, 2, 2, 2, 2, 2, 4, 0, 0, 0, 0, 2, 2, 2, 2, - 2, 5, 2, 6, 0, 0, 0, 0, 2, 2, 2, 7, 2, 2, 2, 2, - 2, 2, 2, 2, 8, 2, 2, 2, 9, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ideographic_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 254, 3, 0, 7, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 63, 0, - 255, 31, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 63, 255, 255, - 255, 255, 255, 3, 0, 0, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, - 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 0, 0, 0, 0, -}; - -/* Ideographic: 297 bytes. */ - -RE_UINT32 re_get_ideographic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ideographic_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_ideographic_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_ideographic_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_ideographic_stage_4[pos + f] << 6; - pos += code; - value = (re_ideographic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Diacritic. */ - -static RE_UINT8 re_diacritic_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_diacritic_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 7, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9, - 4, 4, 10, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_diacritic_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 1, 1, 1, 1, 1, 17, 1, 18, 19, 20, 21, 22, 1, 23, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 1, 25, 1, - 26, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 28, - 29, 30, 31, 32, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, - 36, 37, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 40, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_diacritic_stage_4[] = { - 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 5, 5, 5, 5, 6, 7, 8, 0, 0, 0, - 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 10, 0, 11, 12, 13, 0, - 0, 0, 14, 0, 0, 0, 15, 16, 0, 4, 17, 0, 0, 18, 0, 19, - 20, 0, 0, 0, 0, 0, 0, 21, 0, 22, 23, 24, 0, 22, 25, 0, - 0, 22, 25, 0, 0, 22, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, - 0, 0, 25, 0, 0, 22, 25, 0, 0, 0, 25, 0, 0, 0, 26, 0, - 0, 0, 27, 0, 0, 0, 28, 0, 20, 29, 0, 0, 30, 0, 31, 0, - 0, 32, 0, 0, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 0, - 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, - 0, 37, 38, 39, 0, 40, 0, 0, 0, 41, 0, 42, 0, 0, 4, 43, - 0, 44, 5, 17, 0, 0, 45, 46, 0, 0, 0, 0, 0, 47, 48, 49, - 0, 0, 0, 0, 0, 0, 0, 50, 0, 51, 0, 0, 0, 0, 0, 0, - 0, 52, 0, 0, 53, 0, 0, 22, 0, 0, 0, 54, 0, 0, 0, 55, - 56, 57, 0, 0, 58, 0, 0, 20, 0, 0, 0, 0, 0, 0, 38, 59, - 0, 60, 61, 0, 0, 61, 2, 0, 0, 0, 0, 62, 0, 15, 63, 64, - 0, 0, 0, 0, 0, 0, 0, 65, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 66, 0, 0, 0, 0, 0, 0, 0, 1, 2, 67, 68, 0, 0, 69, - 0, 0, 0, 0, 0, 70, 0, 0, 0, 71, 0, 0, 0, 0, 2, 0, - 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 72, 0, 0, 0, - 0, 0, 0, 73, 74, 75, 0, 0, -}; - -static RE_UINT8 re_diacritic_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 64, 1, 0, 0, 0, 0, 129, 144, 1, - 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 224, 7, 0, 48, 4, - 48, 0, 0, 0, 248, 0, 0, 0, 0, 0, 0, 2, 0, 0, 254, 255, - 251, 255, 255, 191, 22, 0, 0, 0, 0, 248, 135, 1, 0, 0, 0, 128, - 97, 28, 0, 0, 255, 7, 0, 0, 192, 255, 1, 0, 0, 248, 63, 0, - 0, 0, 0, 3, 240, 255, 255, 127, 0, 0, 0, 16, 0, 32, 30, 0, - 0, 0, 2, 0, 0, 32, 0, 0, 0, 4, 0, 0, 128, 95, 0, 0, - 0, 31, 0, 0, 0, 0, 160, 194, 220, 0, 0, 0, 64, 0, 0, 0, - 0, 0, 128, 6, 128, 191, 0, 12, 0, 254, 15, 32, 0, 0, 0, 14, - 0, 0, 224, 159, 0, 0, 16, 0, 16, 0, 0, 0, 0, 248, 15, 0, - 0, 12, 0, 0, 0, 0, 192, 0, 0, 0, 0, 63, 255, 33, 16, 0, - 0, 240, 255, 255, 240, 255, 0, 0, 0, 0, 0, 224, 0, 0, 0, 160, - 3, 224, 0, 224, 0, 224, 0, 96, 0, 128, 3, 0, 0, 128, 0, 0, - 0, 252, 0, 0, 0, 0, 0, 30, 0, 128, 0, 176, 0, 0, 3, 0, - 0, 0, 128, 255, 3, 0, 0, 0, 0, 1, 0, 0, 255, 255, 3, 0, - 0, 120, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 7, 0, 0, 0, - 0, 0, 64, 0, 0, 48, 0, 0, 127, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 192, 8, 0, 0, 0, 0, 0, 0, 6, 0, 0, 24, 0, - 0, 128, 255, 255, 128, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, -}; - -/* Diacritic: 849 bytes. */ - -RE_UINT32 re_get_diacritic(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_diacritic_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_diacritic_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_diacritic_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_diacritic_stage_4[pos + f] << 5; - pos += code; - value = (re_diacritic_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Extender. */ - -static RE_UINT8 re_extender_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_extender_stage_2[] = { - 0, 1, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_extender_stage_3[] = { - 0, 1, 2, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 5, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 7, 1, 8, 1, 1, 1, - 9, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, - 1, 12, 13, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, -}; - -static RE_UINT8 re_extender_stage_4[] = { - 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 5, 0, - 6, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, - 0, 9, 0, 10, 0, 0, 0, 0, 11, 12, 0, 0, 13, 0, 0, 14, - 15, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 18, 0, 0, 19, 20, - 0, 0, 0, 18, 0, 0, 0, 0, -}; - -static RE_UINT8 re_extender_stage_5[] = { - 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 4, 64, 0, 0, 0, 0, 4, 0, 0, 8, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 8, 32, 0, 0, 0, - 0, 0, 62, 0, 0, 0, 0, 96, 0, 0, 0, 112, 0, 0, 32, 0, - 0, 16, 0, 0, 0, 128, 0, 0, 0, 0, 1, 0, 0, 0, 0, 32, - 0, 0, 24, 0, -}; - -/* Extender: 349 bytes. */ - -RE_UINT32 re_get_extender(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_extender_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_extender_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_extender_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_extender_stage_4[pos + f] << 5; - pos += code; - value = (re_extender_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Lowercase. */ - -static RE_UINT8 re_other_lowercase_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_lowercase_stage_2[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_other_lowercase_stage_3[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, - 4, 2, 5, 2, 2, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 7, 2, 2, 2, 2, -}; - -static RE_UINT8 re_other_lowercase_stage_4[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 4, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, - 0, 8, 9, 0, 0, 10, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, - 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 14, -}; - -static RE_UINT8 re_other_lowercase_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, - 0, 0, 0, 0, 0, 0, 255, 1, 3, 0, 0, 0, 31, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 240, 255, 255, - 255, 255, 255, 255, 255, 7, 0, 1, 0, 0, 0, 248, 255, 255, 255, 255, - 0, 0, 0, 0, 0, 0, 2, 128, 0, 0, 255, 31, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 3, -}; - -/* Other_Lowercase: 273 bytes. */ - -RE_UINT32 re_get_other_lowercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_lowercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_lowercase_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_lowercase_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_lowercase_stage_4[pos + f] << 6; - pos += code; - value = (re_other_lowercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Uppercase. */ - -static RE_UINT8 re_other_uppercase_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_uppercase_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_uppercase_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_uppercase_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 1, 0, -}; - -static RE_UINT8 re_other_uppercase_stage_5[] = { - 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 192, 255, -}; - -/* Other_Uppercase: 117 bytes. */ - -RE_UINT32 re_get_other_uppercase(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_uppercase_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_uppercase_stage_2[pos + f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_other_uppercase_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_other_uppercase_stage_4[pos + f] << 5; - pos += code; - value = (re_other_uppercase_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Noncharacter_Code_Point. */ - -static RE_UINT8 re_noncharacter_code_point_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_2[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 0, 0, 0, 0, 0, 0, 0, 2, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 2, -}; - -static RE_UINT8 re_noncharacter_code_point_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 192, -}; - -/* Noncharacter_Code_Point: 121 bytes. */ - -RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_noncharacter_code_point_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_noncharacter_code_point_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_noncharacter_code_point_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_noncharacter_code_point_stage_4[pos + f] << 6; - pos += code; - value = (re_noncharacter_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Grapheme_Extend. */ - -static RE_UINT8 re_other_grapheme_extend_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_2[] = { - 0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_3[] = { - 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 0, 3, 1, 2, 0, 4, - 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 8, 0, 0, -}; - -static RE_UINT8 re_other_grapheme_extend_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 0, 0, 128, 0, 0, 0, 0, 0, 4, 0, 96, 0, 0, 0, 0, 0, - 0, 128, 0, 128, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, - 0, 0, 0, 0, 32, 192, 7, 0, -}; - -/* Other_Grapheme_Extend: 249 bytes. */ - -RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_grapheme_extend_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_grapheme_extend_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_grapheme_extend_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_grapheme_extend_stage_4[pos + f] << 6; - pos += code; - value = (re_other_grapheme_extend_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* IDS_Binary_Operator. */ - -static RE_UINT8 re_ids_binary_operator_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ids_binary_operator_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ids_binary_operator_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_binary_operator_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_binary_operator_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 243, 15, -}; - -/* IDS_Binary_Operator: 97 bytes. */ - -RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ids_binary_operator_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_ids_binary_operator_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_ids_binary_operator_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_ids_binary_operator_stage_4[pos + f] << 6; - pos += code; - value = (re_ids_binary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* IDS_Trinary_Operator. */ - -static RE_UINT8 re_ids_trinary_operator_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_ids_trinary_operator_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, -}; - -/* IDS_Trinary_Operator: 97 bytes. */ - -RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_ids_trinary_operator_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_ids_trinary_operator_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_ids_trinary_operator_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_ids_trinary_operator_stage_4[pos + f] << 6; - pos += code; - value = (re_ids_trinary_operator_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Radical. */ - -static RE_UINT8 re_radical_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_radical_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_radical_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -}; - -static RE_UINT8 re_radical_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 2, 3, 2, 2, 2, 2, 2, 2, 4, 0, -}; - -static RE_UINT8 re_radical_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 251, 255, 255, 255, 255, 255, 255, 15, 0, - 255, 255, 63, 0, -}; - -/* Radical: 117 bytes. */ - -RE_UINT32 re_get_radical(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_radical_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_radical_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_radical_stage_3[pos + f] << 4; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_radical_stage_4[pos + f] << 5; - pos += code; - value = (re_radical_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Unified_Ideograph. */ - -static RE_UINT8 re_unified_ideograph_stage_1[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_unified_ideograph_stage_2[] = { - 0, 0, 0, 1, 2, 3, 3, 3, 3, 4, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 7, 0, 0, 0, 0, -}; - -static RE_UINT8 re_unified_ideograph_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 3, 0, 0, 0, 0, 0, 4, 0, 0, - 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 6, 7, 0, 0, 0, -}; - -static RE_UINT8 re_unified_ideograph_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 3, - 4, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 5, 1, 1, 1, 1, - 1, 1, 1, 1, 6, 1, 1, 1, 7, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_unified_ideograph_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 63, 0, 255, 31, 0, 0, 0, 0, 0, 0, - 0, 192, 26, 128, 154, 3, 0, 0, 255, 255, 127, 0, 0, 0, 0, 0, - 255, 255, 255, 255, 255, 255, 31, 0, 255, 255, 255, 63, 0, 0, 0, 0, -}; - -/* Unified_Ideograph: 257 bytes. */ - -RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_unified_ideograph_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_unified_ideograph_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_unified_ideograph_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_unified_ideograph_stage_4[pos + f] << 6; - pos += code; - value = (re_unified_ideograph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_Default_Ignorable_Code_Point. */ - -static RE_UINT8 re_other_default_ignorable_code_point_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_3[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, - 4, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, - 7, 8, 8, 8, 8, 8, 8, 8, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 9, 0, 0, 0, 10, - 9, 9, 9, 9, 9, 9, 9, 9, -}; - -static RE_UINT8 re_other_default_ignorable_code_point_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, - 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 1, - 253, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, - 0, 0, 0, 0, 0, 0, 255, 255, -}; - -/* Other_Default_Ignorable_Code_Point: 281 bytes. */ - -RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_default_ignorable_code_point_stage_4[pos + f] << 6; - pos += code; - value = (re_other_default_ignorable_code_point_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Deprecated. */ - -static RE_UINT8 re_deprecated_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_deprecated_stage_2[] = { - 0, 1, 2, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_deprecated_stage_3[] = { - 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 0, 0, 6, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_deprecated_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, - 0, 6, 0, 0, 0, 0, 0, 0, 7, 8, 8, 8, 0, 0, 0, 0, -}; - -static RE_UINT8 re_deprecated_stage_5[] = { - 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 8, 0, 0, 0, 128, 2, - 24, 0, 0, 0, 0, 252, 0, 0, 0, 6, 0, 0, 2, 0, 0, 0, - 255, 255, 255, 255, -}; - -/* Deprecated: 230 bytes. */ - -RE_UINT32 re_get_deprecated(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_deprecated_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_deprecated_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_deprecated_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_deprecated_stage_4[pos + f] << 5; - pos += code; - value = (re_deprecated_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Soft_Dotted. */ - -static RE_UINT8 re_soft_dotted_stage_1[] = { - 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, -}; - -static RE_UINT8 re_soft_dotted_stage_2[] = { - 0, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_soft_dotted_stage_3[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 6, 7, 5, 8, 9, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 11, 12, 13, 5, -}; - -static RE_UINT8 re_soft_dotted_stage_4[] = { - 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 10, 11, 0, 0, 0, 12, 0, 0, 0, 0, 13, 0, - 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, - 0, 0, 0, 16, 0, 0, 0, 0, 0, 17, 18, 0, 19, 20, 0, 21, - 0, 22, 23, 0, 24, 0, 17, 18, 0, 19, 20, 0, 21, 0, 0, 0, -}; - -static RE_UINT8 re_soft_dotted_stage_5[] = { - 0, 0, 0, 0, 0, 6, 0, 0, 0, 128, 0, 0, 0, 2, 0, 0, - 0, 1, 0, 0, 0, 0, 0, 32, 0, 0, 4, 0, 0, 0, 8, 0, - 0, 0, 64, 1, 4, 0, 0, 0, 0, 0, 64, 0, 16, 1, 0, 0, - 0, 32, 0, 0, 0, 8, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, - 0, 0, 0, 16, 12, 0, 0, 0, 0, 0, 192, 0, 0, 12, 0, 0, - 0, 0, 0, 192, 0, 0, 12, 0, 192, 0, 0, 0, 0, 0, 0, 12, - 0, 192, 0, 0, -}; - -/* Soft_Dotted: 342 bytes. */ - -RE_UINT32 re_get_soft_dotted(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_soft_dotted_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_soft_dotted_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_soft_dotted_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_soft_dotted_stage_4[pos + f] << 5; - pos += code; - value = (re_soft_dotted_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Logical_Order_Exception. */ - -static RE_UINT8 re_logical_order_exception_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_logical_order_exception_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_logical_order_exception_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, -}; - -static RE_UINT8 re_logical_order_exception_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, - 0, 0, 2, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_logical_order_exception_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 96, 26, -}; - -/* Logical_Order_Exception: 121 bytes. */ - -RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_logical_order_exception_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_logical_order_exception_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_logical_order_exception_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_logical_order_exception_stage_4[pos + f] << 6; - pos += code; - value = (re_logical_order_exception_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_ID_Start. */ - -static RE_UINT8 re_other_id_start_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_id_start_stage_2[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_id_start_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_id_start_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 2, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_other_id_start_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 64, 0, 0, - 0, 0, 0, 24, 0, 0, 0, 0, -}; - -/* Other_ID_Start: 113 bytes. */ - -RE_UINT32 re_get_other_id_start(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_id_start_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_other_id_start_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_id_start_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_id_start_stage_4[pos + f] << 6; - pos += code; - value = (re_other_id_start_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Other_ID_Continue. */ - -static RE_UINT8 re_other_id_continue_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_other_id_continue_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_other_id_continue_stage_3[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_other_id_continue_stage_4[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, -}; - -static RE_UINT8 re_other_id_continue_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, - 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254, 3, 0, - 0, 0, 0, 4, 0, 0, 0, 0, -}; - -/* Other_ID_Continue: 145 bytes. */ - -RE_UINT32 re_get_other_id_continue(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_other_id_continue_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_other_id_continue_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_other_id_continue_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_other_id_continue_stage_4[pos + f] << 6; - pos += code; - value = (re_other_id_continue_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* STerm. */ - -static RE_UINT8 re_sterm_stage_1[] = { - 0, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, -}; - -static RE_UINT8 re_sterm_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 3, 3, 9, 10, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 12, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 13, - 3, 3, 14, 3, 15, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -}; - -static RE_UINT8 re_sterm_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, - 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 9, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 11, 1, 12, 1, - 13, 1, 14, 1, 1, 15, 16, 1, 17, 1, 1, 1, 1, 1, 1, 1, - 18, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 19, 1, 1, 1, - 20, 1, 1, 1, 1, 1, 1, 1, 1, 21, 1, 1, 22, 23, 1, 1, - 24, 25, 26, 27, 28, 29, 1, 30, 1, 1, 1, 1, 31, 1, 32, 1, - 1, 1, 1, 1, 33, 1, 1, 1, 34, 35, 36, 37, 1, 1, 1, 1, -}; - -static RE_UINT8 re_sterm_stage_4[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, - 4, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 7, - 0, 0, 0, 8, 0, 0, 9, 0, 0, 0, 0, 10, 0, 0, 0, 11, - 0, 12, 0, 0, 13, 0, 0, 0, 0, 0, 8, 0, 0, 14, 0, 0, - 0, 0, 15, 0, 0, 16, 0, 17, 0, 18, 19, 0, 0, 11, 0, 0, - 20, 0, 0, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 22, - 0, 0, 0, 23, 0, 0, 21, 0, 0, 24, 0, 0, 0, 0, 25, 0, - 0, 0, 26, 0, 0, 0, 0, 27, 0, 0, 0, 28, 0, 0, 29, 0, - 1, 0, 0, 30, 0, 0, 23, 0, 0, 0, 31, 0, 0, 17, 32, 0, - 0, 0, 33, 0, 0, 0, 34, 0, -}; - -static RE_UINT8 re_sterm_stage_5[] = { - 0, 0, 0, 0, 2, 64, 0, 128, 0, 0, 0, 80, 0, 2, 0, 0, - 0, 0, 0, 128, 0, 0, 16, 0, 7, 0, 0, 0, 0, 0, 0, 2, - 48, 0, 0, 0, 0, 12, 0, 0, 132, 1, 0, 0, 0, 64, 0, 0, - 0, 0, 96, 0, 8, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 204, - 0, 0, 0, 24, 0, 0, 0, 192, 0, 0, 0, 48, 128, 3, 0, 0, - 4, 0, 0, 0, 0, 192, 0, 0, 0, 0, 136, 0, 0, 0, 192, 0, - 0, 128, 0, 0, 0, 3, 0, 0, 0, 0, 0, 224, 0, 0, 3, 0, - 0, 8, 0, 0, 0, 0, 196, 0, 2, 0, 0, 0, 128, 1, 0, 0, - 3, 0, 0, 0, 14, 0, 0, 0, 96, 0, 0, 0, -}; - -/* STerm: 568 bytes. */ - -RE_UINT32 re_get_sterm(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 14; - code = ch ^ (f << 14); - pos = (RE_UINT32)re_sterm_stage_1[f] << 4; - f = code >> 10; - code ^= f << 10; - pos = (RE_UINT32)re_sterm_stage_2[pos + f] << 3; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_sterm_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_sterm_stage_4[pos + f] << 5; - pos += code; - value = (re_sterm_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Variation_Selector. */ - -static RE_UINT8 re_variation_selector_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, -}; - -static RE_UINT8 re_variation_selector_stage_2[] = { - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_variation_selector_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_variation_selector_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, -}; - -static RE_UINT8 re_variation_selector_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, - 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 0, 0, -}; - -/* Variation_Selector: 169 bytes. */ - -RE_UINT32 re_get_variation_selector(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_variation_selector_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_variation_selector_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_variation_selector_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_variation_selector_stage_4[pos + f] << 6; - pos += code; - value = (re_variation_selector_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Pattern_White_Space. */ - -static RE_UINT8 re_pattern_white_space_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_2[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_white_space_stage_5[] = { - 0, 62, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 32, 0, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 3, 0, 0, -}; - -/* Pattern_White_Space: 129 bytes. */ - -RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_pattern_white_space_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_pattern_white_space_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_pattern_white_space_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_pattern_white_space_stage_4[pos + f] << 6; - pos += code; - value = (re_pattern_white_space_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Pattern_Syntax. */ - -static RE_UINT8 re_pattern_syntax_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_pattern_syntax_stage_2[] = { - 0, 1, 1, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_pattern_syntax_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 3, 4, 4, 5, 4, 4, 6, 4, 4, 4, 4, 1, 1, 7, 1, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 10, 1, -}; - -static RE_UINT8 re_pattern_syntax_stage_4[] = { - 0, 1, 2, 2, 0, 3, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, - 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, - 8, 8, 8, 9, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, - 11, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, - 0, 0, 14, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_pattern_syntax_stage_5[] = { - 0, 0, 0, 0, 254, 255, 0, 252, 1, 0, 0, 120, 254, 90, 67, 136, - 0, 0, 128, 0, 0, 0, 255, 255, 255, 0, 255, 127, 254, 255, 239, 127, - 255, 255, 255, 255, 255, 255, 63, 0, 0, 0, 240, 255, 14, 255, 255, 255, - 1, 0, 1, 0, 0, 0, 0, 192, 96, 0, 0, 0, -}; - -/* Pattern_Syntax: 277 bytes. */ - -RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_pattern_syntax_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_pattern_syntax_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_pattern_syntax_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_pattern_syntax_stage_4[pos + f] << 5; - pos += code; - value = (re_pattern_syntax_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Hangul_Syllable_Type. */ - -static RE_UINT8 re_hangul_syllable_type_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_2[] = { - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 9, 10, 4, - 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, - 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, - 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, - 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, - 6, 7, 8, 9, 10, 4, 5, 6, 7, 8, 9, 10, 4, 5, 6, 11, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 4, - 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, - 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, - 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, - 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, - 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, - 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, - 6, 6, 5, 6, 6, 7, 6, 6, 6, 5, 6, 6, 7, 6, 6, 6, - 6, 5, 6, 6, 8, 0, 2, 2, 9, 10, 3, 3, 3, 3, 3, 11, -}; - -static RE_UINT8 re_hangul_syllable_type_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 1, 1, 1, 1, 1, 0, 0, 0, 4, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, - 5, 5, 5, 5, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, - 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, -}; - -/* Hangul_Syllable_Type: 497 bytes. */ - -RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_hangul_syllable_type_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_hangul_syllable_type_stage_2[pos + f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_hangul_syllable_type_stage_3[pos + f] << 4; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_hangul_syllable_type_stage_4[pos + f] << 3; - value = re_hangul_syllable_type_stage_5[pos + code]; - - return value; -} - -/* Bidi_Class. */ - -static RE_UINT8 re_bidi_class_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 7, - 8, 9, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 11, 12, 13, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 15, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 14, -}; - -static RE_UINT8 re_bidi_class_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 2, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 2, 2, 2, 2, 30, 31, 32, 2, 2, 2, 2, 33, 34, 35, - 36, 37, 38, 39, 40, 2, 41, 42, 43, 44, 2, 45, 2, 2, 2, 46, - 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 52, 52, 52, 57, 58, 52, - 2, 2, 52, 52, 52, 52, 59, 2, 2, 60, 61, 62, 63, 64, 52, 65, - 66, 67, 2, 68, 69, 70, 71, 72, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 73, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 74, 2, 2, 75, 76, 77, 78, - 79, 80, 81, 82, 83, 84, 2, 85, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 86, 87, 87, 87, 88, 89, 90, 91, 92, 93, - 2, 2, 94, 95, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 96, 96, 97, 96, 98, 96, 99, 96, 96, 96, 96, 96, 100, 96, 96, 96, - 101, 102, 103, 104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 106, - 2, 2, 107, 108, 109, 2, 110, 2, 2, 2, 2, 2, 2, 111, 112, 113, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 87, 114, 96, 96, - 115, 116, 117, 2, 2, 2, 118, 119, 120, 121, 122, 123, 124, 125, 126, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 127, - 128, 128, 129, 130, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, -}; - -static RE_UINT8 re_bidi_class_stage_3[] = { - 0, 1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 10, 11, 12, 11, 12, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 13, 14, 14, 15, 16, - 17, 17, 17, 17, 17, 17, 17, 18, 19, 11, 11, 11, 11, 11, 11, 20, - 21, 11, 11, 11, 11, 11, 11, 11, 22, 23, 17, 24, 25, 26, 26, 26, - 27, 28, 29, 29, 30, 17, 31, 32, 29, 29, 29, 29, 29, 33, 34, 35, - 29, 36, 29, 17, 28, 29, 29, 29, 29, 29, 37, 32, 26, 26, 38, 39, - 26, 40, 41, 26, 26, 42, 26, 26, 26, 26, 29, 29, 29, 29, 43, 44, - 45, 11, 11, 46, 47, 48, 49, 11, 50, 11, 11, 51, 52, 11, 49, 53, - 54, 11, 11, 51, 55, 50, 11, 56, 54, 11, 11, 51, 57, 11, 49, 58, - 50, 11, 11, 59, 52, 60, 49, 11, 61, 11, 11, 11, 62, 11, 11, 63, - 11, 11, 11, 64, 65, 66, 49, 67, 11, 11, 11, 51, 68, 11, 49, 11, - 11, 11, 11, 11, 52, 11, 49, 11, 11, 11, 11, 11, 69, 70, 11, 11, - 11, 11, 11, 71, 72, 11, 11, 11, 11, 11, 11, 73, 74, 11, 11, 11, - 11, 75, 11, 76, 11, 11, 11, 77, 78, 79, 17, 80, 60, 11, 11, 11, - 11, 11, 81, 82, 11, 83, 84, 85, 86, 87, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 81, 11, 11, 11, 88, 11, 11, 11, 11, 11, 11, - 4, 11, 11, 11, 11, 11, 11, 11, 89, 90, 11, 11, 11, 11, 11, 11, - 11, 91, 11, 91, 11, 49, 11, 49, 11, 11, 11, 92, 93, 94, 11, 88, - 95, 11, 11, 11, 11, 11, 11, 11, 11, 11, 96, 11, 11, 11, 11, 11, - 11, 11, 97, 98, 99, 11, 11, 11, 11, 11, 11, 11, 11, 100, 16, 16, - 11, 101, 11, 11, 11, 102, 103, 104, 105, 11, 11, 106, 61, 11, 107, 105, - 108, 11, 109, 11, 11, 11, 110, 108, 11, 11, 111, 112, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 113, 114, 115, 11, 11, 11, 11, 17, 17, 116, 111, - 11, 11, 11, 117, 118, 119, 119, 120, 121, 16, 122, 123, 124, 125, 126, 127, - 128, 11, 129, 129, 129, 17, 17, 84, 130, 131, 132, 133, 134, 16, 11, 11, - 135, 16, 16, 16, 16, 16, 16, 16, 16, 136, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 137, 11, 11, 11, 5, - 16, 138, 16, 16, 16, 16, 16, 139, 16, 16, 140, 11, 141, 11, 16, 16, - 142, 143, 11, 11, 11, 11, 144, 16, 16, 16, 145, 16, 16, 16, 16, 16, - 146, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 147, 88, 11, 11, - 11, 11, 11, 11, 11, 11, 148, 149, 11, 11, 11, 11, 11, 11, 11, 150, - 11, 11, 11, 11, 11, 11, 17, 17, 16, 16, 16, 151, 11, 11, 11, 11, - 16, 152, 16, 16, 16, 16, 16, 139, 16, 16, 16, 16, 16, 137, 11, 151, - 153, 16, 154, 155, 11, 11, 11, 11, 11, 156, 4, 11, 11, 11, 11, 157, - 11, 11, 11, 11, 16, 16, 139, 11, 11, 120, 11, 11, 11, 16, 11, 158, - 11, 11, 11, 146, 159, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 160, - 11, 11, 11, 11, 11, 100, 11, 161, 11, 11, 11, 11, 16, 16, 16, 16, - 11, 16, 16, 16, 140, 11, 11, 11, 119, 11, 11, 11, 11, 11, 150, 162, - 11, 150, 11, 11, 11, 11, 11, 108, 16, 16, 163, 11, 11, 11, 11, 11, - 164, 11, 11, 11, 11, 11, 11, 11, 165, 11, 166, 167, 11, 11, 11, 168, - 11, 11, 11, 11, 115, 11, 17, 108, 11, 11, 169, 11, 170, 108, 11, 11, - 45, 11, 11, 171, 11, 11, 11, 11, 11, 11, 172, 173, 174, 11, 11, 11, - 11, 11, 11, 175, 50, 11, 68, 60, 11, 11, 11, 11, 11, 11, 176, 11, - 11, 177, 178, 26, 26, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 179, 29, 29, 29, 29, 29, 29, 29, 29, 29, 8, 8, 180, - 17, 88, 116, 16, 16, 181, 182, 29, 29, 29, 29, 29, 29, 29, 29, 183, - 184, 3, 4, 5, 4, 5, 137, 11, 11, 11, 11, 11, 11, 11, 185, 186, - 187, 11, 11, 11, 16, 16, 16, 16, 141, 151, 11, 11, 11, 11, 11, 87, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 188, 26, 26, 26, 26, 26, 26, - 189, 26, 26, 190, 26, 26, 26, 26, 26, 26, 26, 191, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 192, 193, 50, 11, 11, 194, 116, 14, 137, 11, - 108, 11, 11, 195, 11, 11, 11, 11, 45, 11, 196, 197, 11, 11, 11, 11, - 108, 11, 11, 198, 11, 11, 11, 11, 11, 11, 199, 200, 11, 11, 11, 11, - 150, 45, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 201, 202, - 203, 11, 204, 11, 11, 11, 11, 11, 16, 16, 16, 16, 205, 11, 11, 11, - 16, 16, 16, 16, 16, 140, 11, 11, 11, 11, 11, 11, 11, 157, 11, 11, - 11, 206, 11, 11, 161, 11, 11, 11, 135, 11, 11, 11, 207, 208, 208, 208, - 29, 29, 29, 29, 29, 29, 29, 209, 16, 16, 151, 16, 16, 16, 16, 16, - 16, 139, 210, 211, 146, 146, 11, 11, 212, 11, 11, 11, 11, 11, 133, 11, - 16, 16, 4, 213, 16, 16, 16, 147, 16, 139, 16, 16, 214, 11, 16, 4, - 16, 16, 16, 210, 215, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 216, - 16, 16, 16, 217, 139, 16, 218, 11, 11, 11, 11, 11, 11, 11, 11, 5, - 16, 16, 16, 16, 219, 11, 11, 11, 16, 16, 16, 16, 137, 11, 11, 11, - 16, 16, 16, 16, 16, 16, 16, 139, 11, 11, 11, 11, 11, 11, 11, 220, - 8, 8, 8, 8, 8, 8, 8, 8, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 8, -}; - -static RE_UINT8 re_bidi_class_stage_4[] = { - 0, 0, 1, 2, 0, 0, 0, 3, 4, 5, 6, 7, 8, 8, 9, 10, - 11, 12, 12, 12, 12, 12, 13, 10, 12, 12, 13, 14, 0, 15, 0, 0, - 0, 0, 0, 0, 16, 5, 17, 18, 19, 20, 21, 10, 12, 12, 12, 12, - 12, 13, 12, 12, 12, 12, 22, 12, 23, 10, 10, 10, 12, 24, 10, 17, - 10, 10, 10, 10, 25, 25, 25, 25, 12, 26, 12, 27, 12, 17, 12, 12, - 12, 27, 12, 12, 28, 25, 29, 12, 12, 12, 27, 30, 31, 25, 25, 25, - 25, 25, 25, 32, 33, 32, 34, 34, 34, 34, 34, 34, 35, 36, 37, 38, - 25, 25, 39, 40, 40, 40, 40, 40, 40, 40, 41, 25, 35, 35, 42, 43, - 44, 40, 40, 40, 40, 45, 25, 46, 25, 47, 48, 49, 8, 8, 50, 40, - 51, 40, 40, 40, 40, 45, 25, 25, 34, 34, 52, 25, 25, 53, 54, 34, - 34, 55, 32, 25, 25, 31, 31, 56, 34, 34, 31, 34, 40, 25, 25, 25, - 25, 25, 25, 39, 57, 12, 12, 12, 12, 12, 58, 59, 60, 25, 59, 61, - 60, 25, 12, 12, 62, 12, 12, 12, 61, 12, 12, 12, 12, 12, 12, 59, - 60, 59, 12, 61, 63, 12, 30, 12, 64, 12, 12, 12, 64, 28, 65, 29, - 29, 61, 12, 12, 60, 66, 59, 61, 67, 12, 12, 12, 12, 12, 12, 65, - 12, 58, 12, 12, 58, 12, 12, 12, 59, 12, 12, 61, 13, 10, 68, 12, - 12, 12, 12, 62, 59, 62, 69, 29, 12, 64, 12, 12, 12, 12, 10, 70, - 12, 12, 12, 29, 12, 12, 58, 12, 62, 71, 12, 12, 61, 25, 57, 30, - 12, 28, 25, 57, 61, 25, 66, 59, 12, 12, 25, 29, 12, 12, 29, 12, - 12, 72, 73, 26, 60, 25, 25, 57, 25, 69, 12, 60, 25, 25, 60, 25, - 25, 25, 25, 59, 12, 12, 12, 60, 69, 25, 64, 64, 12, 12, 29, 62, - 59, 12, 12, 12, 60, 59, 12, 12, 58, 64, 12, 61, 12, 12, 12, 61, - 10, 10, 26, 12, 74, 12, 12, 12, 12, 12, 13, 11, 62, 59, 12, 12, - 12, 66, 25, 29, 12, 58, 60, 25, 25, 12, 30, 61, 10, 10, 75, 76, - 12, 12, 61, 12, 57, 28, 59, 12, 58, 12, 60, 12, 11, 26, 12, 12, - 12, 12, 12, 23, 12, 28, 65, 12, 12, 58, 25, 57, 71, 60, 25, 59, - 28, 25, 25, 65, 25, 12, 12, 12, 12, 69, 57, 59, 12, 12, 28, 25, - 29, 12, 12, 12, 62, 29, 66, 12, 12, 58, 29, 72, 12, 12, 12, 25, - 25, 62, 12, 12, 57, 25, 25, 25, 69, 25, 59, 61, 12, 59, 12, 12, - 25, 57, 12, 12, 12, 12, 12, 77, 26, 12, 12, 24, 12, 12, 12, 24, - 12, 12, 12, 22, 78, 78, 79, 80, 10, 10, 81, 82, 83, 84, 10, 10, - 10, 85, 10, 10, 10, 10, 10, 86, 0, 87, 88, 0, 89, 8, 90, 70, - 8, 8, 90, 70, 83, 83, 83, 83, 17, 70, 26, 12, 12, 20, 11, 23, - 10, 77, 91, 92, 12, 12, 23, 12, 10, 11, 23, 26, 12, 12, 91, 12, - 93, 10, 10, 10, 10, 26, 12, 12, 10, 20, 10, 10, 10, 12, 12, 12, - 10, 70, 12, 12, 10, 10, 70, 12, 10, 10, 8, 8, 8, 8, 8, 12, - 12, 12, 23, 10, 10, 10, 10, 24, 24, 10, 10, 10, 10, 10, 10, 11, - 12, 24, 70, 28, 29, 12, 24, 10, 12, 12, 12, 28, 10, 10, 10, 12, - 10, 10, 17, 10, 94, 11, 10, 10, 11, 12, 62, 29, 11, 23, 12, 24, - 12, 12, 95, 11, 12, 12, 13, 12, 12, 12, 12, 70, 12, 12, 12, 10, - 12, 13, 70, 12, 12, 12, 12, 13, 96, 25, 25, 97, 26, 12, 12, 12, - 12, 12, 11, 12, 58, 58, 28, 12, 12, 64, 10, 12, 12, 12, 98, 12, - 12, 10, 12, 12, 12, 62, 25, 29, 12, 28, 25, 25, 28, 62, 29, 59, - 12, 12, 60, 57, 64, 64, 12, 12, 28, 12, 12, 59, 69, 65, 59, 62, - 12, 61, 59, 61, 12, 12, 12, 99, 34, 34, 100, 34, 40, 40, 40, 101, - 40, 40, 40, 102, 103, 104, 10, 105, 106, 70, 107, 12, 40, 40, 40, 108, - 109, 5, 6, 7, 5, 110, 10, 70, 0, 0, 111, 112, 91, 12, 12, 12, - 34, 34, 34, 113, 31, 33, 34, 25, 34, 34, 114, 52, 34, 34, 115, 10, - 35, 35, 35, 35, 35, 35, 35, 116, 12, 12, 25, 25, 28, 57, 64, 12, - 12, 28, 25, 60, 25, 59, 12, 12, 12, 62, 25, 57, 12, 12, 28, 61, - 25, 66, 12, 12, 12, 28, 29, 12, 117, 0, 118, 25, 57, 60, 25, 12, - 12, 12, 62, 29, 119, 120, 12, 12, 12, 91, 12, 12, 13, 12, 12, 121, - 8, 8, 8, 8, 122, 40, 40, 40, 10, 10, 10, 70, 24, 10, 10, 70, - 8, 8, 123, 12, 10, 17, 10, 10, 10, 20, 70, 12, 20, 10, 10, 10, - 10, 10, 24, 11, 10, 10, 10, 26, 10, 10, 12, 12, 11, 24, 10, 10, - 12, 12, 12, 124, -}; - -static RE_UINT8 re_bidi_class_stage_5[] = { - 11, 11, 11, 11, 11, 8, 7, 8, 9, 7, 11, 11, 7, 7, 7, 8, - 9, 10, 10, 4, 4, 4, 10, 10, 10, 10, 10, 3, 6, 3, 6, 6, - 2, 2, 2, 2, 2, 2, 6, 10, 10, 10, 10, 10, 10, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 11, 11, 7, 11, 11, - 6, 10, 4, 4, 10, 10, 0, 10, 10, 11, 10, 10, 4, 4, 2, 2, - 10, 0, 10, 10, 10, 2, 0, 10, 0, 10, 10, 0, 0, 0, 10, 10, - 0, 10, 10, 10, 12, 12, 12, 12, 10, 10, 0, 0, 0, 0, 10, 0, - 0, 0, 0, 12, 12, 12, 0, 0, 0, 0, 0, 4, 1, 12, 12, 12, - 12, 12, 1, 12, 1, 12, 12, 1, 1, 1, 1, 1, 5, 5, 5, 5, - 5, 13, 10, 10, 13, 4, 4, 13, 6, 13, 10, 10, 12, 12, 12, 13, - 13, 13, 13, 13, 13, 13, 13, 12, 5, 5, 4, 5, 5, 13, 13, 13, - 12, 13, 13, 13, 13, 13, 12, 12, 12, 5, 10, 12, 12, 13, 13, 12, - 12, 10, 12, 12, 12, 12, 13, 13, 2, 2, 13, 13, 13, 12, 13, 13, - 1, 1, 1, 12, 1, 1, 10, 10, 10, 10, 1, 1, 1, 1, 12, 12, - 12, 12, 1, 1, 12, 12, 12, 0, 0, 0, 12, 0, 12, 0, 0, 0, - 0, 12, 12, 12, 0, 12, 0, 0, 0, 0, 12, 12, 0, 0, 4, 4, - 0, 12, 12, 0, 12, 0, 0, 12, 12, 12, 0, 12, 0, 4, 0, 0, - 10, 4, 10, 0, 12, 0, 12, 12, 10, 10, 10, 0, 12, 0, 12, 0, - 0, 12, 0, 12, 0, 12, 10, 10, 9, 0, 0, 0, 10, 10, 10, 12, - 12, 12, 11, 0, 0, 10, 0, 10, 9, 9, 9, 9, 9, 9, 9, 11, - 11, 11, 0, 1, 9, 7, 16, 17, 18, 14, 15, 6, 4, 4, 4, 4, - 4, 10, 10, 10, 6, 10, 10, 10, 10, 10, 10, 9, 11, 11, 19, 20, - 21, 22, 11, 11, 2, 0, 0, 0, 2, 2, 3, 3, 0, 10, 0, 0, - 0, 0, 4, 0, 10, 10, 3, 4, 9, 10, 10, 10, 0, 12, 12, 10, - 12, 12, 12, 10, 12, 12, 10, 10, 4, 4, 0, 0, 0, 1, 12, 1, - 1, 3, 1, 1, 13, 13, 10, 10, 13, 10, 13, 13, 6, 10, 6, 0, - 10, 6, 10, 10, 10, 10, 10, 4, 10, 10, 3, 3, 10, 4, 4, 10, - 13, 13, 13, 11, 0, 10, 10, 4, 10, 4, 4, 0, 11, 10, 10, 10, - 10, 10, 11, 11, 1, 1, 1, 10, 12, 12, 12, 1, 1, 10, 10, 10, - 5, 5, 5, 1, 0, 0, 0, 11, 11, 11, 11, 12, 10, 10, 12, 12, - 12, 10, 0, 0, 0, 0, 2, 2, 10, 10, 13, 13, 2, 2, 2, 0, - 0, 0, 11, 11, -}; - -/* Bidi_Class: 3216 bytes. */ - -RE_UINT32 re_get_bidi_class(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_bidi_class_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_bidi_class_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_bidi_class_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_bidi_class_stage_4[pos + f] << 2; - value = re_bidi_class_stage_5[pos + code]; - - return value; -} - -/* Canonical_Combining_Class. */ - -static RE_UINT8 re_canonical_combining_class_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 6, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_canonical_combining_class_stage_2[] = { - 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 0, 0, 15, 0, 0, 0, 16, 17, 18, 19, 20, 21, 22, 0, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 0, 0, - 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 27, 0, 28, 29, 30, 31, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 33, 0, - 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, - 36, 37, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_canonical_combining_class_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, - 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, - 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17, - 18, 19, 20, 0, 0, 0, 0, 21, 0, 22, 23, 0, 0, 22, 24, 0, - 0, 22, 24, 0, 0, 22, 24, 0, 0, 22, 24, 0, 0, 0, 24, 0, - 0, 0, 25, 0, 0, 22, 24, 0, 0, 0, 24, 0, 0, 0, 26, 0, - 0, 27, 28, 0, 0, 29, 30, 0, 31, 32, 0, 33, 34, 0, 35, 0, - 0, 36, 0, 0, 37, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 0, - 39, 39, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 41, 0, 0, - 0, 42, 0, 0, 0, 0, 0, 0, 43, 0, 0, 44, 0, 0, 0, 0, - 0, 45, 46, 47, 0, 48, 0, 49, 0, 50, 0, 0, 0, 0, 51, 52, - 0, 0, 0, 0, 0, 0, 53, 54, 0, 0, 0, 0, 0, 0, 55, 56, - 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, 58, 0, 0, 0, 59, - 0, 60, 0, 0, 61, 0, 0, 0, 0, 0, 0, 62, 63, 0, 0, 64, - 65, 0, 0, 0, 0, 0, 46, 66, 0, 67, 68, 0, 0, 69, 70, 0, - 0, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, 0, 0, 24, - 74, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 76, 77, 78, 0, 0, 0, 0, 0, 0, - 0, 0, 65, 0, 0, 79, 0, 0, 80, 81, 0, 0, 0, 0, 70, 0, - 0, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 83, 84, 85, 0, 0, - 0, 0, 86, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_canonical_combining_class_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, - 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17, - 18, 1, 1, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, 25, 26, 27, 28, - 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 31, 0, - 0, 0, 32, 33, 34, 35, 1, 36, 0, 0, 0, 0, 37, 0, 0, 0, - 0, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, 0, 0, 0, 0, - 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45, - 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 0, - 0, 0, 0, 0, 0, 38, 47, 1, 1, 48, 48, 49, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 50, 0, 0, 21, 43, 51, 52, 21, 35, 53, - 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 55, 56, 57, 0, 0, - 0, 0, 0, 55, 0, 0, 0, 0, 0, 0, 0, 55, 0, 58, 0, 0, - 0, 0, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, - 0, 0, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, - 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, - 0, 0, 0, 0, 0, 65, 66, 0, 0, 0, 0, 0, 67, 68, 69, 70, - 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 74, 75, 0, 0, 0, 0, 76, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 77, 0, 0, - 0, 0, 0, 0, 59, 0, 0, 78, 0, 0, 79, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 19, 81, 0, - 77, 0, 0, 0, 0, 48, 1, 82, 0, 0, 0, 0, 0, 54, 0, 0, - 0, 77, 0, 0, 0, 0, 0, 0, 0, 0, 19, 10, 1, 0, 0, 0, - 0, 0, 83, 0, 0, 0, 0, 0, 0, 84, 0, 0, 83, 0, 0, 0, - 0, 0, 0, 0, 0, 74, 0, 0, 0, 0, 0, 0, 53, 9, 12, 4, - 85, 8, 86, 76, 0, 57, 0, 0, 21, 1, 21, 87, 88, 1, 1, 1, - 1, 53, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 90, 1, 91, 57, - 78, 92, 93, 4, 57, 0, 0, 0, 0, 0, 0, 19, 49, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 94, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 95, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, - 0, 0, 0, 19, 0, 1, 1, 49, 0, 0, 0, 0, 0, 0, 0, 19, - 0, 0, 0, 0, 49, 0, 0, 0, 0, 59, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 49, 0, 0, 0, 0, 0, 98, 64, 0, 0, 0, 0, - 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, - 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99, 100, 57, 38, - 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 101, 1, 53, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 102, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 103, 94, 0, 0, 0, 0, 0, 0, 104, 0, - 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 77, 0, 0, - 0, 0, 0, 0, 0, 105, 0, 0, 0, 106, 107, 108, 109, 0, 98, 4, - 110, 48, 23, 0, 0, 0, 0, 0, 0, 0, 38, 49, 0, 0, 0, 0, - 38, 57, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_canonical_combining_class_stage_5[] = { - 0, 0, 0, 0, 50, 50, 50, 50, 50, 51, 45, 45, 45, 45, 51, 43, - 45, 45, 45, 45, 45, 41, 41, 45, 45, 45, 45, 41, 41, 45, 45, 45, - 1, 1, 1, 1, 1, 45, 45, 45, 45, 50, 50, 50, 50, 54, 50, 45, - 45, 45, 50, 50, 50, 45, 45, 0, 50, 50, 50, 45, 45, 45, 45, 50, - 51, 45, 45, 50, 52, 53, 53, 52, 53, 53, 52, 50, 0, 0, 0, 50, - 0, 45, 50, 50, 50, 50, 45, 50, 50, 50, 46, 45, 50, 50, 45, 45, - 50, 46, 49, 50, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 15, - 16, 17, 0, 18, 0, 19, 20, 0, 50, 45, 0, 13, 25, 26, 27, 0, - 0, 0, 0, 22, 23, 24, 25, 26, 27, 28, 29, 50, 50, 45, 45, 50, - 45, 50, 50, 45, 30, 0, 0, 0, 0, 0, 50, 50, 50, 0, 0, 50, - 50, 0, 45, 50, 50, 45, 0, 0, 0, 31, 0, 0, 50, 45, 50, 50, - 45, 45, 50, 45, 45, 50, 45, 50, 45, 50, 50, 0, 50, 50, 0, 50, - 0, 50, 50, 50, 50, 50, 0, 0, 0, 45, 45, 45, 50, 45, 45, 45, - 22, 23, 24, 50, 50, 50, 50, 0, 2, 0, 0, 0, 0, 4, 0, 0, - 0, 50, 45, 50, 50, 0, 0, 0, 0, 32, 33, 0, 0, 0, 4, 0, - 34, 34, 4, 0, 35, 35, 35, 35, 36, 36, 0, 0, 37, 37, 37, 37, - 45, 45, 0, 0, 0, 45, 0, 45, 0, 43, 0, 0, 0, 38, 39, 0, - 40, 0, 0, 0, 0, 0, 39, 39, 39, 39, 0, 0, 39, 0, 50, 50, - 4, 0, 50, 50, 0, 0, 45, 0, 0, 0, 0, 2, 0, 4, 4, 0, - 0, 45, 0, 0, 4, 0, 0, 0, 0, 50, 0, 0, 0, 49, 0, 0, - 0, 46, 50, 45, 45, 0, 0, 0, 50, 0, 0, 45, 0, 0, 4, 4, - 0, 0, 2, 0, 50, 0, 1, 1, 1, 0, 0, 0, 50, 53, 42, 45, - 41, 50, 50, 50, 52, 45, 50, 45, 50, 50, 1, 1, 1, 1, 1, 50, - 0, 1, 1, 50, 45, 50, 1, 1, 0, 0, 0, 4, 0, 0, 44, 49, - 51, 46, 47, 47, 0, 3, 3, 0, 0, 0, 0, 45, 50, 0, 50, 50, - 45, 0, 0, 50, 0, 0, 21, 0, 0, 45, 0, 50, 50, 1, 45, 0, - 0, 4, 2, 0, 0, 0, 4, 2, 0, 43, 43, 1, 1, 1, 0, 0, - 0, 48, 43, 43, 43, 43, 43, 0, 45, 45, 45, 0, -}; - -/* Canonical_Combining_Class: 1828 bytes. */ - -RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_canonical_combining_class_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_canonical_combining_class_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_canonical_combining_class_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_canonical_combining_class_stage_4[pos + f] << 2; - value = re_canonical_combining_class_stage_5[pos + code]; - - return value; -} - -/* Decomposition_Type. */ - -static RE_UINT8 re_decomposition_type_stage_1[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 6, 2, 2, 2, 2, 2, 7, 8, - 2, 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_decomposition_type_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 16, 7, 17, 18, 19, - 20, 21, 22, 23, 24, 7, 7, 7, 7, 7, 25, 7, 26, 27, 28, 29, - 30, 31, 32, 33, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 34, 7, 7, 7, 7, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 36, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 35, 37, 38, 39, 40, 41, 42, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 43, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 45, 7, 7, 46, 47, 48, 49, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 50, 7, - 7, 51, 52, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 35, 35, 53, 7, 7, 7, 7, 7, -}; - -static RE_UINT8 re_decomposition_type_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 3, 5, - 6, 7, 8, 9, 10, 11, 8, 12, 0, 0, 13, 14, 15, 16, 17, 18, - 6, 19, 20, 21, 0, 0, 0, 0, 0, 0, 0, 22, 0, 23, 24, 0, - 0, 0, 0, 0, 25, 0, 0, 26, 27, 14, 28, 14, 29, 30, 0, 31, - 32, 33, 0, 33, 0, 32, 0, 34, 0, 0, 0, 0, 35, 36, 37, 38, - 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 40, 0, 0, 0, 0, 41, 0, 0, 0, 0, 42, 43, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 33, 44, 0, 45, 0, 0, 0, 0, 0, 0, 46, 47, 0, 0, - 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, 53, 0, 0, 0, - 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, - 0, 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, - 0, 0, 0, 57, 0, 0, 0, 0, 0, 0, 0, 57, 0, 58, 0, 0, - 59, 0, 0, 0, 60, 61, 33, 62, 63, 60, 61, 33, 0, 0, 0, 0, - 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, - 66, 67, 0, 68, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 70, 71, 72, 73, 74, 75, 0, 76, 73, 73, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 77, 6, 6, 6, 6, 6, 78, - 6, 79, 6, 6, 79, 80, 6, 81, 6, 6, 6, 82, 83, 84, 6, 85, - 86, 87, 88, 89, 90, 91, 0, 92, 93, 94, 95, 0, 0, 0, 0, 0, - 96, 97, 98, 99, 100, 101, 102, 102, 103, 104, 105, 0, 106, 0, 0, 0, - 107, 0, 108, 109, 110, 0, 111, 112, 112, 0, 113, 0, 0, 0, 114, 0, - 0, 0, 115, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 116, 117, 102, 102, 102, 118, 116, 116, 119, 0, - 120, 0, 0, 0, 0, 0, 0, 121, 0, 0, 0, 0, 0, 122, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 123, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 125, 0, 0, 0, 0, 0, 57, - 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 126, 0, 0, - 127, 0, 0, 128, 129, 130, 131, 132, 0, 133, 129, 130, 131, 132, 0, 134, - 0, 0, 0, 135, 102, 102, 102, 102, 136, 137, 0, 0, 0, 0, 0, 0, - 102, 136, 102, 102, 138, 139, 116, 140, 116, 116, 116, 116, 141, 116, 116, 140, - 142, 142, 142, 142, 142, 143, 102, 144, 142, 142, 142, 142, 142, 142, 102, 145, - 0, 0, 0, 0, 0, 0, 0, 146, 0, 0, 0, 0, 0, 0, 0, 147, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 0, 0, 0, 0, 0, - 81, 148, 149, 6, 6, 6, 81, 6, 6, 6, 6, 6, 6, 78, 0, 0, - 150, 151, 152, 153, 154, 155, 156, 156, 157, 156, 158, 159, 0, 160, 161, 162, - 163, 163, 163, 163, 163, 163, 164, 165, 165, 166, 167, 167, 167, 168, 169, 170, - 163, 171, 172, 173, 0, 174, 175, 176, 177, 178, 165, 179, 180, 0, 0, 181, - 0, 182, 0, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 192, 193, 194, - 195, 196, 196, 196, 196, 196, 197, 198, 198, 198, 198, 199, 200, 201, 202, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 203, 204, 0, 0, 0, 0, 0, - 0, 0, 205, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 205, 206, 0, 0, 0, 0, 207, 14, 0, 0, 0, - 208, 208, 208, 208, 208, 209, 208, 208, 208, 210, 211, 212, 213, 208, 208, 208, - 214, 215, 208, 216, 217, 218, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, - 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 219, 208, 208, 208, 208, 208, - 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 208, 220, 208, 208, 208, - 213, 208, 221, 222, 223, 224, 225, 226, 227, 228, 229, 228, 0, 0, 0, 0, - 230, 102, 231, 142, 142, 0, 232, 0, 0, 233, 0, 0, 0, 0, 0, 0, - 234, 142, 142, 235, 236, 237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_decomposition_type_stage_4[] = { - 0, 0, 0, 0, 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 8, 8, - 10, 11, 10, 12, 10, 11, 10, 9, 8, 8, 8, 8, 13, 8, 8, 8, - 8, 12, 8, 8, 14, 8, 10, 15, 16, 8, 17, 8, 12, 8, 8, 8, - 8, 8, 8, 15, 12, 0, 0, 18, 19, 0, 0, 0, 0, 20, 20, 21, - 8, 8, 8, 22, 8, 13, 8, 8, 23, 12, 8, 8, 8, 8, 8, 13, - 0, 13, 8, 8, 8, 0, 0, 0, 24, 24, 25, 0, 0, 0, 20, 5, - 24, 25, 0, 0, 9, 19, 0, 0, 0, 19, 26, 27, 0, 21, 11, 22, - 0, 0, 13, 8, 0, 0, 13, 11, 28, 29, 0, 0, 30, 5, 31, 0, - 9, 18, 0, 11, 0, 0, 32, 0, 0, 13, 0, 0, 33, 0, 0, 0, - 8, 13, 13, 8, 13, 8, 13, 8, 8, 12, 12, 0, 0, 3, 0, 0, - 13, 11, 0, 0, 0, 34, 35, 0, 36, 0, 0, 0, 18, 0, 0, 0, - 32, 19, 0, 0, 0, 0, 8, 8, 0, 0, 18, 19, 0, 0, 0, 9, - 18, 27, 0, 0, 0, 0, 10, 27, 0, 0, 37, 19, 0, 0, 0, 12, - 0, 19, 0, 0, 0, 0, 13, 19, 0, 0, 19, 0, 19, 18, 22, 0, - 0, 0, 27, 11, 3, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, - 18, 0, 0, 32, 27, 18, 0, 19, 18, 38, 17, 0, 32, 0, 0, 0, - 0, 27, 0, 0, 0, 0, 0, 25, 0, 27, 36, 36, 27, 0, 0, 0, - 0, 0, 18, 32, 9, 0, 0, 0, 0, 0, 0, 39, 24, 24, 39, 24, - 24, 24, 24, 40, 24, 24, 24, 24, 41, 42, 43, 0, 0, 0, 25, 0, - 0, 0, 44, 24, 8, 8, 45, 0, 8, 8, 12, 0, 8, 12, 8, 12, - 8, 8, 46, 46, 8, 8, 8, 12, 8, 22, 8, 47, 21, 22, 8, 8, - 8, 13, 8, 10, 13, 22, 8, 48, 49, 50, 30, 0, 51, 3, 0, 0, - 0, 30, 0, 52, 3, 53, 0, 54, 0, 3, 5, 0, 0, 3, 0, 3, - 55, 24, 24, 24, 42, 42, 42, 43, 42, 42, 42, 56, 0, 0, 35, 0, - 57, 34, 58, 59, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 59, - 69, 61, 62, 0, 70, 70, 70, 70, 20, 20, 20, 20, 0, 0, 71, 0, - 0, 0, 13, 0, 0, 0, 0, 27, 0, 0, 0, 10, 0, 19, 32, 19, - 0, 36, 0, 72, 35, 0, 0, 0, 32, 37, 32, 0, 36, 0, 0, 10, - 12, 12, 12, 0, 0, 0, 0, 8, 8, 0, 13, 12, 0, 0, 33, 0, - 73, 73, 73, 73, 73, 20, 20, 20, 20, 74, 73, 73, 73, 73, 75, 0, - 0, 0, 0, 35, 0, 30, 0, 0, 0, 0, 0, 19, 0, 0, 0, 76, - 0, 0, 0, 44, 0, 0, 0, 3, 20, 5, 0, 0, 77, 0, 0, 0, - 0, 26, 30, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 46, 32, 0, - 9, 22, 33, 12, 0, 19, 3, 78, 0, 37, 11, 79, 34, 20, 20, 20, - 20, 20, 20, 30, 4, 24, 24, 24, 20, 73, 0, 0, 80, 73, 73, 73, - 73, 73, 73, 75, 20, 20, 20, 81, 81, 81, 81, 81, 81, 81, 20, 20, - 82, 81, 81, 81, 20, 20, 20, 83, 25, 0, 0, 0, 0, 0, 55, 0, - 36, 10, 8, 11, 36, 33, 13, 8, 20, 30, 0, 0, 3, 20, 0, 46, - 59, 59, 84, 8, 8, 11, 8, 36, 9, 22, 8, 15, 85, 86, 86, 86, - 86, 86, 86, 86, 86, 85, 85, 85, 87, 85, 86, 86, 88, 0, 0, 0, - 89, 90, 91, 92, 85, 87, 86, 85, 85, 85, 93, 87, 94, 94, 94, 94, - 94, 95, 95, 95, 95, 95, 95, 95, 95, 96, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 98, 99, 99, 99, 99, 99, 100, 94, 94, 101, 95, 95, 95, - 95, 95, 95, 102, 97, 99, 99, 103, 104, 97, 105, 106, 107, 105, 108, 105, - 104, 96, 95, 105, 96, 109, 110, 97, 111, 106, 112, 105, 95, 106, 113, 95, - 96, 106, 0, 0, 94, 94, 94, 114, 115, 115, 116, 0, 115, 115, 115, 115, - 115, 117, 118, 20, 119, 120, 120, 120, 120, 119, 120, 0, 121, 122, 123, 123, - 124, 91, 125, 126, 90, 125, 127, 127, 127, 127, 126, 91, 125, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 126, 125, 126, 91, 128, 129, 130, 130, 130, - 130, 130, 130, 130, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133, - 134, 132, 134, 132, 134, 132, 134, 135, 130, 136, 132, 133, 0, 0, 27, 19, - 0, 0, 18, 0, 0, 0, 0, 13, 8, 19, 0, 0, 0, 0, 18, 8, - 59, 59, 59, 59, 59, 137, 59, 59, 59, 59, 59, 137, 138, 139, 61, 137, - 59, 59, 66, 61, 59, 61, 59, 59, 59, 66, 140, 61, 59, 137, 59, 137, - 59, 59, 66, 140, 59, 141, 142, 59, 137, 59, 59, 59, 59, 62, 59, 59, - 59, 59, 59, 142, 139, 143, 61, 59, 140, 59, 144, 0, 138, 145, 144, 61, - 139, 143, 144, 144, 139, 143, 140, 59, 140, 59, 61, 141, 59, 59, 66, 59, - 59, 59, 59, 0, 61, 61, 66, 59, 20, 20, 30, 0, 20, 20, 146, 75, - 0, 0, 4, 0, 147, 0, 0, 0, 148, 0, 0, 0, 81, 81, 148, 0, - 20, 20, 35, 0, 149, 0, 0, 0, -}; - -static RE_UINT8 re_decomposition_type_stage_5[] = { - 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 10, 0, 0, 0, 0, 2, - 0, 0, 10, 10, 2, 2, 0, 0, 2, 10, 10, 0, 17, 17, 17, 0, - 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 1, 1, 1, 2, - 2, 0, 0, 1, 1, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, - 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, - 10, 10, 10, 10, 10, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, - 2, 2, 2, 1, 1, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, - 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0, 0, - 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 2, 10, 10, 10, 0, - 10, 10, 0, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, - 0, 0, 0, 10, 1, 1, 2, 1, 0, 1, 0, 1, 1, 2, 1, 2, - 1, 1, 2, 0, 1, 1, 2, 2, 2, 2, 2, 4, 0, 4, 0, 0, - 0, 0, 0, 4, 2, 0, 2, 2, 2, 0, 2, 0, 10, 10, 0, 0, - 11, 0, 0, 0, 2, 2, 3, 2, 0, 2, 3, 3, 3, 3, 3, 3, - 0, 3, 2, 0, 0, 3, 3, 3, 3, 3, 0, 0, 10, 2, 10, 0, - 3, 0, 1, 0, 3, 0, 1, 1, 3, 3, 0, 3, 3, 2, 2, 2, - 2, 3, 0, 2, 3, 0, 0, 0, 17, 17, 17, 17, 0, 17, 0, 0, - 2, 2, 0, 2, 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 0, - 11, 10, 0, 0, 13, 0, 0, 0, 2, 0, 1, 12, 0, 0, 1, 12, - 16, 9, 9, 9, 16, 16, 16, 16, 2, 16, 16, 16, 2, 2, 2, 16, - 3, 3, 1, 1, 8, 7, 8, 7, 5, 6, 8, 7, 8, 7, 5, 6, - 8, 7, 0, 0, 0, 0, 0, 8, 7, 5, 6, 8, 7, 8, 7, 8, - 7, 8, 8, 7, 5, 8, 7, 5, 8, 8, 8, 8, 7, 7, 7, 7, - 7, 7, 7, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, - 6, 8, 8, 8, 8, 7, 7, 7, 7, 5, 5, 5, 7, 8, 0, 0, - 5, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 5, 5, 7, 5, - 5, 7, 7, 5, 7, 7, 5, 7, 5, 5, 5, 7, 0, 0, 5, 5, - 5, 7, 7, 7, 5, 7, 5, 7, 8, 0, 0, 0, 12, 12, 12, 12, - 12, 12, 0, 0, 12, 0, 0, 12, 12, 2, 2, 2, 15, 15, 15, 0, - 15, 15, 15, 15, 8, 6, 8, 0, 8, 0, 8, 6, 8, 6, 8, 6, - 8, 8, 7, 8, 7, 8, 7, 5, 6, 8, 7, 8, 6, 8, 7, 5, - 7, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 0, 0, 0, - 13, 13, 13, 0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 3, 3, 0, - 3, 3, 3, 0, 3, 0, 3, 0, 0, 0, 3, 3, 3, 0, 0, 3, - 0, 3, 0, 3, 0, 0, 0, 3, 2, 2, 2, 9, 16, 0, 0, 0, - 16, 16, 16, 0, 9, 9, 0, 0, -}; - -/* Decomposition_Type: 2872 bytes. */ - -RE_UINT32 re_get_decomposition_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_decomposition_type_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_decomposition_type_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_decomposition_type_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_decomposition_type_stage_4[pos + f] << 2; - value = re_decomposition_type_stage_5[pos + code]; - - return value; -} - -/* East_Asian_Width. */ - -static RE_UINT8 re_east_asian_width_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 7, 8, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 10, 10, 10, 12, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 14, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 15, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 15, -}; - -static RE_UINT8 re_east_asian_width_stage_2[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 7, 8, 9, 10, 11, 12, 13, 14, 5, 15, 5, 16, 5, 5, 17, 18, - 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 24, 5, 5, 5, 5, 25, 5, 5, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 26, 5, 5, 5, 5, 5, 5, 5, 5, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 22, 22, 5, 5, 5, 28, 29, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 30, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 31, 32, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 33, - 5, 34, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 35, -}; - -static RE_UINT8 re_east_asian_width_stage_3[] = { - 0, 0, 1, 1, 1, 1, 1, 2, 0, 0, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 11, 0, 0, 0, 0, 0, 15, 16, 0, 0, - 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 17, 18, 0, 0, - 19, 19, 19, 19, 19, 19, 19, 0, 0, 20, 21, 20, 21, 0, 0, 0, - 9, 19, 19, 19, 19, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 23, 24, 25, 0, 0, 0, 26, 27, 0, 28, 0, 0, 0, 0, 0, - 29, 30, 31, 0, 0, 32, 33, 34, 35, 34, 0, 36, 0, 37, 38, 0, - 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 0, 0, 0, 0, - 0, 44, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 51, 19, - 19, 19, 19, 19, 33, 19, 19, 52, 19, 53, 21, 54, 55, 56, 57, 0, - 58, 59, 0, 0, 60, 0, 61, 0, 0, 62, 0, 62, 63, 19, 64, 19, - 0, 0, 0, 65, 0, 38, 0, 66, 0, 0, 0, 0, 0, 0, 67, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 22, 70, 22, 22, 22, 22, 22, 71, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 72, 0, 73, - 74, 22, 22, 75, 76, 22, 22, 22, 22, 77, 22, 22, 22, 22, 22, 22, - 78, 22, 79, 76, 22, 22, 22, 22, 75, 22, 22, 80, 22, 22, 71, 22, - 22, 75, 22, 22, 81, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 75, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 82, 22, 22, 22, 83, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 22, 82, 0, 0, 0, 0, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 71, 0, 0, 0, 0, 0, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 84, 0, 22, 22, 85, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 87, 88, 88, 88, 88, 88, 89, 90, 90, 90, 90, 91, 92, 93, 94, 65, - 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 96, 19, 97, 19, 19, 19, 34, 19, 19, 96, 0, 0, 0, 0, 0, 0, - 98, 22, 22, 80, 99, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 79, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 97, -}; - -static RE_UINT8 re_east_asian_width_stage_4[] = { - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 7, 0, 10, 0, 0, 11, 12, 11, 13, 14, 10, 9, 14, - 8, 12, 9, 5, 15, 0, 0, 0, 16, 0, 12, 0, 0, 13, 12, 0, - 17, 0, 11, 12, 9, 11, 7, 15, 13, 0, 0, 0, 0, 0, 0, 10, - 5, 5, 5, 11, 0, 18, 17, 15, 11, 0, 7, 16, 7, 7, 7, 7, - 17, 7, 7, 7, 19, 7, 14, 0, 20, 20, 20, 20, 18, 9, 14, 14, - 9, 7, 0, 0, 8, 15, 12, 10, 0, 11, 0, 12, 17, 11, 0, 0, - 0, 0, 21, 11, 12, 15, 15, 0, 12, 10, 0, 0, 22, 10, 12, 0, - 12, 11, 12, 9, 7, 7, 7, 0, 7, 7, 14, 0, 0, 0, 15, 0, - 0, 0, 14, 0, 10, 11, 0, 0, 0, 12, 0, 0, 8, 12, 18, 12, - 15, 15, 10, 17, 18, 16, 7, 5, 0, 7, 0, 14, 0, 0, 11, 11, - 10, 0, 0, 0, 14, 7, 13, 13, 13, 13, 0, 0, 0, 15, 15, 0, - 0, 15, 0, 0, 0, 0, 0, 12, 0, 0, 23, 0, 7, 7, 19, 7, - 7, 0, 0, 0, 13, 14, 0, 0, 13, 13, 0, 14, 14, 13, 18, 13, - 14, 0, 0, 0, 13, 14, 0, 12, 0, 22, 15, 13, 0, 14, 0, 5, - 5, 0, 0, 0, 19, 19, 9, 19, 0, 0, 0, 13, 0, 7, 7, 19, - 19, 0, 7, 7, 0, 0, 0, 15, 0, 13, 7, 7, 0, 24, 1, 25, - 0, 26, 0, 0, 0, 17, 14, 0, 20, 20, 27, 20, 20, 0, 0, 0, - 20, 28, 0, 0, 20, 20, 20, 0, 29, 20, 20, 20, 20, 20, 20, 30, - 31, 20, 20, 20, 20, 30, 31, 20, 0, 31, 20, 20, 20, 20, 20, 28, - 20, 20, 30, 0, 20, 20, 7, 7, 20, 20, 20, 32, 20, 30, 0, 0, - 20, 20, 28, 0, 30, 20, 20, 20, 20, 30, 20, 0, 33, 34, 34, 34, - 34, 34, 34, 34, 35, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, - 38, 36, 38, 36, 38, 36, 38, 39, 34, 40, 36, 37, 28, 0, 0, 0, - 7, 7, 9, 0, 7, 7, 7, 14, 30, 0, 0, 0, 20, 20, 32, 0, -}; - -static RE_UINT8 re_east_asian_width_stage_5[] = { - 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 1, 5, 5, - 1, 5, 5, 1, 1, 0, 1, 0, 5, 1, 1, 5, 1, 1, 1, 1, - 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, - 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, - 3, 3, 3, 3, 0, 2, 0, 0, 0, 1, 1, 0, 0, 3, 3, 0, - 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, 5, 0, 3, 3, 0, 3, - 3, 3, 0, 0, 4, 3, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, - 3, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, - 4, 4, 4, 0, -}; - -/* East_Asian_Width: 1668 bytes. */ - -RE_UINT32 re_get_east_asian_width(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_east_asian_width_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_east_asian_width_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_east_asian_width_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_east_asian_width_stage_4[pos + f] << 2; - value = re_east_asian_width_stage_5[pos + code]; - - return value; -} - -/* Joining_Group. */ - -static RE_UINT8 re_joining_group_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_joining_group_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_joining_group_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, - 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_joining_group_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 0, 21, 0, 22, - 0, 0, 23, 24, 25, 26, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, - 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_joining_group_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 3, 3, 43, 3, 45, 3, - 4, 41, 4, 4, 13, 13, 13, 6, 6, 31, 31, 35, 35, 33, 33, 39, - 39, 1, 1, 11, 11, 55, 55, 55, 0, 9, 29, 19, 22, 24, 26, 16, - 43, 45, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 29, - 0, 3, 3, 3, 0, 3, 43, 43, 45, 4, 4, 4, 4, 4, 4, 4, - 4, 13, 13, 13, 13, 13, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 31, 31, 31, 31, 31, 31, 31, 31, 31, 35, 35, 35, 33, 33, 39, - 1, 9, 9, 9, 9, 9, 9, 29, 29, 11, 38, 11, 19, 19, 19, 11, - 11, 11, 11, 11, 11, 22, 22, 22, 22, 26, 26, 26, 26, 56, 21, 13, - 41, 17, 17, 14, 43, 43, 43, 43, 43, 43, 43, 43, 55, 47, 55, 43, - 45, 45, 46, 46, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 6, 31, - 0, 0, 35, 33, 1, 0, 0, 21, 2, 0, 5, 12, 12, 7, 7, 15, - 44, 50, 18, 42, 42, 48, 49, 20, 23, 25, 27, 36, 10, 8, 28, 32, - 34, 30, 7, 37, 40, 5, 12, 7, 0, 0, 0, 0, 0, 51, 52, 53, - 4, 4, 4, 4, 4, 4, 4, 13, 13, 6, 6, 31, 35, 1, 1, 1, - 9, 9, 11, 11, 11, 24, 24, 26, 26, 26, 22, 31, 31, 35, 13, 13, - 35, 31, 13, 3, 3, 55, 55, 45, 43, 43, 54, 54, 13, 35, 35, 19, - 4, 0, 13, 39, 9, 29, 22, 24, 45, 45, 31, 43, 57, 0, 0, 0, -}; - -/* Joining_Group: 481 bytes. */ - -RE_UINT32 re_get_joining_group(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_joining_group_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_joining_group_stage_2[pos + f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_joining_group_stage_3[pos + f] << 4; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_joining_group_stage_4[pos + f] << 3; - value = re_joining_group_stage_5[pos + code]; - - return value; -} - -/* Joining_Type. */ - -static RE_UINT8 re_joining_type_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 6, 2, 2, 7, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_joining_type_stage_2[] = { - 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 1, 1, 16, 1, 1, 1, 17, 18, 19, 20, 21, 22, 23, 1, 1, - 24, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 25, 26, 1, 1, - 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 28, 1, 29, 30, 31, 32, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 1, 1, 34, 35, - 1, 36, 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, 1, 1, 1, 1, - 38, 39, 1, 1, 1, 1, 40, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 42, 43, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 44, 45, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_joining_type_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 2, 5, 6, 0, 0, 0, - 0, 7, 8, 9, 10, 2, 11, 12, 13, 14, 15, 15, 16, 17, 18, 19, - 20, 21, 22, 2, 23, 24, 25, 26, 0, 0, 27, 28, 29, 15, 30, 31, - 0, 32, 33, 0, 34, 35, 0, 0, 0, 0, 36, 0, 0, 0, 37, 38, - 39, 0, 0, 40, 41, 42, 43, 0, 44, 0, 0, 45, 46, 0, 43, 0, - 47, 0, 0, 45, 48, 44, 0, 49, 47, 0, 0, 45, 50, 0, 43, 0, - 44, 0, 0, 51, 46, 52, 43, 0, 53, 0, 0, 0, 54, 0, 0, 0, - 0, 0, 0, 55, 56, 57, 43, 0, 0, 0, 0, 51, 58, 0, 43, 0, - 0, 0, 0, 0, 46, 0, 43, 0, 0, 0, 0, 0, 59, 60, 0, 0, - 0, 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 64, 0, 0, 0, - 0, 65, 0, 66, 0, 0, 0, 67, 68, 69, 2, 70, 52, 0, 0, 0, - 0, 0, 71, 72, 0, 73, 28, 74, 75, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 76, 0, 76, 0, 43, 0, 43, 0, 0, 0, 77, 78, 79, 0, 0, - 80, 0, 15, 15, 15, 15, 15, 81, 82, 15, 83, 0, 0, 0, 0, 0, - 0, 0, 84, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 86, 0, 0, 0, 87, 88, 89, 0, 0, 0, 0, 0, 0, 0, 0, - 90, 0, 0, 91, 53, 0, 92, 90, 93, 0, 94, 0, 0, 0, 95, 93, - 0, 0, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98, 99, 100, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 101, 96, - 102, 0, 103, 0, 0, 0, 104, 0, 0, 0, 0, 0, 0, 2, 2, 28, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 93, - 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 2, 2, - 0, 0, 105, 0, 0, 0, 0, 0, 0, 106, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 20, 107, 0, 20, 0, 0, 0, 0, 0, 93, - 108, 0, 57, 0, 15, 15, 15, 109, 0, 0, 0, 0, 100, 0, 2, 93, - 0, 0, 110, 0, 111, 93, 0, 0, 39, 0, 0, 112, 0, 0, 0, 0, - 0, 0, 113, 114, 115, 0, 0, 0, 0, 0, 0, 116, 44, 0, 117, 52, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0, - 0, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 0, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 121, 0, 0, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 44, 0, 0, 123, 101, 0, 0, 0, 93, 0, 0, 124, 0, 0, 0, 0, - 39, 0, 125, 126, 0, 0, 0, 0, 93, 0, 0, 127, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 20, 39, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 130, 131, 132, 0, 105, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 44, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, -}; - -static RE_UINT8 re_joining_type_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 2, 4, 0, - 5, 2, 2, 2, 2, 2, 2, 6, 7, 6, 0, 0, 2, 2, 8, 9, - 10, 11, 12, 13, 14, 15, 15, 15, 16, 15, 17, 2, 0, 0, 0, 18, - 19, 20, 15, 15, 15, 15, 21, 21, 21, 21, 22, 15, 15, 15, 15, 15, - 23, 21, 21, 24, 25, 26, 2, 27, 2, 27, 28, 29, 0, 0, 18, 30, - 0, 0, 0, 3, 31, 32, 22, 33, 15, 15, 34, 23, 2, 2, 8, 35, - 15, 15, 32, 15, 15, 15, 13, 36, 24, 36, 22, 15, 0, 37, 2, 2, - 9, 0, 0, 0, 0, 0, 18, 15, 15, 15, 38, 2, 2, 0, 39, 0, - 0, 37, 6, 2, 2, 5, 5, 4, 36, 33, 12, 13, 15, 40, 5, 0, - 41, 15, 25, 42, 0, 2, 2, 2, 2, 2, 2, 8, 8, 0, 0, 0, - 0, 0, 43, 9, 5, 2, 9, 1, 5, 2, 0, 0, 37, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 9, 5, 9, 0, 1, 7, 0, 0, 0, - 7, 3, 27, 4, 4, 1, 0, 0, 5, 6, 9, 1, 0, 0, 0, 27, - 0, 43, 0, 0, 43, 0, 0, 0, 9, 0, 0, 1, 0, 0, 0, 37, - 9, 37, 28, 4, 0, 7, 0, 0, 0, 43, 0, 4, 0, 0, 43, 0, - 37, 44, 0, 0, 1, 2, 8, 0, 0, 3, 2, 8, 1, 2, 6, 9, - 0, 0, 2, 4, 0, 0, 4, 0, 0, 45, 1, 0, 5, 2, 2, 8, - 2, 28, 0, 5, 2, 2, 5, 2, 2, 2, 2, 9, 0, 0, 0, 5, - 28, 2, 7, 7, 0, 0, 4, 37, 5, 9, 0, 0, 43, 7, 0, 1, - 37, 9, 0, 0, 0, 6, 2, 4, 0, 43, 5, 2, 2, 0, 0, 1, - 0, 46, 47, 4, 15, 15, 0, 0, 0, 46, 15, 15, 15, 15, 48, 0, - 8, 3, 9, 0, 43, 0, 5, 0, 0, 3, 27, 0, 0, 43, 2, 8, - 44, 5, 2, 9, 3, 2, 2, 27, 2, 0, 0, 0, 0, 28, 8, 9, - 0, 0, 3, 2, 4, 0, 0, 0, 37, 4, 6, 0, 0, 43, 4, 45, - 0, 0, 0, 2, 2, 37, 0, 0, 8, 2, 2, 2, 28, 2, 9, 1, - 0, 9, 0, 0, 2, 8, 0, 0, 0, 0, 3, 49, 0, 0, 37, 8, - 2, 9, 37, 2, 0, 0, 37, 4, 0, 0, 7, 0, 8, 2, 2, 4, - 43, 43, 3, 0, 50, 0, 0, 0, 0, 37, 2, 4, 0, 3, 2, 2, - 3, 37, 4, 9, 0, 0, 5, 8, 7, 7, 0, 0, 3, 0, 0, 9, - 28, 27, 9, 37, 0, 0, 0, 4, 0, 1, 9, 1, 0, 0, 0, 43, - 0, 0, 5, 0, 5, 7, 0, 2, 0, 0, 8, 3, 0, 0, 2, 2, - 3, 8, 7, 1, 0, 3, 2, 5, 2, 9, 0, 0, 0, 37, 2, 8, - 0, 0, 3, 1, 2, 6, 0, 0, 0, 3, 4, 0, 3, 2, 2, 2, - 8, 5, 2, 0, -}; - -static RE_UINT8 re_joining_type_stage_5[] = { - 0, 0, 0, 0, 0, 5, 0, 0, 5, 5, 5, 5, 0, 0, 0, 5, - 5, 5, 0, 0, 0, 5, 5, 5, 5, 5, 0, 5, 0, 5, 5, 0, - 5, 5, 5, 0, 5, 0, 0, 0, 2, 0, 3, 3, 3, 3, 2, 3, - 2, 3, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 3, 2, 2, 5, 0, 0, 2, 2, 5, 3, 3, 3, - 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 3, - 2, 3, 2, 3, 2, 2, 3, 3, 0, 3, 5, 5, 5, 0, 0, 5, - 5, 0, 5, 5, 5, 5, 3, 3, 2, 0, 0, 2, 3, 5, 2, 2, - 2, 3, 3, 3, 2, 2, 3, 2, 3, 2, 3, 2, 0, 3, 2, 2, - 3, 2, 2, 2, 0, 0, 5, 5, 2, 2, 2, 5, 0, 0, 1, 0, - 3, 2, 0, 0, 2, 0, 2, 2, 3, 0, 0, 0, 0, 0, 5, 0, - 5, 0, 5, 0, 0, 5, 0, 5, 0, 0, 0, 2, 0, 0, 1, 5, - 2, 5, 2, 0, 0, 1, 5, 5, 2, 2, 4, 0, -}; - -/* Joining_Type: 1896 bytes. */ - -RE_UINT32 re_get_joining_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_joining_type_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_joining_type_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_joining_type_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_joining_type_stage_4[pos + f] << 2; - value = re_joining_type_stage_5[pos + code]; - - return value; -} - -/* Line_Break. */ - -static RE_UINT8 re_line_break_stage_1[] = { - 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 10, 10, 16, 10, 10, 10, 10, 17, 10, 18, 19, 20, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 22, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -}; - -static RE_UINT8 re_line_break_stage_2[] = { - 0, 1, 2, 2, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 2, 2, 2, 2, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 2, 51, 2, 2, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 2, 2, 2, 70, 2, 2, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, - 81, 82, 83, 84, 85, 86, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 87, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 88, 79, 79, 79, 79, 79, 79, 79, 79, 89, 2, 2, 90, 91, 2, 92, - 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 101, - 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, - 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, - 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, - 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, - 103, 104, 105, 106, 107, 101, 102, 103, 104, 105, 106, 107, 101, 102, 103, 108, - 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, 109, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 79, 79, 79, 79, 110, 111, 2, 2, 112, 113, 114, 115, 116, 117, - 118, 119, 120, 121, 72, 122, 123, 124, 2, 125, 72, 72, 72, 72, 72, 72, - 126, 72, 127, 128, 129, 72, 130, 72, 131, 72, 72, 72, 132, 72, 72, 72, - 133, 134, 135, 136, 72, 72, 72, 72, 72, 72, 72, 72, 72, 137, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 2, 2, 2, 2, 2, 138, 72, 139, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 2, 2, 2, 140, 141, 142, 2, 143, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 2, 2, 2, 144, 72, 72, 72, 72, 72, 72, 72, 72, 72, 145, 146, - 147, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 2, 148, 149, 150, 151, 72, 152, 72, 153, 154, 155, 2, 2, 156, 2, 157, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 158, 159, 72, 72, - 160, 161, 162, 163, 164, 72, 165, 166, 167, 168, 169, 170, 171, 172, 173, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 174, - 175, 72, 176, 177, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, - 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, -}; - -static RE_UINT16 re_line_break_stage_3[] = { - 0, 1, 2, 3, 4, 5, 4, 6, 7, 1, 8, 9, 4, 10, 4, 10, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11, 12, 4, 4, - 1, 1, 1, 1, 13, 14, 15, 16, 17, 4, 18, 4, 4, 4, 4, 4, - 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 21, 4, 22, 21, 4, - 23, 24, 1, 25, 26, 27, 28, 29, 30, 31, 4, 4, 32, 1, 33, 34, - 4, 4, 4, 4, 4, 35, 36, 37, 38, 39, 4, 1, 40, 4, 4, 4, - 4, 4, 41, 42, 37, 4, 32, 43, 4, 44, 45, 46, 4, 47, 48, 48, - 48, 48, 49, 48, 48, 48, 50, 51, 52, 4, 4, 53, 1, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 58, 59, 66, 67, 68, 69, 70, - 71, 18, 59, 72, 73, 74, 63, 75, 57, 58, 59, 72, 76, 77, 63, 20, - 78, 79, 80, 81, 82, 83, 69, 84, 85, 86, 59, 87, 88, 89, 63, 90, - 91, 86, 59, 92, 88, 93, 63, 94, 91, 86, 4, 95, 96, 97, 63, 98, - 99, 100, 4, 101, 102, 103, 48, 104, 105, 106, 106, 107, 108, 109, 48, 48, - 110, 111, 112, 113, 114, 115, 48, 48, 116, 117, 37, 118, 56, 4, 119, 120, - 121, 122, 1, 123, 124, 125, 48, 48, 106, 106, 106, 106, 126, 106, 106, 106, - 106, 127, 4, 4, 128, 4, 4, 4, 129, 129, 129, 129, 129, 129, 130, 130, - 130, 130, 131, 132, 132, 132, 132, 132, 4, 4, 4, 4, 133, 134, 4, 4, - 133, 4, 4, 135, 136, 137, 4, 4, 4, 136, 4, 4, 4, 138, 139, 119, - 4, 140, 4, 4, 4, 4, 4, 141, 142, 4, 4, 4, 4, 4, 4, 4, - 142, 143, 4, 4, 4, 4, 144, 74, 145, 146, 4, 147, 4, 148, 145, 149, - 106, 106, 106, 106, 106, 150, 151, 140, 152, 151, 4, 4, 4, 4, 4, 20, - 4, 4, 153, 4, 4, 4, 4, 154, 4, 119, 155, 155, 156, 106, 157, 158, - 106, 106, 159, 106, 160, 161, 4, 4, 4, 162, 106, 106, 106, 163, 106, 164, - 151, 151, 157, 48, 48, 48, 48, 48, 165, 4, 4, 166, 167, 168, 169, 170, - 171, 4, 172, 37, 4, 4, 41, 173, 4, 4, 166, 174, 175, 37, 4, 176, - 48, 48, 48, 48, 20, 177, 178, 179, 4, 4, 4, 4, 1, 1, 180, 181, - 4, 182, 4, 4, 182, 183, 4, 184, 4, 4, 4, 185, 185, 186, 4, 187, - 188, 189, 190, 191, 192, 193, 194, 195, 196, 119, 197, 198, 199, 1, 1, 200, - 201, 202, 203, 4, 4, 204, 205, 206, 207, 206, 4, 4, 4, 208, 4, 4, - 209, 210, 211, 212, 213, 214, 215, 4, 216, 217, 218, 219, 4, 4, 4, 4, - 4, 220, 221, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 222, - 4, 4, 223, 48, 224, 48, 225, 225, 225, 225, 225, 225, 225, 225, 225, 226, - 225, 225, 225, 225, 205, 225, 225, 227, 225, 228, 229, 230, 231, 232, 233, 4, - 234, 235, 4, 236, 237, 4, 238, 239, 4, 240, 4, 241, 242, 243, 244, 245, - 246, 4, 4, 4, 4, 247, 248, 249, 225, 250, 4, 4, 251, 4, 252, 4, - 253, 254, 4, 4, 4, 255, 4, 256, 4, 4, 4, 4, 119, 257, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, 4, 4, 46, 4, 4, 46, 4, 4, - 4, 4, 4, 4, 4, 4, 258, 259, 4, 4, 128, 4, 4, 4, 260, 261, - 4, 223, 262, 262, 262, 262, 1, 1, 263, 264, 265, 266, 48, 48, 48, 48, - 267, 268, 267, 267, 267, 267, 267, 222, 267, 267, 267, 267, 267, 267, 267, 267, - 267, 267, 267, 267, 267, 269, 48, 270, 271, 272, 273, 274, 275, 267, 276, 267, - 277, 278, 279, 267, 276, 267, 277, 280, 281, 267, 282, 283, 267, 267, 267, 267, - 284, 267, 267, 285, 267, 267, 222, 286, 267, 284, 267, 267, 287, 267, 267, 267, - 267, 267, 267, 267, 267, 267, 267, 284, 267, 267, 267, 267, 4, 4, 4, 4, - 267, 288, 267, 267, 267, 267, 267, 267, 289, 267, 267, 267, 290, 4, 4, 176, - 291, 4, 292, 48, 4, 4, 258, 293, 4, 294, 4, 4, 4, 4, 4, 295, - 46, 296, 224, 48, 48, 48, 48, 90, 297, 4, 298, 299, 4, 4, 4, 300, - 301, 4, 4, 166, 302, 151, 1, 303, 37, 4, 304, 4, 305, 306, 129, 307, - 52, 4, 4, 308, 309, 310, 48, 48, 4, 4, 311, 180, 312, 313, 106, 159, - 106, 106, 106, 106, 314, 315, 32, 316, 317, 318, 262, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 4, 4, 319, 151, 320, 321, 322, 323, 322, 324, 322, 320, - 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, - 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, - 324, 322, 320, 321, 322, 323, 322, 324, 322, 320, 321, 322, 323, 322, 324, 322, - 323, 322, 325, 130, 326, 132, 132, 327, 328, 328, 328, 328, 328, 328, 328, 328, - 223, 329, 330, 331, 332, 4, 4, 4, 4, 4, 4, 4, 333, 334, 4, 4, - 4, 4, 4, 335, 48, 4, 4, 4, 4, 336, 4, 4, 20, 48, 48, 337, - 1, 338, 180, 339, 340, 341, 342, 185, 4, 4, 4, 4, 4, 4, 4, 343, - 344, 345, 267, 346, 267, 347, 348, 349, 4, 350, 4, 46, 351, 352, 353, 354, - 355, 4, 137, 356, 184, 184, 48, 48, 4, 4, 4, 4, 4, 4, 4, 224, - 357, 4, 4, 358, 4, 4, 4, 4, 224, 359, 48, 48, 48, 4, 4, 360, - 4, 119, 4, 4, 4, 74, 48, 48, 4, 46, 296, 4, 224, 48, 48, 48, - 4, 361, 4, 4, 362, 363, 48, 48, 4, 184, 151, 48, 48, 48, 48, 48, - 364, 4, 4, 365, 4, 366, 48, 48, 4, 367, 4, 368, 48, 48, 48, 48, - 4, 4, 4, 369, 48, 48, 48, 48, 370, 371, 4, 372, 20, 373, 4, 4, - 4, 4, 4, 374, 4, 375, 4, 376, 4, 4, 4, 4, 377, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 4, 46, 171, 4, 4, 378, 379, 336, 380, 48, - 171, 4, 4, 381, 382, 4, 377, 151, 171, 4, 305, 383, 384, 48, 48, 48, - 171, 4, 4, 308, 385, 151, 48, 48, 4, 4, 32, 386, 151, 48, 48, 48, - 4, 4, 4, 4, 4, 4, 46, 48, 4, 4, 4, 4, 4, 4, 387, 384, - 4, 4, 4, 4, 4, 388, 4, 4, 389, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 390, 4, 4, 46, 48, 48, 48, 48, 48, - 4, 4, 4, 377, 48, 48, 48, 48, 4, 4, 4, 4, 141, 391, 1, 51, - 392, 171, 48, 48, 48, 48, 48, 48, 393, 48, 48, 48, 48, 48, 48, 48, - 4, 4, 4, 4, 4, 4, 4, 154, 4, 4, 22, 4, 4, 4, 394, 1, - 395, 4, 396, 4, 4, 184, 48, 48, 4, 4, 4, 4, 397, 48, 48, 48, - 4, 4, 4, 4, 4, 223, 4, 333, 4, 4, 4, 4, 4, 185, 4, 4, - 4, 145, 398, 399, 400, 4, 4, 4, 401, 402, 4, 403, 404, 86, 4, 4, - 4, 4, 375, 4, 4, 4, 4, 4, 4, 4, 4, 4, 405, 406, 406, 406, - 400, 4, 407, 408, 409, 410, 411, 412, 413, 359, 414, 359, 48, 48, 48, 333, - 267, 267, 270, 267, 267, 267, 267, 267, 267, 222, 284, 415, 283, 283, 48, 48, - 416, 225, 417, 225, 225, 225, 418, 225, 225, 416, 48, 48, 48, 48, 419, 420, - 421, 267, 267, 285, 422, 393, 48, 48, 267, 267, 423, 424, 267, 267, 267, 289, - 267, 222, 267, 425, 426, 48, 267, 423, 267, 267, 267, 284, 427, 267, 267, 267, - 267, 267, 428, 429, 267, 267, 267, 430, 431, 432, 433, 434, 296, 267, 435, 48, - 48, 48, 48, 48, 48, 48, 48, 436, 267, 267, 267, 267, 437, 48, 48, 48, - 267, 267, 267, 267, 269, 48, 48, 48, 4, 4, 4, 4, 4, 4, 4, 296, - 267, 267, 267, 267, 267, 267, 267, 282, 438, 48, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, -}; - -static RE_UINT8 re_line_break_stage_4[] = { - 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 12, 12, 12, 12, 13, 14, 15, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 17, 14, - 14, 14, 14, 14, 14, 16, 18, 19, 0, 0, 20, 0, 0, 0, 0, 0, - 21, 22, 23, 24, 25, 26, 27, 14, 22, 28, 29, 28, 28, 26, 28, 30, - 14, 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 24, 31, 28, 31, 14, - 25, 14, 14, 14, 28, 28, 24, 32, 0, 0, 0, 0, 0, 0, 0, 33, - 0, 0, 0, 0, 0, 0, 34, 34, 34, 35, 0, 0, 0, 0, 0, 0, - 14, 14, 14, 14, 36, 14, 14, 37, 36, 36, 14, 14, 14, 38, 38, 14, - 14, 39, 14, 14, 14, 14, 14, 14, 14, 19, 0, 0, 0, 14, 14, 14, - 14, 14, 14, 14, 36, 36, 36, 36, 39, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 38, 39, 14, 14, 14, 14, 14, 14, 14, 40, 41, 36, 42, - 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, - 19, 45, 0, 46, 36, 36, 36, 36, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 48, 36, 36, 47, 49, 38, 36, 36, 36, 36, 36, - 14, 14, 38, 14, 50, 51, 13, 14, 0, 0, 0, 0, 0, 52, 53, 54, - 14, 14, 14, 14, 14, 19, 0, 0, 12, 12, 12, 12, 12, 55, 56, 14, - 45, 14, 14, 14, 14, 14, 14, 14, 14, 14, 57, 0, 0, 0, 45, 19, - 0, 0, 45, 19, 45, 0, 0, 14, 12, 12, 12, 12, 12, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 39, 19, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 0, 0, 0, 53, 39, 14, 14, 14, 14, 0, 0, 0, 0, 0, - 45, 36, 36, 36, 36, 36, 36, 36, 0, 0, 14, 14, 58, 38, 36, 36, - 14, 14, 14, 0, 0, 19, 0, 0, 0, 0, 19, 0, 19, 0, 0, 36, - 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, 19, 0, 36, 38, - 36, 36, 36, 36, 36, 36, 36, 36, 38, 14, 14, 14, 14, 14, 38, 36, - 36, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, - 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 45, 0, - 19, 0, 0, 0, 14, 14, 14, 14, 14, 0, 59, 12, 12, 12, 12, 12, - 14, 14, 14, 14, 39, 14, 14, 14, 43, 0, 39, 14, 14, 14, 38, 39, - 38, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, - 38, 38, 36, 14, 14, 36, 45, 0, 0, 0, 53, 43, 53, 43, 0, 38, - 36, 36, 36, 43, 36, 36, 14, 39, 14, 0, 36, 12, 12, 12, 12, 12, - 14, 51, 14, 14, 50, 9, 36, 36, 43, 0, 39, 14, 14, 38, 36, 39, - 38, 14, 39, 38, 14, 36, 53, 0, 0, 53, 36, 43, 53, 43, 0, 36, - 43, 36, 36, 36, 39, 14, 38, 38, 36, 36, 36, 12, 12, 12, 12, 12, - 0, 14, 19, 36, 36, 36, 36, 36, 43, 0, 39, 14, 14, 14, 14, 39, - 38, 14, 39, 14, 14, 36, 45, 0, 0, 0, 0, 43, 0, 43, 0, 36, - 38, 36, 36, 36, 36, 36, 36, 36, 9, 36, 36, 36, 36, 36, 36, 36, - 0, 0, 53, 43, 53, 43, 0, 36, 36, 36, 36, 0, 36, 36, 14, 39, - 36, 45, 39, 14, 14, 38, 36, 14, 38, 14, 14, 36, 39, 38, 38, 14, - 36, 39, 38, 36, 14, 38, 36, 14, 14, 14, 14, 14, 14, 36, 36, 0, - 0, 53, 36, 0, 53, 0, 0, 36, 38, 36, 36, 43, 36, 36, 36, 36, - 14, 14, 14, 14, 9, 38, 36, 36, 43, 0, 39, 14, 14, 14, 38, 14, - 38, 14, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 14, 36, 39, 0, - 0, 0, 53, 0, 53, 0, 0, 36, 36, 36, 43, 53, 14, 36, 36, 36, - 36, 36, 36, 36, 14, 14, 14, 14, 36, 0, 39, 14, 14, 14, 38, 14, - 14, 14, 39, 14, 14, 36, 45, 0, 36, 36, 43, 53, 36, 36, 36, 38, - 39, 38, 36, 36, 36, 36, 36, 36, 14, 14, 14, 14, 14, 38, 39, 0, - 0, 0, 53, 0, 53, 0, 0, 38, 36, 36, 36, 43, 36, 36, 36, 36, - 14, 14, 14, 36, 60, 14, 14, 14, 36, 0, 39, 14, 14, 14, 14, 14, - 14, 14, 14, 38, 36, 14, 14, 14, 14, 39, 14, 14, 14, 14, 39, 36, - 14, 14, 14, 38, 36, 53, 36, 43, 0, 0, 53, 53, 0, 0, 0, 0, - 36, 0, 38, 36, 36, 36, 36, 36, 61, 62, 62, 62, 62, 62, 62, 62, - 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 36, 42, - 62, 62, 62, 62, 62, 62, 62, 64, 12, 12, 12, 12, 12, 59, 36, 36, - 61, 63, 63, 61, 63, 63, 61, 36, 36, 36, 62, 62, 61, 62, 62, 62, - 61, 62, 61, 61, 36, 62, 61, 62, 62, 62, 62, 62, 62, 61, 62, 36, - 62, 62, 63, 63, 62, 62, 62, 36, 12, 12, 12, 12, 12, 36, 62, 62, - 32, 65, 29, 65, 66, 67, 68, 54, 54, 69, 57, 14, 0, 14, 14, 14, - 14, 14, 44, 19, 19, 70, 70, 0, 14, 14, 14, 14, 14, 14, 38, 36, - 43, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 14, 14, 19, 0, - 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 59, - 14, 14, 14, 45, 14, 14, 38, 14, 65, 71, 14, 14, 72, 73, 36, 36, - 12, 12, 12, 12, 12, 59, 14, 14, 12, 12, 12, 12, 12, 62, 62, 62, - 14, 14, 14, 39, 36, 36, 39, 36, 74, 74, 74, 74, 74, 74, 74, 74, - 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, - 76, 76, 76, 76, 76, 76, 76, 76, 14, 14, 14, 14, 38, 14, 14, 36, - 14, 14, 14, 38, 38, 14, 14, 36, 38, 14, 14, 36, 14, 14, 14, 38, - 38, 14, 14, 36, 14, 14, 14, 14, 14, 14, 14, 38, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 38, 43, 0, 27, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 36, 36, 36, 14, 14, 38, 36, 36, 36, 36, 36, - 77, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 16, 78, 36, - 14, 14, 14, 14, 14, 27, 59, 14, 14, 14, 14, 14, 14, 14, 38, 14, - 14, 0, 53, 36, 36, 36, 36, 36, 14, 0, 1, 41, 36, 36, 36, 36, - 14, 0, 36, 36, 36, 36, 36, 36, 38, 0, 36, 36, 36, 36, 36, 36, - 62, 62, 59, 79, 77, 80, 62, 36, 12, 12, 12, 12, 12, 36, 36, 36, - 14, 54, 59, 29, 54, 19, 0, 73, 14, 14, 14, 14, 19, 38, 36, 36, - 14, 14, 14, 36, 36, 36, 36, 36, 0, 0, 0, 0, 0, 0, 36, 36, - 38, 36, 54, 12, 12, 12, 12, 12, 62, 62, 62, 62, 62, 62, 62, 36, - 62, 62, 63, 36, 36, 36, 36, 36, 62, 62, 62, 62, 62, 62, 36, 36, - 62, 62, 62, 62, 62, 36, 36, 36, 12, 12, 12, 12, 12, 63, 36, 62, - 14, 14, 14, 19, 0, 0, 36, 14, 62, 62, 62, 62, 62, 62, 62, 63, - 62, 62, 62, 62, 62, 62, 63, 43, 0, 0, 45, 14, 14, 14, 14, 14, - 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 45, 14, 14, 14, 36, 36, - 12, 12, 12, 12, 12, 59, 27, 59, 77, 14, 14, 14, 14, 19, 0, 0, - 0, 0, 14, 14, 14, 14, 38, 36, 0, 45, 14, 14, 14, 14, 14, 14, - 19, 0, 0, 0, 0, 0, 0, 14, 0, 0, 36, 36, 36, 36, 14, 14, - 0, 0, 0, 0, 36, 81, 59, 59, 12, 12, 12, 12, 12, 36, 39, 14, - 14, 14, 14, 14, 14, 14, 14, 59, 0, 45, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 45, 14, 19, 14, 14, 0, 45, 38, 36, 36, 36, 36, - 0, 0, 0, 53, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 0, - 14, 14, 14, 36, 14, 14, 14, 36, 14, 14, 14, 14, 39, 39, 39, 39, - 14, 14, 14, 14, 14, 14, 14, 36, 14, 14, 38, 14, 14, 14, 14, 14, - 14, 14, 36, 14, 14, 14, 39, 14, 36, 14, 38, 14, 14, 14, 32, 38, - 59, 59, 59, 82, 59, 83, 0, 0, 82, 59, 84, 25, 85, 86, 85, 86, - 28, 14, 87, 88, 89, 0, 0, 33, 51, 51, 51, 51, 7, 90, 91, 14, - 14, 14, 92, 93, 91, 14, 14, 14, 14, 14, 14, 77, 59, 59, 27, 59, - 94, 14, 38, 0, 0, 0, 0, 0, 14, 36, 25, 14, 14, 14, 16, 95, - 24, 28, 25, 14, 14, 14, 16, 78, 23, 23, 23, 6, 23, 23, 23, 23, - 23, 23, 23, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 53, 36, 36, 36, 36, 36, 36, 36, 14, 50, 24, 14, 50, 14, 14, 14, - 14, 24, 14, 96, 14, 14, 14, 14, 24, 25, 14, 14, 14, 24, 14, 14, - 14, 14, 28, 14, 14, 24, 14, 25, 28, 28, 28, 28, 28, 28, 14, 14, - 28, 28, 28, 28, 28, 14, 14, 14, 14, 14, 14, 14, 24, 36, 36, 36, - 14, 25, 25, 14, 14, 14, 14, 14, 25, 28, 14, 24, 25, 24, 14, 24, - 24, 23, 24, 14, 14, 25, 24, 28, 25, 24, 24, 24, 28, 28, 25, 25, - 14, 14, 28, 28, 14, 14, 28, 14, 14, 14, 14, 14, 25, 14, 25, 14, - 14, 25, 14, 14, 14, 14, 14, 14, 28, 14, 28, 28, 14, 28, 14, 28, - 14, 28, 14, 28, 14, 14, 14, 14, 14, 14, 24, 14, 24, 14, 14, 14, - 14, 14, 24, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 24, - 14, 25, 14, 14, 14, 97, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, - 97, 97, 36, 36, 36, 36, 36, 36, 14, 14, 14, 38, 36, 36, 36, 36, - 14, 14, 14, 14, 14, 38, 36, 36, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 25, 28, 28, 25, 14, 14, 14, 14, 14, - 14, 28, 28, 14, 14, 14, 14, 14, 28, 24, 28, 28, 28, 14, 14, 14, - 14, 28, 14, 28, 14, 14, 28, 14, 28, 14, 14, 28, 25, 24, 14, 28, - 28, 14, 14, 14, 14, 14, 14, 14, 14, 28, 28, 14, 14, 14, 14, 24, - 97, 97, 24, 25, 24, 14, 14, 28, 14, 14, 97, 28, 99, 97, 97, 97, - 14, 14, 14, 14, 100, 97, 14, 14, 25, 25, 14, 14, 14, 14, 14, 14, - 28, 24, 28, 24, 101, 25, 28, 24, 14, 14, 14, 14, 14, 14, 14, 100, - 14, 14, 14, 14, 14, 14, 14, 28, 14, 14, 14, 14, 14, 14, 100, 97, - 97, 97, 97, 97, 101, 28, 102, 100, 97, 102, 101, 28, 97, 28, 101, 102, - 97, 24, 14, 14, 28, 101, 28, 28, 102, 97, 97, 102, 97, 101, 102, 97, - 103, 97, 99, 14, 97, 97, 97, 14, 14, 14, 14, 24, 14, 7, 85, 5, - 14, 54, 14, 14, 70, 70, 70, 70, 70, 70, 70, 28, 28, 28, 28, 28, - 28, 28, 14, 14, 14, 14, 14, 14, 14, 14, 16, 98, 14, 14, 14, 14, - 14, 14, 14, 70, 70, 70, 70, 70, 14, 16, 104, 104, 104, 104, 104, 104, - 104, 104, 104, 104, 98, 14, 14, 14, 14, 14, 14, 14, 70, 70, 14, 14, - 14, 14, 14, 14, 14, 14, 70, 14, 14, 14, 24, 28, 28, 36, 36, 36, - 14, 14, 14, 14, 14, 14, 14, 19, 0, 14, 36, 36, 105, 59, 77, 106, - 14, 14, 14, 14, 36, 36, 36, 39, 41, 36, 36, 36, 36, 36, 36, 43, - 14, 14, 14, 38, 14, 14, 14, 38, 85, 85, 85, 85, 85, 85, 85, 59, - 59, 59, 59, 27, 107, 14, 85, 14, 85, 70, 70, 70, 70, 59, 59, 57, - 59, 27, 77, 14, 14, 108, 36, 36, 97, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 103, 97, 97, 97, 97, 97, 36, 36, 36, 36, 36, - 97, 97, 97, 97, 97, 97, 36, 36, 18, 109, 110, 97, 70, 70, 70, 70, - 70, 97, 70, 70, 70, 70, 111, 112, 97, 97, 97, 97, 97, 0, 0, 0, - 97, 97, 113, 97, 97, 110, 114, 97, 115, 116, 116, 116, 116, 97, 97, 97, - 97, 116, 97, 97, 97, 97, 97, 97, 97, 116, 116, 116, 97, 97, 97, 117, - 97, 97, 116, 118, 43, 119, 91, 114, 120, 116, 116, 116, 116, 97, 97, 97, - 97, 97, 116, 117, 97, 110, 121, 114, 36, 36, 103, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 36, 103, 97, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 97, 122, 97, 97, 97, 97, 97, 122, 36, 36, - 123, 123, 123, 123, 123, 123, 123, 123, 97, 97, 97, 97, 28, 28, 28, 28, - 97, 97, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 122, 36, - 97, 97, 97, 122, 36, 36, 36, 36, 14, 14, 14, 14, 14, 14, 27, 106, - 12, 12, 12, 12, 12, 14, 36, 36, 0, 45, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 14, 36, 36, 36, 43, 0, 27, 59, 59, 36, 36, 36, 36, - 14, 14, 36, 36, 36, 36, 36, 36, 14, 45, 14, 45, 14, 19, 14, 14, - 14, 19, 0, 0, 14, 14, 36, 36, 14, 14, 14, 14, 124, 36, 36, 36, - 14, 14, 65, 54, 36, 36, 36, 36, 0, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 53, 36, 36, 36, 36, 59, 0, 14, 14, 14, 14, 14, 36, 36, - 14, 14, 14, 0, 0, 0, 0, 59, 14, 14, 14, 19, 0, 0, 0, 0, - 0, 0, 36, 36, 36, 36, 36, 39, 74, 74, 74, 74, 74, 74, 125, 36, - 14, 19, 0, 0, 0, 0, 0, 0, 45, 14, 14, 27, 59, 14, 14, 39, - 12, 12, 12, 12, 12, 36, 36, 14, 14, 14, 14, 14, 19, 0, 0, 0, - 14, 19, 14, 14, 14, 14, 0, 36, 12, 12, 12, 12, 12, 36, 27, 59, - 62, 63, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 62, 62, - 59, 14, 19, 53, 36, 36, 36, 36, 39, 14, 14, 38, 39, 14, 14, 38, - 39, 14, 14, 38, 36, 36, 36, 36, 14, 19, 0, 0, 0, 1, 0, 36, - 126, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, - 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 126, 127, 127, 127, - 127, 127, 126, 127, 127, 127, 127, 127, 127, 127, 36, 36, 36, 36, 36, 36, - 75, 75, 75, 128, 36, 129, 76, 76, 76, 76, 76, 76, 76, 76, 36, 36, - 130, 130, 130, 130, 130, 130, 130, 130, 36, 39, 14, 14, 36, 36, 131, 132, - 47, 47, 47, 47, 49, 47, 47, 47, 47, 47, 47, 48, 47, 47, 48, 48, - 47, 131, 48, 47, 47, 47, 47, 47, 14, 36, 36, 36, 36, 36, 36, 36, - 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 70, - 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 124, 36, - 133, 134, 58, 135, 136, 36, 36, 36, 97, 97, 137, 104, 104, 104, 104, 104, - 104, 104, 109, 137, 109, 97, 97, 97, 109, 78, 91, 54, 137, 104, 104, 109, - 97, 97, 97, 122, 138, 139, 36, 36, 14, 14, 14, 14, 14, 14, 38, 140, - 105, 97, 6, 97, 70, 97, 109, 109, 97, 97, 97, 97, 97, 91, 97, 141, - 97, 97, 97, 97, 97, 137, 142, 97, 97, 97, 97, 97, 97, 137, 142, 137, - 112, 70, 93, 143, 123, 123, 123, 123, 144, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 91, 36, 14, 14, 14, 36, 14, 14, 14, - 36, 14, 14, 14, 36, 14, 38, 36, 22, 97, 138, 145, 14, 14, 14, 38, - 36, 36, 36, 36, 43, 0, 146, 36, 14, 14, 14, 14, 14, 14, 39, 14, - 14, 14, 14, 14, 14, 38, 14, 39, 59, 41, 36, 39, 14, 14, 14, 14, - 14, 14, 36, 39, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 36, - 14, 14, 14, 14, 14, 14, 19, 36, 14, 14, 14, 14, 14, 14, 14, 81, - 14, 14, 36, 36, 14, 14, 14, 14, 77, 14, 14, 36, 36, 36, 36, 36, - 14, 14, 14, 36, 38, 14, 14, 14, 14, 14, 14, 39, 38, 36, 38, 39, - 14, 14, 14, 81, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 81, - 14, 14, 14, 14, 14, 36, 36, 39, 14, 14, 14, 14, 36, 36, 36, 14, - 19, 0, 43, 53, 36, 36, 0, 0, 14, 14, 39, 14, 39, 14, 14, 14, - 14, 14, 36, 36, 0, 53, 36, 43, 59, 59, 59, 59, 38, 36, 36, 36, - 14, 14, 14, 36, 81, 59, 59, 59, 14, 14, 14, 36, 14, 14, 14, 14, - 14, 38, 36, 36, 14, 14, 14, 14, 14, 14, 14, 14, 38, 36, 36, 36, - 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 1, 77, 14, 14, 36, - 14, 14, 14, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, 45, 14, 59, - 59, 36, 36, 36, 36, 36, 36, 36, 0, 0, 53, 12, 12, 12, 12, 12, - 59, 59, 36, 36, 36, 36, 36, 36, 45, 14, 27, 77, 41, 36, 36, 36, - 0, 0, 0, 0, 36, 36, 36, 36, 14, 38, 36, 36, 36, 36, 36, 36, - 14, 14, 14, 14, 147, 70, 112, 14, 14, 98, 14, 70, 70, 14, 14, 14, - 14, 14, 14, 14, 16, 112, 14, 14, 19, 0, 0, 0, 0, 0, 0, 0, - 36, 36, 36, 36, 36, 36, 36, 43, 97, 36, 36, 36, 36, 36, 36, 36, - 14, 14, 19, 0, 0, 14, 19, 0, 0, 45, 19, 0, 0, 0, 14, 14, - 14, 14, 14, 14, 14, 0, 0, 14, 14, 0, 45, 36, 36, 36, 36, 36, - 36, 38, 39, 38, 39, 14, 38, 14, 14, 14, 14, 14, 14, 39, 39, 14, - 14, 14, 39, 14, 14, 14, 14, 14, 14, 14, 14, 39, 14, 38, 39, 14, - 14, 14, 38, 14, 14, 14, 38, 14, 14, 14, 14, 14, 14, 39, 14, 38, - 14, 14, 38, 38, 36, 14, 14, 14, 14, 14, 14, 14, 14, 14, 36, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 39, 38, 38, 39, 39, 14, 14, 14, - 14, 38, 14, 14, 39, 39, 36, 36, 36, 38, 36, 39, 39, 39, 39, 14, - 39, 38, 38, 39, 39, 39, 39, 39, 39, 38, 38, 39, 14, 38, 14, 14, - 14, 38, 14, 14, 39, 14, 38, 38, 14, 14, 14, 14, 14, 39, 14, 14, - 39, 14, 39, 14, 14, 39, 14, 14, 103, 97, 97, 97, 97, 97, 97, 122, - 28, 28, 28, 28, 28, 148, 36, 36, 28, 28, 28, 28, 28, 28, 28, 38, - 28, 28, 28, 28, 28, 14, 36, 36, 36, 36, 36, 149, 149, 149, 149, 149, - 149, 149, 149, 149, 149, 149, 149, 149, 97, 122, 36, 36, 36, 36, 36, 36, - 97, 97, 97, 97, 122, 36, 36, 36, 122, 36, 36, 36, 36, 36, 36, 36, - 97, 97, 97, 103, 97, 97, 97, 97, 97, 97, 99, 100, 97, 97, 100, 97, - 97, 97, 122, 97, 97, 122, 36, 36, 122, 97, 97, 97, 97, 97, 97, 97, - 100, 100, 100, 97, 97, 97, 97, 99, 99, 100, 97, 97, 97, 97, 97, 97, - 97, 97, 97, 97, 103, 97, 122, 36, 14, 14, 14, 100, 97, 97, 97, 97, - 97, 97, 97, 99, 14, 14, 14, 14, 14, 14, 100, 97, 97, 97, 97, 97, - 97, 14, 14, 14, 14, 14, 14, 36, 97, 97, 97, 97, 36, 36, 36, 36, - 36, 36, 36, 36, 36, 103, 97, 97, 122, 36, 103, 97, 97, 97, 97, 97, - 43, 36, 36, 36, 36, 36, 36, 36, -}; - -static RE_UINT8 re_line_break_stage_5[] = { - 16, 16, 16, 18, 22, 20, 20, 21, 19, 6, 3, 12, 9, 10, 12, 3, - 1, 36, 12, 9, 8, 15, 8, 7, 11, 11, 8, 8, 12, 12, 12, 6, - 12, 1, 9, 36, 18, 2, 12, 16, 16, 29, 4, 1, 10, 9, 9, 9, - 12, 25, 25, 12, 25, 3, 12, 18, 25, 25, 17, 12, 25, 1, 17, 25, - 12, 17, 16, 4, 4, 4, 4, 16, 0, 0, 8, 0, 12, 0, 0, 12, - 0, 8, 18, 0, 0, 9, 0, 16, 18, 16, 16, 12, 6, 16, 37, 37, - 37, 0, 37, 12, 12, 10, 10, 10, 16, 6, 16, 0, 6, 6, 10, 11, - 11, 12, 6, 12, 8, 6, 18, 18, 0, 10, 0, 24, 24, 24, 24, 0, - 24, 12, 17, 17, 4, 17, 17, 18, 4, 6, 4, 12, 1, 2, 18, 17, - 12, 4, 4, 0, 31, 31, 32, 32, 33, 33, 18, 12, 2, 0, 5, 24, - 18, 9, 0, 18, 18, 4, 18, 28, 26, 25, 3, 3, 1, 3, 14, 14, - 14, 18, 20, 20, 3, 25, 5, 5, 8, 1, 2, 5, 30, 12, 2, 25, - 9, 12, 13, 13, 2, 12, 13, 12, 12, 13, 13, 25, 25, 13, 0, 13, - 2, 1, 0, 6, 6, 18, 1, 18, 26, 26, 2, 13, 13, 5, 5, 1, - 2, 2, 13, 16, 5, 13, 0, 38, 13, 38, 38, 13, 38, 0, 16, 5, - 5, 38, 38, 5, 13, 0, 38, 38, 10, 12, 31, 0, 34, 35, 35, 35, - 32, 0, 0, 33, 27, 27, 0, 37, 16, 37, 8, 2, 2, 8, 6, 1, - 2, 14, 13, 1, 13, 9, 10, 13, 0, 30, 13, 6, 13, 2, 12, 38, - 38, 12, 9, 0, 23, 25, 1, 1, 25, 0, 39, 39, -}; - -/* Line_Break: 7668 bytes. */ - -RE_UINT32 re_get_line_break(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_line_break_stage_1[f] << 5; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_line_break_stage_2[pos + f] << 3; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_line_break_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_line_break_stage_4[pos + f] << 1; - value = re_line_break_stage_5[pos + code]; - - return value; -} - -/* Numeric_Type. */ - -static RE_UINT8 re_numeric_type_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, - 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, - 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -}; - -static RE_UINT8 re_numeric_type_stage_2[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, - 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, - 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, - 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, - 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, - 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, - 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, - 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, - 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, - 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, - 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, - 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 75, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_numeric_type_stage_3[] = { - 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, - 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, - 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, - 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, - 0, 0, 0, 16, 17, 0, 0, 0, 0, 0, 18, 19, 20, 0, 0, 0, - 0, 0, 0, 21, 22, 0, 0, 23, 0, 0, 0, 24, 25, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 26, 27, 28, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 29, 0, 0, 0, 0, 30, 31, 0, 30, 32, 0, 0, - 33, 0, 0, 0, 34, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, - 0, 0, 36, 0, 0, 0, 0, 0, 37, 0, 26, 0, 38, 39, 40, 41, - 36, 0, 0, 42, 0, 0, 0, 0, 43, 0, 44, 45, 0, 0, 0, 0, - 0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 48, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 50, 0, 0, 0, 51, - 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, - 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, - 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, - 0, 0, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, - 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, - 0, 42, 0, 0, 0, 0, 0, 0, 0, 58, 59, 60, 0, 0, 0, 56, - 0, 3, 0, 0, 0, 0, 0, 61, 0, 62, 0, 0, 0, 0, 1, 0, - 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 63, 0, 55, 64, 26, - 65, 66, 19, 67, 35, 0, 0, 0, 0, 68, 69, 0, 0, 0, 70, 0, - 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 71, 0, 0, 0, 0, 0, - 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73, 74, 0, 0, 0, 0, - 0, 0, 71, 71, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, - 0, 0, 76, 77, 0, 0, 0, 1, 0, 78, 0, 0, 0, 0, 1, 0, - 19, 19, 19, 79, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 81, 82, 83, 0, 0, 0, 0, 0, 0, 0, - 58, 0, 0, 43, 0, 0, 0, 84, 0, 58, 0, 0, 0, 0, 0, 0, - 0, 35, 0, 0, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, - 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, - 0, 0, 0, 0, 60, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 36, 0, 0, 0, 0, -}; - -static RE_UINT8 re_numeric_type_stage_4[] = { - 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 1, 2, 0, 0, - 5, 1, 0, 0, 5, 1, 6, 7, 5, 1, 8, 0, 5, 1, 9, 0, - 5, 1, 0, 10, 5, 1, 11, 0, 1, 12, 13, 0, 0, 14, 15, 16, - 0, 17, 18, 0, 1, 2, 19, 7, 0, 0, 1, 20, 1, 2, 1, 2, - 0, 0, 21, 22, 23, 22, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, - 24, 7, 0, 0, 23, 25, 26, 27, 19, 23, 25, 13, 0, 28, 29, 30, - 0, 0, 31, 32, 23, 33, 34, 0, 0, 0, 0, 35, 36, 0, 0, 0, - 37, 7, 0, 9, 0, 0, 38, 0, 19, 7, 0, 0, 0, 19, 37, 19, - 0, 0, 37, 19, 35, 0, 0, 0, 39, 0, 0, 0, 0, 40, 0, 0, - 0, 35, 0, 0, 41, 42, 0, 0, 0, 43, 44, 0, 0, 0, 0, 36, - 18, 0, 0, 36, 0, 18, 0, 0, 0, 0, 18, 0, 43, 0, 0, 0, - 45, 0, 0, 0, 0, 46, 0, 0, 47, 43, 0, 0, 48, 0, 0, 0, - 0, 0, 0, 39, 0, 0, 42, 42, 0, 0, 0, 40, 0, 0, 0, 17, - 0, 49, 18, 0, 0, 0, 0, 45, 0, 43, 0, 0, 0, 0, 40, 0, - 0, 0, 45, 0, 0, 45, 39, 0, 42, 0, 0, 0, 45, 43, 0, 0, - 0, 0, 0, 18, 17, 19, 0, 0, 0, 0, 11, 0, 0, 39, 39, 18, - 0, 0, 50, 0, 36, 19, 19, 19, 19, 19, 13, 0, 19, 19, 19, 18, - 13, 0, 0, 0, 42, 40, 0, 0, 0, 0, 51, 0, 0, 0, 0, 19, - 0, 0, 17, 13, 52, 0, 0, 0, 0, 0, 0, 53, 23, 25, 19, 10, - 0, 0, 54, 55, 56, 1, 0, 0, 0, 0, 5, 1, 9, 0, 0, 0, - 19, 19, 7, 0, 0, 5, 1, 1, 1, 1, 1, 1, 23, 57, 0, 0, - 40, 0, 0, 0, 39, 43, 0, 43, 0, 40, 0, 35, 0, 0, 0, 42, -}; - -static RE_UINT8 re_numeric_type_stage_5[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, - 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 3, 3, - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, - 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 3, 3, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, - 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - 0, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 0, 0, 0, 0, 0, 0, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, - 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, - 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, - 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, - 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 3, 3, 2, 2, 2, 0, 0, 0, 0, 0, -}; - -/* Numeric_Type: 2088 bytes. */ - -RE_UINT32 re_get_numeric_type(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_numeric_type_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_numeric_type_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_numeric_type_stage_3[pos + f] << 2; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_numeric_type_stage_4[pos + f] << 3; - value = re_numeric_type_stage_5[pos + code]; - - return value; -} - -/* Numeric_Value. */ - -static RE_UINT8 re_numeric_value_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11, 12, - 13, 14, 15, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 16, 11, 17, - 18, 11, 19, 20, 11, 11, 21, 11, 11, 11, 11, 11, 11, 11, 11, 22, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -}; - -static RE_UINT8 re_numeric_value_stage_2[] = { - 0, 1, 1, 1, 1, 1, 2, 3, 1, 4, 5, 6, 7, 8, 9, 10, - 11, 1, 1, 12, 1, 1, 13, 14, 15, 16, 17, 18, 19, 1, 1, 1, - 20, 21, 1, 1, 22, 1, 1, 23, 1, 1, 1, 1, 24, 1, 1, 1, - 25, 26, 27, 1, 28, 1, 1, 1, 29, 1, 1, 30, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 31, 32, - 1, 33, 1, 34, 1, 1, 35, 1, 36, 1, 1, 1, 1, 1, 37, 38, - 1, 1, 39, 40, 1, 1, 1, 41, 1, 1, 1, 1, 1, 1, 1, 42, - 1, 1, 1, 43, 1, 1, 44, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 45, 1, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 47, 48, 1, 1, - 1, 1, 1, 1, 1, 1, 49, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 50, 1, 51, 52, 53, 54, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 1, 1, 1, 1, 1, 15, - 1, 56, 1, 57, 58, 1, 1, 1, 59, 60, 61, 62, 1, 1, 63, 1, - 64, 65, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 66, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 67, 1, 1, 1, 68, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 69, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 70, 71, 1, 1, 1, 1, 1, 1, 1, 72, 73, 74, 1, 1, 1, 1, - 1, 1, 1, 75, 1, 1, 1, 1, 1, 76, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, - 1, 1, 78, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_numeric_value_stage_3[] = { - 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 4, - 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0, 8, 0, 0, 0, 4, - 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, - 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, - 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, - 0, 0, 0, 0, 0, 0, 0, 13, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 14, 0, 0, 0, 0, 0, 13, 0, 0, 0, - 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, - 0, 0, 0, 15, 3, 0, 0, 0, 0, 0, 16, 17, 18, 0, 0, 0, - 0, 0, 0, 19, 20, 0, 0, 21, 0, 0, 0, 22, 23, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 24, 25, 26, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 27, 0, 0, 0, 0, 28, 29, 0, 28, 30, 0, 0, - 31, 0, 0, 0, 32, 0, 0, 0, 0, 33, 0, 0, 0, 0, 0, 0, - 0, 0, 34, 0, 0, 0, 0, 0, 35, 0, 36, 0, 37, 38, 39, 40, - 41, 0, 0, 42, 0, 0, 0, 0, 43, 0, 44, 45, 0, 0, 0, 0, - 0, 0, 46, 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 48, 0, - 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 50, 0, 0, 0, 51, - 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, - 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, - 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 0, - 0, 0, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, - 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61, 0, 0, - 0, 62, 0, 0, 0, 0, 0, 0, 0, 63, 64, 65, 0, 0, 0, 66, - 0, 3, 0, 0, 0, 0, 0, 67, 0, 68, 0, 0, 0, 0, 1, 0, - 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 69, 0, 70, 71, 72, - 73, 74, 75, 76, 77, 0, 0, 0, 0, 78, 79, 0, 0, 0, 80, 0, - 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, - 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 0, 0, 0, 0, - 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, - 0, 0, 87, 88, 0, 0, 0, 1, 0, 89, 0, 0, 0, 0, 1, 0, - 90, 91, 92, 93, 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, - 98, 0, 0, 99, 0, 0, 0, 100, 0, 101, 0, 0, 0, 0, 0, 0, - 0, 102, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, - 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, - 0, 0, 0, 0, 106, 0, 0, 0, 107, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, -}; - -static RE_UINT8 re_numeric_value_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, - 0, 0, 0, 0, 4, 0, 5, 6, 1, 2, 3, 0, 0, 0, 0, 0, - 0, 7, 8, 9, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 0, - 0, 7, 8, 9, 12, 13, 0, 0, 0, 7, 8, 9, 14, 0, 0, 0, - 0, 7, 8, 9, 0, 0, 1, 15, 0, 7, 8, 9, 16, 17, 0, 0, - 1, 2, 18, 19, 20, 0, 0, 0, 0, 0, 21, 2, 22, 23, 24, 25, - 0, 0, 0, 26, 27, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 0, - 0, 0, 0, 0, 1, 2, 28, 0, 0, 0, 0, 0, 29, 2, 3, 0, - 0, 0, 0, 0, 30, 31, 32, 33, 34, 35, 36, 37, 34, 35, 36, 37, - 38, 39, 40, 0, 0, 0, 0, 0, 34, 35, 36, 41, 42, 34, 35, 36, - 41, 42, 34, 35, 36, 41, 42, 0, 0, 0, 43, 44, 45, 46, 2, 47, - 0, 0, 0, 0, 0, 48, 49, 50, 34, 35, 51, 49, 50, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 52, 0, 53, 0, 0, 0, 0, 0, 0, - 21, 2, 3, 0, 0, 0, 54, 0, 0, 0, 0, 0, 48, 55, 0, 0, - 34, 35, 56, 0, 0, 0, 0, 0, 0, 0, 57, 58, 59, 60, 61, 62, - 0, 0, 0, 0, 63, 64, 65, 66, 0, 67, 0, 0, 0, 0, 0, 0, - 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, - 0, 0, 0, 70, 0, 0, 0, 0, 71, 72, 73, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 74, 0, 0, 0, 75, 0, 76, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 77, 78, 0, 0, 0, 0, 0, 0, 79, - 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, - 0, 0, 0, 0, 81, 0, 0, 0, 0, 82, 0, 0, 0, 0, 0, 0, - 0, 83, 0, 0, 0, 0, 0, 0, 0, 0, 84, 85, 0, 0, 0, 0, - 86, 87, 0, 88, 0, 0, 0, 0, 89, 80, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 90, 0, 0, 0, 0, 0, 5, 0, 5, 0, - 0, 0, 0, 0, 0, 0, 91, 0, 0, 0, 0, 0, 0, 0, 0, 92, - 0, 0, 0, 15, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93, - 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, 0, - 0, 95, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 97, 0, 98, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 99, 68, 0, 0, 0, - 0, 0, 0, 0, 75, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, - 0, 101, 0, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, - 0, 0, 0, 0, 0, 103, 0, 0, 0, 48, 49, 104, 0, 0, 0, 0, - 0, 0, 0, 0, 105, 106, 0, 0, 0, 0, 107, 0, 108, 0, 75, 0, - 0, 0, 0, 0, 103, 0, 0, 0, 0, 0, 0, 0, 109, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 110, 0, 111, 8, 9, 57, 58, 112, 113, - 114, 115, 116, 117, 118, 0, 0, 0, 119, 120, 121, 122, 123, 124, 125, 126, - 127, 128, 129, 130, 122, 131, 132, 0, 0, 0, 103, 0, 0, 0, 0, 0, - 133, 0, 0, 0, 0, 0, 0, 0, 134, 0, 135, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 136, 137, 0, 0, 0, 0, 0, 0, 0, 0, 138, 139, - 0, 0, 0, 0, 0, 140, 141, 0, 34, 142, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 143, 0, 0, 0, 0, 0, 0, 34, 142, - 34, 35, 144, 145, 146, 147, 148, 149, 0, 0, 0, 0, 48, 49, 50, 150, - 151, 152, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, - 8, 9, 49, 153, 35, 154, 2, 155, 156, 157, 9, 158, 159, 158, 160, 161, - 162, 163, 164, 165, 166, 167, 168, 169, 170, 0, 0, 0, 0, 0, 0, 0, - 34, 35, 144, 145, 171, 0, 0, 0, 0, 0, 0, 7, 8, 9, 1, 2, - 172, 8, 9, 1, 2, 172, 8, 9, 173, 49, 174, 0, 0, 0, 0, 0, - 70, 0, 0, 0, 0, 0, 0, 0, 0, 175, 0, 0, 0, 0, 0, 0, - 98, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 91, 0, 0, 0, 0, 0, 176, 0, 0, 88, 0, 0, 0, 88, - 0, 0, 101, 0, 0, 0, 0, 73, 0, 0, 0, 0, 0, 0, 73, 0, - 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 107, 0, - 0, 0, 0, 177, 0, 0, 0, 0, 0, 0, 0, 0, 178, 0, 0, 0, -}; - -static RE_UINT8 re_numeric_value_stage_5[] = { - 0, 0, 0, 0, 2, 23, 25, 27, 29, 31, 33, 35, 37, 39, 0, 0, - 0, 0, 25, 27, 0, 23, 0, 0, 11, 15, 19, 0, 0, 0, 2, 23, - 25, 27, 29, 31, 33, 35, 37, 39, 3, 6, 9, 11, 19, 46, 0, 0, - 0, 0, 11, 15, 19, 3, 6, 9, 40, 85, 94, 0, 23, 25, 27, 0, - 40, 85, 94, 11, 15, 19, 0, 0, 37, 39, 15, 24, 26, 28, 30, 32, - 34, 36, 38, 1, 0, 23, 25, 27, 37, 39, 40, 50, 60, 70, 80, 81, - 82, 83, 84, 85, 103, 0, 0, 0, 0, 0, 47, 48, 49, 0, 0, 0, - 37, 39, 23, 0, 2, 0, 0, 0, 7, 5, 4, 12, 18, 10, 14, 16, - 20, 8, 21, 6, 13, 17, 22, 23, 23, 25, 27, 29, 31, 33, 35, 37, - 39, 40, 41, 42, 80, 85, 89, 94, 94, 98, 103, 0, 0, 33, 80, 107, - 112, 2, 0, 0, 43, 44, 45, 46, 47, 48, 49, 50, 0, 0, 2, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 23, 25, 27, 37, 39, 40, 2, - 0, 0, 23, 25, 27, 29, 31, 33, 35, 37, 39, 40, 39, 40, 23, 25, - 0, 15, 0, 0, 0, 0, 0, 2, 40, 50, 60, 0, 27, 29, 0, 0, - 39, 40, 0, 0, 40, 50, 60, 70, 80, 81, 82, 83, 0, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 0, 66, 67, 68, - 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 31, 0, 0, - 0, 0, 0, 25, 0, 0, 31, 0, 0, 35, 0, 0, 23, 0, 0, 35, - 0, 0, 0, 103, 0, 27, 0, 0, 0, 39, 0, 0, 25, 0, 0, 0, - 31, 0, 29, 0, 0, 0, 0, 115, 40, 0, 0, 0, 0, 0, 0, 94, - 27, 0, 0, 0, 85, 0, 0, 0, 115, 0, 0, 0, 0, 0, 116, 0, - 0, 25, 0, 37, 0, 33, 0, 0, 0, 40, 0, 94, 50, 60, 0, 0, - 70, 0, 0, 0, 0, 27, 27, 27, 0, 0, 0, 29, 0, 0, 23, 0, - 0, 0, 39, 50, 0, 0, 40, 0, 37, 0, 0, 0, 0, 0, 35, 0, - 0, 0, 39, 0, 0, 0, 85, 0, 0, 0, 29, 0, 0, 0, 25, 0, - 0, 94, 0, 0, 0, 0, 33, 0, 33, 0, 0, 0, 0, 0, 2, 0, - 35, 37, 39, 2, 11, 15, 19, 3, 6, 9, 0, 0, 0, 0, 0, 27, - 0, 0, 0, 40, 0, 33, 0, 33, 0, 40, 0, 0, 0, 0, 0, 23, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 11, 15, 23, 31, - 80, 89, 98, 107, 31, 40, 80, 85, 89, 94, 98, 31, 40, 80, 85, 89, - 94, 103, 107, 40, 23, 23, 23, 25, 25, 25, 25, 31, 40, 40, 40, 40, - 40, 60, 80, 80, 80, 80, 85, 87, 89, 89, 89, 89, 80, 15, 15, 18, - 19, 0, 0, 0, 23, 31, 40, 80, 0, 84, 0, 0, 0, 0, 93, 0, - 0, 23, 25, 40, 50, 85, 0, 0, 23, 25, 27, 40, 50, 85, 94, 103, - 0, 0, 23, 40, 50, 85, 25, 27, 40, 50, 85, 94, 0, 23, 80, 0, - 39, 40, 50, 60, 70, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - 91, 92, 93, 15, 11, 12, 18, 0, 50, 60, 70, 80, 81, 82, 83, 84, - 85, 94, 2, 23, 35, 37, 39, 29, 39, 23, 25, 27, 37, 39, 23, 25, - 27, 29, 31, 25, 27, 27, 29, 31, 23, 25, 27, 27, 29, 31, 113, 114, - 29, 31, 27, 27, 29, 29, 29, 29, 33, 35, 35, 35, 37, 37, 39, 39, - 39, 39, 25, 27, 29, 31, 33, 23, 25, 27, 29, 29, 31, 31, 25, 27, - 23, 25, 12, 18, 21, 12, 18, 6, 11, 8, 11, 0, 83, 84, 0, 0, - 37, 39, 2, 23, 2, 2, 23, 25, 35, 37, 39, 0, 29, 0, 0, 0, - 0, 0, 0, 60, 0, 29, 0, 0, 39, 0, 0, 0, -}; - -/* Numeric_Value: 2876 bytes. */ - -RE_UINT32 re_get_numeric_value(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 12; - code = ch ^ (f << 12); - pos = (RE_UINT32)re_numeric_value_stage_1[f] << 4; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_numeric_value_stage_2[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_numeric_value_stage_3[pos + f] << 3; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_numeric_value_stage_4[pos + f] << 2; - value = re_numeric_value_stage_5[pos + code]; - - return value; -} - -/* Bidi_Mirrored. */ - -static RE_UINT8 re_bidi_mirrored_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_bidi_mirrored_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_bidi_mirrored_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 1, 1, 1, - 4, 5, 1, 6, 7, 8, 1, 9, 10, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, - 1, 1, 1, 12, 1, 1, 1, 1, -}; - -static RE_UINT8 re_bidi_mirrored_stage_4[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, - 6, 7, 8, 3, 3, 9, 3, 3, 10, 11, 12, 13, 14, 3, 3, 3, - 3, 3, 3, 3, 3, 15, 3, 16, 3, 3, 3, 3, 3, 3, 17, 18, - 19, 20, 21, 22, 3, 3, 3, 3, 23, 3, 3, 3, 3, 3, 3, 3, - 24, 3, 3, 3, 3, 3, 3, 3, 3, 25, 3, 3, 26, 27, 3, 3, - 3, 3, 3, 28, 29, 30, 31, 32, -}; - -static RE_UINT8 re_bidi_mirrored_stage_5[] = { - 0, 0, 0, 0, 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 40, - 0, 0, 0, 0, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 24, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 6, 96, 0, 0, 0, 0, 0, 0, 96, - 0, 96, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 30, 63, 98, 188, 87, 248, 15, 250, 255, 31, 60, 128, 245, 207, 255, 255, - 255, 159, 7, 1, 204, 255, 255, 193, 0, 62, 195, 255, 255, 63, 255, 255, - 0, 15, 0, 0, 3, 6, 0, 0, 0, 0, 0, 0, 0, 255, 63, 0, - 121, 59, 120, 112, 252, 255, 0, 0, 248, 255, 255, 249, 255, 255, 0, 1, - 63, 194, 55, 31, 58, 3, 240, 51, 0, 252, 255, 223, 83, 122, 48, 112, - 0, 0, 128, 1, 48, 188, 25, 254, 255, 255, 255, 255, 207, 191, 255, 255, - 255, 255, 127, 80, 124, 112, 136, 47, 60, 54, 0, 48, 255, 3, 0, 0, - 0, 255, 243, 15, 0, 0, 0, 0, 0, 0, 0, 126, 48, 0, 0, 0, - 0, 3, 0, 80, 0, 0, 0, 40, 0, 0, 0, 168, 13, 0, 0, 0, - 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, - 8, 0, 0, 0, 0, 0, 0, 0, -}; - -/* Bidi_Mirrored: 489 bytes. */ - -RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_bidi_mirrored_stage_1[f] << 4; - f = code >> 12; - code ^= f << 12; - pos = (RE_UINT32)re_bidi_mirrored_stage_2[pos + f] << 3; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_bidi_mirrored_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_bidi_mirrored_stage_4[pos + f] << 6; - pos += code; - value = (re_bidi_mirrored_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Indic_Matra_Category. */ - -static RE_UINT8 re_indic_matra_category_stage_1[] = { - 0, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_indic_matra_category_stage_2[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, - 19, 20, 0, 0, 0, 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_indic_matra_category_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 5, 6, 7, 4, 0, - 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, 9, 0, 4, 0, - 0, 0, 0, 10, 11, 12, 4, 0, 0, 0, 0, 13, 14, 7, 0, 0, - 0, 0, 0, 15, 16, 17, 4, 0, 0, 0, 0, 10, 18, 19, 4, 0, - 0, 0, 0, 13, 20, 7, 4, 0, 0, 0, 0, 0, 21, 22, 0, 23, - 0, 0, 0, 24, 25, 0, 0, 0, 0, 0, 0, 26, 27, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 30, 31, 0, 32, 33, 34, 35, 36, 0, 0, 0, 0, 0, 0, - 0, 37, 0, 37, 0, 38, 0, 38, 0, 0, 0, 39, 40, 41, 0, 0, - 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 43, 44, 0, 0, 0, - 0, 45, 0, 0, 0, 0, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 48, 49, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 23, - 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 52, 0, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, 55, 0, 0, 0, - 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0, - 0, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 63, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, - 66, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 68, 69, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, - 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 73, 44, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74, 69, 0, 0, 0, 0, -}; - -static RE_UINT8 re_indic_matra_category_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, - 3, 4, 5, 6, 1, 7, 3, 8, 0, 0, 9, 4, 0, 0, 0, 0, - 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, - 3, 4, 10, 11, 12, 13, 14, 0, 0, 0, 0, 15, 0, 0, 0, 0, - 3, 10, 0, 9, 16, 9, 17, 0, 3, 4, 5, 9, 18, 15, 3, 0, - 0, 0, 0, 0, 0, 0, 0, 19, 3, 4, 10, 11, 20, 13, 21, 0, - 0, 0, 0, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, - 17, 10, 0, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 6, - 1, 7, 25, 6, 26, 6, 6, 0, 0, 0, 9, 10, 0, 0, 0, 0, - 27, 7, 25, 18, 28, 29, 6, 0, 0, 0, 15, 25, 0, 0, 0, 0, - 7, 3, 10, 22, 12, 23, 24, 0, 0, 0, 0, 0, 0, 16, 0, 15, - 7, 6, 10, 10, 2, 30, 23, 31, 0, 7, 0, 0, 0, 0, 0, 0, - 19, 7, 6, 6, 4, 10, 0, 0, 32, 32, 33, 9, 0, 0, 0, 16, - 19, 7, 6, 6, 4, 9, 0, 0, 32, 32, 34, 0, 0, 0, 0, 0, - 35, 36, 4, 37, 37, 6, 6, 0, 36, 0, 10, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 15, 19, 17, 38, 6, 6, 0, 39, 16, 0, 0, - 0, 0, 0, 7, 4, 0, 0, 0, 0, 25, 0, 15, 25, 0, 0, 0, - 9, 6, 16, 0, 0, 0, 0, 0, 0, 15, 40, 16, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 19, 0, 0, 17, 10, 0, 0, 0, 0, 0, - 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 6, 17, 4, 41, - 42, 22, 23, 0, 25, 0, 0, 0, 9, 43, 0, 0, 0, 0, 0, 0, - 6, 44, 45, 46, 16, 0, 0, 0, 7, 7, 2, 22, 7, 8, 7, 7, - 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 38, 2, 0, 0, - 47, 1, 19, 6, 17, 5, 44, 22, 22, 40, 16, 0, 0, 0, 0, 0, - 0, 0, 15, 6, 4, 48, 49, 22, 23, 18, 25, 0, 0, 0, 0, 0, - 0, 0, 17, 8, 6, 25, 0, 0, 0, 0, 0, 2, 50, 7, 10, 0, - 0, 0, 0, 16, 0, 0, 0, 0, 0, 15, 3, 1, 0, 0, 0, 0, - 0, 0, 15, 7, 7, 7, 7, 7, 7, 7, 10, 0, 0, 0, 0, 0, - 0, 0, 0, 35, 4, 17, 4, 10, 0, 15, 0, 0, 0, 0, 0, 0, - 0, 0, 7, 6, 4, 22, 16, 0, 51, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 9, 6, 17, 52, 40, 10, 0, 0, 0, 0, 0, 0, - 1, 6, 53, 54, 55, 56, 33, 16, 0, 0, 0, 0, 0, 11, 5, 8, - 0, 0, 0, 43, 0, 0, 0, 0, 0, 15, 19, 7, 44, 25, 35, 0, - 57, 4, 9, 58, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 39, - 0, 0, 0, 0, 6, 6, 4, 4, 4, 6, 6, 16, 0, 0, 0, 0, - 2, 3, 5, 1, 3, 0, 0, 0, 0, 0, 0, 9, 6, 4, 40, 37, - 17, 59, 16, 0, 0, 0, 0, 0, 0, 15, 8, 4, 4, 4, 6, 18, - 0, 0, 0, 0, 0, 0, 9, 8, -}; - -static RE_UINT8 re_indic_matra_category_stage_5[] = { - 0, 0, 5, 1, 1, 2, 1, 6, 6, 6, 6, 5, 5, 5, 1, 1, - 2, 1, 0, 5, 6, 0, 0, 2, 2, 0, 0, 4, 4, 6, 0, 1, - 5, 0, 5, 6, 5, 8, 1, 5, 9, 0, 10, 6, 2, 2, 4, 4, - 4, 5, 1, 0, 7, 0, 8, 1, 8, 0, 8, 8, 9, 2, 4, 1, - 3, 3, 3, 1, 3, 0, 0, 6, 5, 7, 7, 7, 6, 2, 0, 14, - 2, 5, 9, 10, 4, 2, 14, 0, 6, 1, 1, 8, 8, 5, 14, 1, - 6, 11, 7, 12, 2, 9, 11, 0, 5, 2, 6, 3, 3, 5, 5, 3, - 1, 3, 0, 13, 13, 0, 6, 14, -}; - -/* Indic_Matra_Category: 1336 bytes. */ - -RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_indic_matra_category_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_indic_matra_category_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_indic_matra_category_stage_3[pos + f] << 3; - f = code >> 1; - code ^= f << 1; - pos = (RE_UINT32)re_indic_matra_category_stage_4[pos + f] << 1; - value = re_indic_matra_category_stage_5[pos + code]; - - return value; -} - -/* Indic_Syllabic_Category. */ - -static RE_UINT8 re_indic_syllabic_category_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_2[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 1, 1, 1, 1, 1, 1, 10, 1, 11, 12, 13, 14, 1, 1, 1, - 1, 1, 1, 1, 1, 15, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 16, 17, 18, 19, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20, 1, 1, 1, 1, 1, - 21, 22, 1, 1, 1, 1, 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 16, - 17, 11, 12, 18, 19, 20, 0, 21, 22, 23, 12, 24, 25, 0, 8, 0, - 10, 11, 12, 24, 26, 27, 8, 28, 29, 30, 31, 32, 33, 34, 0, 0, - 35, 36, 12, 37, 38, 39, 8, 0, 40, 36, 12, 41, 38, 42, 8, 0, - 40, 36, 4, 43, 44, 34, 8, 45, 46, 47, 4, 48, 49, 50, 0, 51, - 52, 4, 53, 54, 55, 0, 0, 0, 56, 57, 58, 59, 60, 61, 0, 0, - 0, 0, 0, 0, 62, 4, 63, 64, 65, 66, 67, 68, 0, 0, 0, 0, - 4, 4, 69, 70, 0, 71, 72, 73, 74, 75, 0, 0, 0, 0, 0, 0, - 76, 77, 78, 77, 78, 79, 76, 80, 4, 4, 81, 82, 83, 84, 0, 0, - 85, 63, 86, 87, 0, 4, 88, 89, 4, 4, 90, 91, 92, 0, 0, 0, - 4, 93, 4, 4, 94, 95, 96, 97, 0, 0, 0, 0, 0, 0, 0, 0, - 98, 78, 4, 99, 100, 0, 0, 0, 101, 4, 102, 103, 4, 4, 104, 105, - 4, 4, 106, 107, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 110, 0, 0, 0, - 111, 4, 112, 0, 4, 113, 114, 115, 116, 117, 4, 118, 119, 0, 0, 0, - 120, 4, 121, 4, 122, 123, 0, 0, 124, 4, 4, 125, 126, 0, 0, 0, - 127, 4, 128, 129, 130, 0, 4, 131, 4, 4, 4, 132, 133, 0, 134, 135, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 137, 138, 0, - 139, 140, 4, 141, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 142, 78, 4, 143, 144, 0, 0, 0, 145, 4, 4, 146, 0, 0, 0, 0, - 147, 4, 148, 149, 0, 0, 0, 0, 150, 151, 4, 152, 153, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 154, 4, 155, 156, 0, 0, 0, 0, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_4[] = { - 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 3, 3, 3, 4, 5, 5, - 5, 5, 5, 5, 5, 5, 6, 7, 8, 8, 8, 9, 0, 10, 5, 5, - 11, 0, 0, 0, 12, 3, 13, 5, 14, 15, 3, 16, 16, 4, 5, 5, - 5, 5, 17, 5, 18, 19, 20, 7, 8, 21, 21, 22, 0, 23, 0, 24, - 20, 0, 0, 0, 14, 15, 25, 26, 17, 27, 20, 28, 29, 23, 21, 30, - 0, 0, 13, 18, 31, 32, 0, 0, 14, 15, 3, 33, 33, 4, 5, 5, - 17, 13, 20, 7, 8, 34, 34, 30, 8, 21, 21, 30, 0, 35, 0, 24, - 36, 0, 0, 0, 37, 15, 25, 12, 38, 39, 27, 17, 40, 41, 42, 19, - 5, 5, 20, 35, 29, 35, 43, 30, 0, 23, 0, 0, 14, 15, 3, 38, - 38, 4, 5, 5, 5, 13, 20, 44, 8, 43, 43, 30, 0, 45, 20, 0, - 46, 15, 3, 38, 5, 13, 20, 7, 0, 45, 0, 47, 5, 5, 42, 44, - 8, 43, 43, 48, 0, 0, 49, 50, 46, 15, 3, 3, 3, 25, 19, 5, - 24, 5, 5, 36, 5, 42, 51, 23, 8, 52, 8, 8, 35, 0, 0, 0, - 13, 5, 5, 5, 5, 5, 5, 42, 8, 8, 53, 0, 8, 34, 54, 55, - 27, 56, 18, 36, 0, 5, 13, 5, 13, 57, 19, 27, 8, 8, 34, 58, - 8, 59, 54, 60, 0, 0, 0, 20, 5, 5, 13, 5, 5, 5, 5, 41, - 10, 8, 8, 61, 62, 63, 64, 65, 66, 66, 67, 66, 66, 66, 66, 66, - 66, 66, 66, 68, 69, 3, 70, 8, 8, 71, 72, 73, 74, 11, 75, 76, - 77, 78, 79, 80, 81, 82, 5, 5, 83, 84, 54, 85, 0, 0, 86, 87, - 88, 5, 5, 17, 6, 89, 0, 0, 88, 5, 5, 5, 6, 0, 0, 0, - 90, 0, 0, 0, 91, 3, 3, 3, 3, 35, 8, 8, 8, 61, 92, 93, - 94, 0, 0, 95, 96, 5, 5, 5, 8, 8, 97, 0, 98, 99, 100, 0, - 101, 102, 102, 103, 104, 105, 0, 0, 5, 5, 5, 0, 8, 8, 8, 8, - 106, 99, 107, 0, 5, 108, 8, 0, 5, 5, 5, 69, 88, 109, 99, 110, - 111, 8, 8, 8, 8, 79, 107, 0, 112, 113, 3, 3, 5, 114, 8, 8, - 8, 115, 5, 0, 116, 3, 117, 5, 118, 8, 119, 120, 0, 0, 121, 122, - 5, 123, 8, 8, 124, 0, 0, 0, 5, 125, 8, 106, 99, 126, 0, 0, - 0, 0, 0, 13, 127, 0, 0, 0, 0, 0, 0, 1, 33, 128, 129, 5, - 108, 8, 0, 0, 5, 5, 5, 130, 131, 132, 133, 5, 134, 0, 0, 0, - 135, 3, 3, 3, 117, 5, 5, 5, 5, 136, 8, 8, 8, 89, 0, 0, - 0, 0, 19, 5, 130, 102, 137, 107, 5, 108, 8, 138, 139, 0, 0, 0, - 140, 3, 4, 88, 141, 8, 8, 142, 89, 0, 0, 0, 3, 117, 5, 5, - 5, 5, 81, 8, 143, 144, 0, 0, 99, 99, 99, 145, 13, 0, 146, 0, - 8, 8, 8, 84, 147, 0, 0, 0, 117, 5, 108, 8, 0, 148, 0, 0, - 5, 5, 5, 74, 149, 5, 150, 99, 151, 8, 29, 152, 81, 45, 0, 153, - 5, 13, 13, 5, 5, 0, 0, 154, 155, 15, 3, 3, 5, 5, 8, 8, - 8, 53, 0, 0, 156, 3, 3, 4, 8, 8, 157, 0, 156, 88, 5, 5, - 5, 108, 8, 8, 158, 89, 0, 0, 156, 3, 3, 3, 4, 5, 5, 5, - 108, 8, 8, 8, 63, 0, 0, 0, 3, 3, 117, 5, 5, 5, 129, 159, - 8, 160, 0, 0, -}; - -static RE_UINT8 re_indic_syllabic_category_stage_5[] = { - 0, 0, 0, 0, 9, 0, 0, 0, 1, 1, 1, 2, 6, 6, 6, 6, - 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 4, 3, 7, 7, - 7, 7, 7, 7, 7, 5, 7, 7, 0, 7, 7, 7, 6, 6, 7, 7, - 0, 0, 6, 6, 0, 10, 10, 10, 0, 1, 1, 2, 0, 6, 6, 6, - 6, 0, 0, 6, 10, 0, 10, 10, 10, 0, 10, 0, 0, 0, 10, 10, - 10, 10, 0, 0, 7, 0, 0, 7, 7, 5, 11, 0, 0, 0, 0, 7, - 10, 10, 0, 10, 6, 6, 6, 0, 0, 0, 0, 6, 0, 10, 10, 0, - 4, 0, 7, 7, 7, 7, 7, 0, 7, 5, 0, 0, 1, 0, 9, 9, - 0, 14, 0, 0, 6, 6, 0, 6, 7, 7, 0, 7, 0, 0, 7, 7, - 0, 10, 0, 0, 0, 0, 1, 17, 6, 0, 6, 6, 6, 10, 0, 0, - 0, 0, 0, 10, 10, 0, 0, 0, 10, 10, 10, 0, 7, 0, 7, 7, - 0, 3, 7, 7, 0, 7, 7, 0, 0, 0, 1, 2, 0, 0, 10, 0, - 7, 5, 12, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 0, 5, 0, - 7, 0, 7, 0, 7, 7, 5, 0, 19, 19, 19, 19, 0, 1, 5, 0, - 10, 0, 0, 10, 0, 10, 0, 10, 14, 14, 0, 0, 7, 0, 0, 0, - 0, 1, 0, 0, 7, 7, 1, 2, 7, 7, 1, 1, 5, 3, 0, 0, - 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 0, 13, 13, 13, - 13, 0, 0, 0, 10, 6, 6, 6, 6, 6, 6, 7, 7, 7, 1, 19, - 2, 5, 5, 14, 14, 14, 14, 10, 10, 10, 6, 6, 7, 7, 10, 10, - 10, 10, 14, 14, 14, 10, 7, 19, 19, 10, 10, 7, 7, 19, 19, 19, - 19, 19, 10, 10, 10, 7, 7, 7, 7, 10, 10, 10, 10, 10, 14, 7, - 7, 7, 7, 19, 19, 19, 10, 19, 0, 0, 19, 19, 7, 7, 0, 0, - 6, 6, 6, 10, 5, 0, 0, 0, 10, 0, 7, 7, 10, 10, 10, 6, - 7, 20, 20, 0, 12, 0, 0, 0, 0, 5, 5, 0, 3, 0, 0, 0, - 9, 10, 10, 10, 7, 13, 13, 13, 15, 15, 1, 15, 15, 15, 15, 15, - 15, 0, 0, 0, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 0, 0, - 18, 18, 18, 18, 18, 0, 0, 0, 7, 15, 15, 15, 19, 19, 0, 0, - 10, 10, 10, 7, 10, 14, 14, 15, 15, 15, 15, 0, 5, 7, 7, 7, - 1, 1, 1, 12, 2, 6, 6, 6, 4, 7, 7, 7, 5, 10, 10, 10, - 1, 12, 2, 6, 6, 6, 10, 10, 10, 13, 13, 13, 7, 7, 5, 5, - 13, 13, 10, 10, 0, 0, 3, 10, 10, 10, 15, 15, 6, 6, 4, 7, - 15, 15, 5, 5, 13, 13, 7, 7, 1, 1, 0, 4, 0, 0, 2, 2, - 6, 6, 5, 10, 10, 10, 10, 1, 10, 10, 8, 8, 8, 8, 10, 10, - 10, 10, 8, 13, 13, 10, 10, 10, 10, 13, 10, 1, 1, 2, 6, 6, - 15, 7, 7, 7, 8, 8, 8, 19, 7, 7, 7, 15, 15, 15, 15, 5, - 1, 1, 12, 2, 10, 10, 10, 4, 7, 13, 14, 14, 7, 7, 7, 14, - 14, 14, 14, 0, 15, 15, 0, 0, 0, 0, 10, 19, 18, 19, 18, 0, - 0, 2, 5, 0, 10, 6, 10, 10, 10, 10, 10, 15, 15, 15, 15, 7, - 19, 5, 0, 0, 7, 0, 1, 2, 0, 0, 0, 5, 1, 1, 2, 0, - 1, 1, 2, 6, 7, 5, 4, 0, 7, 7, 7, 5, 2, 7, 7, 7, - 7, 7, 5, 4, -}; - -/* Indic_Syllabic_Category: 1952 bytes. */ - -RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_indic_syllabic_category_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_indic_syllabic_category_stage_2[pos + f] << 4; - f = code >> 4; - code ^= f << 4; - pos = (RE_UINT32)re_indic_syllabic_category_stage_3[pos + f] << 2; - f = code >> 2; - code ^= f << 2; - pos = (RE_UINT32)re_indic_syllabic_category_stage_4[pos + f] << 2; - value = re_indic_syllabic_category_stage_5[pos + code]; - - return value; -} - -/* Alphanumeric. */ - -static RE_UINT8 re_alphanumeric_stage_1[] = { - 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, -}; - -static RE_UINT8 re_alphanumeric_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_alphanumeric_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, - 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, - 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, - 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, - 31, 31, 31, 31, 72, 73, 74, 75, 31, 31, 31, 31, 31, 31, 76, 31, - 1, 1, 1, 1, 1, 1, 77, 1, 1, 1, 1, 1, 1, 1, 1, 78, - 79, 31, 31, 31, 31, 31, 31, 31, 1, 1, 79, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_alphanumeric_stage_4[] = { - 0, 1, 2, 2, 0, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 6, 7, 0, 0, 8, 9, 10, 11, 5, 12, - 5, 5, 5, 5, 13, 5, 5, 5, 5, 14, 15, 16, 17, 18, 19, 20, - 21, 5, 22, 23, 5, 5, 24, 25, 26, 5, 27, 5, 5, 28, 5, 29, - 30, 31, 32, 0, 0, 33, 0, 34, 5, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 38, 47, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 49, 59, 57, 60, 61, 59, 62, 63, 64, 65, 66, 67, 68, - 16, 69, 70, 0, 71, 72, 73, 0, 74, 75, 76, 77, 78, 79, 0, 0, - 5, 80, 81, 82, 83, 5, 84, 85, 5, 5, 86, 5, 87, 88, 89, 5, - 90, 5, 91, 0, 92, 5, 5, 93, 16, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 94, 2, 5, 5, 95, 96, 97, 97, 98, 5, 99, 100, 75, - 1, 5, 5, 101, 5, 102, 5, 103, 79, 104, 105, 106, 5, 107, 108, 0, - 109, 5, 110, 111, 108, 112, 0, 0, 5, 113, 114, 0, 5, 115, 5, 116, - 5, 103, 117, 118, 0, 0, 0, 119, 5, 5, 5, 5, 5, 5, 0, 0, - 120, 5, 121, 118, 5, 122, 123, 124, 0, 0, 0, 125, 126, 0, 0, 0, - 127, 128, 129, 5, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 131, 5, 75, 5, 132, 110, 5, 5, 5, 5, 133, - 5, 84, 5, 134, 135, 136, 136, 5, 0, 137, 0, 0, 0, 0, 0, 0, - 138, 139, 16, 5, 140, 16, 5, 85, 141, 142, 5, 5, 143, 69, 0, 26, - 5, 5, 5, 5, 5, 103, 0, 0, 5, 5, 5, 5, 5, 5, 31, 0, - 5, 5, 5, 5, 31, 0, 26, 118, 144, 145, 5, 146, 147, 5, 5, 92, - 148, 149, 5, 5, 150, 151, 0, 152, 153, 17, 5, 97, 5, 5, 154, 155, - 5, 102, 156, 79, 5, 157, 158, 0, 5, 135, 159, 160, 5, 110, 161, 162, - 163, 164, 0, 0, 0, 0, 5, 165, 5, 5, 5, 5, 5, 166, 167, 109, - 5, 5, 5, 168, 5, 5, 169, 0, 170, 171, 172, 5, 5, 28, 173, 5, - 5, 118, 26, 5, 174, 5, 17, 175, 0, 0, 0, 176, 5, 5, 5, 79, - 1, 2, 2, 105, 5, 110, 177, 0, 178, 179, 180, 0, 5, 5, 5, 69, - 0, 0, 5, 93, 0, 0, 0, 0, 0, 0, 0, 0, 79, 5, 181, 0, - 110, 26, 151, 0, 118, 5, 182, 0, 5, 5, 5, 5, 118, 75, 0, 0, - 183, 184, 103, 0, 0, 0, 0, 0, 103, 169, 0, 0, 5, 185, 0, 0, - 186, 97, 0, 79, 0, 0, 0, 0, 5, 103, 103, 156, 0, 0, 0, 0, - 5, 5, 130, 0, 0, 0, 0, 0, 5, 5, 187, 55, 149, 32, 26, 188, - 5, 189, 0, 0, 5, 5, 190, 0, 0, 0, 0, 0, 5, 103, 75, 0, - 5, 5, 5, 143, 0, 0, 0, 0, 5, 5, 5, 191, 0, 0, 0, 0, - 5, 143, 0, 0, 0, 0, 0, 0, 5, 32, 0, 0, 0, 0, 0, 0, - 5, 5, 192, 110, 173, 0, 0, 0, 193, 0, 0, 0, 0, 0, 0, 0, - 5, 5, 194, 5, 195, 196, 197, 5, 198, 199, 200, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 201, 202, 85, 194, 194, 132, 132, 203, 203, 204, 5, - 197, 205, 206, 207, 208, 209, 0, 0, 5, 5, 5, 5, 5, 5, 135, 0, - 5, 93, 5, 5, 5, 5, 5, 5, 118, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_alphanumeric_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 7, 0, 4, 32, 4, - 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, 31, 80, 0, 0, - 32, 0, 0, 0, 0, 0, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 3, 252, 255, 255, 255, 0, 254, 255, 255, 255, 127, 2, - 254, 255, 255, 255, 255, 0, 0, 0, 0, 0, 255, 191, 182, 0, 255, 255, - 255, 7, 7, 0, 0, 0, 255, 7, 255, 255, 255, 254, 255, 195, 255, 255, - 255, 255, 239, 31, 254, 225, 255, 159, 0, 0, 255, 255, 0, 224, 255, 255, - 255, 255, 3, 0, 255, 7, 48, 4, 255, 255, 255, 252, 255, 31, 0, 0, - 255, 255, 255, 1, 253, 31, 0, 0, 240, 3, 255, 127, 255, 255, 255, 239, - 255, 223, 225, 255, 207, 255, 254, 254, 238, 159, 249, 255, 255, 253, 197, 227, - 159, 89, 128, 176, 207, 255, 3, 0, 238, 135, 249, 255, 255, 253, 109, 195, - 135, 25, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 227, - 191, 27, 1, 0, 207, 255, 0, 0, 159, 25, 192, 176, 207, 255, 2, 0, - 236, 199, 61, 214, 24, 199, 255, 195, 199, 29, 129, 0, 192, 255, 0, 0, - 238, 223, 253, 255, 255, 253, 239, 227, 223, 29, 96, 3, 236, 223, 253, 255, - 223, 29, 96, 64, 207, 255, 6, 0, 255, 255, 255, 231, 223, 93, 128, 0, - 207, 255, 0, 252, 236, 255, 127, 252, 255, 255, 251, 47, 127, 128, 95, 255, - 0, 0, 12, 0, 255, 255, 255, 7, 127, 32, 255, 3, 150, 37, 240, 254, - 174, 236, 255, 59, 95, 32, 255, 243, 1, 0, 0, 0, 255, 3, 0, 0, - 255, 254, 255, 255, 255, 31, 254, 255, 3, 255, 255, 254, 255, 255, 255, 31, - 255, 255, 127, 249, 255, 3, 255, 255, 231, 193, 255, 255, 127, 64, 255, 51, - 191, 32, 255, 255, 255, 255, 255, 247, 255, 61, 127, 61, 255, 61, 255, 255, - 255, 255, 61, 127, 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 135, - 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, 255, 199, 1, 0, - 255, 223, 15, 0, 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 207, 255, - 255, 1, 128, 16, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 15, 255, 1, 192, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 255, 3, 255, 255, 255, 15, 255, 255, 255, 127, 254, 255, 31, 0, - 128, 0, 0, 0, 255, 255, 239, 255, 239, 15, 255, 3, 255, 243, 255, 255, - 191, 255, 3, 0, 255, 227, 255, 255, 255, 255, 255, 63, 0, 222, 111, 0, - 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 0, 0, 2, 128, 0, 0, 255, 31, 132, 252, 47, 62, - 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, - 255, 127, 255, 255, 31, 120, 12, 0, 255, 128, 0, 0, 255, 255, 127, 0, - 127, 127, 127, 127, 0, 128, 0, 0, 224, 0, 0, 0, 254, 3, 62, 31, - 255, 255, 127, 224, 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 127, 240, 143, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 187, 247, 255, 255, 15, 0, 255, 3, 0, 0, 252, 8, - 255, 255, 7, 0, 255, 255, 247, 255, 0, 128, 255, 3, 255, 63, 255, 3, - 255, 255, 127, 4, 5, 0, 0, 56, 255, 255, 60, 0, 126, 126, 126, 0, - 127, 127, 0, 0, 255, 7, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, - 255, 63, 255, 255, 255, 255, 255, 3, 127, 0, 248, 224, 255, 253, 127, 95, - 219, 255, 255, 255, 0, 0, 248, 255, 255, 255, 252, 255, 0, 0, 255, 15, - 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 255, 255, 1, 0, 15, 255, 62, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, 63, 0, 0, 0, - 255, 1, 255, 3, 255, 255, 199, 255, 30, 0, 255, 3, 7, 0, 0, 0, - 31, 0, 255, 255, 3, 0, 0, 0, 255, 255, 223, 255, 255, 255, 255, 223, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 253, 255, 255, 247, 255, 253, 255, 255, - 247, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* Alphanumeric: 1849 bytes. */ - -RE_UINT32 re_get_alphanumeric(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_alphanumeric_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_alphanumeric_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_alphanumeric_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_alphanumeric_stage_4[pos + f] << 5; - pos += code; - value = (re_alphanumeric_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Any. */ - -RE_UINT32 re_get_any(RE_UINT32 ch) { - return 1; -} - -/* Blank. */ - -static RE_UINT8 re_blank_stage_1[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, -}; - -static RE_UINT8 re_blank_stage_2[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_blank_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_blank_stage_4[] = { - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 3, 1, 1, 1, 1, 1, 4, 5, 1, 1, 1, 1, 1, 1, - 3, 1, 1, 1, 1, 1, 1, 1, -}; - -static RE_UINT8 re_blank_stage_5[] = { - 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 255, 7, 0, 0, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, -}; - -/* Blank: 169 bytes. */ - -RE_UINT32 re_get_blank(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_blank_stage_1[f] << 3; - f = code >> 13; - code ^= f << 13; - pos = (RE_UINT32)re_blank_stage_2[pos + f] << 4; - f = code >> 9; - code ^= f << 9; - pos = (RE_UINT32)re_blank_stage_3[pos + f] << 3; - f = code >> 6; - code ^= f << 6; - pos = (RE_UINT32)re_blank_stage_4[pos + f] << 6; - pos += code; - value = (re_blank_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Graph. */ - -static RE_UINT8 re_graph_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 4, 8, - 4, 8, -}; - -static RE_UINT8 re_graph_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 7, 7, 7, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 30, -}; - -static RE_UINT8 re_graph_stage_3[] = { - 0, 1, 1, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 1, 15, 16, 1, 1, 17, 18, 19, 20, 21, 22, 23, 24, 1, 25, - 26, 27, 1, 28, 29, 1, 1, 30, 1, 1, 1, 31, 32, 33, 34, 35, - 36, 37, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, - 1, 1, 1, 1, 41, 1, 42, 43, 44, 45, 46, 47, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 48, 49, 49, 49, 49, 49, 49, 49, 49, - 1, 1, 50, 51, 1, 52, 53, 54, 55, 56, 57, 58, 59, 49, 49, 49, - 60, 61, 62, 63, 64, 49, 65, 49, 66, 67, 49, 49, 49, 49, 68, 49, - 1, 1, 1, 69, 70, 49, 49, 49, 1, 1, 1, 1, 71, 49, 49, 49, - 1, 1, 72, 49, 49, 49, 49, 73, 74, 49, 49, 49, 49, 49, 49, 49, - 75, 76, 77, 78, 79, 80, 81, 82, 49, 49, 49, 49, 49, 49, 83, 49, - 84, 85, 86, 87, 88, 89, 90, 91, 1, 1, 1, 1, 1, 1, 92, 1, - 1, 1, 1, 1, 1, 1, 1, 93, 94, 49, 49, 49, 49, 49, 49, 49, - 1, 1, 94, 49, 49, 49, 49, 49, 95, 96, 49, 49, 49, 49, 49, 49, - 1, 1, 1, 1, 1, 1, 1, 97, -}; - -static RE_UINT8 re_graph_stage_4[] = { - 0, 1, 2, 3, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 4, 5, 6, 2, 2, 2, 7, 8, 1, 9, 2, 10, 11, - 12, 2, 2, 2, 2, 2, 2, 2, 13, 2, 14, 2, 2, 15, 2, 16, - 2, 17, 18, 0, 0, 19, 0, 20, 2, 2, 2, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 22, 31, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 44, 48, 49, 50, 51, 52, 53, 54, - 1, 55, 56, 0, 57, 58, 59, 0, 2, 2, 60, 61, 21, 62, 63, 0, - 2, 2, 2, 2, 2, 2, 64, 2, 2, 2, 65, 2, 66, 67, 68, 2, - 69, 2, 48, 70, 71, 2, 2, 72, 2, 2, 2, 2, 73, 2, 2, 74, - 75, 76, 77, 78, 2, 2, 79, 80, 81, 2, 2, 82, 2, 83, 2, 84, - 70, 85, 86, 87, 2, 88, 89, 2, 90, 2, 3, 91, 80, 92, 0, 0, - 2, 2, 88, 70, 2, 2, 2, 93, 2, 94, 95, 2, 0, 0, 10, 76, - 2, 2, 2, 2, 2, 2, 2, 96, 97, 2, 98, 79, 2, 99, 100, 101, - 102, 103, 3, 104, 105, 16, 106, 74, 2, 2, 2, 2, 107, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 77, 2, 108, 109, 2, 2, 2, 2, 2, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 110, 0, 0, 0, 0, 0, - 2, 111, 3, 2, 2, 2, 2, 112, 2, 64, 2, 113, 76, 114, 114, 2, - 2, 56, 0, 0, 115, 2, 2, 77, 2, 2, 2, 2, 2, 2, 84, 116, - 1, 2, 1, 2, 8, 2, 2, 2, 117, 118, 2, 2, 111, 16, 2, 119, - 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 84, 2, 2, - 2, 2, 2, 2, 2, 2, 120, 0, 2, 2, 2, 2, 121, 2, 122, 2, - 2, 123, 2, 2, 124, 2, 2, 82, 2, 2, 2, 2, 125, 109, 0, 126, - 2, 127, 2, 82, 2, 2, 128, 56, 2, 2, 129, 70, 2, 2, 130, 0, - 2, 76, 131, 56, 2, 2, 132, 76, 133, 134, 0, 0, 0, 0, 2, 135, - 2, 2, 2, 2, 2, 119, 136, 56, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 137, 2, 2, 71, 0, 138, 139, 140, 2, 2, 2, 141, 2, - 2, 2, 106, 2, 142, 2, 143, 144, 71, 122, 145, 146, 2, 2, 2, 91, - 1, 2, 2, 2, 2, 3, 147, 148, 149, 150, 151, 0, 2, 2, 2, 16, - 152, 153, 2, 2, 154, 0, 106, 79, 0, 0, 0, 0, 70, 2, 74, 0, - 3, 119, 109, 0, 155, 2, 156, 0, 2, 2, 2, 2, 79, 157, 0, 0, - 158, 159, 160, 0, 0, 0, 0, 0, 161, 162, 0, 0, 2, 163, 0, 0, - 164, 165, 166, 2, 0, 0, 0, 0, 2, 167, 168, 169, 0, 0, 0, 0, - 2, 2, 170, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, - 2, 2, 171, 172, 2, 2, 173, 174, 2, 99, 175, 0, 2, 2, 174, 0, - 0, 0, 0, 0, 2, 82, 157, 0, 2, 2, 2, 176, 0, 0, 0, 0, - 2, 2, 2, 177, 0, 0, 0, 0, 2, 176, 0, 0, 0, 0, 0, 0, - 2, 178, 0, 0, 0, 0, 0, 0, 2, 2, 179, 3, 180, 0, 0, 0, - 181, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 84, - 2, 182, 2, 2, 2, 2, 79, 0, 2, 2, 183, 0, 0, 0, 0, 0, - 2, 2, 76, 15, 0, 0, 0, 0, 2, 2, 99, 2, 62, 184, 185, 2, - 186, 187, 188, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 189, 2, 2, - 2, 2, 2, 2, 2, 2, 190, 2, 185, 191, 192, 193, 194, 195, 0, 196, - 2, 88, 2, 2, 77, 197, 198, 0, 83, 111, 2, 88, 16, 0, 0, 199, - 200, 16, 201, 0, 0, 0, 0, 0, 2, 202, 2, 70, 77, 2, 203, 74, - 2, 3, 204, 2, 2, 2, 2, 205, 2, 79, 119, 143, 0, 0, 0, 206, - 2, 2, 207, 0, 2, 2, 183, 0, 2, 2, 2, 77, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 76, 0, 2, 72, 2, 2, 2, 2, 2, 2, - 79, 0, 0, 0, 0, 0, 0, 0, 208, 2, 2, 2, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 172, 2, 2, 2, 2, 2, 2, 2, 79, -}; - -static RE_UINT8 re_graph_stage_5[] = { - 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 127, - 255, 255, 255, 124, 240, 215, 255, 255, 251, 255, 255, 255, 255, 0, 254, 255, - 255, 255, 127, 254, 255, 134, 254, 255, 255, 0, 255, 255, 255, 7, 31, 0, - 223, 255, 255, 223, 255, 191, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, - 255, 255, 255, 7, 255, 63, 255, 127, 255, 255, 255, 79, 253, 31, 0, 0, - 240, 255, 255, 127, 255, 255, 255, 254, 238, 159, 249, 255, 255, 253, 197, 243, - 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 249, 255, 255, 253, 109, 211, - 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, - 191, 59, 1, 0, 207, 255, 3, 0, 159, 57, 192, 176, 207, 255, 255, 0, - 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 255, 7, - 238, 223, 253, 255, 255, 253, 239, 227, 223, 61, 96, 3, 207, 255, 0, 255, - 236, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, - 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 63, 254, 236, 255, 127, 252, - 255, 255, 251, 47, 127, 132, 95, 255, 0, 0, 28, 0, 255, 255, 255, 135, - 255, 255, 255, 15, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, - 255, 254, 255, 255, 255, 31, 254, 255, 255, 255, 255, 223, 255, 223, 255, 7, - 191, 32, 255, 255, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 31, 255, 255, 255, 3, - 255, 255, 31, 0, 254, 255, 255, 31, 255, 255, 1, 0, 255, 223, 31, 0, - 255, 255, 127, 0, 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 255, 63, - 255, 3, 255, 3, 255, 127, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, - 255, 255, 63, 0, 255, 15, 255, 15, 241, 255, 255, 255, 255, 63, 31, 0, - 255, 15, 255, 255, 255, 3, 255, 199, 255, 255, 255, 207, 255, 255, 255, 159, - 255, 63, 0, 0, 255, 255, 15, 240, 255, 255, 255, 248, 255, 227, 255, 255, - 127, 0, 0, 240, 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 255, - 223, 255, 207, 239, 255, 255, 220, 127, 0, 248, 255, 255, 255, 124, 255, 255, - 223, 255, 243, 255, 255, 127, 255, 31, 0, 0, 255, 255, 255, 3, 255, 255, - 127, 0, 0, 0, 255, 7, 0, 0, 255, 31, 255, 3, 255, 127, 255, 255, - 255, 255, 15, 254, 255, 128, 1, 128, 127, 127, 127, 127, 255, 255, 255, 251, - 0, 0, 255, 15, 224, 255, 255, 255, 255, 63, 254, 255, 15, 0, 255, 255, - 255, 31, 0, 0, 255, 31, 255, 255, 127, 0, 255, 255, 255, 15, 0, 0, - 255, 255, 255, 128, 255, 127, 15, 0, 0, 0, 0, 255, 255, 15, 255, 3, - 31, 192, 255, 3, 255, 255, 15, 128, 255, 191, 255, 195, 255, 63, 255, 243, - 7, 0, 0, 248, 126, 126, 126, 0, 127, 127, 0, 0, 255, 63, 255, 3, - 127, 248, 255, 255, 255, 63, 255, 255, 127, 0, 248, 224, 255, 255, 127, 95, - 219, 255, 255, 255, 3, 0, 248, 255, 255, 255, 252, 255, 255, 0, 0, 0, - 0, 0, 255, 63, 255, 255, 247, 255, 127, 15, 223, 255, 252, 252, 252, 28, - 127, 127, 0, 62, 255, 239, 255, 255, 127, 255, 255, 183, 255, 63, 255, 63, - 135, 255, 255, 255, 255, 255, 143, 255, 255, 7, 255, 15, 255, 255, 255, 191, - 15, 255, 63, 0, 255, 3, 0, 0, 63, 253, 255, 255, 255, 255, 191, 145, - 255, 255, 191, 255, 255, 255, 255, 143, 255, 255, 255, 131, 255, 255, 255, 192, - 111, 240, 239, 254, 255, 255, 15, 135, 255, 0, 255, 1, 255, 255, 63, 254, - 255, 255, 63, 255, 255, 255, 7, 255, 255, 1, 0, 0, 255, 63, 252, 255, - 255, 255, 0, 0, 3, 0, 255, 255, 255, 1, 255, 3, 15, 0, 0, 0, - 255, 127, 0, 0, 7, 0, 15, 0, 255, 255, 255, 1, 31, 0, 255, 255, - 0, 128, 255, 255, 3, 0, 0, 0, 127, 254, 255, 255, 63, 0, 0, 0, - 100, 222, 255, 235, 239, 255, 255, 255, 191, 231, 223, 223, 255, 255, 255, 123, - 95, 252, 253, 255, 63, 255, 255, 255, 255, 207, 255, 255, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, - 0, 0, 3, 0, 255, 127, 254, 127, 254, 255, 254, 255, 192, 255, 255, 255, - 7, 0, 255, 255, 255, 1, 3, 0, 1, 0, 191, 255, 223, 7, 0, 0, - 253, 255, 255, 255, 255, 255, 255, 30, 0, 0, 0, 248, 225, 255, 0, 0, - 2, 0, 0, 0, -}; - -/* Graph: 2046 bytes. */ - -RE_UINT32 re_get_graph(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_graph_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_graph_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_graph_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_graph_stage_4[pos + f] << 5; - pos += code; - value = (re_graph_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Print. */ - -static RE_UINT8 re_print_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 4, 8, - 4, 8, -}; - -static RE_UINT8 re_print_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 7, 7, 7, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 24, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 25, 7, 26, 27, 13, 13, 13, 13, 13, 13, 13, 28, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 29, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 30, -}; - -static RE_UINT8 re_print_stage_3[] = { - 0, 1, 1, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 1, 15, 16, 1, 1, 17, 18, 19, 20, 21, 22, 23, 24, 1, 25, - 26, 27, 1, 28, 29, 1, 1, 30, 1, 1, 1, 31, 32, 33, 34, 35, - 36, 37, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40, - 1, 1, 1, 1, 41, 1, 42, 43, 44, 45, 46, 47, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 48, 49, 49, 49, 49, 49, 49, 49, 49, - 1, 1, 50, 51, 1, 52, 53, 54, 55, 56, 57, 58, 59, 49, 49, 49, - 60, 61, 62, 63, 64, 49, 65, 49, 66, 67, 49, 49, 49, 49, 68, 49, - 1, 1, 1, 69, 70, 49, 49, 49, 1, 1, 1, 1, 71, 49, 49, 49, - 1, 1, 72, 49, 49, 49, 49, 73, 74, 49, 49, 49, 49, 49, 49, 49, - 75, 76, 77, 78, 79, 80, 81, 82, 49, 49, 49, 49, 49, 49, 83, 49, - 84, 85, 86, 87, 88, 89, 90, 91, 1, 1, 1, 1, 1, 1, 92, 1, - 1, 1, 1, 1, 1, 1, 1, 93, 94, 49, 49, 49, 49, 49, 49, 49, - 1, 1, 94, 49, 49, 49, 49, 49, 95, 96, 49, 49, 49, 49, 49, 49, - 1, 1, 1, 1, 1, 1, 1, 97, -}; - -static RE_UINT8 re_print_stage_4[] = { - 0, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 3, 4, 5, 1, 1, 1, 6, 7, 8, 9, 1, 10, 11, - 12, 1, 1, 1, 1, 1, 1, 1, 13, 1, 14, 1, 1, 15, 1, 16, - 1, 17, 18, 0, 0, 19, 0, 20, 1, 1, 1, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 22, 31, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 44, 48, 49, 50, 51, 52, 53, 54, - 8, 55, 56, 0, 57, 58, 59, 0, 1, 1, 60, 61, 21, 62, 63, 0, - 1, 1, 1, 1, 1, 1, 64, 1, 1, 1, 65, 1, 66, 67, 68, 1, - 69, 1, 48, 70, 71, 1, 1, 72, 1, 1, 1, 1, 70, 1, 1, 73, - 74, 75, 76, 77, 1, 1, 78, 79, 80, 1, 1, 81, 1, 82, 1, 83, - 70, 84, 85, 86, 1, 87, 88, 1, 89, 1, 2, 90, 79, 91, 0, 0, - 1, 1, 87, 70, 1, 1, 1, 92, 1, 93, 94, 1, 0, 0, 10, 75, - 1, 1, 1, 1, 1, 1, 1, 95, 96, 1, 97, 78, 1, 98, 99, 100, - 1, 101, 1, 102, 103, 16, 104, 73, 1, 1, 1, 1, 105, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 76, 1, 106, 107, 1, 1, 1, 1, 1, - 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 108, 0, 0, 0, 0, 0, - 1, 109, 2, 1, 1, 1, 1, 110, 1, 64, 1, 111, 75, 112, 112, 1, - 1, 56, 0, 0, 113, 1, 1, 76, 1, 1, 1, 1, 1, 1, 83, 114, - 1, 1, 8, 1, 7, 1, 1, 1, 115, 116, 1, 1, 109, 16, 1, 117, - 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 83, 1, 1, - 1, 1, 1, 1, 1, 1, 118, 0, 1, 1, 1, 1, 119, 1, 120, 1, - 1, 121, 1, 1, 122, 1, 1, 81, 1, 1, 1, 1, 123, 107, 0, 124, - 1, 125, 1, 81, 1, 1, 126, 56, 1, 1, 127, 70, 1, 1, 128, 0, - 1, 75, 129, 56, 1, 1, 130, 75, 131, 132, 0, 0, 0, 0, 1, 133, - 1, 1, 1, 1, 1, 117, 134, 56, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 135, 1, 1, 71, 0, 136, 137, 138, 1, 1, 1, 139, 1, - 1, 1, 104, 1, 140, 1, 141, 142, 71, 120, 143, 144, 1, 1, 1, 90, - 8, 1, 1, 1, 1, 2, 145, 146, 147, 148, 149, 0, 1, 1, 1, 16, - 150, 151, 1, 1, 152, 0, 104, 78, 0, 0, 0, 0, 70, 1, 73, 0, - 2, 117, 107, 0, 153, 1, 154, 0, 1, 1, 1, 1, 78, 155, 0, 0, - 156, 157, 158, 0, 0, 0, 0, 0, 159, 160, 0, 0, 1, 161, 0, 0, - 162, 163, 164, 1, 0, 0, 0, 0, 1, 165, 166, 167, 0, 0, 0, 0, - 1, 1, 168, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, - 1, 1, 169, 170, 1, 1, 171, 172, 1, 98, 173, 0, 1, 1, 172, 0, - 0, 0, 0, 0, 1, 81, 155, 0, 1, 1, 1, 174, 0, 0, 0, 0, - 1, 1, 1, 175, 0, 0, 0, 0, 1, 174, 0, 0, 0, 0, 0, 0, - 1, 176, 0, 0, 0, 0, 0, 0, 1, 1, 177, 2, 178, 0, 0, 0, - 179, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 83, - 1, 180, 1, 1, 1, 1, 78, 0, 1, 1, 181, 0, 0, 0, 0, 0, - 1, 1, 75, 15, 0, 0, 0, 0, 1, 1, 98, 1, 62, 182, 183, 1, - 184, 185, 186, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 187, 1, 1, - 1, 1, 1, 1, 1, 1, 188, 1, 183, 189, 190, 191, 192, 193, 0, 194, - 1, 87, 1, 1, 76, 195, 196, 0, 82, 109, 1, 87, 16, 0, 0, 197, - 198, 16, 199, 0, 0, 0, 0, 0, 1, 200, 1, 70, 76, 1, 201, 73, - 1, 2, 202, 1, 1, 1, 1, 203, 1, 78, 117, 141, 0, 0, 0, 204, - 1, 1, 205, 0, 1, 1, 181, 0, 1, 1, 1, 76, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 75, 0, 1, 72, 1, 1, 1, 1, 1, 1, - 78, 0, 0, 0, 0, 0, 0, 0, 206, 1, 1, 1, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 170, 1, 1, 1, 1, 1, 1, 1, 78, -}; - -static RE_UINT8 re_print_stage_5[] = { - 0, 0, 0, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 124, - 240, 215, 255, 255, 251, 255, 255, 255, 255, 0, 254, 255, 255, 255, 127, 254, - 254, 255, 255, 255, 255, 134, 254, 255, 255, 0, 255, 255, 255, 7, 31, 0, - 223, 255, 255, 223, 255, 191, 255, 255, 255, 231, 255, 255, 255, 255, 3, 0, - 255, 255, 255, 7, 255, 63, 255, 127, 255, 255, 255, 79, 253, 31, 0, 0, - 240, 255, 255, 127, 255, 255, 255, 254, 238, 159, 249, 255, 255, 253, 197, 243, - 159, 121, 128, 176, 207, 255, 255, 15, 238, 135, 249, 255, 255, 253, 109, 211, - 135, 57, 2, 94, 192, 255, 63, 0, 238, 191, 251, 255, 255, 253, 237, 243, - 191, 59, 1, 0, 207, 255, 3, 0, 159, 57, 192, 176, 207, 255, 255, 0, - 236, 199, 61, 214, 24, 199, 255, 195, 199, 61, 129, 0, 192, 255, 255, 7, - 238, 223, 253, 255, 255, 253, 239, 227, 223, 61, 96, 3, 207, 255, 0, 255, - 236, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, 207, 255, 6, 0, - 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 63, 254, 236, 255, 127, 252, - 255, 255, 251, 47, 127, 132, 95, 255, 0, 0, 28, 0, 255, 255, 255, 135, - 255, 255, 255, 15, 150, 37, 240, 254, 174, 236, 255, 59, 95, 63, 255, 243, - 255, 254, 255, 255, 255, 31, 254, 255, 255, 255, 255, 223, 255, 223, 255, 7, - 191, 32, 255, 255, 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, - 61, 255, 127, 255, 255, 255, 61, 255, 255, 255, 255, 31, 255, 255, 255, 3, - 255, 255, 31, 0, 255, 255, 1, 0, 255, 223, 31, 0, 255, 255, 127, 0, - 255, 255, 15, 0, 255, 223, 13, 0, 255, 255, 255, 63, 255, 3, 255, 3, - 255, 127, 255, 3, 255, 255, 255, 0, 255, 7, 255, 255, 255, 255, 63, 0, - 255, 15, 255, 15, 241, 255, 255, 255, 255, 63, 31, 0, 255, 15, 255, 255, - 255, 3, 255, 199, 255, 255, 255, 207, 255, 255, 255, 159, 255, 63, 0, 0, - 255, 255, 15, 240, 255, 255, 255, 248, 255, 227, 255, 255, 127, 0, 0, 240, - 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 255, 223, 255, 207, 239, - 255, 255, 220, 127, 255, 252, 255, 255, 223, 255, 243, 255, 255, 127, 255, 31, - 0, 0, 255, 255, 255, 3, 255, 255, 127, 0, 0, 0, 255, 7, 0, 0, - 255, 31, 255, 3, 255, 127, 255, 255, 255, 255, 15, 254, 255, 128, 1, 128, - 127, 127, 127, 127, 255, 255, 255, 251, 0, 0, 255, 15, 224, 255, 255, 255, - 255, 63, 254, 255, 15, 0, 255, 255, 255, 31, 0, 0, 255, 31, 255, 255, - 127, 0, 255, 255, 255, 15, 0, 0, 255, 255, 255, 128, 255, 127, 15, 0, - 0, 0, 0, 255, 255, 15, 255, 3, 31, 192, 255, 3, 255, 255, 15, 128, - 255, 191, 255, 195, 255, 63, 255, 243, 7, 0, 0, 248, 126, 126, 126, 0, - 127, 127, 0, 0, 255, 63, 255, 3, 127, 248, 255, 255, 255, 63, 255, 255, - 127, 0, 248, 224, 255, 255, 127, 95, 219, 255, 255, 255, 3, 0, 248, 255, - 255, 255, 252, 255, 255, 0, 0, 0, 0, 0, 255, 63, 255, 255, 247, 255, - 127, 15, 223, 255, 252, 252, 252, 28, 127, 127, 0, 62, 255, 239, 255, 255, - 127, 255, 255, 183, 255, 63, 255, 63, 135, 255, 255, 255, 255, 255, 143, 255, - 255, 7, 255, 15, 255, 255, 255, 191, 15, 255, 63, 0, 255, 3, 0, 0, - 63, 253, 255, 255, 255, 255, 191, 145, 255, 255, 191, 255, 255, 255, 255, 143, - 255, 255, 255, 131, 255, 255, 255, 192, 111, 240, 239, 254, 255, 255, 15, 135, - 255, 0, 255, 1, 255, 255, 63, 254, 255, 255, 63, 255, 255, 255, 7, 255, - 255, 1, 0, 0, 255, 63, 252, 255, 255, 255, 0, 0, 3, 0, 255, 255, - 255, 1, 255, 3, 15, 0, 0, 0, 255, 127, 0, 0, 7, 0, 15, 0, - 255, 255, 255, 1, 31, 0, 255, 255, 0, 128, 255, 255, 3, 0, 0, 0, - 127, 254, 255, 255, 63, 0, 0, 0, 100, 222, 255, 235, 239, 255, 255, 255, - 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, - 255, 207, 255, 255, 150, 254, 247, 10, 132, 234, 150, 170, 150, 247, 247, 94, - 255, 251, 255, 15, 238, 251, 255, 15, 0, 0, 3, 0, 255, 127, 254, 127, - 254, 255, 254, 255, 192, 255, 255, 255, 7, 0, 255, 255, 255, 1, 3, 0, - 1, 0, 191, 255, 223, 7, 0, 0, 253, 255, 255, 255, 255, 255, 255, 30, - 0, 0, 0, 248, 225, 255, 0, 0, 2, 0, 0, 0, -}; - -/* Print: 2038 bytes. */ - -RE_UINT32 re_get_print(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_print_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_print_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_print_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_print_stage_4[pos + f] << 5; - pos += code; - value = (re_print_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* Word. */ - -static RE_UINT8 re_word_stage_1[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, - 6, 6, -}; - -static RE_UINT8 re_word_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 13, 13, 13, 14, - 15, 16, 17, 13, 18, 13, 19, 13, 13, 13, 13, 13, 13, 20, 13, 13, - 13, 13, 13, 13, 13, 13, 21, 13, 13, 13, 22, 13, 13, 23, 13, 13, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 13, 13, 13, 13, 13, 13, 13, 27, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 28, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -}; - -static RE_UINT8 re_word_stage_3[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 1, 17, 18, 19, 1, 20, 21, 22, 23, 24, 25, 26, 27, 1, 28, - 29, 30, 31, 31, 32, 31, 31, 31, 31, 31, 31, 31, 33, 34, 35, 31, - 36, 37, 31, 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 38, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39, - 1, 1, 1, 1, 40, 1, 41, 42, 43, 44, 45, 46, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 47, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 1, 48, 49, 1, 50, 51, 52, 53, 54, 55, 56, 57, 31, 31, 31, - 58, 59, 60, 61, 62, 31, 31, 31, 63, 64, 31, 31, 31, 31, 65, 31, - 1, 1, 1, 66, 67, 31, 31, 31, 1, 1, 1, 1, 68, 31, 31, 31, - 1, 1, 69, 31, 31, 31, 31, 70, 71, 31, 31, 31, 31, 31, 31, 31, - 31, 72, 73, 31, 74, 75, 76, 77, 31, 31, 31, 31, 31, 31, 78, 31, - 1, 1, 1, 1, 1, 1, 79, 1, 1, 1, 1, 1, 1, 1, 1, 80, - 81, 31, 31, 31, 31, 31, 31, 31, 1, 1, 81, 31, 31, 31, 31, 31, - 31, 82, 31, 31, 31, 31, 31, 31, -}; - -static RE_UINT8 re_word_stage_4[] = { - 0, 1, 2, 3, 0, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 8, 6, 6, 6, 9, 10, 11, 6, 12, - 6, 6, 6, 6, 11, 6, 6, 6, 6, 13, 14, 15, 13, 16, 17, 18, - 19, 6, 6, 20, 6, 6, 21, 22, 23, 6, 24, 6, 6, 25, 6, 26, - 6, 27, 28, 0, 0, 29, 0, 30, 6, 6, 6, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 40, 41, 42, 43, 32, 41, 44, 45, 46, 47, 48, 49, - 50, 51, 52, 43, 53, 54, 55, 56, 53, 57, 58, 59, 60, 61, 62, 63, - 15, 64, 65, 0, 66, 67, 68, 0, 69, 70, 71, 72, 73, 74, 75, 0, - 6, 6, 76, 6, 77, 6, 78, 79, 6, 6, 80, 6, 81, 82, 83, 6, - 84, 6, 57, 0, 85, 6, 6, 86, 15, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 87, 3, 6, 6, 88, 89, 86, 90, 91, 6, 6, 92, 93, - 94, 6, 6, 95, 6, 96, 6, 97, 74, 98, 99, 100, 6, 101, 102, 0, - 28, 6, 103, 104, 102, 105, 0, 0, 6, 6, 106, 107, 6, 6, 6, 90, - 6, 95, 108, 77, 0, 0, 109, 110, 6, 6, 6, 6, 6, 6, 6, 111, - 112, 6, 113, 77, 6, 114, 115, 116, 117, 118, 119, 120, 121, 0, 23, 122, - 123, 124, 125, 6, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 127, 6, 93, 6, 128, 103, 6, 6, 6, 6, 129, - 6, 78, 6, 130, 110, 131, 131, 6, 0, 132, 0, 0, 0, 0, 0, 0, - 133, 134, 15, 6, 135, 15, 6, 79, 136, 137, 6, 6, 138, 64, 0, 23, - 6, 6, 6, 6, 6, 97, 0, 0, 6, 6, 6, 6, 6, 6, 139, 0, - 6, 6, 6, 6, 139, 0, 23, 77, 140, 141, 6, 142, 143, 6, 6, 25, - 144, 145, 6, 6, 146, 147, 0, 148, 6, 149, 6, 90, 6, 6, 150, 151, - 6, 152, 90, 74, 6, 6, 153, 0, 6, 110, 154, 155, 6, 6, 156, 157, - 158, 159, 0, 0, 0, 0, 6, 160, 6, 6, 6, 6, 6, 161, 162, 28, - 6, 6, 6, 152, 6, 6, 163, 0, 164, 165, 166, 6, 6, 25, 167, 6, - 6, 77, 23, 6, 168, 6, 149, 169, 85, 170, 171, 172, 6, 6, 6, 74, - 1, 2, 3, 99, 6, 103, 173, 0, 174, 175, 176, 0, 6, 6, 6, 64, - 0, 0, 6, 86, 0, 0, 0, 177, 0, 0, 0, 0, 74, 6, 122, 0, - 103, 23, 147, 0, 77, 6, 178, 0, 6, 6, 6, 6, 77, 93, 0, 0, - 179, 180, 97, 0, 0, 0, 0, 0, 97, 163, 0, 0, 6, 181, 0, 0, - 182, 183, 0, 74, 0, 0, 0, 0, 6, 97, 97, 184, 0, 0, 0, 0, - 6, 6, 126, 0, 0, 0, 0, 0, 6, 6, 185, 49, 6, 64, 23, 186, - 6, 187, 0, 0, 6, 6, 150, 0, 0, 0, 0, 0, 6, 95, 93, 0, - 6, 6, 6, 138, 0, 0, 0, 0, 6, 6, 6, 188, 0, 0, 0, 0, - 6, 138, 0, 0, 0, 0, 0, 0, 6, 189, 0, 0, 0, 0, 0, 0, - 6, 6, 190, 103, 191, 0, 0, 0, 192, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 193, 194, 195, 0, 0, 0, 0, 196, 0, 0, 0, 0, 0, - 6, 6, 187, 6, 197, 198, 199, 6, 200, 201, 202, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 203, 204, 79, 187, 187, 128, 128, 205, 205, 206, 6, - 199, 207, 208, 209, 210, 211, 0, 0, 6, 6, 6, 6, 6, 6, 110, 0, - 6, 86, 6, 6, 6, 6, 6, 6, 77, 0, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 85, -}; - -static RE_UINT8 re_word_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - 0, 4, 32, 4, 255, 255, 127, 255, 255, 255, 255, 255, 195, 255, 3, 0, - 31, 80, 0, 0, 255, 255, 223, 60, 64, 215, 255, 255, 251, 255, 255, 255, - 255, 255, 191, 255, 255, 0, 254, 255, 255, 255, 127, 2, 254, 255, 255, 255, - 255, 255, 255, 191, 182, 0, 255, 255, 255, 7, 7, 0, 0, 0, 255, 7, - 255, 195, 255, 255, 255, 255, 239, 159, 255, 253, 255, 159, 0, 0, 255, 255, - 255, 231, 255, 255, 255, 255, 3, 0, 255, 255, 63, 4, 255, 63, 0, 0, - 255, 255, 255, 15, 253, 31, 0, 0, 240, 255, 255, 127, 207, 255, 254, 254, - 238, 159, 249, 255, 255, 253, 197, 243, 159, 121, 128, 176, 207, 255, 3, 0, - 238, 135, 249, 255, 255, 253, 109, 211, 135, 57, 2, 94, 192, 255, 63, 0, - 238, 191, 251, 255, 255, 253, 237, 243, 191, 59, 1, 0, 207, 255, 0, 0, - 159, 57, 192, 176, 207, 255, 2, 0, 236, 199, 61, 214, 24, 199, 255, 195, - 199, 61, 129, 0, 192, 255, 0, 0, 238, 223, 253, 255, 255, 253, 239, 227, - 223, 61, 96, 3, 236, 223, 253, 255, 255, 253, 239, 243, 223, 61, 96, 64, - 207, 255, 6, 0, 255, 255, 255, 231, 223, 125, 128, 0, 207, 255, 0, 252, - 236, 255, 127, 252, 255, 255, 251, 47, 127, 132, 95, 255, 0, 0, 12, 0, - 255, 255, 255, 7, 255, 127, 255, 3, 150, 37, 240, 254, 174, 236, 255, 59, - 95, 63, 255, 243, 1, 0, 0, 3, 255, 3, 160, 194, 255, 254, 255, 255, - 255, 31, 254, 255, 223, 255, 255, 254, 255, 255, 255, 31, 64, 0, 0, 0, - 255, 3, 255, 255, 255, 255, 255, 63, 191, 32, 255, 255, 255, 255, 255, 247, - 255, 61, 127, 61, 255, 61, 255, 255, 255, 255, 61, 127, 61, 255, 127, 255, - 255, 255, 61, 255, 255, 255, 0, 0, 255, 255, 31, 0, 255, 159, 255, 255, - 255, 199, 1, 0, 255, 223, 31, 0, 255, 255, 15, 0, 255, 223, 13, 0, - 255, 255, 143, 48, 255, 3, 0, 0, 0, 56, 255, 3, 255, 255, 255, 0, - 255, 7, 255, 255, 255, 255, 63, 0, 255, 15, 255, 15, 192, 255, 255, 255, - 255, 63, 31, 0, 255, 15, 255, 255, 255, 3, 255, 3, 255, 255, 255, 127, - 255, 255, 255, 159, 128, 0, 0, 0, 255, 15, 255, 3, 0, 248, 15, 0, - 255, 227, 255, 255, 0, 0, 247, 255, 255, 255, 127, 0, 127, 0, 0, 240, - 255, 255, 63, 63, 63, 63, 255, 170, 255, 255, 223, 95, 220, 31, 207, 15, - 255, 31, 220, 31, 0, 48, 0, 0, 0, 0, 0, 128, 1, 0, 16, 0, - 0, 0, 2, 128, 0, 0, 255, 31, 255, 255, 1, 0, 132, 252, 47, 62, - 80, 189, 255, 243, 224, 67, 0, 0, 255, 1, 0, 0, 0, 0, 192, 255, - 255, 127, 255, 255, 31, 248, 15, 0, 255, 128, 0, 128, 127, 127, 127, 127, - 0, 128, 0, 0, 224, 0, 0, 0, 254, 255, 62, 31, 255, 255, 127, 230, - 224, 255, 255, 255, 255, 63, 254, 255, 255, 127, 0, 0, 255, 31, 0, 0, - 255, 31, 255, 255, 255, 15, 0, 0, 255, 255, 247, 191, 255, 255, 255, 128, - 0, 0, 128, 255, 252, 255, 255, 255, 255, 121, 15, 0, 255, 7, 0, 0, - 0, 0, 0, 255, 255, 0, 0, 0, 31, 0, 255, 3, 255, 255, 255, 8, - 255, 63, 255, 255, 1, 128, 255, 3, 255, 63, 255, 3, 255, 255, 127, 12, - 7, 0, 0, 56, 255, 255, 124, 0, 126, 126, 126, 0, 127, 127, 0, 0, - 255, 55, 255, 3, 15, 0, 255, 255, 127, 248, 255, 255, 255, 255, 255, 3, - 127, 0, 248, 224, 255, 253, 127, 95, 219, 255, 255, 255, 0, 0, 248, 255, - 255, 255, 252, 255, 0, 0, 255, 15, 127, 0, 24, 0, 0, 224, 0, 0, - 0, 0, 223, 255, 252, 252, 252, 28, 255, 239, 255, 255, 127, 255, 255, 183, - 255, 63, 255, 63, 0, 0, 0, 32, 15, 255, 62, 0, 63, 253, 255, 255, - 255, 255, 191, 145, 255, 255, 255, 192, 111, 240, 239, 254, 255, 255, 15, 135, - 255, 255, 7, 0, 127, 0, 0, 0, 255, 1, 255, 3, 255, 255, 223, 255, - 7, 0, 0, 0, 255, 255, 255, 1, 31, 0, 255, 255, 0, 128, 255, 255, - 3, 0, 0, 0, 224, 227, 7, 248, 231, 15, 0, 0, 0, 60, 0, 0, - 28, 0, 0, 0, 255, 255, 255, 223, 100, 222, 255, 235, 239, 255, 255, 255, - 191, 231, 223, 223, 255, 255, 255, 123, 95, 252, 253, 255, 63, 255, 255, 255, - 253, 255, 255, 247, 255, 253, 255, 255, 247, 207, 255, 255, 150, 254, 247, 10, - 132, 234, 150, 170, 150, 247, 247, 94, 255, 251, 255, 15, 238, 251, 255, 15, -}; - -/* Word: 1906 bytes. */ - -RE_UINT32 re_get_word(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 15; - code = ch ^ (f << 15); - pos = (RE_UINT32)re_word_stage_1[f] << 4; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_word_stage_2[pos + f] << 3; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_word_stage_3[pos + f] << 3; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_word_stage_4[pos + f] << 5; - pos += code; - value = (re_word_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* XDigit. */ - -static RE_UINT8 re_xdigit_stage_1[] = { - 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, -}; - -static RE_UINT8 re_xdigit_stage_2[] = { - 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 5, 6, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, - 8, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 10, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -static RE_UINT8 re_xdigit_stage_3[] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 4, - 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 6, 6, 7, 1, - 4, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, - 8, 1, 9, 6, 1, 10, 6, 11, 12, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, - 1, 6, 13, 6, 6, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, - 5, 3, 15, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, -}; - -static RE_UINT8 re_xdigit_stage_4[] = { - 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, - 0, 0, 3, 0, 0, 0, 0, 4, 0, 0, 1, 0, 0, 3, 0, 0, - 1, 0, 0, 0, 0, 0, 4, 0, 5, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 5, 0, 3, 0, 0, 0, 1, 2, 2, 0, 0, 6, 0, 0, - 0, 0, 7, 8, -}; - -static RE_UINT8 re_xdigit_stage_5[] = { - 0, 0, 0, 0, 0, 0, 255, 3, 126, 0, 0, 0, 255, 3, 0, 0, - 192, 255, 0, 0, 255, 3, 255, 3, 0, 0, 192, 255, 0, 192, 255, 255, - 255, 255, 255, 255, -}; - -/* XDigit: 393 bytes. */ - -RE_UINT32 re_get_xdigit(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - - f = ch >> 16; - code = ch ^ (f << 16); - pos = (RE_UINT32)re_xdigit_stage_1[f] << 5; - f = code >> 11; - code ^= f << 11; - pos = (RE_UINT32)re_xdigit_stage_2[pos + f] << 4; - f = code >> 7; - code ^= f << 7; - pos = (RE_UINT32)re_xdigit_stage_3[pos + f] << 2; - f = code >> 5; - code ^= f << 5; - pos = (RE_UINT32)re_xdigit_stage_4[pos + f] << 5; - pos += code; - value = (re_xdigit_stage_5[pos >> 3] >> (pos & 0x7)) & 0x1; - - return value; -} - -/* All_Cases. */ - -static RE_UINT8 re_all_cases_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_all_cases_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, 10, - 6, 11, 6, 6, 12, 6, 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 15, 16, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 17, 6, 6, 6, 18, - 6, 6, 6, 6, 19, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_all_cases_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, - 0, 0, 0, 0, 0, 0, 9, 0, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 18, 18, 18, 18, 18, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 21, 34, 18, 18, 35, 18, - 18, 18, 18, 18, 36, 18, 37, 38, 39, 18, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 50, 51, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 18, 18, 18, 63, 64, - 65, 65, 11, 11, 11, 11, 15, 15, 15, 15, 66, 66, 18, 18, 18, 18, - 67, 68, 18, 18, 18, 18, 18, 18, 69, 70, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 0, 71, 72, 72, 72, 73, 0, 74, 75, 75, 75, - 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 77, 77, 77, 77, 78, 79, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 81, 18, 18, 18, - 18, 18, 82, 83, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 84, 85, 86, 87, 84, 85, 84, 85, 86, 87, 88, 89, 84, 85, 90, 91, - 84, 85, 84, 85, 84, 85, 92, 93, 94, 95, 96, 97, 98, 99, 94, 100, - 0, 0, 0, 0, 101, 102, 103, 0, 0, 104, 0, 0, 105, 105, 106, 106, - 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 108, 109, 109, 109, 110, 110, 110, 111, 0, 0, - 72, 72, 72, 72, 72, 73, 75, 75, 75, 75, 75, 76, 112, 113, 114, 115, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 37, 116, 117, 0, - 118, 118, 118, 118, 119, 120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 82, 0, 0, - 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 68, 18, 68, 18, 18, 18, 18, 18, 18, 18, 0, 121, - 18, 122, 37, 0, 18, 123, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 11, 11, 4, 5, 15, 15, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 125, 125, 125, 125, 125, 126, 126, 126, 126, 126, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_all_cases_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, - 5, 6, 5, 7, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, - 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, - 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 11, - 5, 5, 5, 5, 5, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 13, - 14, 15, 14, 15, 14, 15, 14, 15, 16, 17, 14, 15, 14, 15, 14, 15, - 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 14, - 15, 0, 14, 15, 14, 15, 14, 15, 18, 14, 15, 14, 15, 14, 15, 19, - 20, 21, 14, 15, 14, 15, 22, 14, 15, 23, 23, 14, 15, 0, 24, 25, - 26, 14, 15, 23, 27, 28, 29, 30, 14, 15, 31, 0, 29, 32, 33, 34, - 14, 15, 14, 15, 14, 15, 35, 14, 15, 35, 0, 0, 14, 15, 35, 14, - 15, 36, 36, 14, 15, 14, 15, 37, 14, 15, 0, 0, 14, 15, 0, 38, - 0, 0, 0, 0, 39, 40, 41, 39, 40, 41, 39, 40, 41, 14, 15, 14, - 15, 14, 15, 14, 15, 42, 14, 15, 0, 39, 40, 41, 14, 15, 43, 44, - 45, 0, 14, 15, 14, 15, 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, - 0, 0, 46, 14, 15, 47, 48, 49, 49, 14, 15, 50, 51, 52, 14, 15, - 53, 54, 55, 56, 57, 0, 58, 58, 0, 59, 0, 60, 0, 0, 0, 0, - 58, 0, 0, 61, 0, 62, 63, 0, 64, 65, 0, 66, 0, 0, 0, 65, - 0, 67, 68, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, - 71, 0, 0, 71, 0, 0, 0, 0, 71, 72, 73, 73, 74, 0, 0, 0, - 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 0, - 14, 15, 14, 15, 0, 0, 14, 15, 0, 0, 0, 33, 33, 33, 0, 0, - 0, 0, 0, 0, 0, 0, 77, 0, 78, 78, 78, 0, 79, 0, 80, 80, - 81, 1, 82, 1, 1, 83, 1, 1, 84, 85, 86, 1, 87, 1, 1, 1, - 88, 89, 0, 90, 1, 1, 91, 1, 1, 92, 1, 1, 93, 94, 94, 94, - 95, 5, 96, 5, 5, 97, 5, 5, 98, 99, 100, 5, 101, 5, 5, 5, - 102, 103, 104, 105, 5, 5, 106, 5, 5, 107, 5, 5, 108, 109, 109, 110, - 111, 112, 0, 0, 0, 113, 114, 115, 116, 117, 118, 0, 119, 120, 0, 14, - 15, 121, 14, 15, 0, 45, 45, 45, 122, 122, 122, 122, 122, 122, 122, 122, - 123, 123, 123, 123, 123, 123, 123, 123, 14, 15, 0, 0, 0, 0, 0, 0, - 0, 0, 14, 15, 14, 15, 14, 15, 124, 14, 15, 14, 15, 14, 15, 14, - 15, 14, 15, 14, 15, 14, 15, 125, 0, 126, 126, 126, 126, 126, 126, 126, - 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 0, - 0, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 127, 127, 127, 0, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 0, 128, 0, 0, 0, 0, 0, 128, 0, 0, - 0, 129, 0, 0, 0, 130, 0, 0, 131, 132, 14, 15, 14, 15, 14, 15, - 14, 15, 14, 15, 14, 15, 0, 0, 0, 0, 0, 133, 0, 0, 134, 0, - 110, 110, 110, 110, 110, 110, 110, 110, 115, 115, 115, 115, 115, 115, 115, 115, - 110, 110, 110, 110, 110, 110, 0, 0, 115, 115, 115, 115, 115, 115, 0, 0, - 0, 110, 0, 110, 0, 110, 0, 110, 0, 115, 0, 115, 0, 115, 0, 115, - 135, 135, 136, 136, 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, 0, 0, - 110, 110, 0, 141, 0, 0, 0, 0, 115, 115, 142, 142, 143, 0, 144, 0, - 0, 0, 0, 141, 0, 0, 0, 0, 145, 145, 145, 145, 143, 0, 0, 0, - 110, 110, 0, 146, 0, 0, 0, 0, 115, 115, 147, 147, 0, 0, 0, 0, - 110, 110, 0, 148, 0, 118, 0, 0, 115, 115, 149, 149, 121, 0, 0, 0, - 150, 150, 151, 151, 143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 152, 0, - 0, 0, 153, 154, 0, 0, 0, 0, 0, 0, 155, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 156, 0, 157, 157, 157, 157, 157, 157, 157, 157, - 158, 158, 158, 158, 158, 158, 158, 158, 0, 0, 0, 14, 15, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 0, 0, - 14, 15, 161, 162, 163, 164, 165, 14, 15, 14, 15, 14, 15, 166, 167, 168, - 169, 0, 14, 15, 0, 14, 15, 0, 0, 0, 0, 0, 0, 0, 170, 170, - 0, 0, 0, 14, 15, 14, 15, 0, 0, 0, 14, 15, 0, 0, 0, 0, - 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 0, 171, - 0, 0, 0, 0, 0, 171, 0, 0, 0, 14, 15, 14, 15, 172, 14, 15, - 0, 0, 0, 14, 15, 173, 0, 0, 14, 15, 174, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 14, 15, 0, 175, 175, 175, 175, 175, 175, 175, 175, - 176, 176, 176, 176, 176, 176, 176, 176, -}; - -/* All_Cases: 1984 bytes. */ - -static RE_AllCases re_all_cases_table[] = { - {{ 0, 0, 0}}, - {{ 32, 0, 0}}, - {{ 32, 232, 0}}, - {{ 32, 8415, 0}}, - {{ 32, 300, 0}}, - {{ -32, 0, 0}}, - {{ -32, 199, 0}}, - {{ -32, 8383, 0}}, - {{ -32, 268, 0}}, - {{ 743, 775, 0}}, - {{ 32, 8294, 0}}, - {{ 7615, 0, 0}}, - {{ -32, 8262, 0}}, - {{ 121, 0, 0}}, - {{ 1, 0, 0}}, - {{ -1, 0, 0}}, - {{ -199, 0, 0}}, - {{ -232, 0, 0}}, - {{ -121, 0, 0}}, - {{ -300, -268, 0}}, - {{ 195, 0, 0}}, - {{ 210, 0, 0}}, - {{ 206, 0, 0}}, - {{ 205, 0, 0}}, - {{ 79, 0, 0}}, - {{ 202, 0, 0}}, - {{ 203, 0, 0}}, - {{ 207, 0, 0}}, - {{ 97, 0, 0}}, - {{ 211, 0, 0}}, - {{ 209, 0, 0}}, - {{ 163, 0, 0}}, - {{ 213, 0, 0}}, - {{ 130, 0, 0}}, - {{ 214, 0, 0}}, - {{ 218, 0, 0}}, - {{ 217, 0, 0}}, - {{ 219, 0, 0}}, - {{ 56, 0, 0}}, - {{ 1, 2, 0}}, - {{ -1, 1, 0}}, - {{ -2, -1, 0}}, - {{ -79, 0, 0}}, - {{ -97, 0, 0}}, - {{ -56, 0, 0}}, - {{ -130, 0, 0}}, - {{ 10795, 0, 0}}, - {{ -163, 0, 0}}, - {{ 10792, 0, 0}}, - {{ 10815, 0, 0}}, - {{ -195, 0, 0}}, - {{ 69, 0, 0}}, - {{ 71, 0, 0}}, - {{ 10783, 0, 0}}, - {{ 10780, 0, 0}}, - {{ 10782, 0, 0}}, - {{ -210, 0, 0}}, - {{ -206, 0, 0}}, - {{ -205, 0, 0}}, - {{ -202, 0, 0}}, - {{ -203, 0, 0}}, - {{ -207, 0, 0}}, - {{ 42280, 0, 0}}, - {{ 42308, 0, 0}}, - {{ -209, 0, 0}}, - {{ -211, 0, 0}}, - {{ 10743, 0, 0}}, - {{ 10749, 0, 0}}, - {{ -213, 0, 0}}, - {{ -214, 0, 0}}, - {{ 10727, 0, 0}}, - {{ -218, 0, 0}}, - {{ -69, 0, 0}}, - {{ -217, 0, 0}}, - {{ -71, 0, 0}}, - {{ -219, 0, 0}}, - {{ 84, 116, 7289}}, - {{ 38, 0, 0}}, - {{ 37, 0, 0}}, - {{ 64, 0, 0}}, - {{ 63, 0, 0}}, - {{ 7235, 0, 0}}, - {{ 32, 62, 0}}, - {{ 32, 96, 0}}, - {{ 32, 57, 92}}, - {{ -84, 32, 7205}}, - {{ 32, 86, 0}}, - {{ -743, 32, 0}}, - {{ 32, 54, 0}}, - {{ 32, 80, 0}}, - {{ 31, 32, 0}}, - {{ 32, 47, 0}}, - {{ 32, 7549, 0}}, - {{ -38, 0, 0}}, - {{ -37, 0, 0}}, - {{ 7219, 0, 0}}, - {{ -32, 30, 0}}, - {{ -32, 64, 0}}, - {{ -32, 25, 60}}, - {{ -116, -32, 7173}}, - {{ -32, 54, 0}}, - {{ -775, -32, 0}}, - {{ -32, 22, 0}}, - {{ -32, 48, 0}}, - {{ -31, 1, 0}}, - {{ -32, -1, 0}}, - {{ -32, 15, 0}}, - {{ -32, 7517, 0}}, - {{ -64, 0, 0}}, - {{ -63, 0, 0}}, - {{ 8, 0, 0}}, - {{ -62, -30, 0}}, - {{ -57, -25, 35}}, - {{ -47, -15, 0}}, - {{ -54, -22, 0}}, - {{ -8, 0, 0}}, - {{ -86, -54, 0}}, - {{ -80, -48, 0}}, - {{ 7, 0, 0}}, - {{ -92, -60, -35}}, - {{ -96, -64, 0}}, - {{ -7, 0, 0}}, - {{ 80, 0, 0}}, - {{ -80, 0, 0}}, - {{ 15, 0, 0}}, - {{ -15, 0, 0}}, - {{ 48, 0, 0}}, - {{ -48, 0, 0}}, - {{ 7264, 0, 0}}, - {{ 35332, 0, 0}}, - {{ 3814, 0, 0}}, - {{ 1, 59, 0}}, - {{ -1, 58, 0}}, - {{ -59, -58, 0}}, - {{ -7615, 0, 0}}, - {{ 74, 0, 0}}, - {{ 86, 0, 0}}, - {{ 100, 0, 0}}, - {{ 128, 0, 0}}, - {{ 112, 0, 0}}, - {{ 126, 0, 0}}, - {{ 9, 0, 0}}, - {{ -74, 0, 0}}, - {{ -9, 0, 0}}, - {{ -7289, -7205, -7173}}, - {{ -86, 0, 0}}, - {{ -7235, 0, 0}}, - {{ -100, 0, 0}}, - {{ -7219, 0, 0}}, - {{ -112, 0, 0}}, - {{ -128, 0, 0}}, - {{ -126, 0, 0}}, - {{ -7549, -7517, 0}}, - {{ -8415, -8383, 0}}, - {{ -8294, -8262, 0}}, - {{ 28, 0, 0}}, - {{ -28, 0, 0}}, - {{ 16, 0, 0}}, - {{ -16, 0, 0}}, - {{ 26, 0, 0}}, - {{ -26, 0, 0}}, - {{-10743, 0, 0}}, - {{ -3814, 0, 0}}, - {{-10727, 0, 0}}, - {{-10795, 0, 0}}, - {{-10792, 0, 0}}, - {{-10780, 0, 0}}, - {{-10749, 0, 0}}, - {{-10783, 0, 0}}, - {{-10782, 0, 0}}, - {{-10815, 0, 0}}, - {{ -7264, 0, 0}}, - {{-35332, 0, 0}}, - {{-42280, 0, 0}}, - {{-42308, 0, 0}}, - {{ 40, 0, 0}}, - {{ -40, 0, 0}}, -}; - -/* All_Cases: 2124 bytes. */ - -int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - RE_AllCases* all_cases; - int count; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_all_cases_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_all_cases_stage_2[pos + f] << 5; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_all_cases_stage_3[pos + f] << 3; - value = re_all_cases_stage_4[pos + code]; - - all_cases = &re_all_cases_table[value]; - - codepoints[0] = ch; - count = 1; - - while (count < RE_MAX_CASES && all_cases->diffs[count - 1] != 0) { - codepoints[count] = (RE_UINT32)((RE_INT32)ch + all_cases->diffs[count - - 1]); - ++count; - } - - return count; -} - -/* Simple_Case_Folding. */ - -static RE_UINT8 re_simple_case_folding_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_simple_case_folding_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, - 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, - 6, 6, 6, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_simple_case_folding_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 5, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 7, 8, 8, 7, 6, 6, 6, 6, 6, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 8, 20, 6, 6, 21, 6, - 6, 6, 6, 6, 22, 6, 23, 24, 25, 6, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 0, 0, 0, 0, 27, 0, - 28, 29, 1, 2, 30, 31, 0, 0, 32, 33, 34, 6, 6, 6, 35, 36, - 37, 37, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, - 38, 7, 6, 6, 6, 6, 6, 6, 39, 40, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 0, 41, 42, 42, 42, 43, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 44, 44, 44, 44, 45, 46, 0, 0, 0, 0, 0, 0, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 47, 48, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 0, 49, 0, 50, 0, 49, 0, 49, 0, 50, 0, 51, 0, 49, 0, 0, - 0, 49, 0, 49, 0, 49, 0, 52, 0, 53, 0, 54, 0, 55, 0, 56, - 0, 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 60, 60, 0, 0, - 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 62, 63, 63, 63, 0, 0, 0, 0, 0, 0, - 42, 42, 42, 42, 42, 43, 0, 0, 0, 0, 0, 0, 64, 65, 66, 67, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 23, 68, 32, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 47, 0, 0, - 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 7, 6, 7, 6, 6, 6, 6, 6, 6, 6, 0, 69, - 6, 70, 23, 0, 6, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 72, 72, 72, 72, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_simple_case_folding_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, - 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 3, 0, - 0, 3, 0, 3, 0, 3, 0, 3, 4, 3, 0, 3, 0, 3, 0, 5, - 0, 6, 3, 0, 3, 0, 7, 3, 0, 8, 8, 3, 0, 0, 9, 10, - 11, 3, 0, 8, 12, 0, 13, 14, 3, 0, 0, 0, 13, 15, 0, 16, - 3, 0, 3, 0, 3, 0, 17, 3, 0, 17, 0, 0, 3, 0, 17, 3, - 0, 18, 18, 3, 0, 3, 0, 19, 3, 0, 0, 0, 3, 0, 0, 0, - 0, 0, 0, 0, 20, 3, 0, 20, 3, 0, 20, 3, 0, 3, 0, 3, - 0, 3, 0, 3, 0, 0, 3, 0, 0, 20, 3, 0, 3, 0, 21, 22, - 23, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 24, 3, 0, 25, 26, 0, 0, 3, 0, 27, 28, 29, 3, 0, - 0, 0, 0, 0, 0, 30, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, - 0, 0, 0, 0, 0, 0, 31, 0, 32, 32, 32, 0, 33, 0, 34, 34, - 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, - 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, - 36, 37, 0, 0, 0, 38, 39, 0, 40, 41, 0, 0, 42, 43, 0, 3, - 0, 44, 3, 0, 0, 23, 23, 23, 45, 45, 45, 45, 45, 45, 45, 45, - 3, 0, 0, 0, 0, 0, 0, 0, 46, 3, 0, 3, 0, 3, 0, 3, - 0, 3, 0, 3, 0, 3, 0, 0, 0, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 0, - 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 0, 48, - 0, 0, 0, 0, 0, 48, 0, 0, 3, 0, 3, 0, 3, 0, 0, 0, - 0, 0, 0, 49, 0, 0, 50, 0, 51, 51, 51, 51, 51, 51, 51, 51, - 51, 51, 51, 51, 51, 51, 0, 0, 0, 51, 0, 51, 0, 51, 0, 51, - 51, 51, 52, 52, 53, 0, 54, 0, 55, 55, 55, 55, 53, 0, 0, 0, - 51, 51, 56, 56, 0, 0, 0, 0, 51, 51, 57, 57, 44, 0, 0, 0, - 58, 58, 59, 59, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, - 0, 0, 61, 62, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, - 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, - 3, 0, 66, 67, 68, 0, 0, 3, 0, 3, 0, 3, 0, 69, 70, 71, - 72, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 73, 73, - 0, 0, 0, 3, 0, 3, 0, 0, 0, 3, 0, 3, 0, 74, 3, 0, - 0, 0, 0, 3, 0, 75, 0, 0, 3, 0, 76, 0, 0, 0, 0, 0, - 77, 77, 77, 77, 77, 77, 77, 77, -}; - -/* Simple_Case_Folding: 1456 bytes. */ - -static RE_INT32 re_simple_case_folding_table[] = { - 0, - 32, - 775, - 1, - -121, - -268, - 210, - 206, - 205, - 79, - 202, - 203, - 207, - 211, - 209, - 213, - 214, - 218, - 217, - 219, - 2, - -97, - -56, - -130, - 10795, - -163, - 10792, - -195, - 69, - 71, - 116, - 38, - 37, - 64, - 63, - 8, - -30, - -25, - -15, - -22, - -54, - -48, - -60, - -64, - -7, - 80, - 15, - 48, - 7264, - -58, - -7615, - -8, - -74, - -9, - -7173, - -86, - -100, - -112, - -128, - -126, - -7517, - -8383, - -8262, - 28, - 16, - 26, - -10743, - -3814, - -10727, - -10780, - -10749, - -10783, - -10782, - -10815, - -35332, - -42280, - -42308, - 40, -}; - -/* Simple_Case_Folding: 312 bytes. */ - -RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - RE_INT32 diff; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_simple_case_folding_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_simple_case_folding_stage_2[pos + f] << 5; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_simple_case_folding_stage_3[pos + f] << 3; - value = re_simple_case_folding_stage_4[pos + code]; - - diff = re_simple_case_folding_table[value]; - - return (RE_UINT32)((RE_INT32)ch + diff); -} - -/* Full_Case_Folding. */ - -static RE_UINT8 re_full_case_folding_stage_1[] = { - 0, 1, 2, 2, 2, 3, 2, 4, 5, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, -}; - -static RE_UINT8 re_full_case_folding_stage_2[] = { - 0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 9, - 6, 10, 6, 6, 11, 6, 6, 6, 6, 6, 6, 6, 12, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 13, 14, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 15, 6, 6, 6, 16, - 6, 6, 6, 6, 17, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -}; - -static RE_UINT8 re_full_case_folding_stage_3[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 0, 2, 2, 5, 6, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 8, 9, 9, 10, 7, 7, 7, 7, 7, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 9, 22, 7, 7, 23, 7, - 7, 7, 7, 7, 24, 7, 25, 26, 27, 7, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 0, 29, 0, - 30, 31, 32, 2, 33, 34, 35, 0, 36, 37, 38, 7, 7, 7, 39, 40, - 41, 41, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, - 42, 43, 7, 7, 7, 7, 7, 7, 44, 45, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 0, 46, 47, 47, 47, 48, 0, 0, 0, 0, 0, - 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 50, 50, 50, 50, 51, 52, 0, 0, 0, 0, 0, 0, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 53, 54, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 0, 55, 0, 56, 0, 55, 0, 55, 0, 56, 57, 58, 0, 55, 0, 0, - 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 0, 0, 0, 0, 75, 76, 77, 0, 0, 0, 0, 0, 78, 78, 0, 0, - 79, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 80, 81, 81, 81, 0, 0, 0, 0, 0, 0, - 47, 47, 47, 47, 47, 48, 0, 0, 0, 0, 0, 0, 82, 83, 84, 85, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 86, 36, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 87, 0, 0, - 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 43, 7, 43, 7, 7, 7, 7, 7, 7, 7, 0, 88, - 7, 89, 25, 0, 7, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 91, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 93, 93, 93, 93, 93, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static RE_UINT8 re_full_case_folding_stage_4[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 3, 4, 0, 4, 0, 4, 0, 4, 0, - 5, 0, 4, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 4, - 0, 6, 4, 0, 4, 0, 4, 0, 7, 4, 0, 4, 0, 4, 0, 8, - 0, 9, 4, 0, 4, 0, 10, 4, 0, 11, 11, 4, 0, 0, 12, 13, - 14, 4, 0, 11, 15, 0, 16, 17, 4, 0, 0, 0, 16, 18, 0, 19, - 4, 0, 4, 0, 4, 0, 20, 4, 0, 20, 0, 0, 4, 0, 20, 4, - 0, 21, 21, 4, 0, 4, 0, 22, 4, 0, 0, 0, 4, 0, 0, 0, - 0, 0, 0, 0, 23, 4, 0, 23, 4, 0, 23, 4, 0, 4, 0, 4, - 0, 4, 0, 4, 0, 0, 4, 0, 24, 23, 4, 0, 4, 0, 25, 26, - 27, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 0, 0, 0, - 0, 0, 28, 4, 0, 29, 30, 0, 0, 4, 0, 31, 32, 33, 4, 0, - 0, 0, 0, 0, 0, 34, 0, 0, 4, 0, 4, 0, 0, 0, 4, 0, - 0, 0, 0, 0, 0, 0, 35, 0, 36, 36, 36, 0, 37, 0, 38, 38, - 39, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, - 42, 43, 0, 0, 0, 44, 45, 0, 46, 47, 0, 0, 48, 49, 0, 4, - 0, 50, 4, 0, 0, 27, 27, 27, 51, 51, 51, 51, 51, 51, 51, 51, - 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 0, 4, 0, - 52, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, - 0, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, - 53, 53, 53, 53, 53, 53, 53, 0, 0, 0, 0, 0, 0, 0, 0, 54, - 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, - 0, 0, 0, 0, 0, 55, 0, 0, 4, 0, 4, 0, 4, 0, 56, 57, - 58, 59, 60, 61, 0, 0, 62, 0, 63, 63, 63, 63, 63, 63, 63, 63, - 63, 63, 63, 63, 63, 63, 0, 0, 64, 0, 65, 0, 66, 0, 67, 0, - 0, 63, 0, 63, 0, 63, 0, 63, 68, 68, 68, 68, 68, 68, 68, 68, - 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, - 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, - 73, 73, 73, 73, 73, 73, 73, 73, 0, 0, 74, 75, 76, 0, 77, 78, - 63, 63, 79, 79, 80, 0, 81, 0, 0, 0, 82, 83, 84, 0, 85, 86, - 87, 87, 87, 87, 88, 0, 0, 0, 0, 0, 89, 90, 0, 0, 91, 92, - 63, 63, 93, 93, 0, 0, 0, 0, 0, 0, 94, 95, 96, 0, 97, 98, - 63, 63, 99, 99, 50, 0, 0, 0, 0, 0, 100, 101, 102, 0, 103, 104, - 105, 105, 106, 106, 107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 0, - 0, 0, 109, 110, 0, 0, 0, 0, 0, 0, 111, 0, 0, 0, 0, 0, - 112, 112, 112, 112, 112, 112, 112, 112, 0, 0, 0, 4, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, - 4, 0, 114, 115, 116, 0, 0, 4, 0, 4, 0, 4, 0, 117, 118, 119, - 120, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, - 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 4, 0, 4, 0, 0, 0, - 0, 4, 0, 4, 0, 122, 4, 0, 0, 0, 0, 4, 0, 123, 0, 0, - 4, 0, 124, 0, 0, 0, 0, 0, 125, 126, 127, 128, 129, 130, 131, 0, - 0, 0, 0, 132, 133, 134, 135, 136, 137, 137, 137, 137, 137, 137, 137, 137, -}; - -/* Full_Case_Folding: 1656 bytes. */ - -static RE_FullCaseFolding re_full_case_folding_table[] = { - { 0, { 0, 0}}, - { 32, { 0, 0}}, - { 775, { 0, 0}}, - { -108, { 115, 0}}, - { 1, { 0, 0}}, - { -199, { 775, 0}}, - { 371, { 110, 0}}, - { -121, { 0, 0}}, - { -268, { 0, 0}}, - { 210, { 0, 0}}, - { 206, { 0, 0}}, - { 205, { 0, 0}}, - { 79, { 0, 0}}, - { 202, { 0, 0}}, - { 203, { 0, 0}}, - { 207, { 0, 0}}, - { 211, { 0, 0}}, - { 209, { 0, 0}}, - { 213, { 0, 0}}, - { 214, { 0, 0}}, - { 218, { 0, 0}}, - { 217, { 0, 0}}, - { 219, { 0, 0}}, - { 2, { 0, 0}}, - { -390, { 780, 0}}, - { -97, { 0, 0}}, - { -56, { 0, 0}}, - { -130, { 0, 0}}, - { 10795, { 0, 0}}, - { -163, { 0, 0}}, - { 10792, { 0, 0}}, - { -195, { 0, 0}}, - { 69, { 0, 0}}, - { 71, { 0, 0}}, - { 116, { 0, 0}}, - { 38, { 0, 0}}, - { 37, { 0, 0}}, - { 64, { 0, 0}}, - { 63, { 0, 0}}, - { 41, { 776, 769}}, - { 21, { 776, 769}}, - { 8, { 0, 0}}, - { -30, { 0, 0}}, - { -25, { 0, 0}}, - { -15, { 0, 0}}, - { -22, { 0, 0}}, - { -54, { 0, 0}}, - { -48, { 0, 0}}, - { -60, { 0, 0}}, - { -64, { 0, 0}}, - { -7, { 0, 0}}, - { 80, { 0, 0}}, - { 15, { 0, 0}}, - { 48, { 0, 0}}, - { -34, {1410, 0}}, - { 7264, { 0, 0}}, - { -7726, { 817, 0}}, - { -7715, { 776, 0}}, - { -7713, { 778, 0}}, - { -7712, { 778, 0}}, - { -7737, { 702, 0}}, - { -58, { 0, 0}}, - { -7723, { 115, 0}}, - { -8, { 0, 0}}, - { -7051, { 787, 0}}, - { -7053, { 787, 768}}, - { -7055, { 787, 769}}, - { -7057, { 787, 834}}, - { -128, { 953, 0}}, - { -136, { 953, 0}}, - { -112, { 953, 0}}, - { -120, { 953, 0}}, - { -64, { 953, 0}}, - { -72, { 953, 0}}, - { -66, { 953, 0}}, - { -7170, { 953, 0}}, - { -7176, { 953, 0}}, - { -7173, { 834, 0}}, - { -7174, { 834, 953}}, - { -74, { 0, 0}}, - { -7179, { 953, 0}}, - { -7173, { 0, 0}}, - { -78, { 953, 0}}, - { -7180, { 953, 0}}, - { -7190, { 953, 0}}, - { -7183, { 834, 0}}, - { -7184, { 834, 953}}, - { -86, { 0, 0}}, - { -7189, { 953, 0}}, - { -7193, { 776, 768}}, - { -7194, { 776, 769}}, - { -7197, { 834, 0}}, - { -7198, { 776, 834}}, - { -100, { 0, 0}}, - { -7197, { 776, 768}}, - { -7198, { 776, 769}}, - { -7203, { 787, 0}}, - { -7201, { 834, 0}}, - { -7202, { 776, 834}}, - { -112, { 0, 0}}, - { -118, { 953, 0}}, - { -7210, { 953, 0}}, - { -7206, { 953, 0}}, - { -7213, { 834, 0}}, - { -7214, { 834, 953}}, - { -128, { 0, 0}}, - { -126, { 0, 0}}, - { -7219, { 953, 0}}, - { -7517, { 0, 0}}, - { -8383, { 0, 0}}, - { -8262, { 0, 0}}, - { 28, { 0, 0}}, - { 16, { 0, 0}}, - { 26, { 0, 0}}, - {-10743, { 0, 0}}, - { -3814, { 0, 0}}, - {-10727, { 0, 0}}, - {-10780, { 0, 0}}, - {-10749, { 0, 0}}, - {-10783, { 0, 0}}, - {-10782, { 0, 0}}, - {-10815, { 0, 0}}, - {-35332, { 0, 0}}, - {-42280, { 0, 0}}, - {-42308, { 0, 0}}, - {-64154, { 102, 0}}, - {-64155, { 105, 0}}, - {-64156, { 108, 0}}, - {-64157, { 102, 105}}, - {-64158, { 102, 108}}, - {-64146, { 116, 0}}, - {-64147, { 116, 0}}, - {-62879, {1398, 0}}, - {-62880, {1381, 0}}, - {-62881, {1387, 0}}, - {-62872, {1398, 0}}, - {-62883, {1389, 0}}, - { 40, { 0, 0}}, -}; - -/* Full_Case_Folding: 1104 bytes. */ - -int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints) { - RE_UINT32 code; - RE_UINT32 f; - RE_UINT32 pos; - RE_UINT32 value; - RE_FullCaseFolding* case_folding; - int count; - - f = ch >> 13; - code = ch ^ (f << 13); - pos = (RE_UINT32)re_full_case_folding_stage_1[f] << 5; - f = code >> 8; - code ^= f << 8; - pos = (RE_UINT32)re_full_case_folding_stage_2[pos + f] << 5; - f = code >> 3; - code ^= f << 3; - pos = (RE_UINT32)re_full_case_folding_stage_3[pos + f] << 3; - value = re_full_case_folding_stage_4[pos + code]; - - case_folding = &re_full_case_folding_table[value]; - - codepoints[0] = (RE_UINT32)((RE_INT32)ch + case_folding->diff); - count = 1; - - while (count < RE_MAX_FOLDED && case_folding->codepoints[count - 1] != 0) { - codepoints[count] = case_folding->codepoints[count - 1]; - ++count; - } - - return count; -} - -/* Property function table. */ - -RE_GetPropertyFunc re_get_property[] = { - re_get_general_category, - re_get_block, - re_get_script, - re_get_word_break, - re_get_grapheme_cluster_break, - re_get_sentence_break, - re_get_math, - re_get_alphabetic, - re_get_lowercase, - re_get_uppercase, - re_get_cased, - re_get_case_ignorable, - re_get_changes_when_lowercased, - re_get_changes_when_uppercased, - re_get_changes_when_titlecased, - re_get_changes_when_casefolded, - re_get_changes_when_casemapped, - re_get_id_start, - re_get_id_continue, - re_get_xid_start, - re_get_xid_continue, - re_get_default_ignorable_code_point, - re_get_grapheme_extend, - re_get_grapheme_base, - re_get_grapheme_link, - re_get_white_space, - re_get_bidi_control, - re_get_join_control, - re_get_dash, - re_get_hyphen, - re_get_quotation_mark, - re_get_terminal_punctuation, - re_get_other_math, - re_get_hex_digit, - re_get_ascii_hex_digit, - re_get_other_alphabetic, - re_get_ideographic, - re_get_diacritic, - re_get_extender, - re_get_other_lowercase, - re_get_other_uppercase, - re_get_noncharacter_code_point, - re_get_other_grapheme_extend, - re_get_ids_binary_operator, - re_get_ids_trinary_operator, - re_get_radical, - re_get_unified_ideograph, - re_get_other_default_ignorable_code_point, - re_get_deprecated, - re_get_soft_dotted, - re_get_logical_order_exception, - re_get_other_id_start, - re_get_other_id_continue, - re_get_sterm, - re_get_variation_selector, - re_get_pattern_white_space, - re_get_pattern_syntax, - re_get_hangul_syllable_type, - re_get_bidi_class, - re_get_canonical_combining_class, - re_get_decomposition_type, - re_get_east_asian_width, - re_get_joining_group, - re_get_joining_type, - re_get_line_break, - re_get_numeric_type, - re_get_numeric_value, - re_get_bidi_mirrored, - re_get_indic_matra_category, - re_get_indic_syllabic_category, - re_get_alphanumeric, - re_get_any, - re_get_blank, - re_get_graph, - re_get_print, - re_get_word, - re_get_xdigit, -}; diff --git a/lib/regex/_regex_unicode.h b/lib/regex/_regex_unicode.h deleted file mode 100644 index fa8114be..00000000 --- a/lib/regex/_regex_unicode.h +++ /dev/null @@ -1,218 +0,0 @@ -typedef unsigned char RE_UINT8; -typedef signed char RE_INT8; -typedef unsigned short RE_UINT16; -typedef signed short RE_INT16; -typedef unsigned int RE_UINT32; -typedef signed int RE_INT32; - -typedef unsigned char BOOL; -enum {FALSE, TRUE}; - -#define RE_ASCII_MAX 0x7F -#define RE_LOCALE_MAX 0xFF -#define RE_UNICODE_MAX 0x10FFFF - -#define RE_MAX_CASES 4 -#define RE_MAX_FOLDED 3 - -typedef struct RE_Property { - RE_UINT16 name; - RE_UINT8 id; - RE_UINT8 value_set; -} RE_Property; - -typedef struct RE_PropertyValue { - RE_UINT16 name; - RE_UINT8 value_set; - RE_UINT8 id; -} RE_PropertyValue; - -typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch); - -#define RE_PROP_GC 0x0 -#define RE_PROP_CASED 0xA -#define RE_PROP_UPPERCASE 0x9 -#define RE_PROP_LOWERCASE 0x8 - -#define RE_PROP_C 30 -#define RE_PROP_L 31 -#define RE_PROP_M 32 -#define RE_PROP_N 33 -#define RE_PROP_P 34 -#define RE_PROP_S 35 -#define RE_PROP_Z 36 -#define RE_PROP_ASSIGNED 38 -#define RE_PROP_CASEDLETTER 37 - -#define RE_PROP_CN 0 -#define RE_PROP_LU 1 -#define RE_PROP_LL 2 -#define RE_PROP_LT 3 -#define RE_PROP_LM 4 -#define RE_PROP_LO 5 -#define RE_PROP_MN 6 -#define RE_PROP_ME 7 -#define RE_PROP_MC 8 -#define RE_PROP_ND 9 -#define RE_PROP_NL 10 -#define RE_PROP_NO 11 -#define RE_PROP_ZS 12 -#define RE_PROP_ZL 13 -#define RE_PROP_ZP 14 -#define RE_PROP_CC 15 -#define RE_PROP_CF 16 -#define RE_PROP_CO 17 -#define RE_PROP_CS 18 -#define RE_PROP_PD 19 -#define RE_PROP_PS 20 -#define RE_PROP_PE 21 -#define RE_PROP_PC 22 -#define RE_PROP_PO 23 -#define RE_PROP_SM 24 -#define RE_PROP_SC 25 -#define RE_PROP_SK 26 -#define RE_PROP_SO 27 -#define RE_PROP_PI 28 -#define RE_PROP_PF 29 - -#define RE_PROP_C_MASK 0x00078001 -#define RE_PROP_L_MASK 0x0000003E -#define RE_PROP_M_MASK 0x000001C0 -#define RE_PROP_N_MASK 0x00000E00 -#define RE_PROP_P_MASK 0x30F80000 -#define RE_PROP_S_MASK 0x0F000000 -#define RE_PROP_Z_MASK 0x00007000 - -#define RE_PROP_ALNUM 0x460001 -#define RE_PROP_ALPHA 0x070001 -#define RE_PROP_ANY 0x470001 -#define RE_PROP_ASCII 0x010001 -#define RE_PROP_BLANK 0x480001 -#define RE_PROP_CNTRL 0x00000F -#define RE_PROP_DIGIT 0x000009 -#define RE_PROP_GRAPH 0x490001 -#define RE_PROP_LOWER 0x080001 -#define RE_PROP_PRINT 0x4A0001 -#define RE_PROP_SPACE 0x190001 -#define RE_PROP_UPPER 0x090001 -#define RE_PROP_WORD 0x4B0001 -#define RE_PROP_XDIGIT 0x4C0001 - -#define RE_BREAK_OTHER 0 -#define RE_BREAK_DOUBLEQUOTE 1 -#define RE_BREAK_SINGLEQUOTE 2 -#define RE_BREAK_HEBREWLETTER 3 -#define RE_BREAK_CR 4 -#define RE_BREAK_LF 5 -#define RE_BREAK_NEWLINE 6 -#define RE_BREAK_EXTEND 7 -#define RE_BREAK_REGIONALINDICATOR 8 -#define RE_BREAK_FORMAT 9 -#define RE_BREAK_KATAKANA 10 -#define RE_BREAK_ALETTER 11 -#define RE_BREAK_MIDLETTER 12 -#define RE_BREAK_MIDNUM 13 -#define RE_BREAK_MIDNUMLET 14 -#define RE_BREAK_NUMERIC 15 -#define RE_BREAK_EXTENDNUMLET 16 - -#define RE_GBREAK_OTHER 0 -#define RE_GBREAK_CR 1 -#define RE_GBREAK_LF 2 -#define RE_GBREAK_CONTROL 3 -#define RE_GBREAK_EXTEND 4 -#define RE_GBREAK_REGIONALINDICATOR 5 -#define RE_GBREAK_SPACINGMARK 6 -#define RE_GBREAK_L 7 -#define RE_GBREAK_V 8 -#define RE_GBREAK_T 9 -#define RE_GBREAK_LV 10 -#define RE_GBREAK_LVT 11 -#define RE_GBREAK_PREPEND 12 - -extern char* re_strings[1160]; -extern RE_Property re_properties[143]; -extern RE_PropertyValue re_property_values[1251]; -extern RE_UINT16 re_expand_on_folding[104]; -extern RE_GetPropertyFunc re_get_property[77]; - -RE_UINT32 re_get_general_category(RE_UINT32 ch); -RE_UINT32 re_get_block(RE_UINT32 ch); -RE_UINT32 re_get_script(RE_UINT32 ch); -RE_UINT32 re_get_word_break(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch); -RE_UINT32 re_get_sentence_break(RE_UINT32 ch); -RE_UINT32 re_get_math(RE_UINT32 ch); -RE_UINT32 re_get_alphabetic(RE_UINT32 ch); -RE_UINT32 re_get_lowercase(RE_UINT32 ch); -RE_UINT32 re_get_uppercase(RE_UINT32 ch); -RE_UINT32 re_get_cased(RE_UINT32 ch); -RE_UINT32 re_get_case_ignorable(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch); -RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch); -RE_UINT32 re_get_id_start(RE_UINT32 ch); -RE_UINT32 re_get_id_continue(RE_UINT32 ch); -RE_UINT32 re_get_xid_start(RE_UINT32 ch); -RE_UINT32 re_get_xid_continue(RE_UINT32 ch); -RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_base(RE_UINT32 ch); -RE_UINT32 re_get_grapheme_link(RE_UINT32 ch); -RE_UINT32 re_get_white_space(RE_UINT32 ch); -RE_UINT32 re_get_bidi_control(RE_UINT32 ch); -RE_UINT32 re_get_join_control(RE_UINT32 ch); -RE_UINT32 re_get_dash(RE_UINT32 ch); -RE_UINT32 re_get_hyphen(RE_UINT32 ch); -RE_UINT32 re_get_quotation_mark(RE_UINT32 ch); -RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch); -RE_UINT32 re_get_other_math(RE_UINT32 ch); -RE_UINT32 re_get_hex_digit(RE_UINT32 ch); -RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch); -RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch); -RE_UINT32 re_get_ideographic(RE_UINT32 ch); -RE_UINT32 re_get_diacritic(RE_UINT32 ch); -RE_UINT32 re_get_extender(RE_UINT32 ch); -RE_UINT32 re_get_other_lowercase(RE_UINT32 ch); -RE_UINT32 re_get_other_uppercase(RE_UINT32 ch); -RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch); -RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch); -RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch); -RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch); -RE_UINT32 re_get_radical(RE_UINT32 ch); -RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch); -RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch); -RE_UINT32 re_get_deprecated(RE_UINT32 ch); -RE_UINT32 re_get_soft_dotted(RE_UINT32 ch); -RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch); -RE_UINT32 re_get_other_id_start(RE_UINT32 ch); -RE_UINT32 re_get_other_id_continue(RE_UINT32 ch); -RE_UINT32 re_get_sterm(RE_UINT32 ch); -RE_UINT32 re_get_variation_selector(RE_UINT32 ch); -RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch); -RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch); -RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch); -RE_UINT32 re_get_bidi_class(RE_UINT32 ch); -RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch); -RE_UINT32 re_get_decomposition_type(RE_UINT32 ch); -RE_UINT32 re_get_east_asian_width(RE_UINT32 ch); -RE_UINT32 re_get_joining_group(RE_UINT32 ch); -RE_UINT32 re_get_joining_type(RE_UINT32 ch); -RE_UINT32 re_get_line_break(RE_UINT32 ch); -RE_UINT32 re_get_numeric_type(RE_UINT32 ch); -RE_UINT32 re_get_numeric_value(RE_UINT32 ch); -RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch); -RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch); -RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch); -RE_UINT32 re_get_alphanumeric(RE_UINT32 ch); -RE_UINT32 re_get_any(RE_UINT32 ch); -RE_UINT32 re_get_blank(RE_UINT32 ch); -RE_UINT32 re_get_graph(RE_UINT32 ch); -RE_UINT32 re_get_print(RE_UINT32 ch); -RE_UINT32 re_get_word(RE_UINT32 ch); -RE_UINT32 re_get_xdigit(RE_UINT32 ch); -int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints); -RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch); -int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints); diff --git a/lib/regex/regex.py b/lib/regex/regex.py deleted file mode 100644 index e5e40d1f..00000000 --- a/lib/regex/regex.py +++ /dev/null @@ -1,684 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# This version of the SRE library can be redistributed under CNRI's -# Python 1.6 license. For any other use, please contact Secret Labs -# AB (info@pythonware.com). -# -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 1.6 integration and -# other compatibility work. -# -# 2010-01-16 mrab Python front-end re-written and extended - -r"""Support for regular expressions (RE). - -This module provides regular expression matching operations similar to those -found in Perl. It supports both 8-bit and Unicode strings; both the pattern and -the strings being processed can contain null bytes and characters outside the -US ASCII range. - -Regular expressions can contain both special and ordinary characters. Most -ordinary characters, like "A", "a", or "0", are the simplest regular -expressions; they simply match themselves. You can concatenate ordinary -characters, so last matches the string 'last'. - -There are a few differences between the old (legacy) behaviour and the new -(enhanced) behaviour, which are indicated by VERSION0 or VERSION1. - -The special characters are: - "." Matches any character except a newline. - "^" Matches the start of the string. - "$" Matches the end of the string or just before the - newline at the end of the string. - "*" Matches 0 or more (greedy) repetitions of the preceding - RE. Greedy means that it will match as many repetitions - as possible. - "+" Matches 1 or more (greedy) repetitions of the preceding - RE. - "?" Matches 0 or 1 (greedy) of the preceding RE. - *?,+?,?? Non-greedy versions of the previous three special - characters. - *+,++,?+ Possessive versions of the previous three special - characters. - {m,n} Matches from m to n repetitions of the preceding RE. - {m,n}? Non-greedy version of the above. - {m,n}+ Possessive version of the above. - {...} Fuzzy matching constraints. - "\\" Either escapes special characters or signals a special - sequence. - [...] Indicates a set of characters. A "^" as the first - character indicates a complementing set. - "|" A|B, creates an RE that will match either A or B. - (...) Matches the RE inside the parentheses. The contents are - captured and can be retrieved or matched later in the - string. - (?flags-flags) VERSION1: Sets/clears the flags for the remainder of - the group or pattern; VERSION0: Sets the flags for the - entire pattern. - (?:...) Non-capturing version of regular parentheses. - (?>...) Atomic non-capturing version of regular parentheses. - (?flags-flags:...) Non-capturing version of regular parentheses with local - flags. - (?P...) The substring matched by the group is accessible by - name. - (?...) The substring matched by the group is accessible by - name. - (?P=name) Matches the text matched earlier by the group named - name. - (?#...) A comment; ignored. - (?=...) Matches if ... matches next, but doesn't consume the - string. - (?!...) Matches if ... doesn't match next. - (?<=...) Matches if preceded by .... - (? Matches the text matched by the group named name. - \G Matches the empty string, but only at the position where - the search started. - \L Named list. The list is provided as a keyword argument. - \m Matches the empty string, but only at the start of a word. - \M Matches the empty string, but only at the end of a word. - \n Matches the newline character. - \N{name} Matches the named character. - \p{name=value} Matches the character if its property has the specified - value. - \P{name=value} Matches the character if its property hasn't the specified - value. - \r Matches the carriage-return character. - \s Matches any whitespace character; equivalent to - [ \t\n\r\f\v]. - \S Matches any non-whitespace character; equivalent to [^\s]. - \t Matches the tab character. - \uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX. - \UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code - XXXXXXXX. - \v Matches the vertical tab character. - \w Matches any alphanumeric character; equivalent to - [a-zA-Z0-9_] when matching a bytestring or a Unicode string - with the ASCII flag, or the whole range of Unicode - alphanumeric characters (letters plus digits plus - underscore) when matching a Unicode string. With LOCALE, it - will match the set [0-9_] plus characters defined as - letters for the current locale. - \W Matches the complement of \w; equivalent to [^\w]. - \xXX Matches the character with 2-digit hex code XX. - \X Matches a grapheme. - \Z Matches only at the end of the string. - \\ Matches a literal backslash. - -This module exports the following functions: - match Match a regular expression pattern at the beginning of a string. - fullmatch Match a regular expression pattern against all of a string. - search Search a string for the presence of a pattern. - sub Substitute occurrences of a pattern found in a string using a - template string. - subf Substitute occurrences of a pattern found in a string using a - format string. - subn Same as sub, but also return the number of substitutions made. - subfn Same as subf, but also return the number of substitutions made. - split Split a string by the occurrences of a pattern. VERSION1: will - split at zero-width match; VERSION0: won't split at zero-width - match. - splititer Return an iterator yielding the parts of a split string. - findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a match object for each match. - compile Compile a pattern into a Pattern object. - purge Clear the regular expression cache. - escape Backslash all non-alphanumerics or special characters in a - string. - -Most of the functions support a concurrent parameter: if True, the GIL will be -released during matching, allowing other Python threads to run concurrently. If -the string changes during matching, the behaviour is undefined. This parameter -is not needed when working on the builtin (immutable) string classes. - -Some of the functions in this module take flags as optional parameters. Most of -these flags can also be set within an RE: - A a ASCII Make \w, \W, \b, \B, \d, and \D match the - corresponding ASCII character categories. Default - when matching a bytestring. - B b BESTMATCH Find the best fuzzy match (default is first). - D DEBUG Print the parsed pattern. - F f FULLCASE Use full case-folding when performing - case-insensitive matching in Unicode. - I i IGNORECASE Perform case-insensitive matching. - L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the - current locale. (One byte per character only.) - M m MULTILINE "^" matches the beginning of lines (after a newline) - as well as the string. "$" matches the end of lines - (before a newline) as well as the end of the string. - E e ENHANCEMATCH Attempt to improve the fit after finding the first - fuzzy match. - R r REVERSE Searches backwards. - S s DOTALL "." matches any character at all, including the - newline. - U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the - Unicode locale. Default when matching a Unicode - string. - V0 V0 VERSION0 Turn on the old legacy behaviour. - V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag - includes the FULLCASE flag. - W w WORD Make \b and \B work with default Unicode word breaks - and make ".", "^" and "$" work with Unicode line - breaks. - X x VERBOSE Ignore whitespace and comments for nicer looking REs. - -This module also defines an exception 'error'. - -""" - -# Public symbols. -__all__ = ["compile", "escape", "findall", "finditer", "fullmatch", "match", - "purge", "search", "split", "splititer", "sub", "subf", "subfn", "subn", - "template", "Scanner", "A", "ASCII", "B", "BESTMATCH", "D", "DEBUG", "E", - "ENHANCEMATCH", "S", "DOTALL", "F", "FULLCASE", "I", "IGNORECASE", "L", - "LOCALE", "M", "MULTILINE", "R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", - "V0", "VERSION0", "V1", "VERSION1", "X", "VERBOSE", "W", "WORD", "error", - "Regex"] - -__version__ = "2.4.45" - -# -------------------------------------------------------------------- -# Public interface. - -def match(pattern, string, flags=0, pos=None, endpos=None, partial=False, - concurrent=None, **kwargs): - """Try to apply the pattern at the start of the string, returning a match - object, or None if no match was found.""" - return _compile(pattern, flags, kwargs).match(string, pos, endpos, - concurrent, partial) - -def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, - concurrent=None, **kwargs): - """Try to apply the pattern against all of the string, returning a match - object, or None if no match was found.""" - return _compile(pattern, flags, kwargs).fullmatch(string, pos, endpos, - concurrent, partial) - -def search(pattern, string, flags=0, pos=None, endpos=None, partial=False, - concurrent=None, **kwargs): - """Search through string looking for a match to the pattern, returning a - match object, or None if no match was found.""" - return _compile(pattern, flags, kwargs).search(string, pos, endpos, - concurrent, partial) - -def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return the string obtained by replacing the leftmost (or rightmost with a - reverse pattern) non-overlapping occurrences of the pattern in string by the - replacement repl. repl can be either a string or a callable; if a string, - backslash escapes in it are processed; if a callable, it's passed the match - object and must return a replacement string to be used.""" - return _compile(pattern, flags, kwargs).sub(repl, string, count, pos, - endpos, concurrent) - -def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return the string obtained by replacing the leftmost (or rightmost with a - reverse pattern) non-overlapping occurrences of the pattern in string by the - replacement format. format can be either a string or a callable; if a string, - it's treated as a format string; if a callable, it's passed the match object - and must return a replacement string to be used.""" - return _compile(pattern, flags, kwargs).subf(format, string, count, pos, - endpos, concurrent) - -def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return a 2-tuple containing (new_string, number). new_string is the string - obtained by replacing the leftmost (or rightmost with a reverse pattern) - non-overlapping occurrences of the pattern in the source string by the - replacement repl. number is the number of substitutions that were made. repl - can be either a string or a callable; if a string, backslash escapes in it - are processed; if a callable, it's passed the match object and must return a - replacement string to be used.""" - return _compile(pattern, flags, kwargs).subn(repl, string, count, pos, - endpos, concurrent) - -def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None, - concurrent=None, **kwargs): - """Return a 2-tuple containing (new_string, number). new_string is the string - obtained by replacing the leftmost (or rightmost with a reverse pattern) - non-overlapping occurrences of the pattern in the source string by the - replacement format. number is the number of substitutions that were made. format - can be either a string or a callable; if a string, it's treated as a format - string; if a callable, it's passed the match object and must return a - replacement string to be used.""" - return _compile(pattern, flags, kwargs).subfn(format, string, count, pos, - endpos, concurrent) - -def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): - """Split the source string by the occurrences of the pattern, returning a - list containing the resulting substrings. If capturing parentheses are used - in pattern, then the text of all groups in the pattern are also returned as - part of the resulting list. If maxsplit is nonzero, at most maxsplit splits - occur, and the remainder of the string is returned as the final element of - the list.""" - return _compile(pattern, flags, kwargs).split(string, maxsplit, concurrent) - -def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs): - "Return an iterator yielding the parts of a split string." - return _compile(pattern, flags, kwargs).splititer(string, maxsplit, - concurrent) - -def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, - concurrent=None, **kwargs): - """Return a list of all matches in the string. The matches may be overlapped - if overlapped is True. If one or more groups are present in the pattern, - return a list of groups; this will be a list of tuples if the pattern has - more than one group. Empty matches are included in the result.""" - return _compile(pattern, flags, kwargs).findall(string, pos, endpos, - overlapped, concurrent) - -def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, - partial=False, concurrent=None, **kwargs): - """Return an iterator over all matches in the string. The matches may be - overlapped if overlapped is True. For each match, the iterator returns a - match object. Empty matches are included in the result.""" - return _compile(pattern, flags, kwargs).finditer(string, pos, endpos, - overlapped, concurrent, partial) - -def compile(pattern, flags=0, **kwargs): - "Compile a regular expression pattern, returning a pattern object." - return _compile(pattern, flags, kwargs) - -def purge(): - "Clear the regular expression cache" - _cache.clear() - -def template(pattern, flags=0): - "Compile a template pattern, returning a pattern object." - return _compile(pattern, flags | TEMPLATE) - -def escape(pattern, special_only=False): - "Escape all non-alphanumeric characters or special characters in pattern." - if isinstance(pattern, unicode): - s = [] - if special_only: - for c in pattern: - if c in _METACHARS: - s.append(u"\\") - s.append(c) - elif c == u"\x00": - s.append(u"\\000") - else: - s.append(c) - else: - for c in pattern: - if c in _ALNUM: - s.append(c) - elif c == u"\x00": - s.append(u"\\000") - else: - s.append(u"\\") - s.append(c) - - return u"".join(s) - else: - s = [] - if special_only: - for c in pattern: - if c in _METACHARS: - s.append("\\") - s.append(c) - elif c == "\x00": - s.append("\\000") - else: - s.append(c) - else: - for c in pattern: - if c in _ALNUM: - s.append(c) - elif c == "\x00": - s.append("\\000") - else: - s.append("\\") - s.append(c) - - return "".join(s) - -# -------------------------------------------------------------------- -# Internals. - -import _regex_core -import sys -if sys.version_info < (2, 6): - from Python25 import _regex -elif sys.version_info < (2, 7): - from Python26 import _regex -else: - from Python27 import _regex -from threading import RLock as _RLock -from _regex_core import * -from _regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, - _UnscopedFlagSet, _check_group_features, _compile_firstset, - _compile_replacement, _flatten_code, _fold_case, _get_required_string, - _parse_pattern, _shrink_cache) -from _regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source as - _Source, Fuzzy as _Fuzzy) - -# Version 0 is the old behaviour, compatible with the original 're' module. -# Version 1 is the new behaviour, which differs slightly. - -DEFAULT_VERSION = VERSION0 - -_METACHARS = frozenset("()[]{}?*+|^$\\.") - -_regex_core.DEFAULT_VERSION = DEFAULT_VERSION - -# Caches for the patterns and replacements. -_cache = {} -_cache_lock = _RLock() -_named_args = {} -_replacement_cache = {} - -# Maximum size of the cache. -_MAXCACHE = 500 -_MAXREPCACHE = 500 - -def _compile(pattern, flags=0, kwargs={}): - "Compiles a regular expression to a PatternObject." - try: - # Do we know what keyword arguments are needed? - args_key = pattern, type(pattern), flags - args_needed = _named_args[args_key] - - # Are we being provided with its required keyword arguments? - args_supplied = set() - if args_needed: - for k, v in args_needed: - try: - args_supplied.add((k, frozenset(kwargs[k]))) - except KeyError: - raise error("missing named list") - - args_supplied = frozenset(args_supplied) - - # Have we already seen this regular expression and named list? - pattern_key = (pattern, type(pattern), flags, args_supplied, - DEFAULT_VERSION) - return _cache[pattern_key] - except KeyError: - # It's a new pattern, or new named list for a known pattern. - pass - - # Guess the encoding from the class of the pattern string. - if isinstance(pattern, unicode): - guess_encoding = UNICODE - elif isinstance(pattern, str): - guess_encoding = ASCII - elif isinstance(pattern, _pattern_type): - if flags: - raise ValueError("can't process flags argument with a compiled pattern") - - return pattern - else: - raise TypeError("first argument must be a string or compiled pattern") - - # Set the default version in the core code in case it has been changed. - _regex_core.DEFAULT_VERSION = DEFAULT_VERSION - - caught_exception = None - global_flags = flags - - while True: - try: - source = _Source(pattern) - info = _Info(global_flags, source.char_type, kwargs) - info.guess_encoding = guess_encoding - source.ignore_space = bool(info.flags & VERBOSE) - parsed = _parse_pattern(source, info) - break - except _UnscopedFlagSet: - # Remember the global flags for the next attempt. - global_flags = info.global_flags - except error, e: - caught_exception = e - - if caught_exception: - raise error(str(caught_exception)) - - if not source.at_end(): - raise error("trailing characters in pattern at position %d" % source.pos) - - # Check the global flags for conflicts. - version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION - if version not in (0, VERSION0, VERSION1): - raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") - - if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE): - raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible") - - if not (info.flags & _ALL_ENCODINGS): - if isinstance(pattern, unicode): - info.flags |= UNICODE - else: - info.flags |= ASCII - - reverse = bool(info.flags & REVERSE) - fuzzy = isinstance(parsed, _Fuzzy) - - # Should we print the parsed pattern? - if flags & DEBUG: - parsed.dump(indent=0, reverse=reverse) - - # Fix the group references. - parsed.fix_groups(reverse, False) - - # Optimise the parsed pattern. - parsed = parsed.optimise(info) - parsed = parsed.pack_characters(info) - - # Get the required string. - req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags) - - # Build the named lists. - named_lists = {} - named_list_indexes = [None] * len(info.named_lists_used) - args_needed = set() - for key, index in info.named_lists_used.items(): - name, case_flags = key - values = frozenset(kwargs[name]) - if case_flags: - items = frozenset(_fold_case(info, v) for v in values) - else: - items = values - named_lists[name] = values - named_list_indexes[index] = items - args_needed.add((name, values)) - - # Check the features of the groups. - _check_group_features(info, parsed) - - # Compile the parsed pattern. The result is a list of tuples. - code = parsed.compile(reverse) - - # Is there a group call to the pattern as a whole? - key = (0, reverse, fuzzy) - ref = info.call_refs.get(key) - if ref is not None: - code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )] - - # Add the final 'success' opcode. - code += [(_OP.SUCCESS, )] - - # Compile the additional copies of the groups that we need. - for group, rev, fuz in info.additional_groups: - code += group.compile(rev, fuz) - - # Flatten the code into a list of ints. - code = _flatten_code(code) - - if not parsed.has_simple_start(): - # Get the first set, if possible. - try: - fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) - fs_code = _flatten_code(fs_code) - code = fs_code + code - except _FirstSetError: - pass - - # The named capture groups. - index_group = dict((v, n) for n, v in info.group_index.items()) - - # Create the PatternObject. - # - # Local flags like IGNORECASE affect the code generation, but aren't needed - # by the PatternObject itself. Conversely, global flags like LOCALE _don't_ - # affect the code generation but _are_ needed by the PatternObject. - compiled_pattern = _regex.compile(pattern, info.flags | version, code, - info.group_index, index_group, named_lists, named_list_indexes, - req_offset, req_chars, req_flags, info.group_count) - - # Do we need to reduce the size of the cache? - if len(_cache) >= _MAXCACHE: - _cache_lock.acquire() - try: - _shrink_cache(_cache, _named_args, _MAXCACHE) - finally: - _cache_lock.release() - - args_needed = frozenset(args_needed) - - # Store this regular expression and named list. - pattern_key = (pattern, type(pattern), flags, args_needed, DEFAULT_VERSION) - _cache[pattern_key] = compiled_pattern - - # Store what keyword arguments are needed. - _named_args[args_key] = args_needed - - return compiled_pattern - -def _compile_replacement_helper(pattern, template): - "Compiles a replacement template." - # This function is called by the _regex module. - - # Have we seen this before? - key = pattern.pattern, pattern.flags, template - compiled = _replacement_cache.get(key) - if compiled is not None: - return compiled - - if len(_replacement_cache) >= _MAXREPCACHE: - _replacement_cache.clear() - - is_unicode = isinstance(template, unicode) - source = _Source(template) - if is_unicode: - def make_string(char_codes): - return u"".join(unichr(c) for c in char_codes) - else: - def make_string(char_codes): - return "".join(chr(c) for c in char_codes) - - compiled = [] - literal = [] - while True: - ch = source.get() - if not ch: - break - if ch == "\\": - # '_compile_replacement' will return either an int group reference - # or a string literal. It returns items (plural) in order to handle - # a 2-character literal (an invalid escape sequence). - is_group, items = _compile_replacement(source, pattern, is_unicode) - if is_group: - # It's a group, so first flush the literal. - if literal: - compiled.append(make_string(literal)) - literal = [] - compiled.extend(items) - else: - literal.extend(items) - else: - literal.append(ord(ch)) - - # Flush the literal. - if literal: - compiled.append(make_string(literal)) - - _replacement_cache[key] = compiled - - return compiled - -# We define _pattern_type here after all the support objects have been defined. -_pattern_type = type(_compile("", 0, {})) - -# We'll define an alias for the 'compile' function so that the repr of a -# pattern object is eval-able. -Regex = compile - -# Register myself for pickling. -import copy_reg as _copy_reg - -def _pickle(p): - return _compile, (p.pattern, p.flags) - -_copy_reg.pickle(_pattern_type, _pickle, _compile) - -if not hasattr(str, "format"): - # Strings don't have the .format method (below Python 2.6). - while True: - _start = __doc__.find(" subf") - if _start < 0: - break - - _end = __doc__.find("\n", _start) + 1 - while __doc__.startswith(" ", _end): - _end = __doc__.find("\n", _end) + 1 - - __doc__ = __doc__[ : _start] + __doc__[_end : ] - - __all__ = [_name for _name in __all__ if not _name.startswith("subf")] - - del _start, _end - - del subf, subfn diff --git a/lib/regex/test_regex.py b/lib/regex/test_regex.py deleted file mode 100644 index 55b14c4d..00000000 --- a/lib/regex/test_regex.py +++ /dev/null @@ -1,3230 +0,0 @@ -from __future__ import with_statement -import regex -import string -from weakref import proxy -import unittest -import copy -from test.test_support import run_unittest -import re - -# _AssertRaisesContext is defined here because the class doesn't exist before -# Python 2.7. -class _AssertRaisesContext(object): - """A context manager used to implement TestCase.assertRaises* methods.""" - - def __init__(self, expected, test_case, expected_regexp=None): - self.expected = expected - self.failureException = test_case.failureException - self.expected_regexp = expected_regexp - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, tb): - if exc_type is None: - try: - exc_name = self.expected.__name__ - except AttributeError: - exc_name = str(self.expected) - raise self.failureException( - "{0} not raised".format(exc_name)) - if not issubclass(exc_type, self.expected): - # let unexpected exceptions pass through - return False - self.exception = exc_value # store for later retrieval - if self.expected_regexp is None: - return True - - expected_regexp = self.expected_regexp - if isinstance(expected_regexp, basestring): - expected_regexp = re.compile(expected_regexp) - if not expected_regexp.search(str(exc_value)): - raise self.failureException('"%s" does not match "%s"' % - (expected_regexp.pattern, str(exc_value))) - return True - -class RegexTests(unittest.TestCase): - PATTERN_CLASS = "" - FLAGS_WITH_COMPILED_PAT = "can't process flags argument with a compiled pattern" - INVALID_GROUP_REF = "invalid group reference" - MISSING_GT = "missing >" - BAD_GROUP_NAME = "bad group name" - MISSING_LT = "missing <" - UNKNOWN_GROUP_I = "unknown group" - UNKNOWN_GROUP = "unknown group" - BAD_ESCAPE = "bad escape" - BAD_OCTAL_ESCAPE = "bad octal escape" - BAD_SET = "bad set" - STR_PAT_ON_BYTES = "can't use a string pattern on a bytes-like object" - BYTES_PAT_ON_STR = "can't use a bytes pattern on a string-like object" - STR_PAT_BYTES_TEMPL = "expected str instance, bytes found" - BYTES_PAT_STR_TEMPL = "expected bytes instance, str found" - BYTES_PAT_UNI_FLAG = "can't use UNICODE flag with a bytes pattern" - MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible" - MISSING_RPAREN = "missing \\)" # Need to escape parenthesis for unittest. - TRAILING_CHARS = "trailing characters in pattern" - BAD_CHAR_RANGE = "bad character range" - NOTHING_TO_REPEAT = "nothing to repeat" - OPEN_GROUP = "can't refer to an open group" - DUPLICATE_GROUP = "duplicate group" - CANT_TURN_OFF = "bad inline flags: can't turn flags off" - UNDEF_CHAR_NAME = "undefined character name" - - # assertRaisesRegex is defined here because the method isn't in the - # superclass before Python 2.7. - def assertRaisesRegex(self, expected_exception, expected_regexp, - callable_obj=None, *args, **kwargs): - """Asserts that the message in a raised exception matches a regexp. - - Args: - expected_exception: Exception class expected to be raised. - expected_regexp: Regexp (re pattern object or string) expected - to be found in error message. - callable_obj: Function to be called. - args: Extra args. - kwargs: Extra kwargs. - """ - context = _AssertRaisesContext(expected_exception, self, expected_regexp) - if callable_obj is None: - return context - with context: - callable_obj(*args, **kwargs) - - def assertTypedEqual(self, actual, expect, msg=None): - self.assertEqual(actual, expect, msg) - - def recurse(actual, expect): - if isinstance(expect, (tuple, list)): - for x, y in zip(actual, expect): - recurse(x, y) - else: - self.assertIs(type(actual), type(expect), msg) - - recurse(actual, expect) - - def test_weakref(self): - s = 'QabbbcR' - x = regex.compile('ab+c') - y = proxy(x) - if x.findall('QabbbcR') != y.findall('QabbbcR'): - self.fail() - - def test_search_star_plus(self): - self.assertEqual(regex.search('a*', 'xxx').span(0), (0, 0)) - self.assertEqual(regex.search('x*', 'axx').span(), (0, 0)) - self.assertEqual(regex.search('x+', 'axx').span(0), (1, 3)) - self.assertEqual(regex.search('x+', 'axx').span(), (1, 3)) - self.assertEqual(regex.search('x', 'aaa'), None) - self.assertEqual(regex.match('a*', 'xxx').span(0), (0, 0)) - self.assertEqual(regex.match('a*', 'xxx').span(), (0, 0)) - self.assertEqual(regex.match('x*', 'xxxa').span(0), (0, 3)) - self.assertEqual(regex.match('x*', 'xxxa').span(), (0, 3)) - self.assertEqual(regex.match('a+', 'xxx'), None) - - def bump_num(self, matchobj): - int_value = int(matchobj[0]) - return str(int_value + 1) - - def test_basic_regex_sub(self): - self.assertEqual(regex.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') - self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), - '9.3 -3 24x100y') - self.assertEqual(regex.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), - '9.3 -3 23x99y') - - self.assertEqual(regex.sub('.', lambda m: r"\n", 'x'), "\\n") - self.assertEqual(regex.sub('.', r"\n", 'x'), "\n") - - self.assertEqual(regex.sub('(?Px)', r'\g\g', 'xx'), 'xxxx') - self.assertEqual(regex.sub('(?Px)', r'\g\g<1>', 'xx'), 'xxxx') - self.assertEqual(regex.sub('(?Px)', r'\g\g', 'xx'), - 'xxxx') - self.assertEqual(regex.sub('(?Px)', r'\g<1>\g<1>', 'xx'), 'xxxx') - - self.assertEqual(regex.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', - 'a'), "\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D") - self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), "\t\n\v\r\f\a") - self.assertEqual(regex.sub('a', '\t\n\v\r\f\a', 'a'), chr(9) + chr(10) - + chr(11) + chr(13) + chr(12) + chr(7)) - - self.assertEqual(regex.sub(r'^\s*', 'X', 'test'), 'Xtest') - - self.assertEqual(regex.sub(ur"x", ur"\x0A", u"x"), u"\n") - self.assertEqual(regex.sub(ur"x", ur"\u000A", u"x"), u"\n") - self.assertEqual(regex.sub(ur"x", ur"\U0000000A", u"x"), u"\n") - self.assertEqual(regex.sub(ur"x", ur"\N{LATIN CAPITAL LETTER A}", - u"x"), u"A") - - self.assertEqual(regex.sub(r"x", r"\x0A", "x"), "\n") - self.assertEqual(regex.sub(r"x", r"\u000A", "x"), "\\u000A") - self.assertEqual(regex.sub(r"x", r"\U0000000A", "x"), "\\U0000000A") - self.assertEqual(regex.sub(r"x", r"\N{LATIN CAPITAL LETTER A}", "x"), - "\\N{LATIN CAPITAL LETTER A}") - - def test_bug_449964(self): - # Fails for group followed by other escape. - self.assertEqual(regex.sub(r'(?Px)', r'\g<1>\g<1>\b', 'xx'), - "xx\bxx\b") - - def test_bug_449000(self): - # Test for sub() on escaped characters. - self.assertEqual(regex.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), - "abc\ndef\n") - self.assertEqual(regex.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), - "abc\ndef\n") - self.assertEqual(regex.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), - "abc\ndef\n") - self.assertEqual(regex.sub('\r\n', '\n', 'abc\r\ndef\r\n'), - "abc\ndef\n") - - def test_bug_1140(self): - # regex.sub(x, y, u'') should return u'', not '', and - # regex.sub(x, y, '') should return '', not u''. - # Also: - # regex.sub(x, y, unicode(x)) should return unicode(y), and - # regex.sub(x, y, str(x)) should return - # str(y) if isinstance(y, str) else unicode(y). - for x in 'x', u'x': - for y in 'y', u'y': - z = regex.sub(x, y, u'') - self.assertEqual((type(z), z), (unicode, u'')) - z = regex.sub(x, y, '') - self.assertEqual((type(z), z), (str, '')) - z = regex.sub(x, y, unicode(x)) - self.assertEqual((type(z), z), (unicode, unicode(y))) - z = regex.sub(x, y, str(x)) - self.assertEqual((type(z), z), (type(y), y)) - - def test_bug_1661(self): - # Verify that flags do not get silently ignored with compiled patterns - pattern = regex.compile('.') - self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, - lambda: regex.match(pattern, 'A', regex.I)) - self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, - lambda: regex.search(pattern, 'A', regex.I)) - self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, - lambda: regex.findall(pattern, 'A', regex.I)) - self.assertRaisesRegex(ValueError, self.FLAGS_WITH_COMPILED_PAT, - lambda: regex.compile(pattern, regex.I)) - - def test_bug_3629(self): - # A regex that triggered a bug in the sre-code validator - self.assertEqual(repr(type(regex.compile("(?P)(?(quote))"))), - self.PATTERN_CLASS) - - def test_sub_template_numeric_escape(self): - # Bug 776311 and friends. - self.assertEqual(regex.sub('x', r'\0', 'x'), "\0") - self.assertEqual(regex.sub('x', r'\000', 'x'), "\000") - self.assertEqual(regex.sub('x', r'\001', 'x'), "\001") - self.assertEqual(regex.sub('x', r'\008', 'x'), "\0" + "8") - self.assertEqual(regex.sub('x', r'\009', 'x'), "\0" + "9") - self.assertEqual(regex.sub('x', r'\111', 'x'), "\111") - self.assertEqual(regex.sub('x', r'\117', 'x'), "\117") - - self.assertEqual(regex.sub('x', r'\1111', 'x'), "\1111") - self.assertEqual(regex.sub('x', r'\1111', 'x'), "\111" + "1") - - self.assertEqual(regex.sub('x', r'\00', 'x'), '\x00') - self.assertEqual(regex.sub('x', r'\07', 'x'), '\x07') - self.assertEqual(regex.sub('x', r'\08', 'x'), "\0" + "8") - self.assertEqual(regex.sub('x', r'\09', 'x'), "\0" + "9") - self.assertEqual(regex.sub('x', r'\0a', 'x'), "\0" + "a") - - self.assertEqual(regex.sub(u'x', ur'\400', u'x'), u"\u0100") - self.assertEqual(regex.sub(u'x', ur'\777', u'x'), u"\u01FF") - self.assertEqual(regex.sub('x', r'\400', 'x'), "\x00") - self.assertEqual(regex.sub('x', r'\777', 'x'), "\xFF") - - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\1', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\8', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\9', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\11', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\18', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\1a', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\90', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\99', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\118', 'x')) # r'\11' + '8' - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\11a', 'x')) - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\181', 'x')) # r'\18' + '1' - self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda: - regex.sub('x', r'\800', 'x')) # r'\80' + '0' - - # In Python 2.3 (etc), these loop endlessly in sre_parser.py. - self.assertEqual(regex.sub('(((((((((((x)))))))))))', r'\11', 'x'), - 'x') - self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), - 'xz8') - self.assertEqual(regex.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), - 'xza') - - def test_qualified_re_sub(self): - self.assertEqual(regex.sub('a', 'b', 'aaaaa'), 'bbbbb') - self.assertEqual(regex.sub('a', 'b', 'aaaaa', 1), 'baaaa') - - def test_bug_114660(self): - self.assertEqual(regex.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), - 'hello there') - - def test_bug_462270(self): - # Test for empty sub() behaviour, see SF bug #462270 - self.assertEqual(regex.sub('(?V0)x*', '-', 'abxd'), '-a-b-d-') - self.assertEqual(regex.sub('(?V1)x*', '-', 'abxd'), '-a-b--d-') - self.assertEqual(regex.sub('x+', '-', 'abxd'), 'ab-d') - - def test_bug_14462(self): - # chr(255) is not a valid identifier in Python 2. - group_name = u'\xFF' - self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: - regex.search(ur'(?P<' + group_name + '>a)', u'a')) - - def test_symbolic_refs(self): - self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda: - regex.sub('(?Px)', r'\gx)', r'\g<', 'xx')) - self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda: - regex.sub('(?Px)', r'\g', 'xx')) - self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: - regex.sub('(?Px)', r'\g', 'xx')) - self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: - regex.sub('(?Px)', r'\g<1a1>', 'xx')) - self.assertRaisesRegex(IndexError, self.UNKNOWN_GROUP_I, lambda: - regex.sub('(?Px)', r'\g', 'xx')) - - # The new behaviour of unmatched but valid groups is to treat them like - # empty matches in the replacement template, like in Perl. - self.assertEqual(regex.sub('(?Px)|(?Py)', r'\g', 'xx'), '') - self.assertEqual(regex.sub('(?Px)|(?Py)', r'\2', 'xx'), '') - - # The old behaviour was to raise it as an IndexError. - self.assertRaisesRegex(regex.error, self.BAD_GROUP_NAME, lambda: - regex.sub('(?Px)', r'\g<-1>', 'xx')) - - def test_re_subn(self): - self.assertEqual(regex.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) - self.assertEqual(regex.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) - self.assertEqual(regex.subn("b+", "x", "xyz"), ('xyz', 0)) - self.assertEqual(regex.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) - self.assertEqual(regex.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) - - def test_re_split(self): - self.assertEqual(regex.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) - self.assertEqual(regex.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) - self.assertEqual(regex.split("(:*)", ":a:b::c"), ['', ':', 'a', ':', - 'b', '::', 'c']) - self.assertEqual(regex.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) - self.assertEqual(regex.split("(:)*", ":a:b::c"), ['', ':', 'a', ':', - 'b', ':', 'c']) - self.assertEqual(regex.split("([b:]+)", ":a:b::c"), ['', ':', 'a', - ':b::', 'c']) - self.assertEqual(regex.split("(b)|(:+)", ":a:b::c"), ['', None, ':', - 'a', None, ':', '', 'b', None, '', None, '::', 'c']) - self.assertEqual(regex.split("(?:b)|(?::+)", ":a:b::c"), ['', 'a', '', - '', 'c']) - - self.assertEqual(regex.split("x", "xaxbxc"), ['', 'a', 'b', 'c']) - self.assertEqual([m for m in regex.splititer("x", "xaxbxc")], ['', 'a', - 'b', 'c']) - - self.assertEqual(regex.split("(?r)x", "xaxbxc"), ['c', 'b', 'a', '']) - self.assertEqual([m for m in regex.splititer("(?r)x", "xaxbxc")], ['c', - 'b', 'a', '']) - - self.assertEqual(regex.split("(x)|(y)", "xaxbxc"), ['', 'x', None, 'a', - 'x', None, 'b', 'x', None, 'c']) - self.assertEqual([m for m in regex.splititer("(x)|(y)", "xaxbxc")], - ['', 'x', None, 'a', 'x', None, 'b', 'x', None, 'c']) - - self.assertEqual(regex.split("(?r)(x)|(y)", "xaxbxc"), ['c', 'x', None, - 'b', 'x', None, 'a', 'x', None, '']) - self.assertEqual([m for m in regex.splititer("(?r)(x)|(y)", "xaxbxc")], - ['c', 'x', None, 'b', 'x', None, 'a', 'x', None, '']) - - self.assertEqual(regex.split(r"(?V1)\b", "a b c"), ['', 'a', ' ', 'b', - ' ', 'c', '']) - self.assertEqual(regex.split(r"(?V1)\m", "a b c"), ['', 'a ', 'b ', - 'c']) - self.assertEqual(regex.split(r"(?V1)\M", "a b c"), ['a', ' b', ' c', - '']) - - def test_qualified_re_split(self): - self.assertEqual(regex.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) - self.assertEqual(regex.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) - self.assertEqual(regex.split("(:)", ":a:b::c", 2), ['', ':', 'a', ':', - 'b::c']) - self.assertEqual(regex.split("(:*)", ":a:b::c", 2), ['', ':', 'a', ':', - 'b::c']) - - def test_re_findall(self): - self.assertEqual(regex.findall(":+", "abc"), []) - self.assertEqual(regex.findall(":+", "a:b::c:::d"), [':', '::', ':::']) - self.assertEqual(regex.findall("(:+)", "a:b::c:::d"), [':', '::', - ':::']) - self.assertEqual(regex.findall("(:)(:*)", "a:b::c:::d"), [(':', ''), - (':', ':'), (':', '::')]) - - self.assertEqual(regex.findall(r"\((?P.{0,5}?TEST)\)", - "(MY TEST)"), ["MY TEST"]) - self.assertEqual(regex.findall(r"\((?P.{0,3}?TEST)\)", - "(MY TEST)"), ["MY TEST"]) - self.assertEqual(regex.findall(r"\((?P.{0,3}?T)\)", "(MY T)"), - ["MY T"]) - - self.assertEqual(regex.findall(r"[^a]{2}[A-Z]", "\n S"), [' S']) - self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), ['\n S']) - self.assertEqual(regex.findall(r"[^a]{2,3}[A-Z]", "\n S"), [' S']) - - self.assertEqual(regex.findall(r"X(Y[^Y]+?){1,2}( |Q)+DEF", - "XYABCYPPQ\nQ DEF"), [('YPPQ\n', ' ')]) - - self.assertEqual(regex.findall(r"(\nTest(\n+.+?){0,2}?)?\n+End", - "\nTest\nxyz\nxyz\nEnd"), [('\nTest\nxyz\nxyz', '\nxyz')]) - - def test_bug_117612(self): - self.assertEqual(regex.findall(r"(a|(b))", "aba"), [('a', ''), ('b', - 'b'), ('a', '')]) - - def test_re_match(self): - self.assertEqual(regex.match('a', 'a')[:], ('a',)) - self.assertEqual(regex.match('(a)', 'a')[:], ('a', 'a')) - self.assertEqual(regex.match(r'(a)', 'a')[0], 'a') - self.assertEqual(regex.match(r'(a)', 'a')[1], 'a') - self.assertEqual(regex.match(r'(a)', 'a').group(1, 1), ('a', 'a')) - - pat = regex.compile('((a)|(b))(c)?') - self.assertEqual(pat.match('a')[:], ('a', 'a', 'a', None, None)) - self.assertEqual(pat.match('b')[:], ('b', 'b', None, 'b', None)) - self.assertEqual(pat.match('ac')[:], ('ac', 'a', 'a', None, 'c')) - self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c')) - self.assertEqual(pat.match('bc')[:], ('bc', 'b', None, 'b', 'c')) - - # A single group. - m = regex.match('(a)', 'a') - self.assertEqual(m.group(), 'a') - self.assertEqual(m.group(0), 'a') - self.assertEqual(m.group(1), 'a') - self.assertEqual(m.group(1, 1), ('a', 'a')) - - pat = regex.compile('(?:(?Pa)|(?Pb))(?Pc)?') - self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) - self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b', - None)) - self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) - - def test_re_groupref_exists(self): - self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a)')[:], - ('(a)', '(', 'a')) - self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a')[:], ('a', - None, 'a')) - self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', 'a)'), None) - self.assertEqual(regex.match(r'^(\()?([^()]+)(?(1)\))$', '(a'), None) - self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'ab')[:], ('ab', - 'a', 'b')) - self.assertEqual(regex.match('^(?:(a)|c)((?(1)b|d))$', 'cd')[:], ('cd', - None, 'd')) - self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'cd')[:], ('cd', - None, 'd')) - self.assertEqual(regex.match('^(?:(a)|c)((?(1)|d))$', 'a')[:], ('a', - 'a', '')) - - # Tests for bug #1177831: exercise groups other than the first group. - p = regex.compile('(?Pa)(?Pb)?((?(g2)c|d))') - self.assertEqual(p.match('abc')[:], ('abc', 'a', 'b', 'c')) - self.assertEqual(p.match('ad')[:], ('ad', 'a', None, 'd')) - self.assertEqual(p.match('abd'), None) - self.assertEqual(p.match('ac'), None) - - def test_re_groupref(self): - self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a|')[:], ('|a|', - '|', 'a')) - self.assertEqual(regex.match(r'^(\|)?([^()]+)\1?$', 'a')[:], ('a', - None, 'a')) - self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', 'a|'), None) - self.assertEqual(regex.match(r'^(\|)?([^()]+)\1$', '|a'), None) - self.assertEqual(regex.match(r'^(?:(a)|c)(\1)$', 'aa')[:], ('aa', 'a', - 'a')) - self.assertEqual(regex.match(r'^(?:(a)|c)(\1)?$', 'c')[:], ('c', None, - None)) - - self.assertEqual(regex.findall("(?i)(.{1,40}?),(.{1,40}?)(?:;)+(.{1,80}).{1,40}?\\3(\ |;)+(.{1,80}?)\\1", - "TEST, BEST; LEST ; Lest 123 Test, Best"), [('TEST', ' BEST', - ' LEST', ' ', '123 ')]) - - def test_groupdict(self): - self.assertEqual(regex.match('(?Pfirst) (?Psecond)', - 'first second').groupdict(), {'first': 'first', 'second': 'second'}) - - def test_expand(self): - self.assertEqual(regex.match("(?Pfirst) (?Psecond)", - "first second").expand(r"\2 \1 \g \g"), - 'second first second first') - - def test_repeat_minmax(self): - self.assertEqual(regex.match(r"^(\w){1}$", "abc"), None) - self.assertEqual(regex.match(r"^(\w){1}?$", "abc"), None) - self.assertEqual(regex.match(r"^(\w){1,2}$", "abc"), None) - self.assertEqual(regex.match(r"^(\w){1,2}?$", "abc"), None) - - self.assertEqual(regex.match(r"^(\w){3}$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){1,3}$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){1,4}$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){3}?$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){1,3}?$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){1,4}?$", "abc")[1], 'c') - self.assertEqual(regex.match(r"^(\w){3,4}?$", "abc")[1], 'c') - - self.assertEqual(regex.match("^x{1}$", "xxx"), None) - self.assertEqual(regex.match("^x{1}?$", "xxx"), None) - self.assertEqual(regex.match("^x{1,2}$", "xxx"), None) - self.assertEqual(regex.match("^x{1,2}?$", "xxx"), None) - - self.assertEqual(regex.match("^x{1}", "xxx")[0], 'x') - self.assertEqual(regex.match("^x{1}?", "xxx")[0], 'x') - self.assertEqual(regex.match("^x{0,1}", "xxx")[0], 'x') - self.assertEqual(regex.match("^x{0,1}?", "xxx")[0], '') - - self.assertEqual(bool(regex.match("^x{3}$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{1,3}$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{1,4}$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{3}?$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{1,3}?$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{1,4}?$", "xxx")), True) - self.assertEqual(bool(regex.match("^x{3,4}?$", "xxx")), True) - - self.assertEqual(regex.match("^x{}$", "xxx"), None) - self.assertEqual(bool(regex.match("^x{}$", "x{}")), True) - - def test_getattr(self): - self.assertEqual(regex.compile("(?i)(a)(b)").pattern, '(?i)(a)(b)') - self.assertEqual(regex.compile("(?i)(a)(b)").flags, regex.A | regex.I | - regex.DEFAULT_VERSION) - self.assertEqual(regex.compile(u"(?i)(a)(b)").flags, regex.I | regex.U - | regex.DEFAULT_VERSION) - self.assertEqual(regex.compile("(?i)(a)(b)").groups, 2) - self.assertEqual(regex.compile("(?i)(a)(b)").groupindex, {}) - - self.assertEqual(regex.compile("(?i)(?Pa)(?Pb)").groupindex, - {'first': 1, 'other': 2}) - - self.assertEqual(regex.match("(a)", "a").pos, 0) - self.assertEqual(regex.match("(a)", "a").endpos, 1) - - self.assertEqual(regex.search("b(c)", "abcdef").pos, 0) - self.assertEqual(regex.search("b(c)", "abcdef").endpos, 6) - self.assertEqual(regex.search("b(c)", "abcdef").span(), (1, 3)) - self.assertEqual(regex.search("b(c)", "abcdef").span(1), (2, 3)) - - self.assertEqual(regex.match("(a)", "a").string, 'a') - self.assertEqual(regex.match("(a)", "a").regs, ((0, 1), (0, 1))) - self.assertEqual(repr(type(regex.match("(a)", "a").re)), - self.PATTERN_CLASS) - - # Issue 14260. - p = regex.compile(r'abc(?Pdef)') - p.groupindex["n"] = 0 - self.assertEqual(p.groupindex["n"], 1) - - def test_special_escapes(self): - self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx")[1], 'bx') - self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd")[1], 'bx') - self.assertEqual(regex.search(r"\b(b.)\b", "abcd abc bcd bx", - regex.LOCALE)[1], 'bx') - self.assertEqual(regex.search(r"\B(b.)\B", "abc bcd bc abxd", - regex.LOCALE)[1], 'bx') - self.assertEqual(regex.search(ur"\b(b.)\b", u"abcd abc bcd bx", - regex.UNICODE)[1], u'bx') - self.assertEqual(regex.search(ur"\B(b.)\B", u"abc bcd bc abxd", - regex.UNICODE)[1], u'bx') - - self.assertEqual(regex.search(r"^abc$", "\nabc\n", regex.M)[0], 'abc') - self.assertEqual(regex.search(r"^\Aabc\Z$", "abc", regex.M)[0], 'abc') - self.assertEqual(regex.search(r"^\Aabc\Z$", "\nabc\n", regex.M), None) - - self.assertEqual(regex.search(ur"\b(b.)\b", u"abcd abc bcd bx")[1], - u'bx') - self.assertEqual(regex.search(ur"\B(b.)\B", u"abc bcd bc abxd")[1], - u'bx') - self.assertEqual(regex.search(ur"^abc$", u"\nabc\n", regex.M)[0], - u'abc') - self.assertEqual(regex.search(ur"^\Aabc\Z$", u"abc", regex.M)[0], - u'abc') - self.assertEqual(regex.search(ur"^\Aabc\Z$", u"\nabc\n", regex.M), - None) - - self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a")[0], '1aa! a') - self.assertEqual(regex.search(r"\d\D\w\W\s\S", "1aa! a", - regex.LOCALE)[0], '1aa! a') - self.assertEqual(regex.search(ur"\d\D\w\W\s\S", u"1aa! a", - regex.UNICODE)[0], u'1aa! a') - - def test_bigcharset(self): - self.assertEqual(regex.match(ur"(?u)([\u2222\u2223])", u"\u2222")[1], - u'\u2222') - self.assertEqual(regex.match(ur"(?u)([\u2222\u2223])", u"\u2222", - regex.UNICODE)[1], u'\u2222') - self.assertEqual(u"".join(regex.findall(u".", - u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), - u'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') - self.assertEqual(u"".join(regex.findall(ur"[e\xe8\xe9\xea\xeb\u0113\u011b\u0117]", - u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), - u'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') - self.assertEqual(u"".join(regex.findall(ur"e|\xe8|\xe9|\xea|\xeb|\u0113|\u011b|\u0117", - u"e\xe8\xe9\xea\xeb\u0113\u011b\u0117", flags=regex.UNICODE)), - u'e\xe8\xe9\xea\xeb\u0113\u011b\u0117') - - def test_anyall(self): - self.assertEqual(regex.match("a.b", "a\nb", regex.DOTALL)[0], "a\nb") - self.assertEqual(regex.match("a.*b", "a\n\nb", regex.DOTALL)[0], - "a\n\nb") - - def test_non_consuming(self): - self.assertEqual(regex.match(r"(a(?=\s[^a]))", "a b")[1], 'a') - self.assertEqual(regex.match(r"(a(?=\s[^a]*))", "a b")[1], 'a') - self.assertEqual(regex.match(r"(a(?=\s[abc]))", "a b")[1], 'a') - self.assertEqual(regex.match(r"(a(?=\s[abc]*))", "a bc")[1], 'a') - self.assertEqual(regex.match(r"(a)(?=\s\1)", "a a")[1], 'a') - self.assertEqual(regex.match(r"(a)(?=\s\1*)", "a aa")[1], 'a') - self.assertEqual(regex.match(r"(a)(?=\s(abc|a))", "a a")[1], 'a') - - self.assertEqual(regex.match(r"(a(?!\s[^a]))", "a a")[1], 'a') - self.assertEqual(regex.match(r"(a(?!\s[abc]))", "a d")[1], 'a') - self.assertEqual(regex.match(r"(a)(?!\s\1)", "a b")[1], 'a') - self.assertEqual(regex.match(r"(a)(?!\s(abc|a))", "a b")[1], 'a') - - def test_ignore_case(self): - self.assertEqual(regex.match("abc", "ABC", regex.I)[0], 'ABC') - self.assertEqual(regex.match(u"abc", u"ABC", regex.I)[0], u'ABC') - - self.assertEqual(regex.match(r"(a\s[^a]*)", "a bb", regex.I)[1], - 'a bb') - self.assertEqual(regex.match(r"(a\s[abc])", "a b", regex.I)[1], 'a b') - self.assertEqual(regex.match(r"(a\s[abc]*)", "a bb", regex.I)[1], - 'a bb') - self.assertEqual(regex.match(r"((a)\s\2)", "a a", regex.I)[1], 'a a') - self.assertEqual(regex.match(r"((a)\s\2*)", "a aa", regex.I)[1], - 'a aa') - self.assertEqual(regex.match(r"((a)\s(abc|a))", "a a", regex.I)[1], - 'a a') - self.assertEqual(regex.match(r"((a)\s(abc|a)*)", "a aa", regex.I)[1], - 'a aa') - - # Issue 3511. - self.assertEqual(regex.match(r"[Z-a]", "_").span(), (0, 1)) - self.assertEqual(regex.match(r"(?i)[Z-a]", "_").span(), (0, 1)) - - self.assertEqual(bool(regex.match(ur"(?iu)nao", u"nAo")), True) - self.assertEqual(bool(regex.match(ur"(?iu)n\xE3o", u"n\xC3o")), True) - self.assertEqual(bool(regex.match(ur"(?iu)n\xE3o", u"N\xC3O")), True) - self.assertEqual(bool(regex.match(ur"(?iu)s", u"\u017F")), True) - - def test_case_folding(self): - self.assertEqual(regex.search(ur"(?fiu)ss", u"SS").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)SS", u"ss").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)SS", - u"\N{LATIN SMALL LETTER SHARP S}").span(), (0, 1)) - self.assertEqual(regex.search(ur"(?fi)\N{LATIN SMALL LETTER SHARP S}", - u"SS").span(), (0, 2)) - - self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE ST}", - u"ST").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)ST", - u"\N{LATIN SMALL LIGATURE ST}").span(), (0, 1)) - self.assertEqual(regex.search(ur"(?fiu)ST", - u"\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 1)) - - self.assertEqual(regex.search(ur"(?fiu)SST", - u"\N{LATIN SMALL LETTER SHARP S}t").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)SST", - u"s\N{LATIN SMALL LIGATURE LONG S T}").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)SST", - u"s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE ST}", - u"SST").span(), (1, 3)) - self.assertEqual(regex.search(ur"(?fiu)SST", - u"s\N{LATIN SMALL LIGATURE ST}").span(), (0, 2)) - - self.assertEqual(regex.search(ur"(?fiu)FFI", - u"\N{LATIN SMALL LIGATURE FFI}").span(), (0, 1)) - self.assertEqual(regex.search(ur"(?fiu)FFI", - u"\N{LATIN SMALL LIGATURE FF}i").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)FFI", - u"f\N{LATIN SMALL LIGATURE FI}").span(), (0, 2)) - self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE FFI}", - u"FFI").span(), (0, 3)) - self.assertEqual(regex.search(ur"(?fiu)\N{LATIN SMALL LIGATURE FF}i", - u"FFI").span(), (0, 3)) - self.assertEqual(regex.search(ur"(?fiu)f\N{LATIN SMALL LIGATURE FI}", - u"FFI").span(), (0, 3)) - - sigma = u"\u03A3\u03C3\u03C2" - for ch1 in sigma: - for ch2 in sigma: - if not regex.match(ur"(?fiu)" + ch1, ch2): - self.fail() - - self.assertEqual(bool(regex.search(ur"(?iuV1)ff", u"\uFB00\uFB01")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)ff", u"\uFB01\uFB00")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)fi", u"\uFB00\uFB01")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)fi", u"\uFB01\uFB00")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)fffi", u"\uFB00\uFB01")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB03", - u"\uFB00\uFB01")), True) - self.assertEqual(bool(regex.search(ur"(?iuV1)ff", u"\uFB00\uFB01")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)fi", u"\uFB00\uFB01")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)fffi", u"\uFB00\uFB01")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB03", - u"\uFB00\uFB01")), True) - self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB01", u"\uFB00i")), - True) - self.assertEqual(bool(regex.search(ur"(?iuV1)f\uFB01", u"\uFB00i")), - True) - - self.assertEqual(regex.findall(ur"(?iuV0)\m(?:word){e<=3}\M(?ne", u"affine", - options=[u"\N{LATIN SMALL LIGATURE FFI}"]).span(), (0, 6)) - self.assertEqual(regex.search(ur"(?fi)a\Lne", - u"a\N{LATIN SMALL LIGATURE FFI}ne", options=[u"ffi"]).span(), (0, 4)) - - def test_category(self): - self.assertEqual(regex.match(r"(\s)", " ")[1], ' ') - - def test_not_literal(self): - self.assertEqual(regex.search(r"\s([^a])", " b")[1], 'b') - self.assertEqual(regex.search(r"\s([^a]*)", " bb")[1], 'bb') - - def test_search_coverage(self): - self.assertEqual(regex.search(r"\s(b)", " b")[1], 'b') - self.assertEqual(regex.search(r"a\s", "a ")[0], 'a ') - - def test_re_escape(self): - p = "" - self.assertEqual(regex.escape(p), p) - for i in range(0, 256): - p += chr(i) - self.assertEqual(bool(regex.match(regex.escape(chr(i)), chr(i))), - True) - self.assertEqual(regex.match(regex.escape(chr(i)), chr(i)).span(), - (0, 1)) - - pat = regex.compile(regex.escape(p)) - self.assertEqual(pat.match(p).span(), (0, 256)) - - def test_constants(self): - if regex.I != regex.IGNORECASE: - self.fail() - if regex.L != regex.LOCALE: - self.fail() - if regex.M != regex.MULTILINE: - self.fail() - if regex.S != regex.DOTALL: - self.fail() - if regex.X != regex.VERBOSE: - self.fail() - - def test_flags(self): - for flag in [regex.I, regex.M, regex.X, regex.S, regex.L]: - self.assertEqual(repr(type(regex.compile('^pattern$', flag))), - self.PATTERN_CLASS) - - def test_sre_character_literals(self): - for i in [0, 8, 16, 32, 64, 127, 128, 255]: - self.assertEqual(bool(regex.match(r"\%03o" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"\%03o0" % i, chr(i) + "0")), - True) - self.assertEqual(bool(regex.match(r"\%03o8" % i, chr(i) + "8")), - True) - self.assertEqual(bool(regex.match(r"\x%02x" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"\x%02x0" % i, chr(i) + "0")), - True) - self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")), - True) - - self.assertRaisesRegex(regex.error, self.UNKNOWN_GROUP, lambda: - regex.match(r"\911", "")) - - def test_sre_character_class_literals(self): - for i in [0, 8, 16, 32, 64, 127, 128, 255]: - self.assertEqual(bool(regex.match(r"[\%03o]" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"[\%03o0]" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"[\%03o8]" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"[\x%02x]" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"[\x%02x0]" % i, chr(i))), True) - self.assertEqual(bool(regex.match(r"[\x%02xz]" % i, chr(i))), True) - - self.assertRaisesRegex(regex.error, self.BAD_OCTAL_ESCAPE, lambda: - regex.match(r"[\911]", "")) - - def test_bug_113254(self): - self.assertEqual(regex.match(r'(a)|(b)', 'b').start(1), -1) - self.assertEqual(regex.match(r'(a)|(b)', 'b').end(1), -1) - self.assertEqual(regex.match(r'(a)|(b)', 'b').span(1), (-1, -1)) - - def test_bug_527371(self): - # Bug described in patches 527371/672491. - self.assertEqual(regex.match(r'(a)?a','a').lastindex, None) - self.assertEqual(regex.match(r'(a)(b)?b','ab').lastindex, 1) - self.assertEqual(regex.match(r'(?Pa)(?Pb)?b','ab').lastgroup, - 'a') - self.assertEqual(regex.match("(?Pa(b))", "ab").lastgroup, 'a') - self.assertEqual(regex.match("((a))", "a").lastindex, 1) - - def test_bug_545855(self): - # Bug 545855 -- This pattern failed to cause a compile error as it - # should, instead provoking a TypeError. - self.assertRaisesRegex(regex.error, self.BAD_SET, lambda: - regex.compile('foo[a-')) - - def test_bug_418626(self): - # Bugs 418626 at al. -- Testing Greg Chapman's addition of op code - # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of - # pattern '*?' on a long string. - self.assertEqual(regex.match('.*?c', 10000 * 'ab' + 'cd').end(0), - 20001) - self.assertEqual(regex.match('.*?cd', 5000 * 'ab' + 'c' + 5000 * 'ab' + - 'cde').end(0), 20003) - self.assertEqual(regex.match('.*?cd', 20000 * 'abc' + 'de').end(0), - 60001) - # Non-simple '*?' still used to hit the recursion limit, before the - # non-recursive scheme was implemented. - self.assertEqual(regex.search('(a|b)*?c', 10000 * 'ab' + 'cd').end(0), - 20001) - - def test_bug_612074(self): - pat = u"[" + regex.escape(u"\u2039") + u"]" - self.assertEqual(regex.compile(pat) and 1, 1) - - def test_stack_overflow(self): - # Nasty cases that used to overflow the straightforward recursive - # implementation of repeated groups. - self.assertEqual(regex.match('(x)*', 50000 * 'x')[1], 'x') - self.assertEqual(regex.match('(x)*y', 50000 * 'x' + 'y')[1], 'x') - self.assertEqual(regex.match('(x)*?y', 50000 * 'x' + 'y')[1], 'x') - - def test_scanner(self): - def s_ident(scanner, token): return token - def s_operator(scanner, token): return "op%s" % token - def s_float(scanner, token): return float(token) - def s_int(scanner, token): return int(token) - - scanner = regex.Scanner([(r"[a-zA-Z_]\w*", s_ident), (r"\d+\.\d*", - s_float), (r"\d+", s_int), (r"=|\+|-|\*|/", s_operator), (r"\s+", - None), ]) - - self.assertEqual(repr(type(scanner.scanner.scanner("").pattern)), - self.PATTERN_CLASS) - - self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), (['sum', - 'op=', 3, 'op*', 'foo', 'op+', 312.5, 'op+', 'bar'], '')) - - def test_bug_448951(self): - # Bug 448951 (similar to 429357, but with single char match). - # (Also test greedy matches.) - for op in '', '?', '*': - self.assertEqual(regex.match(r'((.%s):)?z' % op, 'z')[:], ('z', - None, None)) - self.assertEqual(regex.match(r'((.%s):)?z' % op, 'a:z')[:], ('a:z', - 'a:', 'a')) - - def test_bug_725106(self): - # Capturing groups in alternatives in repeats. - self.assertEqual(regex.match('^((a)|b)*', 'abc')[:], ('ab', 'b', 'a')) - self.assertEqual(regex.match('^(([ab])|c)*', 'abc')[:], ('abc', 'c', - 'b')) - self.assertEqual(regex.match('^((d)|[ab])*', 'abc')[:], ('ab', 'b', - None)) - self.assertEqual(regex.match('^((a)c|[ab])*', 'abc')[:], ('ab', 'b', - None)) - self.assertEqual(regex.match('^((a)|b)*?c', 'abc')[:], ('abc', 'b', - 'a')) - self.assertEqual(regex.match('^(([ab])|c)*?d', 'abcd')[:], ('abcd', - 'c', 'b')) - self.assertEqual(regex.match('^((d)|[ab])*?c', 'abc')[:], ('abc', 'b', - None)) - self.assertEqual(regex.match('^((a)c|[ab])*?c', 'abc')[:], ('abc', 'b', - None)) - - def test_bug_725149(self): - # Mark_stack_base restoring before restoring marks. - self.assertEqual(regex.match('(a)(?:(?=(b)*)c)*', 'abb')[:], ('a', 'a', - None)) - self.assertEqual(regex.match('(a)((?!(b)*))*', 'abb')[:], ('a', 'a', - None, None)) - - def test_bug_764548(self): - # Bug 764548, regex.compile() barfs on str/unicode subclasses. - class my_unicode(str): pass - pat = regex.compile(my_unicode("abc")) - self.assertEqual(pat.match("xyz"), None) - - def test_finditer(self): - it = regex.finditer(r":+", "a:b::c:::d") - self.assertEqual([item[0] for item in it], [':', '::', ':::']) - - def test_bug_926075(self): - if regex.compile('bug_926075') is regex.compile(u'bug_926075'): - self.fail() - - def test_bug_931848(self): - pattern = u"[\u002E\u3002\uFF0E\uFF61]" - self.assertEqual(regex.compile(pattern).split("a.b.c"), ['a', 'b', - 'c']) - - def test_bug_581080(self): - it = regex.finditer(r"\s", "a b") - self.assertEqual(it.next().span(), (1, 2)) - self.assertRaises(StopIteration, lambda: it.next()) - - scanner = regex.compile(r"\s").scanner("a b") - self.assertEqual(scanner.search().span(), (1, 2)) - self.assertEqual(scanner.search(), None) - - def test_bug_817234(self): - it = regex.finditer(r".*", "asdf") - self.assertEqual(it.next().span(), (0, 4)) - self.assertEqual(it.next().span(), (4, 4)) - self.assertRaises(StopIteration, lambda: it.next()) - - def test_empty_array(self): - # SF buf 1647541. - import array - for typecode in 'cbBuhHiIlLfd': - a = array.array(typecode) - self.assertEqual(regex.compile("bla").match(a), None) - self.assertEqual(regex.compile("").match(a)[1 : ], ()) - - def test_inline_flags(self): - # Bug #1700. - upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below - lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below - - p = regex.compile(upper_char, regex.I | regex.U) - self.assertEqual(bool(p.match(lower_char)), True) - - p = regex.compile(lower_char, regex.I | regex.U) - self.assertEqual(bool(p.match(upper_char)), True) - - p = regex.compile('(?i)' + upper_char, regex.U) - self.assertEqual(bool(p.match(lower_char)), True) - - p = regex.compile('(?i)' + lower_char, regex.U) - self.assertEqual(bool(p.match(upper_char)), True) - - p = regex.compile('(?iu)' + upper_char) - self.assertEqual(bool(p.match(lower_char)), True) - - p = regex.compile('(?iu)' + lower_char) - self.assertEqual(bool(p.match(upper_char)), True) - - self.assertEqual(bool(regex.match(r"(?i)a", "A")), True) - self.assertEqual(bool(regex.match(r"a(?i)", "A")), True) - self.assertEqual(bool(regex.match(r"(?iV1)a", "A")), True) - self.assertEqual(regex.match(r"a(?iV1)", "A"), None) - - def test_dollar_matches_twice(self): - # $ matches the end of string, and just before the terminating \n. - pattern = regex.compile('$') - self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') - self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') - self.assertEqual(pattern.sub('#', '\n'), '#\n#') - - pattern = regex.compile('$', regex.MULTILINE) - self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#') - self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') - self.assertEqual(pattern.sub('#', '\n'), '#\n#') - - def test_ascii_and_unicode_flag(self): - # Unicode patterns. - for flags in (0, regex.UNICODE): - pat = regex.compile(u'\xc0', flags | regex.IGNORECASE) - self.assertEqual(bool(pat.match(u'\xe0')), True) - pat = regex.compile(u'\w', flags) - self.assertEqual(bool(pat.match(u'\xe0')), True) - - pat = regex.compile(u'\xc0', regex.ASCII | regex.IGNORECASE) - self.assertEqual(pat.match(u'\xe0'), None) - pat = regex.compile(u'(?a)\xc0', regex.IGNORECASE) - self.assertEqual(pat.match(u'\xe0'), None) - pat = regex.compile(u'\w', regex.ASCII) - self.assertEqual(pat.match(u'\xe0'), None) - pat = regex.compile(u'(?a)\w') - self.assertEqual(pat.match(u'\xe0'), None) - - # String patterns. - for flags in (0, regex.ASCII): - pat = regex.compile('\xc0', flags | regex.IGNORECASE) - self.assertEqual(pat.match('\xe0'), None) - pat = regex.compile('\w') - self.assertEqual(pat.match('\xe0'), None) - self.assertRaisesRegex(ValueError, self.MIXED_FLAGS, lambda: - regex.compile('(?au)\w')) - - def test_subscripting_match(self): - m = regex.match(r'(?\w)', 'xy') - if not m: - self.fail("Failed: expected match but returned None") - elif not m or m[0] != m.group(0) or m[1] != m.group(1): - self.fail("Failed") - if not m: - self.fail("Failed: expected match but returned None") - elif m[:] != ('x', 'x'): - self.fail("Failed: expected \"('x', 'x')\" but got %s instead" % - repr(m[:])) - - def test_new_named_groups(self): - m0 = regex.match(r'(?P\w)', 'x') - m1 = regex.match(r'(?\w)', 'x') - if not (m0 and m1 and m0[:] == m1[:]): - self.fail("Failed") - - def test_properties(self): - self.assertEqual(regex.match('(?i)\xC0', '\xE0'), None) - self.assertEqual(regex.match(r'(?i)\xC0', '\xE0'), None) - self.assertEqual(regex.match(r'\w', '\xE0'), None) - self.assertEqual(bool(regex.match(ur'(?u)\w', u'\xE0')), True) - - # Dropped the following test. It's not possible to determine what the - # correct result should be in the general case. -# self.assertEqual(bool(regex.match(r'(?L)\w', '\xE0')), -# '\xE0'.isalnum()) - - self.assertEqual(bool(regex.match(r'(?L)\d', '0')), True) - self.assertEqual(bool(regex.match(r'(?L)\s', ' ')), True) - self.assertEqual(bool(regex.match(r'(?L)\w', 'a')), True) - self.assertEqual(regex.match(r'(?L)\d', '?'), None) - self.assertEqual(regex.match(r'(?L)\s', '?'), None) - self.assertEqual(regex.match(r'(?L)\w', '?'), None) - - self.assertEqual(regex.match(r'(?L)\D', '0'), None) - self.assertEqual(regex.match(r'(?L)\S', ' '), None) - self.assertEqual(regex.match(r'(?L)\W', 'a'), None) - self.assertEqual(bool(regex.match(r'(?L)\D', '?')), True) - self.assertEqual(bool(regex.match(r'(?L)\S', '?')), True) - self.assertEqual(bool(regex.match(r'(?L)\W', '?')), True) - - self.assertEqual(bool(regex.match(ur'(?u)\p{Cyrillic}', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{IsCyrillic}', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{Script=Cyrillic}', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{InCyrillic}', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{Block=Cyrillic}', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:Cyrillic:]]', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:IsCyrillic:]]', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:Script=Cyrillic:]]', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:InCyrillic:]]', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:Block=Cyrillic:]]', - u'\N{CYRILLIC CAPITAL LETTER A}')), True) - - self.assertEqual(bool(regex.match(ur'(?u)\P{Cyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\P{IsCyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\P{Script=Cyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\P{InCyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\P{Block=Cyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{^Cyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{^IsCyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{^Script=Cyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{^InCyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{^Block=Cyrillic}', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:^Cyrillic:]]', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:^IsCyrillic:]]', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:^Script=Cyrillic:]]', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:^InCyrillic:]]', - u'\N{LATIN CAPITAL LETTER A}')), True) - self.assertEqual(bool(regex.match(ur'(?u)[[:^Block=Cyrillic:]]', - u'\N{LATIN CAPITAL LETTER A}')), True) - - self.assertEqual(bool(regex.match(ur'(?u)\d', u'0')), True) - self.assertEqual(bool(regex.match(ur'(?u)\s', u' ')), True) - self.assertEqual(bool(regex.match(ur'(?u)\w', u'A')), True) - self.assertEqual(regex.match(ur"(?u)\d", u"?"), None) - self.assertEqual(regex.match(ur"(?u)\s", u"?"), None) - self.assertEqual(regex.match(ur"(?u)\w", u"?"), None) - self.assertEqual(regex.match(ur"(?u)\D", u"0"), None) - self.assertEqual(regex.match(ur"(?u)\S", u" "), None) - self.assertEqual(regex.match(ur"(?u)\W", u"A"), None) - self.assertEqual(bool(regex.match(ur'(?u)\D', u'?')), True) - self.assertEqual(bool(regex.match(ur'(?u)\S', u'?')), True) - self.assertEqual(bool(regex.match(ur'(?u)\W', u'?')), True) - - self.assertEqual(bool(regex.match(ur'(?u)\p{L}', u'A')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{L}', u'a')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{Lu}', u'A')), True) - self.assertEqual(bool(regex.match(ur'(?u)\p{Ll}', u'a')), True) - - self.assertEqual(bool(regex.match(ur'(?u)(?i)a', u'a')), True) - self.assertEqual(bool(regex.match(ur'(?u)(?i)a', u'A')), True) - - self.assertEqual(bool(regex.match(ur'(?u)\w', u'0')), True) - self.assertEqual(bool(regex.match(ur'(?u)\w', u'a')), True) - self.assertEqual(bool(regex.match(ur'(?u)\w', u'_')), True) - - self.assertEqual(regex.match(ur"(?u)\X", u"\xE0").span(), (0, 1)) - self.assertEqual(regex.match(ur"(?u)\X", u"a\u0300").span(), (0, 2)) - self.assertEqual(regex.findall(ur"(?u)\X", - u"a\xE0a\u0300e\xE9e\u0301"), [u'a', u'\xe0', u'a\u0300', u'e', - u'\xe9', u'e\u0301']) - self.assertEqual(regex.findall(ur"(?u)\X{3}", - u"a\xE0a\u0300e\xE9e\u0301"), [u'a\xe0a\u0300', u'e\xe9e\u0301']) - self.assertEqual(regex.findall(ur"(?u)\X", u"\r\r\n\u0301A\u0301"), - [u'\r', u'\r\n', u'\u0301', u'A\u0301']) - - self.assertEqual(bool(regex.match(ur'(?u)\p{Ll}', u'a')), True) - - chars_u = u"-09AZaz_\u0393\u03b3" - chars_b = "-09AZaz_" - word_set = set("Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc".split()) - - tests = [ - (ur"(?u)\w", chars_u, u"09AZaz_\u0393\u03b3"), - (ur"(?u)[[:word:]]", chars_u, u"09AZaz_\u0393\u03b3"), - (ur"(?u)\W", chars_u, u"-"), - (ur"(?u)[[:^word:]]", chars_u, u"-"), - (ur"(?u)\d", chars_u, u"09"), - (ur"(?u)[[:digit:]]", chars_u, u"09"), - (ur"(?u)\D", chars_u, u"-AZaz_\u0393\u03b3"), - (ur"(?u)[[:^digit:]]", chars_u, u"-AZaz_\u0393\u03b3"), - (ur"(?u)[[:alpha:]]", chars_u, u"AZaz\u0393\u03b3"), - (ur"(?u)[[:^alpha:]]", chars_u, u"-09_"), - (ur"(?u)[[:alnum:]]", chars_u, u"09AZaz\u0393\u03b3"), - (ur"(?u)[[:^alnum:]]", chars_u, u"-_"), - (ur"(?u)[[:xdigit:]]", chars_u, u"09Aa"), - (ur"(?u)[[:^xdigit:]]", chars_u, u"-Zz_\u0393\u03b3"), - (ur"(?u)\p{InBasicLatin}", u"a\xE1", u"a"), - (ur"(?u)\P{InBasicLatin}", u"a\xE1", u"\xE1"), - (ur"(?iu)\p{InBasicLatin}", u"a\xE1", u"a"), - (ur"(?iu)\P{InBasicLatin}", u"a\xE1", u"\xE1"), - - (r"(?L)\w", chars_b, "09AZaz_"), - (r"(?L)[[:word:]]", chars_b, "09AZaz_"), - (r"(?L)\W", chars_b, "-"), - (r"(?L)[[:^word:]]", chars_b, "-"), - (r"(?L)\d", chars_b, "09"), - (r"(?L)[[:digit:]]", chars_b, "09"), - (r"(?L)\D", chars_b, "-AZaz_"), - (r"(?L)[[:^digit:]]", chars_b, "-AZaz_"), - (r"(?L)[[:alpha:]]", chars_b, "AZaz"), - (r"(?L)[[:^alpha:]]", chars_b, "-09_"), - (r"(?L)[[:alnum:]]", chars_b, "09AZaz"), - (r"(?L)[[:^alnum:]]", chars_b, "-_"), - (r"(?L)[[:xdigit:]]", chars_b, "09Aa"), - (r"(?L)[[:^xdigit:]]", chars_b, "-Zz_"), - - (r"\w", chars_b, "09AZaz_"), - (r"[[:word:]]", chars_b, "09AZaz_"), - (r"\W", chars_b, "-"), - (r"[[:^word:]]", chars_b, "-"), - (r"\d", chars_b, "09"), - (r"[[:digit:]]", chars_b, "09"), - (r"\D", chars_b, "-AZaz_"), - (r"[[:^digit:]]", chars_b, "-AZaz_"), - (r"[[:alpha:]]", chars_b, "AZaz"), - (r"[[:^alpha:]]", chars_b, "-09_"), - (r"[[:alnum:]]", chars_b, "09AZaz"), - (r"[[:^alnum:]]", chars_b, "-_"), - (r"[[:xdigit:]]", chars_b, "09Aa"), - (r"[[:^xdigit:]]", chars_b, "-Zz_"), - ] - for pattern, chars, expected in tests: - try: - if chars[ : 0].join(regex.findall(pattern, chars)) != expected: - self.fail("Failed: %s" % pattern) - except Exception, e: - self.fail("Failed: %s raised %s" % (pattern, repr(e))) - - self.assertEqual(bool(regex.match(ur"(?u)\p{NumericValue=0}", u"0")), - True) - self.assertEqual(bool(regex.match(ur"(?u)\p{NumericValue=1/2}", - u"\N{VULGAR FRACTION ONE HALF}")), True) - self.assertEqual(bool(regex.match(ur"(?u)\p{NumericValue=0.5}", - u"\N{VULGAR FRACTION ONE HALF}")), True) - - def test_word_class(self): - self.assertEqual(regex.findall(ur"(?u)\w+", - u" \u0939\u093f\u0928\u094d\u0926\u0940,"), - [u'\u0939\u093f\u0928\u094d\u0926\u0940']) - self.assertEqual(regex.findall(ur"(?u)\W+", - u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u' ', u',']) - self.assertEqual(regex.split(ur"(?uV1)\b", - u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u' ', - u'\u0939\u093f\u0928\u094d\u0926\u0940', u',']) - self.assertEqual(regex.split(ur"(?uV1)\B", - u" \u0939\u093f\u0928\u094d\u0926\u0940,"), [u'', u' \u0939', - u'\u093f', u'\u0928', u'\u094d', u'\u0926', u'\u0940,', u'']) - - def test_search_anchor(self): - self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd']) - - def test_search_reverse(self): - self.assertEqual(regex.findall(r"(?r).", "abc"), ['c', 'b', 'a']) - self.assertEqual(regex.findall(r"(?r).", "abc", overlapped=True), ['c', - 'b', 'a']) - self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc']) - self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True), - ['de', 'cd', 'bc', 'ab']) - self.assertEqual(regex.findall(r"(?r)(.)(-)(.)", "a-b-c", - overlapped=True), [("b", "-", "c"), ("a", "-", "b")]) - - self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c', - 'b', 'a']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", - overlapped=True)], ['de', 'cd', 'bc', 'ab']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r).", "abc")], ['c', - 'b', 'a']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", - overlapped=True)], ['de', 'cd', 'bc', 'ab']) - - self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', - 'bar']) - self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', - 'bar']) - self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo', - '']) - self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', - 'foo', '']) - - self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], - ['', 'foo', 'bar']) - self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", - "foo bar")], ['', 'foo', 'bar']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", - "foo bar")], ['bar', 'foo', '']) - self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", - "foo bar")], ['bar', 'foo', '']) - - self.assertEqual(regex.findall(r"\G\w{2}", "abcd ef"), ['ab', 'cd']) - self.assertEqual(regex.findall(r".{2}(?<=\G.*)", "abcd"), ['ab', 'cd']) - self.assertEqual(regex.findall(r"(?r)\G\w{2}", "abcd ef"), []) - self.assertEqual(regex.findall(r"(?r)\w{2}\G", "abcd ef"), ['ef']) - - self.assertEqual(regex.findall(r"q*", "qqwe"), ['qq', '', '', '']) - self.assertEqual(regex.findall(r"(?V1)q*", "qqwe"), ['qq', '', '', '']) - self.assertEqual(regex.findall(r"(?r)q*", "qqwe"), ['', '', 'qq', '']) - self.assertEqual(regex.findall(r"(?rV1)q*", "qqwe"), ['', '', 'qq', - '']) - - self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=3), ['b', - 'c']) - self.assertEqual(regex.findall(".", "abcd", pos=1, endpos=-1), ['b', - 'c']) - self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1, - endpos=3)], ['b', 'c']) - self.assertEqual([m[0] for m in regex.finditer(".", "abcd", pos=1, - endpos=-1)], ['b', 'c']) - - self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1, - endpos=3)], ['c', 'b']) - self.assertEqual([m[0] for m in regex.finditer("(?r).", "abcd", pos=1, - endpos=-1)], ['c', 'b']) - self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=3), ['c', - 'b']) - self.assertEqual(regex.findall("(?r).", "abcd", pos=1, endpos=-1), - ['c', 'b']) - - self.assertEqual(regex.findall(r"[ab]", "aB", regex.I), ['a', 'B']) - self.assertEqual(regex.findall(r"(?r)[ab]", "aB", regex.I), ['B', 'a']) - - self.assertEqual(regex.findall(r"(?r).{2}", "abc"), ['bc']) - self.assertEqual(regex.findall(r"(?r).{2}", "abc", overlapped=True), - ['bc', 'ab']) - self.assertEqual(regex.findall(r"(\w+) (\w+)", - "first second third fourth fifth"), [('first', 'second'), ('third', - 'fourth')]) - self.assertEqual(regex.findall(r"(?r)(\w+) (\w+)", - "first second third fourth fifth"), [('fourth', 'fifth'), ('second', - 'third')]) - - self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc")], - ['bc']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r).{2}", "abc", - overlapped=True)], ['bc', 'ab']) - self.assertEqual([m[0] for m in regex.finditer(r"(\w+) (\w+)", - "first second third fourth fifth")], ['first second', - 'third fourth']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)(\w+) (\w+)", - "first second third fourth fifth")], ['fourth fifth', - 'second third']) - - self.assertEqual(regex.search("abcdef", "abcdef").span(), (0, 6)) - self.assertEqual(regex.search("(?r)abcdef", "abcdef").span(), (0, 6)) - self.assertEqual(regex.search("(?i)abcdef", "ABCDEF").span(), (0, 6)) - self.assertEqual(regex.search("(?ir)abcdef", "ABCDEF").span(), (0, 6)) - - self.assertEqual(regex.sub(r"(.)", r"\1", "abc"), 'abc') - self.assertEqual(regex.sub(r"(?r)(.)", r"\1", "abc"), 'abc') - - def test_atomic(self): - # Issue 433030. - self.assertEqual(regex.search(r"(?>a*)a", "aa"), None) - - def test_possessive(self): - # Single-character non-possessive. - self.assertEqual(regex.search(r"a?a", "a").span(), (0, 1)) - self.assertEqual(regex.search(r"a*a", "aaa").span(), (0, 3)) - self.assertEqual(regex.search(r"a+a", "aaa").span(), (0, 3)) - self.assertEqual(regex.search(r"a{1,3}a", "aaa").span(), (0, 3)) - - # Multiple-character non-possessive. - self.assertEqual(regex.search(r"(?:ab)?ab", "ab").span(), (0, 2)) - self.assertEqual(regex.search(r"(?:ab)*ab", "ababab").span(), (0, 6)) - self.assertEqual(regex.search(r"(?:ab)+ab", "ababab").span(), (0, 6)) - self.assertEqual(regex.search(r"(?:ab){1,3}ab", "ababab").span(), (0, - 6)) - - # Single-character possessive. - self.assertEqual(regex.search(r"a?+a", "a"), None) - self.assertEqual(regex.search(r"a*+a", "aaa"), None) - self.assertEqual(regex.search(r"a++a", "aaa"), None) - self.assertEqual(regex.search(r"a{1,3}+a", "aaa"), None) - - # Multiple-character possessive. - self.assertEqual(regex.search(r"(?:ab)?+ab", "ab"), None) - self.assertEqual(regex.search(r"(?:ab)*+ab", "ababab"), None) - self.assertEqual(regex.search(r"(?:ab)++ab", "ababab"), None) - self.assertEqual(regex.search(r"(?:ab){1,3}+ab", "ababab"), None) - - def test_zerowidth(self): - # Issue 3262. - self.assertEqual(regex.split(r"\b", "a b"), ['a b']) - self.assertEqual(regex.split(r"(?V1)\b", "a b"), ['', 'a', ' ', 'b', - '']) - - # Issue 1647489. - self.assertEqual(regex.findall(r"^|\w+", "foo bar"), ['', 'foo', - 'bar']) - self.assertEqual([m[0] for m in regex.finditer(r"^|\w+", "foo bar")], - ['', 'foo', 'bar']) - self.assertEqual(regex.findall(r"(?r)^|\w+", "foo bar"), ['bar', 'foo', - '']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)^|\w+", - "foo bar")], ['bar', 'foo', '']) - self.assertEqual(regex.findall(r"(?V1)^|\w+", "foo bar"), ['', 'foo', - 'bar']) - self.assertEqual([m[0] for m in regex.finditer(r"(?V1)^|\w+", - "foo bar")], ['', 'foo', 'bar']) - self.assertEqual(regex.findall(r"(?rV1)^|\w+", "foo bar"), ['bar', - 'foo', '']) - self.assertEqual([m[0] for m in regex.finditer(r"(?rV1)^|\w+", - "foo bar")], ['bar', 'foo', '']) - - self.assertEqual(regex.split("", "xaxbxc"), ['xaxbxc']) - self.assertEqual([m for m in regex.splititer("", "xaxbxc")], - ['xaxbxc']) - - self.assertEqual(regex.split("(?r)", "xaxbxc"), ['xaxbxc']) - self.assertEqual([m for m in regex.splititer("(?r)", "xaxbxc")], - ['xaxbxc']) - - self.assertEqual(regex.split("(?V1)", "xaxbxc"), ['', 'x', 'a', 'x', - 'b', 'x', 'c', '']) - self.assertEqual([m for m in regex.splititer("(?V1)", "xaxbxc")], ['', - 'x', 'a', 'x', 'b', 'x', 'c', '']) - - self.assertEqual(regex.split("(?rV1)", "xaxbxc"), ['', 'c', 'x', 'b', - 'x', 'a', 'x', '']) - self.assertEqual([m for m in regex.splititer("(?rV1)", "xaxbxc")], ['', - 'c', 'x', 'b', 'x', 'a', 'x', '']) - - def test_scoped_and_inline_flags(self): - # Issues 433028, 433024, 433027. - self.assertEqual(regex.search(r"(?i)Ab", "ab").span(), (0, 2)) - self.assertEqual(regex.search(r"(?i:A)b", "ab").span(), (0, 2)) - self.assertEqual(regex.search(r"A(?i)b", "ab").span(), (0, 2)) - self.assertEqual(regex.search(r"A(?iV1)b", "ab"), None) - - self.assertRaisesRegex(regex.error, self.CANT_TURN_OFF, lambda: - regex.search(r"(?V0-i)Ab", "ab", flags=regex.I)) - - self.assertEqual(regex.search(r"(?V0)Ab", "ab"), None) - self.assertEqual(regex.search(r"(?V1)Ab", "ab"), None) - self.assertEqual(regex.search(r"(?V1-i)Ab", "ab", flags=regex.I), None) - self.assertEqual(regex.search(r"(?-i:A)b", "ab", flags=regex.I), None) - self.assertEqual(regex.search(r"A(?V1-i)b", "ab", - flags=regex.I).span(), (0, 2)) - - def test_repeated_repeats(self): - # Issue 2537. - self.assertEqual(regex.search(r"(?:a+)+", "aaa").span(), (0, 3)) - self.assertEqual(regex.search(r"(?:(?:ab)+c)+", "abcabc").span(), (0, - 6)) - - def test_lookbehind(self): - self.assertEqual(regex.search(r"123(?<=a\d+)", "a123").span(), (1, 4)) - self.assertEqual(regex.search(r"123(?<=a\d+)", "b123"), None) - self.assertEqual(regex.search(r"123(?[ \t]+\r*$)|(?P(?<=[^\n])\Z)') - self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', - 'foobar '), ('foobar', 1)) - self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', - '']) - pat = regex.compile(r'(?mV1)(?P[ \t]+\r*$)|(?P(?<=[^\n])\Z)') - self.assertEqual(pat.subn(lambda m: '<' + m.lastgroup + '>', - 'foobar '), ('foobar', 2)) - self.assertEqual([m.group() for m in pat.finditer('foobar ')], [' ', - '']) - - def test_overlapped(self): - self.assertEqual(regex.findall(r"..", "abcde"), ['ab', 'cd']) - self.assertEqual(regex.findall(r"..", "abcde", overlapped=True), ['ab', - 'bc', 'cd', 'de']) - self.assertEqual(regex.findall(r"(?r)..", "abcde"), ['de', 'bc']) - self.assertEqual(regex.findall(r"(?r)..", "abcde", overlapped=True), - ['de', 'cd', 'bc', 'ab']) - self.assertEqual(regex.findall(r"(.)(-)(.)", "a-b-c", overlapped=True), - [("a", "-", "b"), ("b", "-", "c")]) - - self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde")], ['ab', - 'cd']) - self.assertEqual([m[0] for m in regex.finditer(r"..", "abcde", - overlapped=True)], ['ab', 'bc', 'cd', 'de']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde")], - ['de', 'bc']) - self.assertEqual([m[0] for m in regex.finditer(r"(?r)..", "abcde", - overlapped=True)], ['de', 'cd', 'bc', 'ab']) - - self.assertEqual([m.groups() for m in regex.finditer(r"(.)(-)(.)", - "a-b-c", overlapped=True)], [("a", "-", "b"), ("b", "-", "c")]) - self.assertEqual([m.groups() for m in regex.finditer(r"(?r)(.)(-)(.)", - "a-b-c", overlapped=True)], [("b", "-", "c"), ("a", "-", "b")]) - - def test_splititer(self): - self.assertEqual(regex.split(r",", "a,b,,c,"), ['a', 'b', '', 'c', '']) - self.assertEqual([m for m in regex.splititer(r",", "a,b,,c,")], ['a', - 'b', '', 'c', '']) - - def test_grapheme(self): - self.assertEqual(regex.match(ur"(?u)\X", u"\xE0").span(), (0, 1)) - self.assertEqual(regex.match(ur"(?u)\X", u"a\u0300").span(), (0, 2)) - - self.assertEqual(regex.findall(ur"(?u)\X", - u"a\xE0a\u0300e\xE9e\u0301"), [u'a', u'\xe0', u'a\u0300', u'e', - u'\xe9', u'e\u0301']) - self.assertEqual(regex.findall(ur"(?u)\X{3}", - u"a\xE0a\u0300e\xE9e\u0301"), [u'a\xe0a\u0300', u'e\xe9e\u0301']) - self.assertEqual(regex.findall(ur"(?u)\X", u"\r\r\n\u0301A\u0301"), - [u'\r', u'\r\n', u'\u0301', u'A\u0301']) - - def test_word_boundary(self): - text = u'The quick ("brown") fox can\'t jump 32.3 feet, right?' - self.assertEqual(regex.split(ur'(?V1)\b', text), [u'', u'The', u' ', - u'quick', u' ("', u'brown', u'") ', u'fox', u' ', u'can', u"'", u't', - u' ', u'jump', u' ', u'32', u'.', u'3', u' ', u'feet', u', ', - u'right', u'?']) - self.assertEqual(regex.split(ur'(?V1w)\b', text), [u'', u'The', u' ', - u'quick', u' ', u'(', u'"', u'brown', u'"', u')', u' ', u'fox', u' ', - u"can't", u' ', u'jump', u' ', u'32.3', u' ', u'feet', u',', u' ', - u'right', u'?', u'']) - - text = u"The fox" - self.assertEqual(regex.split(ur'(?V1)\b', text), [u'', u'The', u' ', - u'fox', u'']) - self.assertEqual(regex.split(ur'(?V1w)\b', text), [u'', u'The', u' ', - u' ', u'fox', u'']) - - text = u"can't aujourd'hui l'objectif" - self.assertEqual(regex.split(ur'(?V1)\b', text), [u'', u'can', u"'", - u't', u' ', u'aujourd', u"'", u'hui', u' ', u'l', u"'", u'objectif', - u'']) - self.assertEqual(regex.split(ur'(?V1w)\b', text), [u'', u"can't", u' ', - u"aujourd'hui", u' ', u"l'", u'objectif', u'']) - - def test_line_boundary(self): - self.assertEqual(regex.findall(r".+", "Line 1\nLine 2\n"), ["Line 1", - "Line 2"]) - self.assertEqual(regex.findall(r".+", "Line 1\rLine 2\r"), - ["Line 1\rLine 2\r"]) - self.assertEqual(regex.findall(r".+", "Line 1\r\nLine 2\r\n"), - ["Line 1\r", "Line 2\r"]) - self.assertEqual(regex.findall(r"(?w).+", "Line 1\nLine 2\n"), - ["Line 1", "Line 2"]) - self.assertEqual(regex.findall(r"(?w).+", "Line 1\rLine 2\r"), - ["Line 1", "Line 2"]) - self.assertEqual(regex.findall(r"(?w).+", "Line 1\r\nLine 2\r\n"), - ["Line 1", "Line 2"]) - - self.assertEqual(regex.search(r"^abc", "abc").start(), 0) - self.assertEqual(regex.search(r"^abc", "\nabc"), None) - self.assertEqual(regex.search(r"^abc", "\rabc"), None) - self.assertEqual(regex.search(r"(?w)^abc", "abc").start(), 0) - self.assertEqual(regex.search(r"(?w)^abc", "\nabc"), None) - self.assertEqual(regex.search(r"(?w)^abc", "\rabc"), None) - - self.assertEqual(regex.search(r"abc$", "abc").start(), 0) - self.assertEqual(regex.search(r"abc$", "abc\n").start(), 0) - self.assertEqual(regex.search(r"abc$", "abc\r"), None) - self.assertEqual(regex.search(r"(?w)abc$", "abc").start(), 0) - self.assertEqual(regex.search(r"(?w)abc$", "abc\n").start(), 0) - self.assertEqual(regex.search(r"(?w)abc$", "abc\r").start(), 0) - - self.assertEqual(regex.search(r"(?m)^abc", "abc").start(), 0) - self.assertEqual(regex.search(r"(?m)^abc", "\nabc").start(), 1) - self.assertEqual(regex.search(r"(?m)^abc", "\rabc"), None) - self.assertEqual(regex.search(r"(?mw)^abc", "abc").start(), 0) - self.assertEqual(regex.search(r"(?mw)^abc", "\nabc").start(), 1) - self.assertEqual(regex.search(r"(?mw)^abc", "\rabc").start(), 1) - - self.assertEqual(regex.search(r"(?m)abc$", "abc").start(), 0) - self.assertEqual(regex.search(r"(?m)abc$", "abc\n").start(), 0) - self.assertEqual(regex.search(r"(?m)abc$", "abc\r"), None) - self.assertEqual(regex.search(r"(?mw)abc$", "abc").start(), 0) - self.assertEqual(regex.search(r"(?mw)abc$", "abc\n").start(), 0) - self.assertEqual(regex.search(r"(?mw)abc$", "abc\r").start(), 0) - - def test_branch_reset(self): - self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "ac").groups(), ('a', - None, 'c')) - self.assertEqual(regex.match(r"(?:(a)|(b))(c)", "bc").groups(), (None, - 'b', 'c')) - self.assertEqual(regex.match(r"(?:(?a)|(?b))(?c)", - "ac").groups(), ('a', None, 'c')) - self.assertEqual(regex.match(r"(?:(?a)|(?b))(?c)", - "bc").groups(), (None, 'b', 'c')) - - self.assertEqual(regex.match(r"(?a)(?:(?b)|(?c))(?d)", - "abd").groups(), ('a', 'b', None, 'd')) - self.assertEqual(regex.match(r"(?a)(?:(?b)|(?c))(?d)", - "acd").groups(), ('a', None, 'c', 'd')) - self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "abd").groups(), - ('a', 'b', None, 'd')) - - self.assertEqual(regex.match(r"(a)(?:(b)|(c))(d)", "acd").groups(), - ('a', None, 'c', 'd')) - self.assertEqual(regex.match(r"(a)(?|(b)|(b))(d)", "abd").groups(), - ('a', 'b', 'd')) - self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "ac").groups(), - ('a', None, 'c')) - self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "bc").groups(), - (None, 'b', 'c')) - self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "ac").groups(), - ('a', 'c')) - - self.assertEqual(regex.match(r"(?|(?a)|(?b))(c)", "bc").groups(), - ('b', 'c')) - - self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(?d))(e)", - "abe").groups(), ('a', 'b', 'e')) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(?d))(e)", - "cde").groups(), ('d', 'c', 'e')) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(d))(e)", - "abe").groups(), ('a', 'b', 'e')) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(?c)(d))(e)", - "cde").groups(), ('d', 'c', 'e')) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(d))(e)", - "abe").groups(), ('a', 'b', 'e')) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(d))(e)", - "cde").groups(), ('c', 'd', 'e')) - - # Hg issue 87. - self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", - "abe").groups(), ("a", "b", "e")) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", - "abe").capturesdict(), {"a": ["a"], "b": ["b"]}) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", - "cde").groups(), ("d", None, "e")) - self.assertEqual(regex.match(r"(?|(?a)(?b)|(c)(?d))(e)", - "cde").capturesdict(), {"a": ["c", "d"], "b": []}) - - def test_set(self): - self.assertEqual(regex.match(r"[a]", "a").span(), (0, 1)) - self.assertEqual(regex.match(r"(?i)[a]", "A").span(), (0, 1)) - self.assertEqual(regex.match(r"[a-b]", r"a").span(), (0, 1)) - self.assertEqual(regex.match(r"(?i)[a-b]", r"A").span(), (0, 1)) - - self.assertEqual(regex.sub(r"(?V0)([][])", r"-", "a[b]c"), "a-b-c") - - self.assertEqual(regex.findall(ur"[\p{Alpha}]", u"a0"), [u"a"]) - self.assertEqual(regex.findall(ur"(?i)[\p{Alpha}]", u"A0"), [u"A"]) - - self.assertEqual(regex.findall(ur"[a\p{Alpha}]", u"ab0"), [u"a", u"b"]) - self.assertEqual(regex.findall(ur"[a\P{Alpha}]", u"ab0"), [u"a", u"0"]) - self.assertEqual(regex.findall(ur"(?i)[a\p{Alpha}]", u"ab0"), [u"a", - u"b"]) - self.assertEqual(regex.findall(ur"(?i)[a\P{Alpha}]", u"ab0"), [u"a", - u"0"]) - - self.assertEqual(regex.findall(ur"[a-b\p{Alpha}]", u"abC0"), [u"a", - u"b", u"C"]) - self.assertEqual(regex.findall(ur"(?i)[a-b\p{Alpha}]", u"AbC0"), [u"A", - u"b", u"C"]) - - self.assertEqual(regex.findall(ur"[\p{Alpha}]", u"a0"), [u"a"]) - self.assertEqual(regex.findall(ur"[\P{Alpha}]", u"a0"), [u"0"]) - self.assertEqual(regex.findall(ur"[^\p{Alpha}]", u"a0"), [u"0"]) - self.assertEqual(regex.findall(ur"[^\P{Alpha}]", u"a0"), [u"a"]) - - self.assertEqual("".join(regex.findall(r"[^\d-h]", "a^b12c-h")), - 'a^bc') - self.assertEqual("".join(regex.findall(r"[^\dh]", "a^b12c-h")), - 'a^bc-') - self.assertEqual("".join(regex.findall(r"[^h\s\db]", "a^b 12c-h")), - 'a^c-') - self.assertEqual("".join(regex.findall(r"[^b\w]", "a b")), ' ') - self.assertEqual("".join(regex.findall(r"[^b\S]", "a b")), ' ') - self.assertEqual("".join(regex.findall(r"[^8\d]", "a 1b2")), 'a b') - - all_chars = u"".join(unichr(c) for c in range(0x100)) - self.assertEqual(len(regex.findall(ur"(?u)\p{ASCII}", all_chars)), 128) - self.assertEqual(len(regex.findall(ur"(?u)\p{Letter}", all_chars)), - 117) - self.assertEqual(len(regex.findall(ur"(?u)\p{Digit}", all_chars)), 10) - - # Set operators - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Letter}]", - all_chars)), 52) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Alnum}&&\p{Letter}]", - all_chars)), 52) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Alnum}&&\p{Digit}]", - all_chars)), 10) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Cc}]", - all_chars)), 33) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}&&\p{Graph}]", - all_chars)), 94) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{ASCII}--\p{Cc}]", - all_chars)), 95) - self.assertEqual(len(regex.findall(ur"(?u)[\p{Letter}\p{Digit}]", - all_chars)), 127) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{Letter}||\p{Digit}]", - all_chars)), 127) - self.assertEqual(len(regex.findall(ur"(?u)\p{HexDigit}", all_chars)), - 22) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{HexDigit}~~\p{Digit}]", - all_chars)), 12) - self.assertEqual(len(regex.findall(ur"(?uV1)[\p{Digit}~~\p{HexDigit}]", - all_chars)), 12) - - self.assertEqual(repr(type(regex.compile(r"(?V0)([][-])"))), - self.PATTERN_CLASS) - self.assertEqual(regex.findall(r"(?V1)[[a-z]--[aei]]", "abc"), ["b", - "c"]) - self.assertEqual(regex.findall(r"(?iV1)[[a-z]--[aei]]", "abc"), ["b", - "c"]) - self.assertEqual(regex.findall("(?V1)[\w--a]","abc"), ["b", "c"]) - self.assertEqual(regex.findall("(?iV1)[\w--a]","abc"), ["b", "c"]) - - def test_various(self): - tests = [ - # Test ?P< and ?P= extensions. - ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with a digit. - ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char. - ('(?Pa)', '', '', regex.error, self.BAD_GROUP_NAME), # Begins with an illegal char. - - # Same tests, for the ?P= form. - ('(?Pa)(?P=foo_123', 'aa', '', regex.error, - self.MISSING_RPAREN), - ('(?Pa)(?P=1)', 'aa', '', regex.error, - self.BAD_GROUP_NAME), - ('(?Pa)(?P=!)', 'aa', '', regex.error, - self.BAD_GROUP_NAME), - ('(?Pa)(?P=foo_124)', 'aa', '', regex.error, - self.UNKNOWN_GROUP), # Backref to undefined group. - - ('(?Pa)', 'a', '1', repr('a')), - ('(?Pa)(?P=foo_123)', 'aa', '1', repr('a')), - - # Mal-formed \g in pattern treated as literal for compatibility. - (r'(?a)\ga)\g<1>', 'aa', '1', repr('a')), - (r'(?a)\g', 'aa', '', repr(None)), - (r'(?a)\g', 'aa', '', regex.error, - self.UNKNOWN_GROUP), # Backref to undefined group. - - ('(?a)', 'a', '1', repr('a')), - (r'(?a)\g', 'aa', '1', repr('a')), - - # Test octal escapes. - ('\\1', 'a', '', regex.error, self.UNKNOWN_GROUP), # Backreference. - ('[\\1]', '\1', '0', "'\\x01'"), # Character. - ('\\09', chr(0) + '9', '0', repr(chr(0) + '9')), - ('\\141', 'a', '0', repr('a')), - ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', - '0,11', repr(('abcdefghijklk9', 'k'))), - - # Test \0 is handled everywhere. - (r'\0', '\0', '0', repr('\0')), - (r'[\0a]', '\0', '0', repr('\0')), - (r'[a\0]', '\0', '0', repr('\0')), - (r'[^a\0]', '\0', '', repr(None)), - - # Test various letter escapes. - (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', '0', - repr('\a\b\f\n\r\t\v')), - (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', '0', - repr('\a\b\f\n\r\t\v')), - (r'\c\e\g\h\i\j\k\o\p\q\y\z', 'ceghijkopqyz', '0', - repr('ceghijkopqyz')), - (r'\xff', '\377', '0', repr(chr(255))), - - # New \x semantics. - (r'\x00ffffffffffffff', '\377', '', repr(None)), - (r'\x00f', '\017', '', repr(None)), - (r'\x00fe', '\376', '', repr(None)), - - (r'\x00ff', '\377', '', repr(None)), - (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', repr('\t\n\v\r\f\ag')), - ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', '0', repr('\t\n\v\r\f\ag')), - (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', '0', repr(chr(9) + chr(10) + - chr(11) + chr(13) + chr(12) + chr(7))), - (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', '0', - repr('\t\n\v\r\f\b')), - - (r"^\w+=(\\[\000-\277]|[^\n\\])*", - "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", '0', - repr("SRC=eval.c g.c blah blah blah \\\\")), - - # Test that . only matches \n in DOTALL mode. - ('a.b', 'acb', '0', repr('acb')), - ('a.b', 'a\nb', '', repr(None)), - ('a.*b', 'acc\nccb', '', repr(None)), - ('a.{4,5}b', 'acc\nccb', '', repr(None)), - ('a.b', 'a\rb', '0', repr('a\rb')), - # The new behaviour is that the inline flag affects only what follows. - ('a.b(?s)', 'a\nb', '0', repr('a\nb')), - ('a.b(?sV1)', 'a\nb', '', repr(None)), - ('(?s)a.b', 'a\nb', '0', repr('a\nb')), - ('a.*(?s)b', 'acc\nccb', '0', repr('acc\nccb')), - ('a.*(?sV1)b', 'acc\nccb', '', repr(None)), - ('(?s)a.*b', 'acc\nccb', '0', repr('acc\nccb')), - ('(?s)a.{4,5}b', 'acc\nccb', '0', repr('acc\nccb')), - - (')', '', '', regex.error, self.TRAILING_CHARS), # Unmatched right bracket. - ('', '', '0', "''"), # Empty pattern. - ('abc', 'abc', '0', repr('abc')), - ('abc', 'xbc', '', repr(None)), - ('abc', 'axc', '', repr(None)), - ('abc', 'abx', '', repr(None)), - ('abc', 'xabcy', '0', repr('abc')), - ('abc', 'ababc', '0', repr('abc')), - ('ab*c', 'abc', '0', repr('abc')), - ('ab*bc', 'abc', '0', repr('abc')), - - ('ab*bc', 'abbc', '0', repr('abbc')), - ('ab*bc', 'abbbbc', '0', repr('abbbbc')), - ('ab+bc', 'abbc', '0', repr('abbc')), - ('ab+bc', 'abc', '', repr(None)), - ('ab+bc', 'abq', '', repr(None)), - ('ab+bc', 'abbbbc', '0', repr('abbbbc')), - ('ab?bc', 'abbc', '0', repr('abbc')), - ('ab?bc', 'abc', '0', repr('abc')), - ('ab?bc', 'abbbbc', '', repr(None)), - ('ab?c', 'abc', '0', repr('abc')), - - ('^abc$', 'abc', '0', repr('abc')), - ('^abc$', 'abcc', '', repr(None)), - ('^abc', 'abcc', '0', repr('abc')), - ('^abc$', 'aabc', '', repr(None)), - ('abc$', 'aabc', '0', repr('abc')), - ('^', 'abc', '0', repr('')), - ('$', 'abc', '0', repr('')), - ('a.c', 'abc', '0', repr('abc')), - ('a.c', 'axc', '0', repr('axc')), - ('a.*c', 'axyzc', '0', repr('axyzc')), - - ('a.*c', 'axyzd', '', repr(None)), - ('a[bc]d', 'abc', '', repr(None)), - ('a[bc]d', 'abd', '0', repr('abd')), - ('a[b-d]e', 'abd', '', repr(None)), - ('a[b-d]e', 'ace', '0', repr('ace')), - ('a[b-d]', 'aac', '0', repr('ac')), - ('a[-b]', 'a-', '0', repr('a-')), - ('a[\\-b]', 'a-', '0', repr('a-')), - ('a[b-]', 'a-', '0', repr('a-')), - ('a[]b', '-', '', regex.error, self.BAD_SET), - - ('a[', '-', '', regex.error, self.BAD_SET), - ('a\\', '-', '', regex.error, self.BAD_ESCAPE), - ('abc)', '-', '', regex.error, self.TRAILING_CHARS), - ('(abc', '-', '', regex.error, self.MISSING_RPAREN), - ('a]', 'a]', '0', repr('a]')), - ('a[]]b', 'a]b', '0', repr('a]b')), - ('a[]]b', 'a]b', '0', repr('a]b')), - ('a[^bc]d', 'aed', '0', repr('aed')), - ('a[^bc]d', 'abd', '', repr(None)), - ('a[^-b]c', 'adc', '0', repr('adc')), - - ('a[^-b]c', 'a-c', '', repr(None)), - ('a[^]b]c', 'a]c', '', repr(None)), - ('a[^]b]c', 'adc', '0', repr('adc')), - ('\\ba\\b', 'a-', '0', repr('a')), - ('\\ba\\b', '-a', '0', repr('a')), - ('\\ba\\b', '-a-', '0', repr('a')), - ('\\by\\b', 'xy', '', repr(None)), - ('\\by\\b', 'yz', '', repr(None)), - ('\\by\\b', 'xyz', '', repr(None)), - ('x\\b', 'xyz', '', repr(None)), - - ('x\\B', 'xyz', '0', repr('x')), - ('\\Bz', 'xyz', '0', repr('z')), - ('z\\B', 'xyz', '', repr(None)), - ('\\Bx', 'xyz', '', repr(None)), - ('\\Ba\\B', 'a-', '', repr(None)), - ('\\Ba\\B', '-a', '', repr(None)), - ('\\Ba\\B', '-a-', '', repr(None)), - ('\\By\\B', 'xy', '', repr(None)), - ('\\By\\B', 'yz', '', repr(None)), - ('\\By\\b', 'xy', '0', repr('y')), - - ('\\by\\B', 'yz', '0', repr('y')), - ('\\By\\B', 'xyz', '0', repr('y')), - ('ab|cd', 'abc', '0', repr('ab')), - ('ab|cd', 'abcd', '0', repr('ab')), - ('()ef', 'def', '0,1', repr(('ef', ''))), - ('$b', 'b', '', repr(None)), - ('a\\(b', 'a(b', '', repr(('a(b',))), - ('a\\(*b', 'ab', '0', repr('ab')), - ('a\\(*b', 'a((b', '0', repr('a((b')), - ('a\\\\b', 'a\\b', '0', repr('a\\b')), - - ('((a))', 'abc', '0,1,2', repr(('a', 'a', 'a'))), - ('(a)b(c)', 'abc', '0,1,2', repr(('abc', 'a', 'c'))), - ('a+b+c', 'aabbabc', '0', repr('abc')), - ('(a+|b)*', 'ab', '0,1', repr(('ab', 'b'))), - ('(a+|b)+', 'ab', '0,1', repr(('ab', 'b'))), - ('(a+|b)?', 'ab', '0,1', repr(('a', 'a'))), - (')(', '-', '', regex.error, self.TRAILING_CHARS), - ('[^ab]*', 'cde', '0', repr('cde')), - ('abc', '', '', repr(None)), - ('a*', '', '0', repr('')), - - ('a|b|c|d|e', 'e', '0', repr('e')), - ('(a|b|c|d|e)f', 'ef', '0,1', repr(('ef', 'e'))), - ('abcd*efg', 'abcdefg', '0', repr('abcdefg')), - ('ab*', 'xabyabbbz', '0', repr('ab')), - ('ab*', 'xayabbbz', '0', repr('a')), - ('(ab|cd)e', 'abcde', '0,1', repr(('cde', 'cd'))), - ('[abhgefdc]ij', 'hij', '0', repr('hij')), - ('^(ab|cd)e', 'abcde', '', repr(None)), - ('(abc|)ef', 'abcdef', '0,1', repr(('ef', ''))), - ('(a|b)c*d', 'abcd', '0,1', repr(('bcd', 'b'))), - - ('(ab|ab*)bc', 'abc', '0,1', repr(('abc', 'a'))), - ('a([bc]*)c*', 'abc', '0,1', repr(('abc', 'bc'))), - ('a([bc]*)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), - ('a([bc]+)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), - ('a([bc]*)(c+d)', 'abcd', '0,1,2', repr(('abcd', 'b', 'cd'))), - ('a[bcd]*dcdcde', 'adcdcde', '0', repr('adcdcde')), - ('a[bcd]+dcdcde', 'adcdcde', '', repr(None)), - ('(ab|a)b*c', 'abc', '0,1', repr(('abc', 'ab'))), - ('((a)(b)c)(d)', 'abcd', '1,2,3,4', repr(('abc', 'a', 'b', 'd'))), - ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', repr('alpha')), - - ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', repr(('bh', None))), - ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', repr(('effgz', - 'effgz', None))), - ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', repr(('ij', 'ij', - 'j'))), - ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', repr(None)), - ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', repr(None)), - ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', repr(('effgz', - 'effgz', None))), - ('(((((((((a)))))))))', 'a', '0', repr('a')), - ('multiple words of text', 'uh-uh', '', repr(None)), - ('multiple words', 'multiple words, yeah', '0', - repr('multiple words')), - ('(.*)c(.*)', 'abcde', '0,1,2', repr(('abcde', 'ab', 'de'))), - - ('\\((.*), (.*)\\)', '(a, b)', '2,1', repr(('b', 'a'))), - ('[k]', 'ab', '', repr(None)), - ('a[-]?c', 'ac', '0', repr('ac')), - ('(abc)\\1', 'abcabc', '1', repr('abc')), - ('([a-c]*)\\1', 'abcabc', '1', repr('abc')), - ('^(.+)?B', 'AB', '1', repr('A')), - ('(a+).\\1$', 'aaaaa', '0,1', repr(('aaaaa', 'aa'))), - ('^(a+).\\1$', 'aaaa', '', repr(None)), - ('(abc)\\1', 'abcabc', '0,1', repr(('abcabc', 'abc'))), - ('([a-c]+)\\1', 'abcabc', '0,1', repr(('abcabc', 'abc'))), - - ('(a)\\1', 'aa', '0,1', repr(('aa', 'a'))), - ('(a+)\\1', 'aa', '0,1', repr(('aa', 'a'))), - ('(a+)+\\1', 'aa', '0,1', repr(('aa', 'a'))), - ('(a).+\\1', 'aba', '0,1', repr(('aba', 'a'))), - ('(a)ba*\\1', 'aba', '0,1', repr(('aba', 'a'))), - ('(aa|a)a\\1$', 'aaa', '0,1', repr(('aaa', 'a'))), - ('(a|aa)a\\1$', 'aaa', '0,1', repr(('aaa', 'a'))), - ('(a+)a\\1$', 'aaa', '0,1', repr(('aaa', 'a'))), - ('([abc]*)\\1', 'abcabc', '0,1', repr(('abcabc', 'abc'))), - ('(a)(b)c|ab', 'ab', '0,1,2', repr(('ab', None, None))), - - ('(a)+x', 'aaax', '0,1', repr(('aaax', 'a'))), - ('([ac])+x', 'aacx', '0,1', repr(('aacx', 'c'))), - ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', '0,1', - repr(('d:msgs/tdir/sub1/', 'tdir/'))), - ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', - '0,1,2,3', repr(('track1.title:TBlah blah blah', 'track1', - 'title', 'Blah blah blah'))), - ('([^N]*N)+', 'abNNxyzN', '0,1', repr(('abNNxyzN', 'xyzN'))), - ('([^N]*N)+', 'abNNxyz', '0,1', repr(('abNN', 'N'))), - ('([abc]*)x', 'abcx', '0,1', repr(('abcx', 'abc'))), - ('([abc]*)x', 'abc', '', repr(None)), - ('([xyz]*)x', 'abcx', '0,1', repr(('x', ''))), - ('(a)+b|aac', 'aac', '0,1', repr(('aac', None))), - - # Test symbolic groups. - ('(?Paaa)a', 'aaaa', '', regex.error, self.BAD_GROUP_NAME), - ('(?Paaa)a', 'aaaa', '0,id', repr(('aaaa', 'aaa'))), - ('(?Paa)(?P=id)', 'aaaa', '0,id', repr(('aaaa', 'aa'))), - ('(?Paa)(?P=xd)', 'aaaa', '', regex.error, self.UNKNOWN_GROUP), - - # Character properties. - (ur"\g", u"g", '0', repr(u'g')), - (ur"\g<1>", u"g", '', regex.error, self.UNKNOWN_GROUP), - (ur"(.)\g<1>", u"gg", '0', repr(u'gg')), - (ur"(.)\g<1>", u"gg", '', repr((u'gg', u'g'))), - (ur"\N", u"N", '0', repr(u'N')), - (ur"\N{LATIN SMALL LETTER A}", u"a", '0', repr(u'a')), - (ur"\p", u"p", '0', repr(u'p')), - (ur"\p{Ll}", u"a", '0', repr(u'a')), - (ur"\P", u"P", '0', repr(u'P')), - (ur"\P{Lu}", u"p", '0', repr(u'p')), - - # All tests from Perl. - ('abc', 'abc', '0', repr('abc')), - ('abc', 'xbc', '', repr(None)), - ('abc', 'axc', '', repr(None)), - ('abc', 'abx', '', repr(None)), - ('abc', 'xabcy', '0', repr('abc')), - ('abc', 'ababc', '0', repr('abc')), - - ('ab*c', 'abc', '0', repr('abc')), - ('ab*bc', 'abc', '0', repr('abc')), - ('ab*bc', 'abbc', '0', repr('abbc')), - ('ab*bc', 'abbbbc', '0', repr('abbbbc')), - ('ab{0,}bc', 'abbbbc', '0', repr('abbbbc')), - ('ab+bc', 'abbc', '0', repr('abbc')), - ('ab+bc', 'abc', '', repr(None)), - ('ab+bc', 'abq', '', repr(None)), - ('ab{1,}bc', 'abq', '', repr(None)), - ('ab+bc', 'abbbbc', '0', repr('abbbbc')), - - ('ab{1,}bc', 'abbbbc', '0', repr('abbbbc')), - ('ab{1,3}bc', 'abbbbc', '0', repr('abbbbc')), - ('ab{3,4}bc', 'abbbbc', '0', repr('abbbbc')), - ('ab{4,5}bc', 'abbbbc', '', repr(None)), - ('ab?bc', 'abbc', '0', repr('abbc')), - ('ab?bc', 'abc', '0', repr('abc')), - ('ab{0,1}bc', 'abc', '0', repr('abc')), - ('ab?bc', 'abbbbc', '', repr(None)), - ('ab?c', 'abc', '0', repr('abc')), - ('ab{0,1}c', 'abc', '0', repr('abc')), - - ('^abc$', 'abc', '0', repr('abc')), - ('^abc$', 'abcc', '', repr(None)), - ('^abc', 'abcc', '0', repr('abc')), - ('^abc$', 'aabc', '', repr(None)), - ('abc$', 'aabc', '0', repr('abc')), - ('^', 'abc', '0', repr('')), - ('$', 'abc', '0', repr('')), - ('a.c', 'abc', '0', repr('abc')), - ('a.c', 'axc', '0', repr('axc')), - ('a.*c', 'axyzc', '0', repr('axyzc')), - - ('a.*c', 'axyzd', '', repr(None)), - ('a[bc]d', 'abc', '', repr(None)), - ('a[bc]d', 'abd', '0', repr('abd')), - ('a[b-d]e', 'abd', '', repr(None)), - ('a[b-d]e', 'ace', '0', repr('ace')), - ('a[b-d]', 'aac', '0', repr('ac')), - ('a[-b]', 'a-', '0', repr('a-')), - ('a[b-]', 'a-', '0', repr('a-')), - ('a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE), - ('a[]b', '-', '', regex.error, self.BAD_SET), - - ('a[', '-', '', regex.error, self.BAD_SET), - ('a]', 'a]', '0', repr('a]')), - ('a[]]b', 'a]b', '0', repr('a]b')), - ('a[^bc]d', 'aed', '0', repr('aed')), - ('a[^bc]d', 'abd', '', repr(None)), - ('a[^-b]c', 'adc', '0', repr('adc')), - ('a[^-b]c', 'a-c', '', repr(None)), - ('a[^]b]c', 'a]c', '', repr(None)), - ('a[^]b]c', 'adc', '0', repr('adc')), - ('ab|cd', 'abc', '0', repr('ab')), - - ('ab|cd', 'abcd', '0', repr('ab')), - ('()ef', 'def', '0,1', repr(('ef', ''))), - ('*a', '-', '', regex.error, self.NOTHING_TO_REPEAT), - ('(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT), - ('$b', 'b', '', repr(None)), - ('a\\', '-', '', regex.error, self.BAD_ESCAPE), - ('a\\(b', 'a(b', '', repr(('a(b',))), - ('a\\(*b', 'ab', '0', repr('ab')), - ('a\\(*b', 'a((b', '0', repr('a((b')), - ('a\\\\b', 'a\\b', '0', repr('a\\b')), - - ('abc)', '-', '', regex.error, self.TRAILING_CHARS), - ('(abc', '-', '', regex.error, self.MISSING_RPAREN), - ('((a))', 'abc', '0,1,2', repr(('a', 'a', 'a'))), - ('(a)b(c)', 'abc', '0,1,2', repr(('abc', 'a', 'c'))), - ('a+b+c', 'aabbabc', '0', repr('abc')), - ('a{1,}b{1,}c', 'aabbabc', '0', repr('abc')), - ('a**', '-', '', regex.error, self.NOTHING_TO_REPEAT), - ('a.+?c', 'abcabc', '0', repr('abc')), - ('(a+|b)*', 'ab', '0,1', repr(('ab', 'b'))), - ('(a+|b){0,}', 'ab', '0,1', repr(('ab', 'b'))), - - ('(a+|b)+', 'ab', '0,1', repr(('ab', 'b'))), - ('(a+|b){1,}', 'ab', '0,1', repr(('ab', 'b'))), - ('(a+|b)?', 'ab', '0,1', repr(('a', 'a'))), - ('(a+|b){0,1}', 'ab', '0,1', repr(('a', 'a'))), - (')(', '-', '', regex.error, self.TRAILING_CHARS), - ('[^ab]*', 'cde', '0', repr('cde')), - ('abc', '', '', repr(None)), - ('a*', '', '0', repr('')), - ('([abc])*d', 'abbbcd', '0,1', repr(('abbbcd', 'c'))), - ('([abc])*bcd', 'abcd', '0,1', repr(('abcd', 'a'))), - - ('a|b|c|d|e', 'e', '0', repr('e')), - ('(a|b|c|d|e)f', 'ef', '0,1', repr(('ef', 'e'))), - ('abcd*efg', 'abcdefg', '0', repr('abcdefg')), - ('ab*', 'xabyabbbz', '0', repr('ab')), - ('ab*', 'xayabbbz', '0', repr('a')), - ('(ab|cd)e', 'abcde', '0,1', repr(('cde', 'cd'))), - ('[abhgefdc]ij', 'hij', '0', repr('hij')), - ('^(ab|cd)e', 'abcde', '', repr(None)), - ('(abc|)ef', 'abcdef', '0,1', repr(('ef', ''))), - ('(a|b)c*d', 'abcd', '0,1', repr(('bcd', 'b'))), - - ('(ab|ab*)bc', 'abc', '0,1', repr(('abc', 'a'))), - ('a([bc]*)c*', 'abc', '0,1', repr(('abc', 'bc'))), - ('a([bc]*)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), - ('a([bc]+)(c*d)', 'abcd', '0,1,2', repr(('abcd', 'bc', 'd'))), - ('a([bc]*)(c+d)', 'abcd', '0,1,2', repr(('abcd', 'b', 'cd'))), - ('a[bcd]*dcdcde', 'adcdcde', '0', repr('adcdcde')), - ('a[bcd]+dcdcde', 'adcdcde', '', repr(None)), - ('(ab|a)b*c', 'abc', '0,1', repr(('abc', 'ab'))), - ('((a)(b)c)(d)', 'abcd', '1,2,3,4', repr(('abc', 'a', 'b', 'd'))), - ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', '0', repr('alpha')), - - ('^a(bc+|b[eh])g|.h$', 'abh', '0,1', repr(('bh', None))), - ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', '0,1,2', repr(('effgz', - 'effgz', None))), - ('(bc+d$|ef*g.|h?i(j|k))', 'ij', '0,1,2', repr(('ij', 'ij', - 'j'))), - ('(bc+d$|ef*g.|h?i(j|k))', 'effg', '', repr(None)), - ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', '', repr(None)), - ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', '0,1,2', repr(('effgz', - 'effgz', None))), - ('((((((((((a))))))))))', 'a', '10', repr('a')), - ('((((((((((a))))))))))\\10', 'aa', '0', repr('aa')), - - # Python does not have the same rules for \\41 so this is a syntax error - # ('((((((((((a))))))))))\\41', 'aa', '', repr(None)), - # ('((((((((((a))))))))))\\41', 'a!', '0', repr('a!')), - ('((((((((((a))))))))))\\41', '', '', regex.error, - self.UNKNOWN_GROUP), - ('(?i)((((((((((a))))))))))\\41', '', '', regex.error, - self.UNKNOWN_GROUP), - - ('(((((((((a)))))))))', 'a', '0', repr('a')), - ('multiple words of text', 'uh-uh', '', repr(None)), - ('multiple words', 'multiple words, yeah', '0', - repr('multiple words')), - ('(.*)c(.*)', 'abcde', '0,1,2', repr(('abcde', 'ab', 'de'))), - ('\\((.*), (.*)\\)', '(a, b)', '2,1', repr(('b', 'a'))), - ('[k]', 'ab', '', repr(None)), - ('a[-]?c', 'ac', '0', repr('ac')), - ('(abc)\\1', 'abcabc', '1', repr('abc')), - ('([a-c]*)\\1', 'abcabc', '1', repr('abc')), - ('(?i)abc', 'ABC', '0', repr('ABC')), - - ('(?i)abc', 'XBC', '', repr(None)), - ('(?i)abc', 'AXC', '', repr(None)), - ('(?i)abc', 'ABX', '', repr(None)), - ('(?i)abc', 'XABCY', '0', repr('ABC')), - ('(?i)abc', 'ABABC', '0', repr('ABC')), - ('(?i)ab*c', 'ABC', '0', repr('ABC')), - ('(?i)ab*bc', 'ABC', '0', repr('ABC')), - ('(?i)ab*bc', 'ABBC', '0', repr('ABBC')), - ('(?i)ab*?bc', 'ABBBBC', '0', repr('ABBBBC')), - ('(?i)ab{0,}?bc', 'ABBBBC', '0', repr('ABBBBC')), - - ('(?i)ab+?bc', 'ABBC', '0', repr('ABBC')), - ('(?i)ab+bc', 'ABC', '', repr(None)), - ('(?i)ab+bc', 'ABQ', '', repr(None)), - ('(?i)ab{1,}bc', 'ABQ', '', repr(None)), - ('(?i)ab+bc', 'ABBBBC', '0', repr('ABBBBC')), - ('(?i)ab{1,}?bc', 'ABBBBC', '0', repr('ABBBBC')), - ('(?i)ab{1,3}?bc', 'ABBBBC', '0', repr('ABBBBC')), - ('(?i)ab{3,4}?bc', 'ABBBBC', '0', repr('ABBBBC')), - ('(?i)ab{4,5}?bc', 'ABBBBC', '', repr(None)), - ('(?i)ab??bc', 'ABBC', '0', repr('ABBC')), - - ('(?i)ab??bc', 'ABC', '0', repr('ABC')), - ('(?i)ab{0,1}?bc', 'ABC', '0', repr('ABC')), - ('(?i)ab??bc', 'ABBBBC', '', repr(None)), - ('(?i)ab??c', 'ABC', '0', repr('ABC')), - ('(?i)ab{0,1}?c', 'ABC', '0', repr('ABC')), - ('(?i)^abc$', 'ABC', '0', repr('ABC')), - ('(?i)^abc$', 'ABCC', '', repr(None)), - ('(?i)^abc', 'ABCC', '0', repr('ABC')), - ('(?i)^abc$', 'AABC', '', repr(None)), - ('(?i)abc$', 'AABC', '0', repr('ABC')), - - ('(?i)^', 'ABC', '0', repr('')), - ('(?i)$', 'ABC', '0', repr('')), - ('(?i)a.c', 'ABC', '0', repr('ABC')), - ('(?i)a.c', 'AXC', '0', repr('AXC')), - ('(?i)a.*?c', 'AXYZC', '0', repr('AXYZC')), - ('(?i)a.*c', 'AXYZD', '', repr(None)), - ('(?i)a[bc]d', 'ABC', '', repr(None)), - ('(?i)a[bc]d', 'ABD', '0', repr('ABD')), - ('(?i)a[b-d]e', 'ABD', '', repr(None)), - ('(?i)a[b-d]e', 'ACE', '0', repr('ACE')), - - ('(?i)a[b-d]', 'AAC', '0', repr('AC')), - ('(?i)a[-b]', 'A-', '0', repr('A-')), - ('(?i)a[b-]', 'A-', '0', repr('A-')), - ('(?i)a[b-a]', '-', '', regex.error, self.BAD_CHAR_RANGE), - ('(?i)a[]b', '-', '', regex.error, self.BAD_SET), - ('(?i)a[', '-', '', regex.error, self.BAD_SET), - ('(?i)a]', 'A]', '0', repr('A]')), - ('(?i)a[]]b', 'A]B', '0', repr('A]B')), - ('(?i)a[^bc]d', 'AED', '0', repr('AED')), - ('(?i)a[^bc]d', 'ABD', '', repr(None)), - - ('(?i)a[^-b]c', 'ADC', '0', repr('ADC')), - ('(?i)a[^-b]c', 'A-C', '', repr(None)), - ('(?i)a[^]b]c', 'A]C', '', repr(None)), - ('(?i)a[^]b]c', 'ADC', '0', repr('ADC')), - ('(?i)ab|cd', 'ABC', '0', repr('AB')), - ('(?i)ab|cd', 'ABCD', '0', repr('AB')), - ('(?i)()ef', 'DEF', '0,1', repr(('EF', ''))), - ('(?i)*a', '-', '', regex.error, self.NOTHING_TO_REPEAT), - ('(?i)(*)b', '-', '', regex.error, self.NOTHING_TO_REPEAT), - ('(?i)$b', 'B', '', repr(None)), - - ('(?i)a\\', '-', '', regex.error, self.BAD_ESCAPE), - ('(?i)a\\(b', 'A(B', '', repr(('A(B',))), - ('(?i)a\\(*b', 'AB', '0', repr('AB')), - ('(?i)a\\(*b', 'A((B', '0', repr('A((B')), - ('(?i)a\\\\b', 'A\\B', '0', repr('A\\B')), - ('(?i)abc)', '-', '', regex.error, self.TRAILING_CHARS), - ('(?i)(abc', '-', '', regex.error, self.MISSING_RPAREN), - ('(?i)((a))', 'ABC', '0,1,2', repr(('A', 'A', 'A'))), - ('(?i)(a)b(c)', 'ABC', '0,1,2', repr(('ABC', 'A', 'C'))), - ('(?i)a+b+c', 'AABBABC', '0', repr('ABC')), - - ('(?i)a{1,}b{1,}c', 'AABBABC', '0', repr('ABC')), - ('(?i)a**', '-', '', regex.error, self.NOTHING_TO_REPEAT), - ('(?i)a.+?c', 'ABCABC', '0', repr('ABC')), - ('(?i)a.*?c', 'ABCABC', '0', repr('ABC')), - ('(?i)a.{0,5}?c', 'ABCABC', '0', repr('ABC')), - ('(?i)(a+|b)*', 'AB', '0,1', repr(('AB', 'B'))), - ('(?i)(a+|b){0,}', 'AB', '0,1', repr(('AB', 'B'))), - ('(?i)(a+|b)+', 'AB', '0,1', repr(('AB', 'B'))), - ('(?i)(a+|b){1,}', 'AB', '0,1', repr(('AB', 'B'))), - ('(?i)(a+|b)?', 'AB', '0,1', repr(('A', 'A'))), - - ('(?i)(a+|b){0,1}', 'AB', '0,1', repr(('A', 'A'))), - ('(?i)(a+|b){0,1}?', 'AB', '0,1', repr(('', None))), - ('(?i))(', '-', '', regex.error, self.TRAILING_CHARS), - ('(?i)[^ab]*', 'CDE', '0', repr('CDE')), - ('(?i)abc', '', '', repr(None)), - ('(?i)a*', '', '0', repr('')), - ('(?i)([abc])*d', 'ABBBCD', '0,1', repr(('ABBBCD', 'C'))), - ('(?i)([abc])*bcd', 'ABCD', '0,1', repr(('ABCD', 'A'))), - ('(?i)a|b|c|d|e', 'E', '0', repr('E')), - ('(?i)(a|b|c|d|e)f', 'EF', '0,1', repr(('EF', 'E'))), - - ('(?i)abcd*efg', 'ABCDEFG', '0', repr('ABCDEFG')), - ('(?i)ab*', 'XABYABBBZ', '0', repr('AB')), - ('(?i)ab*', 'XAYABBBZ', '0', repr('A')), - ('(?i)(ab|cd)e', 'ABCDE', '0,1', repr(('CDE', 'CD'))), - ('(?i)[abhgefdc]ij', 'HIJ', '0', repr('HIJ')), - ('(?i)^(ab|cd)e', 'ABCDE', '', repr(None)), - ('(?i)(abc|)ef', 'ABCDEF', '0,1', repr(('EF', ''))), - ('(?i)(a|b)c*d', 'ABCD', '0,1', repr(('BCD', 'B'))), - ('(?i)(ab|ab*)bc', 'ABC', '0,1', repr(('ABC', 'A'))), - ('(?i)a([bc]*)c*', 'ABC', '0,1', repr(('ABC', 'BC'))), - - ('(?i)a([bc]*)(c*d)', 'ABCD', '0,1,2', repr(('ABCD', 'BC', 'D'))), - ('(?i)a([bc]+)(c*d)', 'ABCD', '0,1,2', repr(('ABCD', 'BC', 'D'))), - ('(?i)a([bc]*)(c+d)', 'ABCD', '0,1,2', repr(('ABCD', 'B', 'CD'))), - ('(?i)a[bcd]*dcdcde', 'ADCDCDE', '0', repr('ADCDCDE')), - ('(?i)a[bcd]+dcdcde', 'ADCDCDE', '', repr(None)), - ('(?i)(ab|a)b*c', 'ABC', '0,1', repr(('ABC', 'AB'))), - ('(?i)((a)(b)c)(d)', 'ABCD', '1,2,3,4', repr(('ABC', 'A', 'B', - 'D'))), - ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', '0', repr('ALPHA')), - ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', '0,1', repr(('BH', None))), - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', '0,1,2', repr(('EFFGZ', - 'EFFGZ', None))), - - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', '0,1,2', repr(('IJ', 'IJ', - 'J'))), - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', '', repr(None)), - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', '', repr(None)), - ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', '0,1,2', repr(('EFFGZ', - 'EFFGZ', None))), - ('(?i)((((((((((a))))))))))', 'A', '10', repr('A')), - ('(?i)((((((((((a))))))))))\\10', 'AA', '0', repr('AA')), - #('(?i)((((((((((a))))))))))\\41', 'AA', '', repr(None)), - #('(?i)((((((((((a))))))))))\\41', 'A!', '0', repr('A!')), - ('(?i)(((((((((a)))))))))', 'A', '0', repr('A')), - ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', '1', - repr('A')), - ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', '1', - repr('C')), - ('(?i)multiple words of text', 'UH-UH', '', repr(None)), - - ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', '0', - repr('MULTIPLE WORDS')), - ('(?i)(.*)c(.*)', 'ABCDE', '0,1,2', repr(('ABCDE', 'AB', 'DE'))), - ('(?i)\\((.*), (.*)\\)', '(A, B)', '2,1', repr(('B', 'A'))), - ('(?i)[k]', 'AB', '', repr(None)), - # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', repr(ABCD-$&-\\ABCD)), - # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', repr(BC-$1-\\BC)), - ('(?i)a[-]?c', 'AC', '0', repr('AC')), - ('(?i)(abc)\\1', 'ABCABC', '1', repr('ABC')), - ('(?i)([a-c]*)\\1', 'ABCABC', '1', repr('ABC')), - ('a(?!b).', 'abad', '0', repr('ad')), - ('a(?=d).', 'abad', '0', repr('ad')), - ('a(?=c|d).', 'abad', '0', repr('ad')), - - ('a(?:b|c|d)(.)', 'ace', '1', repr('e')), - ('a(?:b|c|d)*(.)', 'ace', '1', repr('e')), - ('a(?:b|c|d)+?(.)', 'ace', '1', repr('e')), - ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', '1,2', repr(('c', 'e'))), - - # Lookbehind: split by : but not if it is escaped by -. - ('(?]*?b', 'a>b', '', repr(None)), - # Bug 490573: minimizing repeat problem. - (r'^a*?$', 'foo', '', repr(None)), - # Bug 470582: nested groups problem. - (r'^((a)c)?(ab)$', 'ab', '1,2,3', repr((None, None, 'ab'))), - # Another minimizing repeat problem (capturing groups in assertions). - ('^([ab]*?)(?=(b)?)c', 'abc', '1,2', repr(('ab', None))), - ('^([ab]*?)(?!(b))c', 'abc', '1,2', repr(('ab', None))), - ('^([ab]*?)(?(.){0,2})d", "abcd").captures(1), - ['b', 'c']) - self.assertEqual(regex.search(r"(.)+", "a").captures(1), ['a']) - - def test_guards(self): - m = regex.search(r"(X.*?Y\s*){3}(X\s*)+AB:", - "XY\nX Y\nX Y\nXY\nXX AB:") - self.assertEqual(m.span(0, 1, 2), ((3, 21), (12, 15), (16, 18))) - - m = regex.search(r"(X.*?Y\s*){3,}(X\s*)+AB:", - "XY\nX Y\nX Y\nXY\nXX AB:") - self.assertEqual(m.span(0, 1, 2), ((0, 21), (12, 15), (16, 18))) - - m = regex.search(r'\d{4}(\s*\w)?\W*((?!\d)\w){2}', "9999XX") - self.assertEqual(m.span(0, 1, 2), ((0, 6), (-1, -1), (5, 6))) - - m = regex.search(r'A\s*?.*?(\n+.*?\s*?){0,2}\(X', 'A\n1\nS\n1 (X') - self.assertEqual(m.span(0, 1), ((0, 10), (5, 8))) - - m = regex.search('Derde\s*:', 'aaaaaa:\nDerde:') - self.assertEqual(m.span(), (8, 14)) - m = regex.search('Derde\s*:', 'aaaaa:\nDerde:') - self.assertEqual(m.span(), (7, 13)) - - def test_turkic(self): - # Turkish has dotted and dotless I/i. - pairs = u"I=i;I=\u0131;i=\u0130" - - all_chars = set() - matching = set() - for pair in pairs.split(";"): - ch1, ch2 = pair.split("=") - all_chars.update((ch1, ch2)) - matching.add((ch1, ch1)) - matching.add((ch1, ch2)) - matching.add((ch2, ch1)) - matching.add((ch2, ch2)) - - for ch1 in all_chars: - for ch2 in all_chars: - m = regex.match(ur"(?iu)\A" + ch1 + ur"\Z", ch2) - if m: - if (ch1, ch2) not in matching: - self.fail("%s matching %s" % (repr(ch1), repr(ch2))) - else: - if (ch1, ch2) in matching: - self.fail("%s not matching %s" % (repr(ch1), - repr(ch2))) - - def test_named_lists(self): - options = [u"one", u"two", u"three"] - self.assertEqual(regex.match(ur"333\L444", u"333one444", - bar=options).group(), u"333one444") - self.assertEqual(regex.match(ur"(?i)333\L444", u"333TWO444", - bar=options).group(), u"333TWO444") - self.assertEqual(regex.match(ur"333\L444", u"333four444", - bar=options), None) - - options = ["one", "two", "three"] - self.assertEqual(regex.match(r"333\L444", "333one444", - bar=options).group(), "333one444") - self.assertEqual(regex.match(r"(?i)333\L444", "333TWO444", - bar=options).group(), "333TWO444") - self.assertEqual(regex.match(r"333\L444", "333four444", - bar=options), None) - - self.assertEqual(repr(type(regex.compile(r"3\L4\L+5", - bar=["one", "two", "three"]))), self.PATTERN_CLASS) - - self.assertEqual(regex.findall(r"^\L", "solid QWERT", - options=set(['good', 'brilliant', '+s\\ol[i}d'])), []) - self.assertEqual(regex.findall(r"^\L", "+solid QWERT", - options=set(['good', 'brilliant', '+solid'])), ['+solid']) - - options = [u"STRASSE"] - self.assertEqual(regex.match(ur"(?fiu)\L", - u"stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0, - 6)) - - options = [u"STRASSE", u"stress"] - self.assertEqual(regex.match(ur"(?fiu)\L", - u"stra\N{LATIN SMALL LETTER SHARP S}e", words=options).span(), (0, - 6)) - - options = [u"stra\N{LATIN SMALL LETTER SHARP S}e"] - self.assertEqual(regex.match(ur"(?fiu)\L", u"STRASSE", - words=options).span(), (0, 7)) - - options = ["kit"] - self.assertEqual(regex.search(ur"(?iu)\L", u"SKITS", - words=options).span(), (1, 4)) - self.assertEqual(regex.search(ur"(?iu)\L", - u"SK\N{LATIN CAPITAL LETTER I WITH DOT ABOVE}TS", - words=options).span(), (1, 4)) - - self.assertEqual(regex.search(ur"(?fiu)\b(\w+) +\1\b", - u" stra\N{LATIN SMALL LETTER SHARP S}e STRASSE ").span(), (1, 15)) - self.assertEqual(regex.search(ur"(?fiu)\b(\w+) +\1\b", - u" STRASSE stra\N{LATIN SMALL LETTER SHARP S}e ").span(), (1, 15)) - - self.assertEqual(regex.search(r"^\L$", "", options=[]).span(), - (0, 0)) - - def test_fuzzy(self): - # Some tests borrowed from TRE library tests. - self.assertEqual(repr(type(regex.compile('(fou){s,e<=1}'))), - self.PATTERN_CLASS) - self.assertEqual(repr(type(regex.compile('(fuu){s}'))), - self.PATTERN_CLASS) - self.assertEqual(repr(type(regex.compile('(fuu){s,e}'))), - self.PATTERN_CLASS) - self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1}'))), - self.PATTERN_CLASS) - self.assertEqual(repr(type(regex.compile('(anaconda){1i+1d<1,s<=1,e<=10}'))), - self.PATTERN_CLASS) - self.assertEqual(repr(type(regex.compile('(anaconda){s<=1,e<=1,1i+1d<1}'))), - self.PATTERN_CLASS) - - text = 'molasses anaconda foo bar baz smith anderson ' - self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<1}', text), - None) - self.assertEqual(regex.search('(znacnda){s<=1,e<=3,1i+1d<2}', - text).span(0, 1), ((9, 17), (9, 17))) - self.assertEqual(regex.search('(ananda){1i+1d<2}', text), None) - self.assertEqual(regex.search(r"(?:\bznacnda){e<=2}", text)[0], - "anaconda") - self.assertEqual(regex.search(r"(?:\bnacnda){e<=2}", text)[0], - "anaconda") - - text = 'anaconda foo bar baz smith anderson' - self.assertEqual(regex.search('(fuu){i<=3,d<=3,e<=5}', text).span(0, - 1), ((0, 0), (0, 0))) - self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e<=5}', - text).span(0, 1), ((9, 10), (9, 10))) - self.assertEqual(regex.search('(fuu){i<=2,d<=2,e<=5}', text).span(0, - 1), ((7, 10), (7, 10))) - self.assertEqual(regex.search('(?e)(fuu){i<=2,d<=2,e<=5}', - text).span(0, 1), ((9, 10), (9, 10))) - self.assertEqual(regex.search('(fuu){i<=3,d<=3,e}', text).span(0, 1), - ((0, 0), (0, 0))) - self.assertEqual(regex.search('(?b)(fuu){i<=3,d<=3,e}', text).span(0, - 1), ((9, 10), (9, 10))) - - self.assertEqual(repr(type(regex.compile('(approximate){s<=3,1i+1d<3}'))), - self.PATTERN_CLASS) - - # No cost limit. - self.assertEqual(regex.search('(foobar){e}', - 'xirefoabralfobarxie').span(0, 1), ((0, 6), (0, 6))) - self.assertEqual(regex.search('(?e)(foobar){e}', - 'xirefoabralfobarxie').span(0, 1), ((0, 3), (0, 3))) - self.assertEqual(regex.search('(?b)(foobar){e}', - 'xirefoabralfobarxie').span(0, 1), ((11, 16), (11, 16))) - - # At most two errors. - self.assertEqual(regex.search('(foobar){e<=2}', - 'xirefoabrzlfd').span(0, 1), ((4, 9), (4, 9))) - self.assertEqual(regex.search('(foobar){e<=2}', 'xirefoabzlfd'), None) - - # At most two inserts or substitutions and max two errors total. - self.assertEqual(regex.search('(foobar){i<=2,s<=2,e<=2}', - 'oobargoobaploowap').span(0, 1), ((5, 11), (5, 11))) - - # Find best whole word match for "foobar". - self.assertEqual(regex.search('\\b(foobar){e}\\b', 'zfoobarz').span(0, - 1), ((0, 8), (0, 8))) - self.assertEqual(regex.search('\\b(foobar){e}\\b', - 'boing zfoobarz goobar woop').span(0, 1), ((0, 6), (0, 6))) - self.assertEqual(regex.search('(?b)\\b(foobar){e}\\b', - 'boing zfoobarz goobar woop').span(0, 1), ((15, 21), (15, 21))) - - # Match whole string, allow only 1 error. - self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobar').span(0, 1), - ((0, 6), (0, 6))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobar').span(0, - 1), ((0, 7), (0, 7))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarx').span(0, - 1), ((0, 7), (0, 7))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooxbar').span(0, - 1), ((0, 7), (0, 7))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbar').span(0, 1), - ((0, 6), (0, 6))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'xoobar').span(0, 1), - ((0, 6), (0, 6))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobax').span(0, 1), - ((0, 6), (0, 6))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'oobar').span(0, 1), - ((0, 5), (0, 5))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'fobar').span(0, 1), - ((0, 5), (0, 5))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'fooba').span(0, 1), - ((0, 5), (0, 5))) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoobarx'), None) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'foobarxx'), None) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'xxfoobar'), None) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'xfoxbar'), None) - self.assertEqual(regex.search('^(foobar){e<=1}$', 'foxbarx'), None) - - # At most one insert, two deletes, and three substitutions. - # Additionally, deletes cost two and substitutes one, and total - # cost must be less than 4. - self.assertEqual(regex.search('(foobar){i<=1,d<=2,s<=3,2d+1s<4}', - '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((6, 13), (6, - 13))) - self.assertEqual(regex.search('(?b)(foobar){i<=1,d<=2,s<=3,2d+1s<4}', - '3oifaowefbaoraofuiebofasebfaobfaorfeoaro').span(0, 1), ((26, 33), - (26, 33))) - - # Partially fuzzy matches. - self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobarzap').span(0, - 1), ((0, 9), (3, 6))) - self.assertEqual(regex.search('foo(bar){e<=1}zap', 'fobarzap'), None) - self.assertEqual(regex.search('foo(bar){e<=1}zap', 'foobrzap').span(0, - 1), ((0, 8), (3, 5))) - - text = ('www.cnn.com 64.236.16.20\nwww.slashdot.org 66.35.250.150\n' - 'For useful information, use www.slashdot.org\nthis is demo data!\n') - self.assertEqual(regex.search(r'(?s)^.*(dot.org){e}.*$', text).span(0, - 1), ((0, 120), (120, 120))) - self.assertEqual(regex.search(r'(?es)^.*(dot.org){e}.*$', text).span(0, - 1), ((0, 120), (93, 100))) - self.assertEqual(regex.search(r'^.*(dot.org){e}.*$', text).span(0, 1), - ((0, 119), (24, 101))) - - # Behaviour is unexpected, but arguably not wrong. It first finds the - # best match, then the best in what follows, etc. - self.assertEqual(regex.findall(r"\b\L{e<=1}\b", - " book cot dog desk ", words="cat dog".split()), ["cot", "dog"]) - self.assertEqual(regex.findall(r"\b\L{e<=1}\b", - " book dog cot desk ", words="cat dog".split()), [" dog", "cot"]) - self.assertEqual(regex.findall(r"(?e)\b\L{e<=1}\b", - " book dog cot desk ", words="cat dog".split()), ["dog", "cot"]) - self.assertEqual(regex.findall(r"(?r)\b\L{e<=1}\b", - " book cot dog desk ", words="cat dog".split()), ["dog ", "cot"]) - self.assertEqual(regex.findall(r"(?er)\b\L{e<=1}\b", - " book cot dog desk ", words="cat dog".split()), ["dog", "cot"]) - self.assertEqual(regex.findall(r"(?r)\b\L{e<=1}\b", - " book dog cot desk ", words="cat dog".split()), ["cot", "dog"]) - self.assertEqual(regex.findall(ur"\b\L{e<=1}\b", - u" book cot dog desk ", words=u"cat dog".split()), [u"cot", u"dog"]) - self.assertEqual(regex.findall(ur"\b\L{e<=1}\b", - u" book dog cot desk ", words=u"cat dog".split()), [u" dog", u"cot"]) - self.assertEqual(regex.findall(ur"(?e)\b\L{e<=1}\b", - u" book dog cot desk ", words=u"cat dog".split()), [u"dog", u"cot"]) - self.assertEqual(regex.findall(ur"(?r)\b\L{e<=1}\b", - u" book cot dog desk ", words=u"cat dog".split()), [u"dog ", u"cot"]) - self.assertEqual(regex.findall(ur"(?er)\b\L{e<=1}\b", - u" book cot dog desk ", words=u"cat dog".split()), [u"dog", u"cot"]) - self.assertEqual(regex.findall(ur"(?r)\b\L{e<=1}\b", - u" book dog cot desk ", words=u"cat dog".split()), [u"cot", u"dog"]) - - self.assertEqual(regex.search(r"(\w+) (\1{e<=1})", "foo fou").groups(), - ("foo", "fou")) - self.assertEqual(regex.search(r"(?r)(\2{e<=1}) (\w+)", - "foo fou").groups(), ("foo", "fou")) - self.assertEqual(regex.search(ur"(\w+) (\1{e<=1})", - u"foo fou").groups(), (u"foo", u"fou")) - - self.assertEqual(regex.findall(r"(?:(?:QR)+){e}","abcde"), ["abcde", - ""]) - self.assertEqual(regex.findall(r"(?:Q+){e}","abc"), ["abc", ""]) - - # Hg issue 41. - self.assertEqual(regex.match(r"(?:service detection){0[^()]+)|(?R))*\)", "(ab(cd)ef)")[ - : ], ("(ab(cd)ef)", "ef")) - self.assertEqual(regex.search(r"\(((?>[^()]+)|(?R))*\)", - "(ab(cd)ef)").captures(1), ["ab", "cd", "(cd)", "ef"]) - - self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)", - "(ab(cd)ef)")[ : ], ("(ab(cd)ef)", "ab")) - self.assertEqual(regex.search(r"(?r)\(((?R)|(?>[^()]+))*\)", - "(ab(cd)ef)").captures(1), ["ef", "cd", "(cd)", "ab"]) - - self.assertEqual(regex.search(r"\(([^()]+|(?R))*\)", - "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "e")) - - self.assertEqual(regex.search(r"(?r)\(((?R)|[^()]+)*\)", - "some text (a(b(c)d)e) more text")[ : ], ("(a(b(c)d)e)", "a")) - - self.assertEqual(regex.search(r"(foo(\(((?:(?>[^()]+)|(?2))*)\)))", - "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))", - "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))", - "bar(baz)+baz(bop)")) - - self.assertEqual(regex.search(r"(?r)(foo(\(((?:(?2)|(?>[^()]+))*)\)))", - "foo(bar(baz)+baz(bop))")[ : ], ("foo(bar(baz)+baz(bop))", - "foo(bar(baz)+baz(bop))", "(bar(baz)+baz(bop))", - "bar(baz)+baz(bop)")) - - rgx = regex.compile(r"""^\s*(<\s*([a-zA-Z:]+)(?:\s*[a-zA-Z:]*\s*=\s*(?:'[^']*'|"[^"]*"))*\s*(/\s*)?>(?:[^<>]*|(?1))*(?(3)|<\s*/\s*\2\s*>))\s*$""") - self.assertEqual(bool(rgx.search('')), True) - self.assertEqual(bool(rgx.search('')), False) - self.assertEqual(bool(rgx.search('')), True) - self.assertEqual(bool(rgx.search('')), False) - self.assertEqual(bool(rgx.search('')), False) - - self.assertEqual(bool(rgx.search('')), False) - self.assertEqual(bool(rgx.search('')), True) - self.assertEqual(bool(rgx.search('< fooo / >')), True) - # The next regex should and does match. Perl 5.14 agrees. - #self.assertEqual(bool(rgx.search('foo')), False) - self.assertEqual(bool(rgx.search('foo')), False) - - self.assertEqual(bool(rgx.search('foo')), True) - self.assertEqual(bool(rgx.search('foo')), True) - self.assertEqual(bool(rgx.search('')), True) - - def test_copy(self): - # PatternObjects are immutable, therefore there's no need to clone them. - r = regex.compile("a") - self.assert_(copy.copy(r) is r) - self.assert_(copy.deepcopy(r) is r) - - # MatchObjects are normally mutable because the target string can be - # detached. However, after the target string has been detached, a - # MatchObject becomes immutable, so there's no need to clone it. - m = r.match("a") - self.assert_(copy.copy(m) is not m) - self.assert_(copy.deepcopy(m) is not m) - - self.assert_(m.string is not None) - m2 = copy.copy(m) - m2.detach_string() - self.assert_(m.string is not None) - self.assert_(m2.string is None) - - # The following behaviour matches that of the re module. - it = regex.finditer(".", "ab") - it2 = copy.copy(it) - self.assertEqual(it.next().group(), "a") - self.assertEqual(it2.next().group(), "b") - - # The following behaviour matches that of the re module. - it = regex.finditer(".", "ab") - it2 = copy.deepcopy(it) - self.assertEqual(it.next().group(), "a") - self.assertEqual(it2.next().group(), "b") - - # The following behaviour is designed to match that of copying 'finditer'. - it = regex.splititer(" ", "a b") - it2 = copy.copy(it) - self.assertEqual(it.next(), "a") - self.assertEqual(it2.next(), "b") - - # The following behaviour is designed to match that of copying 'finditer'. - it = regex.splititer(" ", "a b") - it2 = copy.deepcopy(it) - self.assertEqual(it.next(), "a") - self.assertEqual(it2.next(), "b") - - def test_format(self): - self.assertEqual(regex.subf(r"(\w+) (\w+)", "{0} => {2} {1}", - "foo bar"), "foo bar => bar foo") - self.assertEqual(regex.subf(r"(?\w+) (?\w+)", - "{word2} {word1}", "foo bar"), "bar foo") - - self.assertEqual(regex.subfn(r"(\w+) (\w+)", "{0} => {2} {1}", - "foo bar"), ("foo bar => bar foo", 1)) - self.assertEqual(regex.subfn(r"(?\w+) (?\w+)", - "{word2} {word1}", "foo bar"), ("bar foo", 1)) - - self.assertEqual(regex.match(r"(\w+) (\w+)", - "foo bar").expandf("{0} => {2} {1}"), "foo bar => bar foo") - - def test_fullmatch(self): - self.assertEqual(bool(regex.fullmatch(r"abc", "abc")), True) - self.assertEqual(bool(regex.fullmatch(r"abc", "abcx")), False) - self.assertEqual(bool(regex.fullmatch(r"abc", "abcx", endpos=3)), True) - - self.assertEqual(bool(regex.fullmatch(r"abc", "xabc", pos=1)), True) - self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1)), False) - self.assertEqual(bool(regex.fullmatch(r"abc", "xabcy", pos=1, - endpos=4)), True) - - self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abc")), True) - self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx")), False) - self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "abcx", endpos=3)), - True) - - self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabc", pos=1)), - True) - self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1)), - False) - self.assertEqual(bool(regex.fullmatch(r"(?r)abc", "xabcy", pos=1, - endpos=4)), True) - - def test_hg_bugs(self): - # Hg issue 28. - self.assertEqual(bool(regex.compile("(?>b)", flags=regex.V1)), True) - - # Hg issue 29. - self.assertEqual(bool(regex.compile("^((?>\w+)|(?>\s+))*$", - flags=regex.V1)), True) - - # Hg issue 31. - self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", - "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)']) - self.assertEqual(regex.findall(r"\((?:(?:[^()]+)|(?R))*\)", - "a(bcd(e)f)g(h)"), ['(bcd(e)f)', '(h)']) - self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", - "a(b(cd)e)f)g)h"), ['(b(cd)e)']) - self.assertEqual(regex.findall(r"\((?:(?>[^()]+)|(?R))*\)", - "a(bc(d(e)f)gh"), ['(d(e)f)']) - self.assertEqual(regex.findall(r"(?r)\((?:(?>[^()]+)|(?R))*\)", - "a(bc(d(e)f)gh"), ['(d(e)f)']) - self.assertEqual([m.group() for m in - regex.finditer(r"\((?:[^()]*+|(?0))*\)", "a(b(c(de)fg)h")], - ['(c(de)fg)']) - - # Hg issue 32. - self.assertEqual(regex.search("a(bc)d", "abcd", regex.I | - regex.V1).group(0), "abcd") - - # Hg issue 33. - self.assertEqual(regex.search("([\da-f:]+)$", "E", regex.I | - regex.V1).group(0), "E") - self.assertEqual(regex.search("([\da-f:]+)$", "e", regex.I | - regex.V1).group(0), "e") - - # Hg issue 34. - self.assertEqual(regex.search("^(?=ab(de))(abd)(e)", "abde").groups(), - ('de', 'abd', 'e')) - - # Hg issue 35. - self.assertEqual(bool(regex.match(r"\ ", " ", flags=regex.X)), True) - - # Hg issue 36. - self.assertEqual(regex.search(r"^(a|)\1{2}b", "b").group(0, 1), ('b', - '')) - - # Hg issue 37. - self.assertEqual(regex.search("^(a){0,0}", "abc").group(0, 1), ('', - None)) - - # Hg issue 38. - self.assertEqual(regex.search("(?>.*/)b", "a/b").group(0), "a/b") - - # Hg issue 39. - self.assertEqual(regex.search(r"(?V0)((?i)blah)\s+\1", - "blah BLAH").group(0, 1), ("blah BLAH", "blah")) - self.assertEqual(regex.search(r"(?V1)((?i)blah)\s+\1", "blah BLAH"), - None) - - # Hg issue 40. - self.assertEqual(regex.search(r"(\()?[^()]+(?(1)\)|)", - "(abcd").group(0), "abcd") - - # Hg issue 42. - self.assertEqual(regex.search("(a*)*", "a").span(1), (1, 1)) - self.assertEqual(regex.search("(a*)*", "aa").span(1), (2, 2)) - self.assertEqual(regex.search("(a*)*", "aaa").span(1), (3, 3)) - - # Hg issue 43. - self.assertEqual(regex.search("a(?#xxx)*", "aaa").group(), "aaa") - - # Hg issue 44. - self.assertEqual(regex.search("(?=abc){3}abc", "abcabcabc").span(), (0, - 3)) - - # Hg issue 45. - self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "a").span(), (0, 1)) - self.assertEqual(regex.search("^(?:a(?:(?:))+)+", "aa").span(), (0, 2)) - - # Hg issue 46. - self.assertEqual(regex.search("a(?x: b c )d", "abcd").group(0), "abcd") - - # Hg issue 47. - self.assertEqual(regex.search("a#comment\n*", "aaa", - flags=regex.X).group(0), "aaa") - - # Hg issue 48. - self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){1}", - "aaaaaaaaaa").span(0, 1), ((0, 1), (0, 1))) - self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){2}", - "aaaaaaaaaa").span(0, 1), ((0, 3), (1, 3))) - self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){3}", - "aaaaaaaaaa").span(0, 1), ((0, 6), (3, 6))) - self.assertEqual(regex.search(r"(?V1)(a(?(1)\1)){4}", - "aaaaaaaaaa").span(0, 1), ((0, 10), (6, 10))) - - # Hg issue 49. - self.assertEqual(regex.search("(?V1)(a)(?<=b(?1))", "baz").group(0), - "a") - - # Hg issue 50. - self.assertEqual(regex.findall(ur'(?fi)\L', - u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05', - keywords=['post','pos']), [u'POST', u'Post', u'post', u'po\u017Ft', - u'po\uFB06', u'po\uFB05']) - self.assertEqual(regex.findall(ur'(?fi)pos|post', - u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), [u'POS', - u'Pos', u'pos', u'po\u017F', u'po\uFB06', u'po\uFB05']) - self.assertEqual(regex.findall(ur'(?fi)post|pos', - u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), [u'POST', - u'Post', u'post', u'po\u017Ft', u'po\uFB06', u'po\uFB05']) - self.assertEqual(regex.findall(ur'(?fi)post|another', - u'POST, Post, post, po\u017Ft, po\uFB06, and po\uFB05'), [u'POST', - u'Post', u'post', u'po\u017Ft', u'po\uFB06', u'po\uFB05']) - - # Hg issue 51. - self.assertEqual(regex.search("(?V1)((a)(?1)|(?2))", "a").group(0, 1, - 2), ('a', 'a', None)) - - # Hg issue 52. - self.assertEqual(regex.search(r"(?V1)(\1xx|){6}", "xx").span(0, 1), - ((0, 2), (2, 2))) - - # Hg issue 53. - self.assertEqual(regex.search("(a|)+", "a").group(0, 1), ("a", "")) - - # Hg issue 54. - self.assertEqual(regex.search(r"(a|)*\d", "a" * 80), None) - - # Hg issue 55. - self.assertEqual(regex.search("^(?:a?b?)*$", "ac"), None) - - # Hg issue 58. - self.assertRaisesRegex(regex.error, self.UNDEF_CHAR_NAME, lambda: - regex.compile("\\N{1}")) - - # Hg issue 59. - self.assertEqual(regex.search("\\Z", "a\na\n").span(0), (4, 4)) - - # Hg issue 60. - self.assertEqual(regex.search("(q1|.)*(q2|.)*(x(a|bc)*y){2,}", - "xayxay").group(0), "xayxay") - - # Hg issue 61. - self.assertEqual(regex.search("(?i)[^a]", "A"), None) - - # Hg issue 63. - self.assertEqual(regex.search(u"(?iu)[[:ascii:]]", u"\N{KELVIN SIGN}"), - None) - - # Hg issue 66. - self.assertEqual(regex.search("((a|b(?1)c){3,5})", "baaaaca").group(0, - 1, 2), ('aaaa', 'aaaa', 'a')) - - # Hg issue 71. - self.assertEqual(regex.findall(r"(?<=:\S+ )\w+", ":9 abc :10 def"), - ['abc', 'def']) - self.assertEqual(regex.findall(r"(?<=:\S* )\w+", ":9 abc :10 def"), - ['abc', 'def']) - self.assertEqual(regex.findall(r"(?<=:\S+? )\w+", ":9 abc :10 def"), - ['abc', 'def']) - self.assertEqual(regex.findall(r"(?<=:\S*? )\w+", ":9 abc :10 def"), - ['abc', 'def']) - - # Hg issue 73. - self.assertEqual(regex.search(r"(?:fe)?male", "female").group(), - "female") - self.assertEqual([m.group() for m in - regex.finditer(r"(fe)?male: h(?(1)(er)|(is)) (\w+)", - "female: her dog; male: his cat. asdsasda")], ['female: her dog', - 'male: his cat']) - - # Hg issue 78. - self.assertEqual(regex.search(r'(?\((?:[^()]++|(?&rec))*\))', - 'aaa(((1+0)+1)+1)bbb').captures('rec'), ['(1+0)', '((1+0)+1)', - '(((1+0)+1)+1)']) - - # Hg issue 80. - self.assertRaisesRegex(regex.error, self.BAD_ESCAPE, lambda: - regex.sub('x', '\\', 'x'), ) - - # Hg issue 82. - fz = "(CAGCCTCCCATTTCAGAATATACATCC){1a(?b))', "ab").spans("x"), [(1, - 2), (0, 2)]) - - # Hg issue 91. - # Check that the replacement cache works. - self.assertEqual(regex.sub(r'(-)', lambda m: m.expand(r'x'), 'a-b-c'), - 'axbxc') - - # Hg issue 94. - rx = regex.compile(r'\bt(est){i<2}', flags=regex.V1) - self.assertEqual(rx.search("Some text"), None) - self.assertEqual(rx.findall("Some text"), []) - - # Hg issue 95. - self.assertRaisesRegex(regex.error, - '^nothing to repeat at position 3$', lambda: regex.compile(r'.???')) - - # Hg issue 97. - self.assertEquals(regex.escape(u'foo!?'), u'foo\\!\\?') - self.assertEquals(regex.escape(u'foo!?', special_only=True), - u'foo!\\?') - - self.assertEquals(regex.escape('foo!?'), 'foo\\!\\?') - self.assertEquals(regex.escape('foo!?', special_only=True), 'foo!\\?') - - # Hg issue 100. - self.assertEquals(regex.search('^([^z]*(?:WWWi|W))?$', - 'WWWi').groups(), ('WWWi', )) - self.assertEquals(regex.search('^([^z]*(?:WWWi|w))?$', - 'WWWi').groups(), ('WWWi', )) - self.assertEquals(regex.search('^([^z]*?(?:WWWi|W))?$', - 'WWWi').groups(), ('WWWi', )) - - # Hg issue 101. - pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.UNICODE) - self.assertEquals([x.group() for x in pat.finditer('yxxx')], ['xxx']) - self.assertEquals(pat.findall('yxxx'), ['xxx']) - - raw = 'yxxx' - self.assertEquals([x.group() for x in pat.finditer(raw)], ['xxx']) - self.assertEquals(pat.findall(raw), ['xxx']) - - pat = regex.compile(r'xxx', flags=regex.FULLCASE | regex.IGNORECASE | - regex.UNICODE) - self.assertEquals([x.group() for x in pat.finditer('yxxx')], ['xxx']) - self.assertEquals(pat.findall('yxxx'), ['xxx']) - - raw = 'yxxx' - self.assertEquals([x.group() for x in pat.finditer(raw)], ['xxx']) - self.assertEquals(pat.findall(raw), ['xxx']) - - # Hg issue 106. - self.assertEquals(regex.sub('(?V0).*', 'x', 'test'), 'x') - self.assertEquals(regex.sub('(?V1).*', 'x', 'test'), 'xx') - - self.assertEquals(regex.sub('(?V0).*?', '|', 'test'), '|t|e|s|t|') - self.assertEquals(regex.sub('(?V1).*?', '|', 'test'), '|||||||||') - - # Hg issue 112. - self.assertEquals(regex.sub(r'^(@)\n(?!.*?@)(.*)', - r'\1\n==========\n\2', '@\n', flags=regex.DOTALL), '@\n==========\n') - - # Hg issue 109. - self.assertEquals(regex.match(r'(?:cats|cat){e<=1}', - 'caz').fuzzy_counts, (1, 0, 0)) - self.assertEquals(regex.match(r'(?e)(?:cats|cat){e<=1}', - 'caz').fuzzy_counts, (1, 0, 0)) - self.assertEquals(regex.match(r'(?b)(?:cats|cat){e<=1}', - 'caz').fuzzy_counts, (1, 0, 0)) - - self.assertEquals(regex.match(r'(?:cat){e<=1}', 'caz').fuzzy_counts, - (1, 0, 0)) - self.assertEquals(regex.match(r'(?e)(?:cat){e<=1}', - 'caz').fuzzy_counts, (1, 0, 0)) - self.assertEquals(regex.match(r'(?b)(?:cat){e<=1}', - 'caz').fuzzy_counts, (1, 0, 0)) - - self.assertEquals(regex.match(r'(?:cats){e<=2}', 'c ats').fuzzy_counts, - (1, 1, 0)) - self.assertEquals(regex.match(r'(?e)(?:cats){e<=2}', - 'c ats').fuzzy_counts, (0, 1, 0)) - self.assertEquals(regex.match(r'(?b)(?:cats){e<=2}', - 'c ats').fuzzy_counts, (0, 1, 0)) - - self.assertEquals(regex.match(r'(?:cats){e<=2}', - 'c a ts').fuzzy_counts, (0, 2, 0)) - self.assertEquals(regex.match(r'(?e)(?:cats){e<=2}', - 'c a ts').fuzzy_counts, (0, 2, 0)) - self.assertEquals(regex.match(r'(?b)(?:cats){e<=2}', - 'c a ts').fuzzy_counts, (0, 2, 0)) - - self.assertEquals(regex.match(r'(?:cats){e<=1}', - 'c ats').fuzzy_counts, (0, 1, 0)) - self.assertEquals(regex.match(r'(?e)(?:cats){e<=1}', - 'c ats').fuzzy_counts, (0, 1, 0)) - self.assertEquals(regex.match(r'(?b)(?:cats){e<=1}', - 'c ats').fuzzy_counts, (0, 1, 0)) - -if not hasattr(str, "format"): - # Strings don't have the .format method (below Python 2.6). - del RegexTests.test_format - -def test_main(): - run_unittest(RegexTests) - -if __name__ == "__main__": - test_main() diff --git a/sickbeard/name_parser/parser.py b/sickbeard/name_parser/parser.py index bfa0cf8c..12aad199 100644 --- a/sickbeard/name_parser/parser.py +++ b/sickbeard/name_parser/parser.py @@ -24,7 +24,6 @@ import regexes import sickbeard from sickbeard import logger, helpers, scene_numbering -from regex import regex from dateutil import parser nameparser_lock = threading.Lock() @@ -104,8 +103,8 @@ class NameParser(object): for regex_type, regex_pattern in regexItem.items(): for (cur_pattern_name, cur_pattern) in regex_pattern: try: - cur_regex = regex.compile(cur_pattern, regex.V1 | regex.VERBOSE | regex.IGNORECASE | regex.BESTMATCH) - except regex.error, errormsg: + cur_regex = re.compile(cur_pattern, re.VERBOSE | re.IGNORECASE) + except re.error, errormsg: logger.log(u"WARNING: Invalid episode_pattern, %s. %s" % (errormsg, cur_pattern)) else: self.compiled_regexes[(regex_type,cur_pattern_name)] = cur_regex @@ -116,7 +115,7 @@ class NameParser(object): result = ParseResult(name) for (cur_regex_type, cur_regex_name), cur_regex in self.compiled_regexes.items(): - match = cur_regex.fullmatch(name) + match = cur_regex.match(name) if not match: continue @@ -185,8 +184,8 @@ class NameParser(object): tmp_extra_info = match.group('extra_info') # Show.S04.Special or Show.S05.Part.2.Extras is almost certainly not every episode in the season - if tmp_extra_info and cur_regex_name == 'season_only' and regex.search( - r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, regex.I): + if tmp_extra_info and cur_regex_name == 'season_only' and re.search( + r'([. _-]|^)(special|extra)s?\w*([. _-]|$)', tmp_extra_info, re.I): continue result.extra_info = tmp_extra_info @@ -286,7 +285,7 @@ class NameParser(object): # break it into parts if there are any (dirname, file name, extension) dir_name, file_name = os.path.split(name) - ext_match = regex.match('(.*)\.\w{3,4}$', file_name) + ext_match = re.match('(.*)\.\w{3,4}$', file_name) if ext_match and self.file_name: base_file_name = ext_match.group(1) else: