Merge pull request #38363 from akien-mga/pcre2-10.34
pcre2: Update to upstream version 10.34
This commit is contained in:
commit
f870118323
3
thirdparty/README.md
vendored
3
thirdparty/README.md
vendored
@ -457,14 +457,13 @@ Files extracted from upstream source:
|
||||
## pcre2
|
||||
|
||||
- Upstream: http://www.pcre.org
|
||||
- Version: 10.33
|
||||
- Version: 10.34
|
||||
- License: BSD-3-Clause
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
||||
- Files listed in the file NON-AUTOTOOLS-BUILD steps 1-4
|
||||
- All .h files in src/ apart from pcre2posix.h
|
||||
- src/pcre2_jit_compile.c
|
||||
- src/pcre2_jit_match.c
|
||||
- src/pcre2_jit_misc.c
|
||||
- src/sljit/*
|
||||
|
6
thirdparty/pcre2/src/config.h
vendored
6
thirdparty/pcre2/src/config.h
vendored
@ -218,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.33"
|
||||
#define PACKAGE_STRING "PCRE2 10.34"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
@ -227,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.33"
|
||||
#define PACKAGE_VERSION "10.34"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
@ -352,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.33"
|
||||
#define VERSION "10.34"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
19
thirdparty/pcre2/src/pcre2.h
vendored
19
thirdparty/pcre2/src/pcre2.h
vendored
@ -5,7 +5,7 @@
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2018 University of Cambridge
|
||||
Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 33
|
||||
#define PCRE2_MINOR 34
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2019-04-16
|
||||
#define PCRE2_DATE 2019-11-21
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
@ -142,6 +142,7 @@ D is inspected during pcre2_dfa_match() execution
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
@ -305,6 +306,8 @@ pcre2_pattern_convert(). */
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
|
||||
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
|
||||
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
|
||||
#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198
|
||||
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
@ -390,6 +393,7 @@ released, the numbers must not be changed. */
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
@ -580,7 +584,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
@ -675,6 +679,8 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_match_data_size(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
@ -773,7 +779,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
|
||||
*pcre2_maketables(pcre2_general_context *); \
|
||||
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
@ -838,6 +845,7 @@ pcre2_compile are called by application code. */
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
@ -848,6 +856,7 @@ pcre2_compile are called by application code. */
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
|
7
thirdparty/pcre2/src/pcre2_auto_possess.c
vendored
7
thirdparty/pcre2/src/pcre2_auto_possess.c
vendored
@ -624,6 +624,13 @@ for(;;)
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
return !entered_a_group;
|
||||
|
||||
/* Non-atomic assertions - don't possessify last iterator. This needs
|
||||
more thought. */
|
||||
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Skip over the bracket and inspect what comes next. */
|
||||
|
545
thirdparty/pcre2/src/pcre2_compile.c
vendored
545
thirdparty/pcre2/src/pcre2_compile.c
vendored
File diff suppressed because it is too large
Load Diff
2
thirdparty/pcre2/src/pcre2_context.c
vendored
2
thirdparty/pcre2/src/pcre2_context.c
vendored
@ -323,7 +323,7 @@ data. */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables)
|
||||
const uint8_t *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
|
129
thirdparty/pcre2/src/pcre2_dfa_match.c
vendored
129
thirdparty/pcre2/src/pcre2_dfa_match.c
vendored
@ -173,6 +173,8 @@ static const uint8_t coptable[] = {
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* NA assert */
|
||||
0, /* NA assert behind */
|
||||
0, /* ONCE */
|
||||
0, /* SCRIPT_RUN */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
@ -248,6 +250,8 @@ static const uint8_t poptable[] = {
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* NA assert */
|
||||
0, /* NA assert behind */
|
||||
0, /* ONCE */
|
||||
0, /* SCRIPT_RUN */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
@ -962,7 +966,7 @@ for (;;)
|
||||
if (ptr >= end_subject)
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
could_continue = TRUE;
|
||||
return PCRE2_ERROR_PARTIAL;
|
||||
else { ADD_ACTIVE(state_offset + 1, 0); }
|
||||
}
|
||||
break;
|
||||
@ -1011,10 +1015,12 @@ for (;;)
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
case OP_EODN:
|
||||
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
could_continue = TRUE;
|
||||
else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
|
||||
{
|
||||
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
|
||||
return PCRE2_ERROR_PARTIAL;
|
||||
ADD_ACTIVE(state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
@ -3152,8 +3158,8 @@ for (;;)
|
||||
|
||||
/* We have finished the processing at the current subject character. If no
|
||||
new states have been set for the next character, we have found all the
|
||||
matches that we are going to find. If we are at the top level and partial
|
||||
matching has been requested, check for appropriate conditions.
|
||||
matches that we are going to find. If partial matching has been requested,
|
||||
check for appropriate conditions.
|
||||
|
||||
The "forced_ fail" variable counts the number of (*F) encountered for the
|
||||
character. If it is equal to the original active_count (saved in
|
||||
@ -3165,22 +3171,24 @@ for (;;)
|
||||
|
||||
if (new_count <= 0)
|
||||
{
|
||||
if (rlevel == 1 && /* Top level, and */
|
||||
could_continue && /* Some could go on, and */
|
||||
if (could_continue && /* Some could go on, and */
|
||||
forced_fail != workspace[1] && /* Not all forced fail & */
|
||||
( /* either... */
|
||||
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|
||||
|| /* or... */
|
||||
((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 && /* Soft partial and */
|
||||
match_count < 0) /* no matches */
|
||||
match_count < 0) /* no matches */
|
||||
) && /* And... */
|
||||
(
|
||||
partial_newline || /* Either partial NL */
|
||||
( /* or ... */
|
||||
ptr >= end_subject && /* End of subject and */
|
||||
ptr > mb->start_used_ptr) /* Inspected non-empty string */
|
||||
partial_newline || /* Either partial NL */
|
||||
( /* or ... */
|
||||
ptr >= end_subject && /* End of subject and */
|
||||
( /* either */
|
||||
ptr > mb->start_used_ptr || /* Inspected non-empty string */
|
||||
mb->allowemptypartial /* or pattern has lookbehind */
|
||||
) /* or could match empty */
|
||||
)
|
||||
)
|
||||
))
|
||||
match_count = PCRE2_ERROR_PARTIAL;
|
||||
break; /* Exit from loop along the subject string */
|
||||
}
|
||||
@ -3246,6 +3254,11 @@ BOOL utf, anchored, startline, firstline;
|
||||
BOOL has_first_cu = FALSE;
|
||||
BOOL has_req_cu = FALSE;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
BOOL memchr_not_found_first_cu = FALSE;
|
||||
BOOL memchr_not_found_first_cu2 = FALSE;
|
||||
#endif
|
||||
|
||||
PCRE2_UCHAR first_cu = 0;
|
||||
PCRE2_UCHAR first_cu2 = 0;
|
||||
PCRE2_UCHAR req_cu = 0;
|
||||
@ -3295,6 +3308,11 @@ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
|
||||
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Invalid UTF support is not available for DFA matching. */
|
||||
|
||||
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
|
||||
return PCRE2_ERROR_DFA_UINVALID_UTF;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
@ -3404,6 +3422,8 @@ mb->tables = re->tables;
|
||||
mb->start_subject = subject;
|
||||
mb->end_subject = end_subject;
|
||||
mb->start_offset = start_offset;
|
||||
mb->allowemptypartial = (re->max_lookbehind > 0) ||
|
||||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
mb->moptions = options;
|
||||
mb->poptions = re->overall_options;
|
||||
mb->match_call_count = 0;
|
||||
@ -3619,7 +3639,10 @@ for (;;)
|
||||
/* Not anchored. Advance to a unique first code unit if there is one. In
|
||||
8-bit mode, the use of memchr() gives a big speed up, even though we have
|
||||
to call it twice in caseless mode, in order to find the earliest occurrence
|
||||
of the character in either of its cases. */
|
||||
of the character in either of its cases. If a call to memchr() that
|
||||
searches the rest of the subject fails to find one case, remember that in
|
||||
order not to keep on repeating the search. This can make a huge difference
|
||||
when the strings are very long and only one case is present. */
|
||||
|
||||
else
|
||||
{
|
||||
@ -3633,11 +3656,29 @@ for (;;)
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||
smc != first_cu2)
|
||||
start_match++;
|
||||
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp1 =
|
||||
memchr(start_match, first_cu, end_subject-start_match);
|
||||
PCRE2_SPTR pp2 =
|
||||
memchr(start_match, first_cu2, end_subject-start_match);
|
||||
PCRE2_SPTR pp1 = NULL;
|
||||
PCRE2_SPTR pp2 = NULL;
|
||||
PCRE2_SIZE cu2size = end_subject - start_match;
|
||||
|
||||
if (!memchr_not_found_first_cu)
|
||||
{
|
||||
pp1 = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
|
||||
else cu2size = pp1 - start_match;
|
||||
}
|
||||
|
||||
/* If pp1 is not NULL, we have arranged to search only as far as pp1,
|
||||
to see if the other case is earlier, so we can set "not found" only
|
||||
when both searches have returned NULL. */
|
||||
|
||||
if (!memchr_not_found_first_cu2)
|
||||
{
|
||||
pp2 = memchr(start_match, first_cu2, cu2size);
|
||||
memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
|
||||
}
|
||||
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
@ -3653,7 +3694,7 @@ for (;;)
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) !=
|
||||
first_cu)
|
||||
start_match++;
|
||||
#else
|
||||
#else /* 8-bit code units */
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
@ -3740,6 +3781,8 @@ for (;;)
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
|
||||
{
|
||||
PCRE2_SPTR p;
|
||||
|
||||
/* The minimum matching length is a lower bound; no actual string of that
|
||||
length may actually match the pattern. Although the value is, strictly,
|
||||
in characters, we treat it as code units to avoid spending too much time
|
||||
@ -3753,37 +3796,63 @@ for (;;)
|
||||
point. This optimization can save a huge amount of backtracking in
|
||||
patterns with nested unlimited repeats that aren't going to match.
|
||||
Writing separate code for cased/caseless versions makes it go faster, as
|
||||
does using an autoincrement and backing off on a match.
|
||||
does using an autoincrement and backing off on a match. As in the case of
|
||||
the first code unit, using memchr() in the 8-bit library gives a big
|
||||
speed up. Unlike the first_cu check above, we do not need to call
|
||||
memchr() twice in the caseless case because we only need to check for the
|
||||
presence of the character in either case, not find the first occurrence.
|
||||
|
||||
The search can be skipped if the code unit was found later than the
|
||||
current starting point in a previous iteration of the bumpalong loop.
|
||||
|
||||
HOWEVER: when the subject string is very, very long, searching to its end
|
||||
can take a long time, and give bad performance on quite ordinary
|
||||
patterns. This showed up when somebody was matching something like
|
||||
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
|
||||
sufficiently long. */
|
||||
sufficiently long, but it's worth searching a lot more for unanchored
|
||||
patterns. */
|
||||
|
||||
if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
|
||||
p = start_match + (has_first_cu? 1:0);
|
||||
if (has_req_cu && p > req_cu_ptr)
|
||||
{
|
||||
PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
|
||||
PCRE2_SIZE check_length = end_subject - start_match;
|
||||
|
||||
/* We don't need to repeat the search if we haven't yet reached the
|
||||
place we found it at last time. */
|
||||
|
||||
if (p > req_cu_ptr)
|
||||
if (check_length < REQ_CU_MAX ||
|
||||
(!anchored && check_length < REQ_CU_MAX * 1000))
|
||||
{
|
||||
if (req_cu != req_cu2)
|
||||
if (req_cu != req_cu2) /* Caseless */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
uint32_t pp = UCHAR21INCTEST(p);
|
||||
if (pp == req_cu || pp == req_cu2) { p--; break; }
|
||||
}
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp = p;
|
||||
p = memchr(pp, req_cu, end_subject - pp);
|
||||
if (p == NULL)
|
||||
{
|
||||
p = memchr(pp, req_cu2, end_subject - pp);
|
||||
if (p == NULL) p = end_subject;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
|
||||
}
|
||||
|
||||
#else /* 8-bit code units */
|
||||
p = memchr(p, req_cu, end_subject - p);
|
||||
if (p == NULL) p = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the matching loop,
|
||||
|
3
thirdparty/pcre2/src/pcre2_error.c
vendored
3
thirdparty/pcre2/src/pcre2_error.c
vendored
@ -184,6 +184,8 @@ static const unsigned char compile_error_texts[] =
|
||||
/* 95 */
|
||||
"(*alpha_assertion) not recognized\0"
|
||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||
"too many capturing groups (maximum 65535)\0"
|
||||
"atomic assertion expected after (?( or (?(?C)\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
@ -268,6 +270,7 @@ static const unsigned char match_error_texts[] =
|
||||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
"internal error - duplicate substitution match\0"
|
||||
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||
;
|
||||
|
||||
|
||||
|
105
thirdparty/pcre2/src/pcre2_internal.h
vendored
105
thirdparty/pcre2/src/pcre2_internal.h
vendored
@ -517,6 +517,7 @@ bytes in a code unit in that mode. */
|
||||
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
|
||||
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
|
||||
#define PCRE2_HASBKC 0x00400000 /* contains \C */
|
||||
#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
@ -535,13 +536,14 @@ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* The maximum remaining length of subject we are prepared to search for a
|
||||
req_unit match. In 8-bit mode, memchr() is used and is much faster than the
|
||||
search loop that has to be used in 16-bit and 32-bit modes. */
|
||||
req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
|
||||
much faster than the search loop that has to be used in 16-bit and 32-bit
|
||||
modes. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define REQ_CU_MAX 2000
|
||||
#define REQ_CU_MAX 5000
|
||||
#else
|
||||
#define REQ_CU_MAX 1000
|
||||
#define REQ_CU_MAX 2000
|
||||
#endif
|
||||
|
||||
/* Offsets for the bitmap tables in the cbits set of tables. Each table
|
||||
@ -881,12 +883,16 @@ a positive value. */
|
||||
#define STRING_atomic0 "atomic\0"
|
||||
#define STRING_pla0 "pla\0"
|
||||
#define STRING_plb0 "plb\0"
|
||||
#define STRING_napla0 "napla\0"
|
||||
#define STRING_naplb0 "naplb\0"
|
||||
#define STRING_nla0 "nla\0"
|
||||
#define STRING_nlb0 "nlb\0"
|
||||
#define STRING_sr0 "sr\0"
|
||||
#define STRING_asr0 "asr\0"
|
||||
#define STRING_positive_lookahead0 "positive_lookahead\0"
|
||||
#define STRING_positive_lookbehind0 "positive_lookbehind\0"
|
||||
#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0"
|
||||
#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0"
|
||||
#define STRING_negative_lookahead0 "negative_lookahead\0"
|
||||
#define STRING_negative_lookbehind0 "negative_lookbehind\0"
|
||||
#define STRING_script_run0 "script_run\0"
|
||||
@ -1171,12 +1177,16 @@ only. */
|
||||
#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
|
||||
#define STRING_pla0 STR_p STR_l STR_a "\0"
|
||||
#define STRING_plb0 STR_p STR_l STR_b "\0"
|
||||
#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0"
|
||||
#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0"
|
||||
#define STRING_nla0 STR_n STR_l STR_a "\0"
|
||||
#define STRING_nlb0 STR_n STR_l STR_b "\0"
|
||||
#define STRING_sr0 STR_s STR_r "\0"
|
||||
#define STRING_asr0 STR_a STR_s STR_r "\0"
|
||||
#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
|
||||
#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
|
||||
#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
|
||||
@ -1301,7 +1311,7 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||
order to the list of escapes immediately above. Furthermore, values up to
|
||||
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||
pcre2_auto_possess.c
|
||||
pcre2_auto_possess.c.
|
||||
|
||||
Whenever this list is updated, the two macro definitions that follow must be
|
||||
updated to match. The possessification table called "opcode_possessify" in
|
||||
@ -1499,80 +1509,81 @@ enum {
|
||||
OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 124 Possessive unlimited repeat. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
|
||||
asserts must remain in order. */
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND. */
|
||||
|
||||
OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 126 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 127 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 128 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
|
||||
OP_ASSERT_NA, /* 130 Positive non-atomic lookahead */
|
||||
OP_ASSERTBACK_NA, /* 131 Positive non-atomic lookbehind */
|
||||
|
||||
/* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
|
||||
immediately after the assertions, with ONCE first, as there's a test for >=
|
||||
ONCE for a subpattern that isn't an assertion. The POS versions must
|
||||
immediately follow the non-POS versions in each case. */
|
||||
|
||||
OP_ONCE, /* 130 Atomic group, contains captures */
|
||||
OP_SCRIPT_RUN, /* 131 Non-capture, but check characters' scripts */
|
||||
OP_BRA, /* 132 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 134 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 136 Conditional group */
|
||||
OP_ONCE, /* 132 Atomic group, contains captures */
|
||||
OP_SCRIPT_RUN, /* 133 Non-capture, but check characters' scripts */
|
||||
OP_BRA, /* 134 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 136 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 138 Conditional group */
|
||||
|
||||
/* These five must follow the previous five, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 139 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 141 Conditional group, check empty */
|
||||
OP_SBRA, /* 139 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 149 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 141 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 142 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 143 Conditional group, check empty */
|
||||
|
||||
/* The next two pairs must (respectively) be kept together. */
|
||||
|
||||
OP_CREF, /* 142 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 144 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
|
||||
OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 147 Always true (used by VERSION) */
|
||||
OP_CREF, /* 144 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 145 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 146 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 147 Used to point to duplicate names as a condition */
|
||||
OP_FALSE, /* 148 Always false (used by DEFINE and VERSION) */
|
||||
OP_TRUE, /* 149 Always true (used by VERSION) */
|
||||
|
||||
OP_BRAZERO, /* 148 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 149 order. */
|
||||
OP_BRAPOSZERO, /* 150 */
|
||||
OP_BRAZERO, /* 150 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 151 order. */
|
||||
OP_BRAPOSZERO, /* 152 */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_MARK, /* 151 always has an argument */
|
||||
OP_PRUNE, /* 152 */
|
||||
OP_PRUNE_ARG, /* 153 same, but with argument */
|
||||
OP_SKIP, /* 154 */
|
||||
OP_SKIP_ARG, /* 155 same, but with argument */
|
||||
OP_THEN, /* 156 */
|
||||
OP_THEN_ARG, /* 157 same, but with argument */
|
||||
OP_COMMIT, /* 158 */
|
||||
OP_COMMIT_ARG, /* 159 same, but with argument */
|
||||
OP_MARK, /* 153 always has an argument */
|
||||
OP_PRUNE, /* 154 */
|
||||
OP_PRUNE_ARG, /* 155 same, but with argument */
|
||||
OP_SKIP, /* 156 */
|
||||
OP_SKIP_ARG, /* 157 same, but with argument */
|
||||
OP_THEN, /* 158 */
|
||||
OP_THEN_ARG, /* 159 same, but with argument */
|
||||
OP_COMMIT, /* 160 */
|
||||
OP_COMMIT_ARG, /* 161 same, but with argument */
|
||||
|
||||
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
|
||||
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
|
||||
without the need for a special opcode. */
|
||||
|
||||
OP_FAIL, /* 160 */
|
||||
OP_ACCEPT, /* 161 */
|
||||
OP_ASSERT_ACCEPT, /* 162 Used inside assertions */
|
||||
OP_CLOSE, /* 163 Used before OP_ACCEPT to close open captures */
|
||||
OP_FAIL, /* 162 */
|
||||
OP_ACCEPT, /* 163 */
|
||||
OP_ASSERT_ACCEPT, /* 164 Used inside assertions */
|
||||
OP_CLOSE, /* 165 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 164 */
|
||||
OP_SKIPZERO, /* 166 */
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
OP_DEFINE, /* 165 */
|
||||
OP_DEFINE, /* 167 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
@ -1585,7 +1596,7 @@ enum {
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
definitions that follow must also be updated to match. There are also tables
|
||||
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
|
||||
pcre2_dfa_exec.c that must be updated. */
|
||||
pcre2_dfa_match.c that must be updated. */
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
@ -1618,7 +1629,9 @@ some cases doesn't actually use these names at all). */
|
||||
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
|
||||
"Recurse", "Callout", "CalloutStr", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
|
||||
"Reverse", "Assert", "Assert not", \
|
||||
"Assert back", "Assert back not", \
|
||||
"Non-atomic assert", "Non-atomic assert back", \
|
||||
"Once", \
|
||||
"Script run", \
|
||||
"Bra", "BraPos", "CBra", "CBraPos", \
|
||||
@ -1703,6 +1716,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1+LINK_SIZE, /* Assert not */ \
|
||||
1+LINK_SIZE, /* Assert behind */ \
|
||||
1+LINK_SIZE, /* Assert behind not */ \
|
||||
1+LINK_SIZE, /* NA Assert */ \
|
||||
1+LINK_SIZE, /* NA Assert behind */ \
|
||||
1+LINK_SIZE, /* ONCE */ \
|
||||
1+LINK_SIZE, /* SCRIPT_RUN */ \
|
||||
1+LINK_SIZE, /* BRA */ \
|
||||
|
10
thirdparty/pcre2/src/pcre2_intmodedep.h
vendored
10
thirdparty/pcre2/src/pcre2_intmodedep.h
vendored
@ -205,19 +205,19 @@ whether its argument, which is assumed to be one code unit, is less than 256.
|
||||
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. The
|
||||
TABLE_GET macro is used to access elements of tables containing exactly 256
|
||||
items. When code points can be greater than 255, a check is needed before
|
||||
accessing these tables. */
|
||||
items. Its argument is a code unit. When code points can be greater than 255, a
|
||||
check is needed before accessing these tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#else
|
||||
#define CHMAX_255(c) TRUE
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
@ -228,7 +228,6 @@ accessing these tables. */
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* ----------------- Character-handling macros ----------------- */
|
||||
|
||||
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||
@ -854,6 +853,7 @@ typedef struct match_block {
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
@ -866,6 +866,7 @@ typedef struct match_block {
|
||||
PCRE2_SPTR name_table; /* Table of group names */
|
||||
PCRE2_SPTR start_code; /* For use when recursing */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR check_subject; /* Where UTF-checked from */
|
||||
PCRE2_SPTR end_subject; /* End of the subject string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
@ -908,6 +909,7 @@ typedef struct dfa_match_block {
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
pcre2_callout_block *cb; /* Points to a callout block */
|
||||
|
1899
thirdparty/pcre2/src/pcre2_jit_compile.c
vendored
1899
thirdparty/pcre2/src/pcre2_jit_compile.c
vendored
File diff suppressed because it is too large
Load Diff
1
thirdparty/pcre2/src/pcre2_jit_match.c
vendored
1
thirdparty/pcre2/src/pcre2_jit_match.c
vendored
@ -74,7 +74,6 @@ Arguments:
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
jit_stack points to a JIT stack
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
|
321
thirdparty/pcre2/src/pcre2_jit_neon_inc.h
vendored
Normal file
321
thirdparty/pcre2/src/pcre2_jit_neon_inc.h
vendored
Normal file
@ -0,0 +1,321 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg and Sebastian Pop
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
# if defined(FFCS)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_utf
|
||||
# else
|
||||
# define FF_FUN ffcs
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_2)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_2_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_2
|
||||
# endif
|
||||
|
||||
# elif defined(FFCS_MASK)
|
||||
# if defined(FF_UTF)
|
||||
# define FF_FUN ffcs_mask_utf
|
||||
# else
|
||||
# define FF_FUN ffcs_mask
|
||||
# endif
|
||||
|
||||
# elif defined(FFCPS_0)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_0_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_0
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_1)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_1_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_1
|
||||
# endif
|
||||
|
||||
# elif defined (FFCPS_DEFAULT)
|
||||
# if defined (FF_UTF)
|
||||
# define FF_FUN ffcps_default_utf
|
||||
# else
|
||||
# define FF_FUN ffcps_default
|
||||
# endif
|
||||
# endif
|
||||
|
||||
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
|
||||
#undef FF_FUN
|
||||
{
|
||||
quad_word qw;
|
||||
int_char ic;
|
||||
ic.x = chars;
|
||||
|
||||
#if defined(FFCS)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 c2 = ic.c.c2;
|
||||
vect_t vc2 = VDUPQ(c2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
sljit_u8 c1 = ic.c.c1;
|
||||
vect_t vc1 = VDUPQ(c1);
|
||||
sljit_u8 mask = ic.c.c2;
|
||||
vect_t vmask = VDUPQ(mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
compare_type compare1_type = compare_match1;
|
||||
compare_type compare2_type = compare_match1;
|
||||
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
||||
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
PCRE2_UCHAR char1a = ic.c.c1;
|
||||
PCRE2_UCHAR char2a = ic.c.c3;
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp2a = VDUPQ(char2a);
|
||||
# else
|
||||
PCRE2_UCHAR char1b = ic.c.c2;
|
||||
PCRE2_UCHAR char2b = ic.c.c4;
|
||||
if (char1a == char1b)
|
||||
cmp1a = VDUPQ(char1a);
|
||||
else
|
||||
{
|
||||
sljit_u32 bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = compare_match1i;
|
||||
cmp1a = VDUPQ(char1a | bit1);
|
||||
cmp1b = VDUPQ(bit1);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = compare_match2;
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp1b = VDUPQ(char1b);
|
||||
}
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
cmp2a = VDUPQ(char2a);
|
||||
else
|
||||
{
|
||||
sljit_u32 bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = compare_match1i;
|
||||
cmp2a = VDUPQ(char2a | bit2);
|
||||
cmp2b = VDUPQ(bit2);
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = compare_match2;
|
||||
cmp2a = VDUPQ(char2a);
|
||||
cmp2b = VDUPQ(char2b);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
str_ptr += IN_UCHARS(offs1);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
vect_t char_mask = VDUPQ(0xff);
|
||||
#endif
|
||||
|
||||
#if defined(FF_UTF)
|
||||
restart:;
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
sljit_u8 *p1 = str_ptr - diff;
|
||||
#endif
|
||||
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
|
||||
str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
|
||||
vect_t data = VLD1Q(str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data = VANDQ(data, char_mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCS)
|
||||
vect_t eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
vect_t eq1 = VCEQQ(data, vc1);
|
||||
vect_t eq2 = VCEQQ(data, vc2);
|
||||
vect_t eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
vect_t eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
|
||||
#elif defined(FFCPS)
|
||||
# if defined(FFCPS_DIFF1)
|
||||
vect_t prev_data = data;
|
||||
# endif
|
||||
|
||||
vect_t data2;
|
||||
if (p1 < str_ptr)
|
||||
{
|
||||
data2 = VLD1Q(str_ptr - diff);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
||||
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
vect_t eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
if (align_offset < 8)
|
||||
{
|
||||
qw.dw[0] >>= align_offset * 8;
|
||||
if (qw.dw[0])
|
||||
{
|
||||
str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
|
||||
goto match;
|
||||
}
|
||||
if (qw.dw[1])
|
||||
{
|
||||
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
qw.dw[1] >>= (align_offset - 8) * 8;
|
||||
if (qw.dw[1])
|
||||
{
|
||||
str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
str_ptr += 16;
|
||||
|
||||
while (str_ptr < str_end)
|
||||
{
|
||||
vect_t orig_data = VLD1Q(str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
orig_data = VANDQ(orig_data, char_mask);
|
||||
#endif
|
||||
data = orig_data;
|
||||
|
||||
#if defined(FFCS)
|
||||
eq = VCEQQ(data, vc1);
|
||||
|
||||
#elif defined(FFCS_2)
|
||||
eq1 = VCEQQ(data, vc1);
|
||||
eq2 = VCEQQ(data, vc2);
|
||||
eq = VORRQ(eq1, eq2);
|
||||
|
||||
#elif defined(FFCS_MASK)
|
||||
eq = VORRQ(data, vmask);
|
||||
eq = VCEQQ(eq, vc1);
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
# if defined (FFCPS_DIFF1)
|
||||
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
||||
# else
|
||||
data2 = VLD1Q(str_ptr - diff);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
data = VCEQQ(data, cmp1a);
|
||||
data2 = VCEQQ(data2, cmp2a);
|
||||
# else
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
# endif
|
||||
|
||||
eq = VANDQ(data, data2);
|
||||
#endif
|
||||
|
||||
VST1Q(qw.mem, eq);
|
||||
if (qw.dw[0])
|
||||
str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
|
||||
else if (qw.dw[1])
|
||||
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
||||
else {
|
||||
str_ptr += 16;
|
||||
#if defined (FFCPS_DIFF1)
|
||||
prev_data = orig_data;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
match:;
|
||||
if (str_ptr >= str_end)
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
|
||||
#if defined(FF_UTF)
|
||||
if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
|
||||
{
|
||||
/* Not a match. */
|
||||
str_ptr += IN_UCHARS(1);
|
||||
goto restart;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Match. */
|
||||
#if defined (FFCPS)
|
||||
str_ptr -= IN_UCHARS(offs1);
|
||||
#endif
|
||||
return str_ptr;
|
||||
}
|
||||
|
||||
/* Failed match. */
|
||||
return NULL;
|
||||
}
|
993
thirdparty/pcre2/src/pcre2_jit_simd_inc.h
vendored
Normal file
993
thirdparty/pcre2/src/pcre2_jit_simd_inc.h
vendored
Normal file
@ -0,0 +1,993 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
This module by Zoltan Herczeg
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
|
||||
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
|
||||
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
|
||||
#else
|
||||
#error "Unknown code width"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
|
||||
{
|
||||
sljit_u32 value = chr;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define SSE2_COMPARE_TYPE_INDEX 0
|
||||
return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define SSE2_COMPARE_TYPE_INDEX 1
|
||||
return (sljit_s32)((value << 16) | value);
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define SSE2_COMPARE_TYPE_INDEX 2
|
||||
return (sljit_s32)(value);
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset)
|
||||
{
|
||||
sljit_u8 instruction[5];
|
||||
|
||||
SLJIT_ASSERT(dst_xmm_reg < 8);
|
||||
SLJIT_ASSERT(src_general_reg < 8);
|
||||
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3;
|
||||
instruction[1] = 0x0f;
|
||||
instruction[2] = 0x6f;
|
||||
|
||||
if (offset == 0)
|
||||
{
|
||||
instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg;
|
||||
instruction[4] = (sljit_u8)offset;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
sse2_compare_match1,
|
||||
sse2_compare_match1i,
|
||||
sse2_compare_match2,
|
||||
} sse2_compare_type;
|
||||
|
||||
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
|
||||
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
||||
{
|
||||
sljit_u8 instruction[4];
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
|
||||
SLJIT_ASSERT(step >= 0 && step <= 3);
|
||||
|
||||
if (compare_type != sse2_compare_match2)
|
||||
{
|
||||
if (step == 0)
|
||||
{
|
||||
if (compare_type == sse2_compare_match1i)
|
||||
{
|
||||
/* POR xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xeb;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (step != 2)
|
||||
return;
|
||||
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (step)
|
||||
{
|
||||
case 0:
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x6f;
|
||||
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 1:
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 2:
|
||||
/* PCMPEQB/W/D xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
|
||||
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
|
||||
case 3:
|
||||
/* POR xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xeb;
|
||||
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *quit;
|
||||
struct sljit_jump *partial_quit[2];
|
||||
sse2_compare_type compare_type = sse2_compare_match1;
|
||||
sljit_u8 instruction[8];
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
||||
sljit_s32 data_ind = 0;
|
||||
sljit_s32 tmp_ind = 1;
|
||||
sljit_s32 cmp1_ind = 2;
|
||||
sljit_s32 cmp2_ind = 3;
|
||||
sljit_u32 bit = 0;
|
||||
int i;
|
||||
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
bit = char1 ^ char2;
|
||||
compare_type = sse2_compare_match1i;
|
||||
|
||||
if (!is_powerof2(bit))
|
||||
{
|
||||
bit = 0;
|
||||
compare_type = sse2_compare_match2;
|
||||
}
|
||||
}
|
||||
|
||||
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[0]);
|
||||
|
||||
/* First part (unaligned start) */
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||
|
||||
/* MOVD xmm, r/m32 */
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
instruction[2] = 0x6e;
|
||||
instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||
|
||||
/* MOVD xmm, r/m32 */
|
||||
instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
|
||||
/* PSHUFD xmm1, xmm2/m128, imm8 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x70;
|
||||
instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
|
||||
instruction[4] = 0;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
|
||||
if (char1 != char2)
|
||||
{
|
||||
/* PSHUFD xmm1, xmm2/m128, imm8 */
|
||||
instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
|
||||
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* PMOVMSKB reg, xmm */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xd7;
|
||||
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Second part (aligned) */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
|
||||
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit[1]);
|
||||
|
||||
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
|
||||
for (i = 0; i < 4; i++)
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||
|
||||
/* PMOVMSKB reg, xmm */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xd7;
|
||||
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(quit);
|
||||
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
{
|
||||
JUMPHERE(partial_quit[0]);
|
||||
JUMPHERE(partial_quit[1]);
|
||||
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
|
||||
CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
|
||||
}
|
||||
else
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
{
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||
|
||||
quit = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
JUMPTO(SLJIT_JUMP, restart);
|
||||
|
||||
JUMPHERE(quit);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef _WIN64
|
||||
|
||||
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sse2_compare_type compare1_type = sse2_compare_match1;
|
||||
sse2_compare_type compare2_type = sse2_compare_match1;
|
||||
sljit_u32 bit1 = 0;
|
||||
sljit_u32 bit2 = 0;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
||||
sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
|
||||
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
||||
sljit_s32 data1_ind = 0;
|
||||
sljit_s32 data2_ind = 1;
|
||||
sljit_s32 tmp1_ind = 2;
|
||||
sljit_s32 tmp2_ind = 3;
|
||||
sljit_s32 cmp1a_ind = 4;
|
||||
sljit_s32 cmp1b_ind = 5;
|
||||
sljit_s32 cmp2a_ind = 6;
|
||||
sljit_s32 cmp2b_ind = 7;
|
||||
struct sljit_label *start;
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
struct sljit_label *restart;
|
||||
#endif
|
||||
struct sljit_jump *jump[2];
|
||||
sljit_u8 instruction[8];
|
||||
int i;
|
||||
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1);
|
||||
|
||||
/* Initialize. */
|
||||
if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
|
||||
CMOV(SLJIT_LESS, STR_END, TMP1, 0);
|
||||
}
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
/* MOVD xmm, r/m32 */
|
||||
instruction[0] = 0x66;
|
||||
instruction[1] = 0x0f;
|
||||
instruction[2] = 0x6e;
|
||||
|
||||
if (char1a == char1b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||
else
|
||||
{
|
||||
bit1 = char1a ^ char1b;
|
||||
if (is_powerof2(bit1))
|
||||
{
|
||||
compare1_type = sse2_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
|
||||
}
|
||||
else
|
||||
{
|
||||
compare1_type = sse2_compare_match2;
|
||||
bit1 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
|
||||
}
|
||||
}
|
||||
|
||||
instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
}
|
||||
|
||||
if (char2a == char2b)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||
else
|
||||
{
|
||||
bit2 = char2a ^ char2b;
|
||||
if (is_powerof2(bit2))
|
||||
{
|
||||
compare2_type = sse2_compare_match1i;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
|
||||
}
|
||||
else
|
||||
{
|
||||
compare2_type = sse2_compare_match2;
|
||||
bit2 = 0;
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
|
||||
}
|
||||
}
|
||||
|
||||
instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
}
|
||||
|
||||
/* PSHUFD xmm1, xmm2/m128, imm8 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x70;
|
||||
instruction[4] = 0;
|
||||
|
||||
instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
|
||||
if (char1a != char1b)
|
||||
{
|
||||
instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
|
||||
if (char2a != char2b)
|
||||
{
|
||||
instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
restart = LABEL();
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
|
||||
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
|
||||
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
|
||||
|
||||
load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
|
||||
|
||||
load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
|
||||
jump[1] = JUMP(SLJIT_JUMP);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
/* MOVDQA xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x6f;
|
||||
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
/* PSLLDQ xmm1, imm8 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0x73;
|
||||
instruction[3] = 0xc0 | (7 << 3) | data2_ind;
|
||||
instruction[4] = diff;
|
||||
sljit_emit_op_custom(compiler, instruction, 5);
|
||||
|
||||
JUMPHERE(jump[1]);
|
||||
|
||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
||||
}
|
||||
|
||||
/* PAND xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xdb;
|
||||
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
/* PMOVMSKB reg, xmm */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xd7;
|
||||
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
/* Ignore matches before the first STR_PTR. */
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||
|
||||
/* Main loop. */
|
||||
start = LABEL();
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
|
||||
load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
|
||||
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
|
||||
}
|
||||
|
||||
/* PAND xmm1, xmm2/m128 */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xdb;
|
||||
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
/* PMOVMSKB reg, xmm */
|
||||
/* instruction[0] = 0x66; */
|
||||
/* instruction[1] = 0x0f; */
|
||||
instruction[2] = 0xd7;
|
||||
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
|
||||
sljit_emit_op_custom(compiler, instruction, 4);
|
||||
|
||||
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
|
||||
/* BSF r32, r/m32 */
|
||||
instruction[0] = 0x0f;
|
||||
instruction[1] = 0xbc;
|
||||
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||
sljit_emit_op_custom(compiler, instruction, 3);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
||||
|
||||
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
{
|
||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
||||
|
||||
jump[0] = jump_if_utf_char_start(compiler, TMP1);
|
||||
|
||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
|
||||
|
||||
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
|
||||
|
||||
JUMPHERE(jump[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
||||
|
||||
if (common->match_end_ptr != 0)
|
||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||
}
|
||||
|
||||
#endif /* !_WIN64 */
|
||||
|
||||
#undef SSE2_COMPARE_TYPE_INDEX
|
||||
|
||||
#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef union {
|
||||
unsigned int x;
|
||||
struct { unsigned char c1, c2, c3, c4; } c;
|
||||
} int_char;
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
static SLJIT_INLINE int utf_continue(sljit_u8 *s)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return (*s & 0xc0) == 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return (*s & 0xfc00) == 0xdc00;
|
||||
#else
|
||||
#error "Unknown code width"
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
# define VECTOR_FACTOR 16
|
||||
# define vect_t uint8x16_t
|
||||
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
|
||||
# define VCEQQ vceqq_u8
|
||||
# define VORRQ vorrq_u8
|
||||
# define VST1Q vst1q_u8
|
||||
# define VDUPQ vdupq_n_u8
|
||||
# define VEXTQ vextq_u8
|
||||
# define VANDQ vandq_u8
|
||||
typedef union {
|
||||
uint8_t mem[16];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
# define VECTOR_FACTOR 8
|
||||
# define vect_t uint16x8_t
|
||||
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
|
||||
# define VCEQQ vceqq_u16
|
||||
# define VORRQ vorrq_u16
|
||||
# define VST1Q vst1q_u16
|
||||
# define VDUPQ vdupq_n_u16
|
||||
# define VEXTQ vextq_u16
|
||||
# define VANDQ vandq_u16
|
||||
typedef union {
|
||||
uint16_t mem[8];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#else
|
||||
# define VECTOR_FACTOR 4
|
||||
# define vect_t uint32x4_t
|
||||
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
|
||||
# define VCEQQ vceqq_u32
|
||||
# define VORRQ vorrq_u32
|
||||
# define VST1Q vst1q_u32
|
||||
# define VDUPQ vdupq_n_u32
|
||||
# define VEXTQ vextq_u32
|
||||
# define VANDQ vandq_u32
|
||||
typedef union {
|
||||
uint32_t mem[4];
|
||||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#endif
|
||||
|
||||
#define FFCS
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS
|
||||
|
||||
#define FFCS_2
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_2
|
||||
|
||||
#define FFCS_MASK
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_MASK
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||
|
||||
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
int_char ic;
|
||||
struct sljit_jump *partial_quit;
|
||||
/* Save temporary registers. */
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
|
||||
|
||||
/* Prepare function arguments */
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
|
||||
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
|
||||
|
||||
if (char1 == char2)
|
||||
{
|
||||
ic.c.c1 = char1;
|
||||
ic.c.c2 = char2;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR mask = char1 ^ char2;
|
||||
if (is_powerof2(mask))
|
||||
{
|
||||
ic.c.c1 = char1 | mask;
|
||||
ic.c.c2 = mask;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
ic.c.c1 = char1;
|
||||
ic.c.c2 = char2;
|
||||
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf && offset > 0)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
|
||||
else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
/* Restore registers. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
|
||||
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
|
||||
|
||||
/* Check return value. */
|
||||
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||
add_jump(compiler, &common->failed_match, partial_quit);
|
||||
|
||||
/* Fast forward STR_PTR to the result of memchr. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
|
||||
if (common->mode != PCRE2_JIT_COMPLETE)
|
||||
JUMPHERE(partial_quit);
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
compare_match1,
|
||||
compare_match1i,
|
||||
compare_match2,
|
||||
} compare_type;
|
||||
|
||||
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
|
||||
{
|
||||
if (ctype == compare_match2)
|
||||
{
|
||||
vect_t tmp = dst;
|
||||
dst = VCEQQ(dst, cmp1);
|
||||
tmp = VCEQQ(tmp, cmp2);
|
||||
dst = VORRQ(dst, tmp);
|
||||
return dst;
|
||||
}
|
||||
|
||||
if (ctype == compare_match1i)
|
||||
dst = VORRQ(dst, cmp2);
|
||||
dst = VCEQQ(dst, cmp1);
|
||||
return dst;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return 15;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
return 7;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
return 3;
|
||||
#else
|
||||
#error "Unsupported unit width"
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ARM doesn't have a shift left across lanes. */
|
||||
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
|
||||
{
|
||||
vect_t zero = VDUPQ(0);
|
||||
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
|
||||
/* VEXTQ takes an immediate as last argument. */
|
||||
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
|
||||
switch (n)
|
||||
{
|
||||
C(1); C(2); C(3);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
C(4); C(5); C(6); C(7);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 16
|
||||
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
|
||||
# endif
|
||||
#endif
|
||||
default:
|
||||
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
|
||||
happen. The return is still here for compilers to not warn. */
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
#define FFCPS
|
||||
#define FFCPS_DIFF1
|
||||
#define FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_0
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_0
|
||||
|
||||
#undef FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_1
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_1
|
||||
|
||||
#undef FFCPS_DIFF1
|
||||
|
||||
#define FFCPS_DEFAULT
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||
|
||||
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
||||
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
struct sljit_jump *partial_quit;
|
||||
int_char ic;
|
||||
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
||||
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
|
||||
SLJIT_ASSERT(compiler->scratches == 5);
|
||||
|
||||
/* Save temporary register STR_PTR. */
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
|
||||
|
||||
/* Prepare arguments for the function call. */
|
||||
if (common->match_end_ptr == 0)
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
|
||||
else
|
||||
{
|
||||
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
||||
|
||||
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0);
|
||||
CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0);
|
||||
}
|
||||
|
||||
OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
|
||||
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
|
||||
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
|
||||
ic.c.c1 = char1a;
|
||||
ic.c.c2 = char1b;
|
||||
ic.c.c3 = char2a;
|
||||
ic.c.c4 = char2b;
|
||||
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
|
||||
|
||||
if (diff == 1) {
|
||||
if (char1a == char1b && char2a == char2b) {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0));
|
||||
} else {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1));
|
||||
}
|
||||
} else {
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (common->utf)
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf));
|
||||
else
|
||||
#endif
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
|
||||
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default));
|
||||
}
|
||||
|
||||
/* Restore STR_PTR register. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
|
||||
|
||||
/* Check return value. */
|
||||
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
|
||||
add_jump(compiler, &common->failed_match, partial_quit);
|
||||
|
||||
/* Fast forward STR_PTR to the result of memchr. */
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
|
||||
|
||||
JUMPHERE(partial_quit);
|
||||
}
|
||||
|
||||
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
|
11
thirdparty/pcre2/src/pcre2_maketables.c
vendored
11
thirdparty/pcre2/src/pcre2_maketables.c
vendored
@ -147,4 +147,15 @@ for (i = 0; i < 256; i++)
|
||||
return yield;
|
||||
}
|
||||
|
||||
#ifndef DFTABLES
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
|
||||
{
|
||||
if (gcontext)
|
||||
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
|
||||
else
|
||||
free((void *)tables);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* End of pcre2_maketables.c */
|
||||
|
617
thirdparty/pcre2/src/pcre2_match.c
vendored
617
thirdparty/pcre2/src/pcre2_match.c
vendored
@ -415,8 +415,7 @@ if (caseless)
|
||||
else
|
||||
#endif
|
||||
|
||||
/* Not in UTF mode */
|
||||
|
||||
/* Not in UTF mode */
|
||||
{
|
||||
for (; length > 0; length--)
|
||||
{
|
||||
@ -491,27 +490,32 @@ heap is used for a larger vector.
|
||||
*************************************************/
|
||||
|
||||
/* These macros pack up tests that are used for partial matching several times
|
||||
in the code. We set the "hit end" flag if the pointer is at the end of the
|
||||
subject and also past the earliest inspected character (i.e. something has been
|
||||
matched, even if not part of the actual matched string). For hard partial
|
||||
matching, we then return immediately. The second one is used when we already
|
||||
know we are past the end of the subject. */
|
||||
in the code. The second one is used when we already know we are past the end of
|
||||
the subject. We set the "hit end" flag if the pointer is at the end of the
|
||||
subject and either (a) the pointer is past the earliest inspected character
|
||||
(i.e. something has been matched, even if not part of the actual matched
|
||||
string), or (b) the pattern contains a lookbehind. These are the conditions for
|
||||
which adding more characters may allow the current match to continue.
|
||||
|
||||
For hard partial matching, we immediately return a partial match. Otherwise,
|
||||
carrying on means that a complete match on the current subject will be sought.
|
||||
A partial match is returned only if no complete match can be found. */
|
||||
|
||||
#define CHECK_PARTIAL()\
|
||||
if (mb->partial != 0 && Feptr >= mb->end_subject && \
|
||||
Feptr > mb->start_used_ptr) \
|
||||
if (Feptr >= mb->end_subject) \
|
||||
{ \
|
||||
mb->hitend = TRUE; \
|
||||
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
|
||||
SCHECK_PARTIAL(); \
|
||||
}
|
||||
|
||||
#define SCHECK_PARTIAL()\
|
||||
if (mb->partial != 0 && Feptr > mb->start_used_ptr) \
|
||||
if (mb->partial != 0 && \
|
||||
(Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
|
||||
{ \
|
||||
mb->hitend = TRUE; \
|
||||
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
|
||||
}
|
||||
|
||||
|
||||
/* These macros are used to implement backtracking. They simulate a recursive
|
||||
call to the match() function by means of a local vector of frames which
|
||||
remember the backtracking points. */
|
||||
@ -5127,6 +5131,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
Lframe_type = GF_NOCAPTURE | Fop;
|
||||
for (;;)
|
||||
{
|
||||
@ -5412,7 +5418,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
{
|
||||
while (number-- > 0)
|
||||
{
|
||||
if (Feptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
|
||||
Feptr--;
|
||||
BACKCHAR(Feptr);
|
||||
}
|
||||
@ -5420,7 +5426,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
else
|
||||
#endif
|
||||
|
||||
/* No UTF-8 support, or not in UTF-8 mode: count is byte count */
|
||||
/* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
|
||||
|
||||
{
|
||||
if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
@ -5472,15 +5478,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
|
||||
/* If we are at the end of an assertion that is a condition, return a
|
||||
match, discarding any intermediate backtracking points. Copy back the
|
||||
captures into the frame before N so that they are set on return. Doing
|
||||
this for all assertions, both positive and negative, seems to match what
|
||||
Perl does. */
|
||||
mark setting and the captures into the frame before N so that they are
|
||||
set on return. Doing this for all assertions, both positive and negative,
|
||||
seems to match what Perl does. */
|
||||
|
||||
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
|
||||
{
|
||||
memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
|
||||
Foffset_top * sizeof(PCRE2_SIZE));
|
||||
P->offset_top = Foffset_top;
|
||||
P->mark = Fmark;
|
||||
Fback_frame = (char *)F - (char *)P;
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
@ -5496,10 +5503,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
case OP_SCOND:
|
||||
break;
|
||||
|
||||
/* Positive assertions are like OP_ONCE, except that in addition the
|
||||
/* Non-atomic positive assertions are like OP_BRA, except that the
|
||||
subject pointer must be put back to where it was at the start of the
|
||||
assertion. */
|
||||
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
|
||||
Feptr = P->eptr;
|
||||
break;
|
||||
|
||||
/* Atomic positive assertions are like OP_ONCE, except that in addition
|
||||
the subject pointer must be put back to where it was at the start of the
|
||||
assertion. */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERTBACK:
|
||||
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
|
||||
@ -5640,7 +5657,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
|
||||
case OP_EOD:
|
||||
if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
SCHECK_PARTIAL();
|
||||
if (mb->partial != 0)
|
||||
{
|
||||
mb->hitend = TRUE;
|
||||
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
|
||||
}
|
||||
Fecode++;
|
||||
break;
|
||||
|
||||
@ -5665,7 +5686,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
|
||||
/* Either at end of string or \n before end. */
|
||||
|
||||
SCHECK_PARTIAL();
|
||||
if (mb->partial != 0)
|
||||
{
|
||||
mb->hitend = TRUE;
|
||||
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
|
||||
}
|
||||
Fecode++;
|
||||
break;
|
||||
|
||||
@ -5743,7 +5768,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_WORD_BOUNDARY:
|
||||
if (Feptr == mb->start_subject) prev_is_word = FALSE; else
|
||||
if (Feptr == mb->check_subject) prev_is_word = FALSE; else
|
||||
{
|
||||
PCRE2_SPTR lastptr = Feptr - 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
@ -5946,6 +5971,7 @@ in rrc. */
|
||||
#define LBL(val) case val: goto L_RM##val;
|
||||
|
||||
RETURN_SWITCH:
|
||||
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
|
||||
if (Frdepth == 0) return rrc; /* Exit from the top level */
|
||||
F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
|
||||
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
|
||||
@ -5999,9 +6025,9 @@ Arguments:
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
-1 => failed to match (PCRE2_ERROR_NOMATCH)
|
||||
-2 => partial match (PCRE2_ERROR_PARTIAL)
|
||||
< -2 => some kind of unexpected problem
|
||||
= -1 => failed to match (PCRE2_ERROR_NOMATCH)
|
||||
= -2 => partial match (PCRE2_ERROR_PARTIAL)
|
||||
< -2 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
@ -6014,7 +6040,6 @@ int was_zero_terminated = 0;
|
||||
const uint8_t *start_bits = NULL;
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
|
||||
BOOL anchored;
|
||||
BOOL firstline;
|
||||
BOOL has_first_cu = FALSE;
|
||||
@ -6022,6 +6047,11 @@ BOOL has_req_cu = FALSE;
|
||||
BOOL startline;
|
||||
BOOL utf;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
BOOL memchr_not_found_first_cu = FALSE;
|
||||
BOOL memchr_not_found_first_cu2 = FALSE;
|
||||
#endif
|
||||
|
||||
PCRE2_UCHAR first_cu = 0;
|
||||
PCRE2_UCHAR first_cu2 = 0;
|
||||
PCRE2_UCHAR req_cu = 0;
|
||||
@ -6029,10 +6059,23 @@ PCRE2_UCHAR req_cu2 = 0;
|
||||
|
||||
PCRE2_SPTR bumpalong_limit;
|
||||
PCRE2_SPTR end_subject;
|
||||
PCRE2_SPTR true_end_subject;
|
||||
PCRE2_SPTR start_match = subject + start_offset;
|
||||
PCRE2_SPTR req_cu_ptr = start_match - 1;
|
||||
PCRE2_SPTR start_partial = NULL;
|
||||
PCRE2_SPTR match_partial = NULL;
|
||||
PCRE2_SPTR start_partial;
|
||||
PCRE2_SPTR match_partial;
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
BOOL use_jit;
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL allow_invalid;
|
||||
uint32_t fragment_options = 0;
|
||||
#ifdef SUPPORT_JIT
|
||||
BOOL jit_checked_utf = FALSE;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
PCRE2_SIZE frame_size;
|
||||
|
||||
@ -6059,7 +6102,7 @@ if (length == PCRE2_ZERO_TERMINATED)
|
||||
length = PRIV(strlen)(subject);
|
||||
was_zero_terminated = 1;
|
||||
}
|
||||
end_subject = subject + length;
|
||||
true_end_subject = end_subject = subject + length;
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
@ -6095,12 +6138,24 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
|
||||
#undef FF
|
||||
#undef OO
|
||||
|
||||
/* These two settings are used in the code for checking a UTF string that
|
||||
follows immediately afterwards. Other values in the mb block are used only
|
||||
during interpretive processing, not when the JIT support is in use, so they are
|
||||
set up later. */
|
||||
/* If the pattern was successfully studied with JIT support, we will run the
|
||||
JIT executable instead of the rest of this function. Most options must be set
|
||||
at compile time for the JIT code to be usable. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
use_jit = (re->executable_jit != NULL &&
|
||||
(options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
|
||||
#endif
|
||||
|
||||
/* Initialize UTF parameters. */
|
||||
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
|
||||
#endif
|
||||
|
||||
/* Convert the partial matching flags into an integer. */
|
||||
|
||||
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
|
||||
((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
|
||||
|
||||
@ -6111,61 +6166,6 @@ if (mb->partial != 0 &&
|
||||
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||
we must also check that a starting offset does not point into the middle of a
|
||||
multiunit character. We check only the portion of the subject that is going to
|
||||
be inspected during matching - from the offset minus the maximum back reference
|
||||
to the given length. This saves time when a small part of a large subject is
|
||||
being matched by the use of a starting offset. Note that the maximum lookbehind
|
||||
is a number of characters, not code units. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
|
||||
|
||||
if (start_offset > 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
unsigned int i;
|
||||
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
|
||||
{
|
||||
check_subject--;
|
||||
while (check_subject > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*check_subject & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*check_subject & 0xfc00) == 0xdc00)
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
check_subject--;
|
||||
}
|
||||
#else
|
||||
/* In the 32-bit library, one code unit equals one character. However,
|
||||
we cannot just subtract the lookbehind and then compare pointers, because
|
||||
a very large lookbehind could create an invalid pointer. */
|
||||
|
||||
if (start_offset >= re->max_lookbehind)
|
||||
check_subject -= re->max_lookbehind;
|
||||
else
|
||||
check_subject = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
}
|
||||
|
||||
/* Validate the relevant portion of the subject. After an error, adjust the
|
||||
offset to be an absolute offset in the whole string. */
|
||||
|
||||
match_data->rc = PRIV(valid_utf)(check_subject,
|
||||
length - (check_subject - subject), &(match_data->startchar));
|
||||
if (match_data->rc != 0)
|
||||
{
|
||||
match_data->startchar += check_subject - subject;
|
||||
return match_data->rc;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* It is an error to set an offset limit without setting the flag at compile
|
||||
time. */
|
||||
|
||||
@ -6184,15 +6184,89 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
}
|
||||
match_data->subject = NULL;
|
||||
|
||||
/* If the pattern was successfully studied with JIT support, run the JIT
|
||||
executable instead of the rest of this function. Most options must be set at
|
||||
compile time for the JIT code to be usable. Fallback to the normal code path if
|
||||
an unsupported option is set or if JIT returns BADOPTION (which means that the
|
||||
selected normal or partial matching mode was not compiled). */
|
||||
/* Zero the error offset in case the first code unit is invalid UTF. */
|
||||
|
||||
match_data->startchar = 0;
|
||||
|
||||
|
||||
/* ============================= JIT matching ============================== */
|
||||
|
||||
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
|
||||
requested or invalid UTF can be handled. We check only the portion of the
|
||||
subject that might be be inspected during matching - from the offset minus the
|
||||
maximum lookbehind to the given length. This saves time when a small part of a
|
||||
large subject is being matched by the use of a starting offset. Note that the
|
||||
maximum lookbehind is a number of characters, not code units. */
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
|
||||
if (use_jit)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
unsigned int i;
|
||||
#endif
|
||||
|
||||
/* For 8-bit and 16-bit UTF, check that the first code unit is a valid
|
||||
character start. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
{
|
||||
if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */
|
||||
#else
|
||||
return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */
|
||||
#endif
|
||||
}
|
||||
#endif /* WIDTH != 32 */
|
||||
|
||||
/* Move back by the maximum lookbehind, just in case it happens at the very
|
||||
start of matching. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
|
||||
{
|
||||
start_match--;
|
||||
while (start_match > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*start_match & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*start_match & 0xfc00) == 0xdc00)
|
||||
#endif
|
||||
start_match--;
|
||||
}
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* In the 32-bit library, one code unit equals one character. However,
|
||||
we cannot just subtract the lookbehind and then compare pointers, because
|
||||
a very large lookbehind could create an invalid pointer. */
|
||||
|
||||
if (start_offset >= re->max_lookbehind)
|
||||
start_match -= re->max_lookbehind;
|
||||
else
|
||||
start_match = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* Validate the relevant portion of the subject. Adjust the offset of an
|
||||
invalid code point to be an absolute offset in the whole string. */
|
||||
|
||||
match_data->rc = PRIV(valid_utf)(start_match,
|
||||
length - (start_match - subject), &(match_data->startchar));
|
||||
if (match_data->rc != 0)
|
||||
{
|
||||
match_data->startchar += start_match - subject;
|
||||
return match_data->rc;
|
||||
}
|
||||
jit_checked_utf = TRUE;
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* If JIT returns BADOPTION, which means that the selected complete or
|
||||
partial matching mode was not compiled, fall through to the interpreter. */
|
||||
|
||||
rc = pcre2_jit_match(code, subject, length, start_offset, options,
|
||||
match_data, mcontext);
|
||||
if (rc != PCRE2_ERROR_JIT_BADOPTION)
|
||||
@ -6209,10 +6283,152 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* ========================= End of JIT matching ========================== */
|
||||
|
||||
|
||||
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
|
||||
start of the subject. A UTF check when there is a non-zero offset may change
|
||||
this. */
|
||||
|
||||
mb->check_subject = subject;
|
||||
|
||||
/* If a UTF subject string was not checked for validity in the JIT code above,
|
||||
check it here, and handle support for invalid UTF strings. The check above
|
||||
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
|
||||
If we get here in those circumstances, it means the subject string is valid,
|
||||
but for some reason JIT matching was not successful. There is no need to check
|
||||
the subject again.
|
||||
|
||||
We check only the portion of the subject that might be be inspected during
|
||||
matching - from the offset minus the maximum lookbehind to the given length.
|
||||
This saves time when a small part of a large subject is being matched by the
|
||||
use of a starting offset. Note that the maximum lookbehind is a number of
|
||||
characters, not code units.
|
||||
|
||||
Note also that support for invalid UTF forces a check, overriding the setting
|
||||
of PCRE2_NO_CHECK_UTF. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf &&
|
||||
#ifdef SUPPORT_JIT
|
||||
!jit_checked_utf &&
|
||||
#endif
|
||||
((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
BOOL skipped_bad_start = FALSE;
|
||||
#endif
|
||||
|
||||
/* Carry on with non-JIT matching. A NULL match context means "use a default
|
||||
context", but we take the memory control functions from the pattern. */
|
||||
/* For 8-bit and 16-bit UTF, check that the first code unit is a valid
|
||||
character start. If we are handling invalid UTF, just skip over such code
|
||||
units. Otherwise, give an appropriate error. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (allow_invalid)
|
||||
{
|
||||
while (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
{
|
||||
start_match++;
|
||||
skipped_bad_start = TRUE;
|
||||
}
|
||||
}
|
||||
else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
|
||||
{
|
||||
if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */
|
||||
#else
|
||||
return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */
|
||||
#endif
|
||||
}
|
||||
#endif /* WIDTH != 32 */
|
||||
|
||||
/* The mb->check_subject field points to the start of UTF checking;
|
||||
lookbehinds can go back no further than this. */
|
||||
|
||||
mb->check_subject = start_match;
|
||||
|
||||
/* Move back by the maximum lookbehind, just in case it happens at the very
|
||||
start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
|
||||
units above. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (!skipped_bad_start)
|
||||
{
|
||||
unsigned int i;
|
||||
for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
|
||||
{
|
||||
mb->check_subject--;
|
||||
while (mb->check_subject > subject &&
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
(*mb->check_subject & 0xc0) == 0x80)
|
||||
#else /* 16-bit */
|
||||
(*mb->check_subject & 0xfc00) == 0xdc00)
|
||||
#endif
|
||||
mb->check_subject--;
|
||||
}
|
||||
}
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* In the 32-bit library, one code unit equals one character. However,
|
||||
we cannot just subtract the lookbehind and then compare pointers, because
|
||||
a very large lookbehind could create an invalid pointer. */
|
||||
|
||||
if (start_offset >= re->max_lookbehind)
|
||||
mb->check_subject -= re->max_lookbehind;
|
||||
else
|
||||
mb->check_subject = subject;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
/* Validate the relevant portion of the subject. There's a loop in case we
|
||||
encounter bad UTF in the characters preceding start_match which we are
|
||||
scanning because of a lookbehind. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
match_data->rc = PRIV(valid_utf)(mb->check_subject,
|
||||
length - (mb->check_subject - subject), &(match_data->startchar));
|
||||
|
||||
if (match_data->rc == 0) break; /* Valid UTF string */
|
||||
|
||||
/* Invalid UTF string. Adjust the offset to be an absolute offset in the
|
||||
whole string. If we are handling invalid UTF strings, set end_subject to
|
||||
stop before the bad code unit, and set the options to "not end of line".
|
||||
Otherwise return the error. */
|
||||
|
||||
match_data->startchar += mb->check_subject - subject;
|
||||
if (!allow_invalid || match_data->rc > 0) return match_data->rc;
|
||||
end_subject = subject + match_data->startchar;
|
||||
|
||||
/* If the end precedes start_match, it means there is invalid UTF in the
|
||||
extra code units we reversed over because of a lookbehind. Advance past the
|
||||
first bad code unit, and then skip invalid character starting code units in
|
||||
8-bit and 16-bit modes, and try again. */
|
||||
|
||||
if (end_subject < start_match)
|
||||
{
|
||||
mb->check_subject = end_subject + 1;
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
|
||||
mb->check_subject++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Otherwise, set the not end of line option, and do the match. */
|
||||
|
||||
else
|
||||
{
|
||||
fragment_options = PCRE2_NOTEOL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* A NULL match context means "use a default context", but we take the memory
|
||||
control functions from the pattern. */
|
||||
|
||||
if (mcontext == NULL)
|
||||
{
|
||||
@ -6224,8 +6440,8 @@ else mb->memctl = mcontext->memctl;
|
||||
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
|
||||
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
|
||||
startline = (re->flags & PCRE2_STARTLINE) != 0;
|
||||
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
|
||||
end_subject : subject + mcontext->offset_limit;
|
||||
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
|
||||
true_end_subject : subject + mcontext->offset_limit;
|
||||
|
||||
/* Initialize and set up the fixed fields in the callout block, with a pointer
|
||||
in the match block. */
|
||||
@ -6236,7 +6452,8 @@ cb.subject = subject;
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
|
||||
cb.callout_flags = 0;
|
||||
|
||||
/* Fill in the remaining fields in the match block. */
|
||||
/* Fill in the remaining fields in the match block, except for moptions, which
|
||||
gets set later. */
|
||||
|
||||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
@ -6245,13 +6462,11 @@ mb->start_subject = subject;
|
||||
mb->start_offset = start_offset;
|
||||
mb->end_subject = end_subject;
|
||||
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
|
||||
|
||||
mb->moptions = options; /* Match options */
|
||||
mb->poptions = re->overall_options; /* Pattern options */
|
||||
|
||||
mb->allowemptypartial = (re->max_lookbehind > 0) ||
|
||||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
mb->poptions = re->overall_options; /* Pattern options */
|
||||
mb->ignore_skip_arg = 0;
|
||||
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
|
||||
mb->hitend = FALSE;
|
||||
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
|
||||
|
||||
/* The name table is needed for finding all the numbers associated with a
|
||||
given name, for condition testing. The code follows the name table. */
|
||||
@ -6404,6 +6619,13 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
||||
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
|
||||
the loop runs just once. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
FRAGMENT_RESTART:
|
||||
#endif
|
||||
|
||||
start_partial = match_partial = NULL;
|
||||
mb->hitend = FALSE;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
PCRE2_SPTR new_start_match;
|
||||
@ -6473,7 +6695,10 @@ for(;;)
|
||||
/* Not anchored. Advance to a unique first code unit if there is one. In
|
||||
8-bit mode, the use of memchr() gives a big speed up, even though we have
|
||||
to call it twice in caseless mode, in order to find the earliest occurrence
|
||||
of the character in either of its cases. */
|
||||
of the character in either of its cases. If a call to memchr() that
|
||||
searches the rest of the subject fails to find one case, remember that in
|
||||
order not to keep on repeating the search. This can make a huge difference
|
||||
when the strings are very long and only one case is present. */
|
||||
|
||||
else
|
||||
{
|
||||
@ -6487,11 +6712,29 @@ for(;;)
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||
smc != first_cu2)
|
||||
start_match++;
|
||||
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp1 =
|
||||
memchr(start_match, first_cu, end_subject-start_match);
|
||||
PCRE2_SPTR pp2 =
|
||||
memchr(start_match, first_cu2, end_subject-start_match);
|
||||
PCRE2_SPTR pp1 = NULL;
|
||||
PCRE2_SPTR pp2 = NULL;
|
||||
PCRE2_SIZE cu2size = end_subject - start_match;
|
||||
|
||||
if (!memchr_not_found_first_cu)
|
||||
{
|
||||
pp1 = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
|
||||
else cu2size = pp1 - start_match;
|
||||
}
|
||||
|
||||
/* If pp1 is not NULL, we have arranged to search only as far as pp1,
|
||||
to see if the other case is earlier, so we can set "not found" only
|
||||
when both searches have returned NULL. */
|
||||
|
||||
if (!memchr_not_found_first_cu2)
|
||||
{
|
||||
pp2 = memchr(start_match, first_cu2, cu2size);
|
||||
memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
|
||||
}
|
||||
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
@ -6523,7 +6766,7 @@ for(;;)
|
||||
we also let the cycle run, because the matching string is legitimately
|
||||
allowed to start with the first code unit of a newline. */
|
||||
|
||||
if (!mb->partial && start_match >= mb->end_subject)
|
||||
if (mb->partial == 0 && start_match >= mb->end_subject)
|
||||
{
|
||||
rc = MATCH_NOMATCH;
|
||||
break;
|
||||
@ -6582,7 +6825,7 @@ for(;;)
|
||||
|
||||
/* See comment above in first_cu checking about the next few lines. */
|
||||
|
||||
if (!mb->partial && start_match >= mb->end_subject)
|
||||
if (mb->partial == 0 && start_match >= mb->end_subject)
|
||||
{
|
||||
rc = MATCH_NOMATCH;
|
||||
break;
|
||||
@ -6596,8 +6839,10 @@ for(;;)
|
||||
|
||||
/* The following two optimizations must be disabled for partial matching. */
|
||||
|
||||
if (!mb->partial)
|
||||
if (mb->partial == 0)
|
||||
{
|
||||
PCRE2_SPTR p;
|
||||
|
||||
/* The minimum matching length is a lower bound; no string of that length
|
||||
may actually match the pattern. Although the value is, strictly, in
|
||||
characters, we treat it as code units to avoid spending too much time in
|
||||
@ -6621,60 +6866,57 @@ for(;;)
|
||||
memchr() twice in the caseless case because we only need to check for the
|
||||
presence of the character in either case, not find the first occurrence.
|
||||
|
||||
The search can be skipped if the code unit was found later than the
|
||||
current starting point in a previous iteration of the bumpalong loop.
|
||||
|
||||
HOWEVER: when the subject string is very, very long, searching to its end
|
||||
can take a long time, and give bad performance on quite ordinary
|
||||
patterns. This showed up when somebody was matching something like
|
||||
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
|
||||
sufficiently long. */
|
||||
anchored patterns. This showed up when somebody was matching something
|
||||
like /^\d+C/ on a 32-megabyte string... so we don't do this when the
|
||||
string is sufficiently long, but it's worth searching a lot more for
|
||||
unanchored patterns. */
|
||||
|
||||
if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
|
||||
p = start_match + (has_first_cu? 1:0);
|
||||
if (has_req_cu && p > req_cu_ptr)
|
||||
{
|
||||
PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
|
||||
PCRE2_SIZE check_length = end_subject - start_match;
|
||||
|
||||
/* We don't need to repeat the search if we haven't yet reached the
|
||||
place we found it last time round the bumpalong loop. */
|
||||
|
||||
if (p > req_cu_ptr)
|
||||
if (check_length < REQ_CU_MAX ||
|
||||
(!anchored && check_length < REQ_CU_MAX * 1000))
|
||||
{
|
||||
if (p < end_subject)
|
||||
if (req_cu != req_cu2) /* Caseless */
|
||||
{
|
||||
if (req_cu != req_cu2) /* Caseless */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
do
|
||||
{
|
||||
uint32_t pp = UCHAR21INCTEST(p);
|
||||
if (pp == req_cu || pp == req_cu2) { p--; break; }
|
||||
}
|
||||
while (p < end_subject);
|
||||
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp = p;
|
||||
p = memchr(pp, req_cu, end_subject - pp);
|
||||
if (p == NULL)
|
||||
{
|
||||
p = memchr(pp, req_cu2, end_subject - pp);
|
||||
if (p == NULL) p = end_subject;
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
|
||||
while (p < end_subject)
|
||||
{
|
||||
uint32_t pp = UCHAR21INCTEST(p);
|
||||
if (pp == req_cu || pp == req_cu2) { p--; break; }
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
do
|
||||
{
|
||||
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
|
||||
}
|
||||
while (p < end_subject);
|
||||
|
||||
#else /* 8-bit code units */
|
||||
p = memchr(p, req_cu, end_subject - p);
|
||||
PCRE2_SPTR pp = p;
|
||||
p = memchr(pp, req_cu, end_subject - pp);
|
||||
if (p == NULL)
|
||||
{
|
||||
p = memchr(pp, req_cu2, end_subject - pp);
|
||||
if (p == NULL) p = end_subject;
|
||||
#endif
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (p < end_subject)
|
||||
{
|
||||
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
|
||||
}
|
||||
|
||||
#else /* 8-bit code units */
|
||||
p = memchr(p, req_cu, end_subject - p);
|
||||
if (p == NULL) p = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the bumpalong loop,
|
||||
@ -6714,6 +6956,11 @@ for(;;)
|
||||
|
||||
mb->start_used_ptr = start_match;
|
||||
mb->last_used_ptr = start_match;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
mb->moptions = options | fragment_options;
|
||||
#else
|
||||
mb->moptions = options;
|
||||
#endif
|
||||
mb->match_call_count = 0;
|
||||
mb->end_offset_top = 0;
|
||||
mb->skip_arg_count = 0;
|
||||
@ -6839,6 +7086,68 @@ for(;;)
|
||||
|
||||
ENDLOOP:
|
||||
|
||||
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
|
||||
and have just processed a non-terminal fragment. If this resulted in no match
|
||||
or a partial match we must carry on to the next fragment (a partial match is
|
||||
returned to the caller only at the very end of the subject). A loop is used to
|
||||
avoid trying to match against empty fragments; if the pattern can match an
|
||||
empty string it would have done so already. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && end_subject != true_end_subject &&
|
||||
(rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
/* Advance past the first bad code unit, and then skip invalid character
|
||||
starting code units in 8-bit and 16-bit modes. */
|
||||
|
||||
start_match = end_subject + 1;
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
|
||||
start_match++;
|
||||
#endif
|
||||
|
||||
/* If we have hit the end of the subject, there isn't another non-empty
|
||||
fragment, so give up. */
|
||||
|
||||
if (start_match >= true_end_subject)
|
||||
{
|
||||
rc = MATCH_NOMATCH; /* In case it was partial */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check the rest of the subject */
|
||||
|
||||
mb->check_subject = start_match;
|
||||
rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
|
||||
&(match_data->startchar));
|
||||
|
||||
/* The rest of the subject is valid UTF. */
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
mb->end_subject = end_subject = true_end_subject;
|
||||
fragment_options = PCRE2_NOTBOL;
|
||||
goto FRAGMENT_RESTART;
|
||||
}
|
||||
|
||||
/* A subsequent UTF error has been found; if the next fragment is
|
||||
non-empty, set up to process it. Otherwise, let the loop advance. */
|
||||
|
||||
else if (rc < 0)
|
||||
{
|
||||
mb->end_subject = end_subject = start_match + match_data->startchar;
|
||||
if (end_subject > start_match)
|
||||
{
|
||||
fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
|
||||
goto FRAGMENT_RESTART;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Release an enlarged frame vector that is on the heap. */
|
||||
|
||||
if (mb->match_frames != mb->stack_frames)
|
||||
|
15
thirdparty/pcre2/src/pcre2_match_data.c
vendored
15
thirdparty/pcre2/src/pcre2_match_data.c
vendored
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -150,4 +150,17 @@ pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
return match_data->startchar;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_match_data_size(pcre2_match_data *match_data)
|
||||
{
|
||||
return offsetof(pcre2_match_data, ovector) +
|
||||
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
|
||||
}
|
||||
|
||||
/* End of pcre2_match_data.c */
|
||||
|
304
thirdparty/pcre2/src/pcre2_study.c
vendored
304
thirdparty/pcre2/src/pcre2_study.c
vendored
@ -88,11 +88,13 @@ Arguments:
|
||||
countptr pointer to call count (to catch over complexity)
|
||||
backref_cache vector for caching back references.
|
||||
|
||||
This function is no longer called when the pattern contains (*ACCEPT); however,
|
||||
the old code for returning -1 is retained, just in case.
|
||||
|
||||
Returns: the minimum length
|
||||
-1 \C in UTF-8 mode
|
||||
or (*ACCEPT)
|
||||
or pattern too complicated
|
||||
or back reference to duplicate name/number
|
||||
-2 internal error (missing capturing bracket)
|
||||
-3 internal error (opcode not listed)
|
||||
*/
|
||||
@ -103,6 +105,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
|
||||
int *backref_cache)
|
||||
{
|
||||
int length = -1;
|
||||
int branchlength = 0;
|
||||
int prev_cap_recno = -1;
|
||||
int prev_cap_d = 0;
|
||||
int prev_recurse_recno = -1;
|
||||
@ -110,9 +113,9 @@ int prev_recurse_d = 0;
|
||||
uint32_t once_fudge = 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
|
||||
recurse_check this_recurse;
|
||||
int branchlength = 0;
|
||||
PCRE2_SPTR nextbranch = code + GET(code, 1);
|
||||
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
||||
recurse_check this_recurse;
|
||||
|
||||
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||
|
||||
@ -128,16 +131,20 @@ if ((*countptr)++ > 1000) return -1;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||
check the length against that of the other branches. If the accumulated length
|
||||
passes 16-bits, stop. */
|
||||
passes 16-bits, reset to that value and skip the rest of the branch. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int d, min, recno;
|
||||
PCRE2_UCHAR *cs, *ce;
|
||||
PCRE2_UCHAR op = *cc;
|
||||
PCRE2_UCHAR op, *cs, *ce;
|
||||
|
||||
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
||||
if (branchlength >= UINT16_MAX)
|
||||
{
|
||||
branchlength = UINT16_MAX;
|
||||
cc = (PCRE2_UCHAR *)nextbranch;
|
||||
}
|
||||
|
||||
op = *cc;
|
||||
switch (op)
|
||||
{
|
||||
case OP_COND:
|
||||
@ -206,7 +213,9 @@ for (;;)
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* ACCEPT makes things far too complicated; we have to give up. */
|
||||
/* ACCEPT makes things far too complicated; we have to give up. In fact,
|
||||
from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
|
||||
used. However, leave the code in place, just in case. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_ASSERT_ACCEPT:
|
||||
@ -214,9 +223,9 @@ for (;;)
|
||||
|
||||
/* Reached end of a branch; if it's a ket it is the end of a nested
|
||||
call. If it's ALT it is an alternation in a nested call. If it is END it's
|
||||
the end of the outer call. All can be handled by the same code. If an
|
||||
ACCEPT was previously encountered, use the length that was in force at that
|
||||
time, and pass back the shortest ACCEPT length. */
|
||||
the end of the outer call. All can be handled by the same code. If the
|
||||
length of any branch is zero, there is no need to scan any subsequent
|
||||
branches. */
|
||||
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
@ -226,7 +235,8 @@ for (;;)
|
||||
case OP_END:
|
||||
if (length < 0 || (!had_recurse && branchlength < length))
|
||||
length = branchlength;
|
||||
if (op != OP_ALT) return length;
|
||||
if (op != OP_ALT || length == 0) return length;
|
||||
nextbranch = cc + GET(cc, 1);
|
||||
cc += 1 + LINK_SIZE;
|
||||
branchlength = 0;
|
||||
had_recurse = FALSE;
|
||||
@ -238,6 +248,8 @@ for (;;)
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
/* Fall through */
|
||||
|
||||
@ -451,15 +463,17 @@ for (;;)
|
||||
|
||||
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
|
||||
matches an empty string (by default it causes a matching failure), so in
|
||||
that case we must set the minimum length to zero. */
|
||||
that case we must set the minimum length to zero.
|
||||
|
||||
/* Duplicate named pattern back reference. We cannot reliably find a length
|
||||
for this if duplicate numbers are present in the pattern. */
|
||||
For backreferenes, if duplicate numbers are present in the pattern we check
|
||||
for a reference to a duplicate. If it is, we don't know which version will
|
||||
be referenced, so we have to set the minimum length to zero. */
|
||||
|
||||
/* Duplicate named pattern back reference. */
|
||||
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
if (dupcapused) return -1;
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
int count = GET2(cc, 1+IMM2_SIZE);
|
||||
PCRE2_UCHAR *slot =
|
||||
@ -482,28 +496,32 @@ for (;;)
|
||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
|
||||
dd = 0;
|
||||
if (!dupcapused ||
|
||||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||
{
|
||||
dd = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev)
|
||||
if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
dd = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr, backref_cache);
|
||||
if (dd < 0) return dd;
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev)
|
||||
if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses; /* No recursion */
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||
countptr, backref_cache);
|
||||
if (dd < 0) return dd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -521,48 +539,51 @@ for (;;)
|
||||
cc += 1 + 2*IMM2_SIZE;
|
||||
goto REPEAT_BACK_REFERENCE;
|
||||
|
||||
/* Single back reference. We cannot find a length for this if duplicate
|
||||
numbers are present in the pattern. */
|
||||
/* Single back reference by number. References by name are converted to by
|
||||
number when there is no duplication. */
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
if (dupcapused) return -1;
|
||||
recno = GET2(cc, 1);
|
||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||
d = backref_cache[recno];
|
||||
else
|
||||
{
|
||||
int i;
|
||||
d = 0;
|
||||
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
|
||||
if (!dupcapused ||
|
||||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
|
||||
backref_cache);
|
||||
if (d < 0) return d;
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else /* No recursion */
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
|
||||
backref_cache);
|
||||
if (d < 0) return d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else d = 0;
|
||||
|
||||
backref_cache[recno] = d;
|
||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||
@ -888,7 +909,7 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting bytes *
|
||||
* Create bitmap of starting code units *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
@ -938,6 +959,9 @@ do
|
||||
{
|
||||
int rc;
|
||||
uint8_t *classmap = NULL;
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
PCRE2_UCHAR xclassflags;
|
||||
#endif
|
||||
|
||||
switch(*tcode)
|
||||
{
|
||||
@ -1078,6 +1102,7 @@ do
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NA:
|
||||
rc = set_start_bits(re, tcode, utf);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
@ -1120,6 +1145,7 @@ do
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERTBACK_NA:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
@ -1444,20 +1470,59 @@ do
|
||||
negative XCLASS without a map, give up. If there are no property checks,
|
||||
there must be wide characters on the XCLASS list, because otherwise an
|
||||
XCLASS would not have been created. This means that code points >= 255
|
||||
are always potential starters. */
|
||||
are potential starters. In the UTF-8 case we can scan them and set bits
|
||||
for the relevant leading bytes. */
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
|
||||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||
xclassflags = tcode[1 + LINK_SIZE];
|
||||
if ((xclassflags & XCL_HASPROP) != 0 ||
|
||||
(xclassflags & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||
return SSB_FAIL;
|
||||
|
||||
/* We have a positive XCLASS or a negative one without a map. Set up the
|
||||
map pointer if there is one, and fall through. */
|
||||
|
||||
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
|
||||
classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
|
||||
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
|
||||
#endif
|
||||
|
||||
/* In UTF-8 mode, scan the character list and set bits for leading bytes,
|
||||
then jump to handle the map. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && (xclassflags & XCL_NOT) == 0)
|
||||
{
|
||||
PCRE2_UCHAR b, e;
|
||||
PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
|
||||
tcode += GET(tcode, 1);
|
||||
|
||||
for (;;) switch (*p++)
|
||||
{
|
||||
case XCL_SINGLE:
|
||||
b = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
re->start_bitmap[b/8] |= (1u << (b&7));
|
||||
break;
|
||||
|
||||
case XCL_RANGE:
|
||||
b = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
e = *p++;
|
||||
while ((*p & 0xc0) == 0x80) p++;
|
||||
for (; b <= e; b++)
|
||||
re->start_bitmap[b/8] |= (1u << (b&7));
|
||||
break;
|
||||
|
||||
case XCL_END:
|
||||
goto HANDLE_CLASSMAP;
|
||||
|
||||
default:
|
||||
return SSB_UNKNOWN; /* Internal error, should not occur */
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
#endif /* SUPPORT_WIDE_CHARS */
|
||||
|
||||
/* It seems that the fall through comment must be outside the #ifdef if
|
||||
it is to avoid the gcc compiler warning. */
|
||||
|
||||
@ -1499,6 +1564,9 @@ do
|
||||
greater than 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#if defined SUPPORT_WIDE_CHARS && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
HANDLE_CLASSMAP:
|
||||
#endif
|
||||
if (classmap != NULL)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
@ -1569,7 +1637,9 @@ return yield;
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching.
|
||||
|
||||
Argument: points to the compiled expression
|
||||
Argument:
|
||||
re points to the compiled expression
|
||||
|
||||
Returns: 0 normally; non-zero should never normally occur
|
||||
1 unknown opcode in set_start_bits
|
||||
2 missing capturing bracket
|
||||
@ -1579,7 +1649,6 @@ Returns: 0 normally; non-zero should never normally occur
|
||||
int
|
||||
PRIV(study)(pcre2_real_code *re)
|
||||
{
|
||||
int min;
|
||||
int count = 0;
|
||||
PCRE2_UCHAR *code;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
@ -1597,25 +1666,121 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
{
|
||||
int rc = set_start_bits(re, code, utf);
|
||||
if (rc == SSB_UNKNOWN) return 1;
|
||||
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
|
||||
|
||||
/* If a list of starting code units was set up, scan the list to see if only
|
||||
one or two were listed. Having only one listed is rare because usually a
|
||||
single starting code unit will have been recognized and PCRE2_FIRSTSET set.
|
||||
If two are listed, see if they are caseless versions of the same character;
|
||||
if so we can replace the list with a caseless first code unit. This gives
|
||||
better performance and is plausibly worth doing for patterns such as [Ww]ord
|
||||
or (word|WORD). */
|
||||
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
int i;
|
||||
int a = -1;
|
||||
int b = -1;
|
||||
uint8_t *p = re->start_bitmap;
|
||||
uint32_t flags = PCRE2_FIRSTMAPSET;
|
||||
|
||||
for (i = 0; i < 256; p++, i += 8)
|
||||
{
|
||||
uint8_t x = *p;
|
||||
if (x != 0)
|
||||
{
|
||||
int c;
|
||||
uint8_t y = x & (~x + 1); /* Least significant bit */
|
||||
if (y != x) goto DONE; /* More than one bit set */
|
||||
|
||||
/* In the 16-bit and 32-bit libraries, the bit for 0xff means "0xff and
|
||||
all wide characters", so we cannot use it here. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (i == 248 && x == 0x80) goto DONE;
|
||||
#endif
|
||||
|
||||
/* Compute the character value */
|
||||
|
||||
c = i;
|
||||
switch (x)
|
||||
{
|
||||
case 1: break;
|
||||
case 2: c += 1; break; case 4: c += 2; break;
|
||||
case 8: c += 3; break; case 16: c += 4; break;
|
||||
case 32: c += 5; break; case 64: c += 6; break;
|
||||
case 128: c += 7; break;
|
||||
}
|
||||
|
||||
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
|
||||
mode, only values < 128 can be used. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (c > 127) goto DONE;
|
||||
#endif
|
||||
if (a < 0) a = c; /* First one found */
|
||||
else if (b < 0) /* Second one found */
|
||||
{
|
||||
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
#else /* 16-bit or 32-bit */
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (utf && c > 127) d = UCD_OTHERCASE(c);
|
||||
#endif /* Code width */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (d != a) goto DONE; /* Not other case of a */
|
||||
b = c;
|
||||
}
|
||||
else goto DONE; /* More than two characters found */
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace the start code unit bits with a first code unit, but only if it
|
||||
is not the same as a required later code unit. This is because a search for
|
||||
a required code unit starts after an explicit first code unit, but at a
|
||||
code unit found from the bitmap. Patterns such as /a*a/ don't work
|
||||
if both the start unit and required unit are the same. */
|
||||
|
||||
if (a >= 0 &&
|
||||
(
|
||||
(re->flags & PCRE2_LASTSET) == 0 ||
|
||||
(
|
||||
re->last_codeunit != (uint32_t)a &&
|
||||
(b < 0 || re->last_codeunit != (uint32_t)b)
|
||||
)
|
||||
))
|
||||
{
|
||||
re->first_codeunit = a;
|
||||
flags = PCRE2_FIRSTSET;
|
||||
if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
|
||||
}
|
||||
|
||||
DONE:
|
||||
re->flags |= flags;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the minimum length of subject string. If the pattern can match an empty
|
||||
string, the minimum length is already known. If there are more back references
|
||||
than the size of the vector we are going to cache them in, do nothing. A
|
||||
pattern that complicated will probably take a long time to analyze and may in
|
||||
any case turn out to be too complicated. Note that back reference minima are
|
||||
held as 16-bit numbers. */
|
||||
string, the minimum length is already known. If the pattern contains (*ACCEPT)
|
||||
all bets are off, and we don't even try to find a minimum length. If there are
|
||||
more back references than the size of the vector we are going to cache them in,
|
||||
do nothing. A pattern that complicated will probably take a long time to
|
||||
analyze and may in any case turn out to be too complicated. Note that back
|
||||
reference minima are held as 16-bit numbers. */
|
||||
|
||||
if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
|
||||
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
|
||||
re->top_backref <= MAX_CACHE_BACKREF)
|
||||
{
|
||||
int min;
|
||||
int backref_cache[MAX_CACHE_BACKREF+1];
|
||||
backref_cache[0] = 0; /* Highest one that is set */
|
||||
min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
|
||||
switch(min)
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
|
||||
case -1: /* \C in UTF mode or over-complex regex */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
|
||||
case -2:
|
||||
@ -1625,8 +1790,7 @@ if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
|
||||
return 3; /* unrecognized opcode */
|
||||
|
||||
default:
|
||||
if (min > UINT16_MAX) min = UINT16_MAX;
|
||||
re->minlength = min;
|
||||
re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
316
thirdparty/pcre2/src/pcre2_tables.c
vendored
316
thirdparty/pcre2/src/pcre2_tables.c
vendored
@ -279,6 +279,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||
#define STRING_Elymaic0 STR_E STR_l STR_y STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
@ -348,6 +349,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Nandinagari0 STR_N STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
|
||||
@ -355,6 +357,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Nl0 STR_N STR_l "\0"
|
||||
#define STRING_No0 STR_N STR_o "\0"
|
||||
#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
|
||||
#define STRING_Nyiakeng_Puachue_Hmong0 STR_N STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_UNDERSCORE STR_P STR_u STR_a STR_c STR_h STR_u STR_e STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
@ -419,6 +422,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0"
|
||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||
#define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0"
|
||||
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
|
||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||
@ -474,6 +478,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Elbasan0
|
||||
STRING_Elymaic0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
@ -543,6 +548,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nabataean0
|
||||
STRING_Nandinagari0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Newa0
|
||||
@ -550,6 +556,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Nl0
|
||||
STRING_No0
|
||||
STRING_Nushu0
|
||||
STRING_Nyiakeng_Puachue_Hmong0
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Hungarian0
|
||||
@ -614,6 +621,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Ugaritic0
|
||||
STRING_Unknown0
|
||||
STRING_Vai0
|
||||
STRING_Wancho0
|
||||
STRING_Warang_Citi0
|
||||
STRING_Xan0
|
||||
STRING_Xps0
|
||||
@ -669,158 +677,162 @@ const ucp_type_table PRIV(utt)[] = {
|
||||
{ 299, PT_SC, ucp_Duployan },
|
||||
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 329, PT_SC, ucp_Elbasan },
|
||||
{ 337, PT_SC, ucp_Ethiopic },
|
||||
{ 346, PT_SC, ucp_Georgian },
|
||||
{ 355, PT_SC, ucp_Glagolitic },
|
||||
{ 366, PT_SC, ucp_Gothic },
|
||||
{ 373, PT_SC, ucp_Grantha },
|
||||
{ 381, PT_SC, ucp_Greek },
|
||||
{ 387, PT_SC, ucp_Gujarati },
|
||||
{ 396, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 410, PT_SC, ucp_Gurmukhi },
|
||||
{ 419, PT_SC, ucp_Han },
|
||||
{ 423, PT_SC, ucp_Hangul },
|
||||
{ 430, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 446, PT_SC, ucp_Hanunoo },
|
||||
{ 454, PT_SC, ucp_Hatran },
|
||||
{ 461, PT_SC, ucp_Hebrew },
|
||||
{ 468, PT_SC, ucp_Hiragana },
|
||||
{ 477, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 494, PT_SC, ucp_Inherited },
|
||||
{ 504, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 526, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 549, PT_SC, ucp_Javanese },
|
||||
{ 558, PT_SC, ucp_Kaithi },
|
||||
{ 565, PT_SC, ucp_Kannada },
|
||||
{ 573, PT_SC, ucp_Katakana },
|
||||
{ 582, PT_SC, ucp_Kayah_Li },
|
||||
{ 591, PT_SC, ucp_Kharoshthi },
|
||||
{ 602, PT_SC, ucp_Khmer },
|
||||
{ 608, PT_SC, ucp_Khojki },
|
||||
{ 615, PT_SC, ucp_Khudawadi },
|
||||
{ 625, PT_GC, ucp_L },
|
||||
{ 627, PT_LAMP, 0 },
|
||||
{ 630, PT_SC, ucp_Lao },
|
||||
{ 634, PT_SC, ucp_Latin },
|
||||
{ 640, PT_SC, ucp_Lepcha },
|
||||
{ 647, PT_SC, ucp_Limbu },
|
||||
{ 653, PT_SC, ucp_Linear_A },
|
||||
{ 662, PT_SC, ucp_Linear_B },
|
||||
{ 671, PT_SC, ucp_Lisu },
|
||||
{ 676, PT_PC, ucp_Ll },
|
||||
{ 679, PT_PC, ucp_Lm },
|
||||
{ 682, PT_PC, ucp_Lo },
|
||||
{ 685, PT_PC, ucp_Lt },
|
||||
{ 688, PT_PC, ucp_Lu },
|
||||
{ 691, PT_SC, ucp_Lycian },
|
||||
{ 698, PT_SC, ucp_Lydian },
|
||||
{ 705, PT_GC, ucp_M },
|
||||
{ 707, PT_SC, ucp_Mahajani },
|
||||
{ 716, PT_SC, ucp_Makasar },
|
||||
{ 724, PT_SC, ucp_Malayalam },
|
||||
{ 734, PT_SC, ucp_Mandaic },
|
||||
{ 742, PT_SC, ucp_Manichaean },
|
||||
{ 753, PT_SC, ucp_Marchen },
|
||||
{ 761, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 775, PT_PC, ucp_Mc },
|
||||
{ 778, PT_PC, ucp_Me },
|
||||
{ 781, PT_SC, ucp_Medefaidrin },
|
||||
{ 793, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 806, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 820, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 837, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 858, PT_SC, ucp_Miao },
|
||||
{ 863, PT_PC, ucp_Mn },
|
||||
{ 866, PT_SC, ucp_Modi },
|
||||
{ 871, PT_SC, ucp_Mongolian },
|
||||
{ 881, PT_SC, ucp_Mro },
|
||||
{ 885, PT_SC, ucp_Multani },
|
||||
{ 893, PT_SC, ucp_Myanmar },
|
||||
{ 901, PT_GC, ucp_N },
|
||||
{ 903, PT_SC, ucp_Nabataean },
|
||||
{ 913, PT_PC, ucp_Nd },
|
||||
{ 916, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 928, PT_SC, ucp_Newa },
|
||||
{ 933, PT_SC, ucp_Nko },
|
||||
{ 937, PT_PC, ucp_Nl },
|
||||
{ 940, PT_PC, ucp_No },
|
||||
{ 943, PT_SC, ucp_Nushu },
|
||||
{ 949, PT_SC, ucp_Ogham },
|
||||
{ 955, PT_SC, ucp_Ol_Chiki },
|
||||
{ 964, PT_SC, ucp_Old_Hungarian },
|
||||
{ 978, PT_SC, ucp_Old_Italic },
|
||||
{ 989, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1007, PT_SC, ucp_Old_Permic },
|
||||
{ 1018, PT_SC, ucp_Old_Persian },
|
||||
{ 1030, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1042, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1060, PT_SC, ucp_Old_Turkic },
|
||||
{ 1071, PT_SC, ucp_Oriya },
|
||||
{ 1077, PT_SC, ucp_Osage },
|
||||
{ 1083, PT_SC, ucp_Osmanya },
|
||||
{ 1091, PT_GC, ucp_P },
|
||||
{ 1093, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1106, PT_SC, ucp_Palmyrene },
|
||||
{ 1116, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1128, PT_PC, ucp_Pc },
|
||||
{ 1131, PT_PC, ucp_Pd },
|
||||
{ 1134, PT_PC, ucp_Pe },
|
||||
{ 1137, PT_PC, ucp_Pf },
|
||||
{ 1140, PT_SC, ucp_Phags_Pa },
|
||||
{ 1149, PT_SC, ucp_Phoenician },
|
||||
{ 1160, PT_PC, ucp_Pi },
|
||||
{ 1163, PT_PC, ucp_Po },
|
||||
{ 1166, PT_PC, ucp_Ps },
|
||||
{ 1169, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1185, PT_SC, ucp_Rejang },
|
||||
{ 1192, PT_SC, ucp_Runic },
|
||||
{ 1198, PT_GC, ucp_S },
|
||||
{ 1200, PT_SC, ucp_Samaritan },
|
||||
{ 1210, PT_SC, ucp_Saurashtra },
|
||||
{ 1221, PT_PC, ucp_Sc },
|
||||
{ 1224, PT_SC, ucp_Sharada },
|
||||
{ 1232, PT_SC, ucp_Shavian },
|
||||
{ 1240, PT_SC, ucp_Siddham },
|
||||
{ 1248, PT_SC, ucp_SignWriting },
|
||||
{ 1260, PT_SC, ucp_Sinhala },
|
||||
{ 1268, PT_PC, ucp_Sk },
|
||||
{ 1271, PT_PC, ucp_Sm },
|
||||
{ 1274, PT_PC, ucp_So },
|
||||
{ 1277, PT_SC, ucp_Sogdian },
|
||||
{ 1285, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1298, PT_SC, ucp_Soyombo },
|
||||
{ 1306, PT_SC, ucp_Sundanese },
|
||||
{ 1316, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1329, PT_SC, ucp_Syriac },
|
||||
{ 1336, PT_SC, ucp_Tagalog },
|
||||
{ 1344, PT_SC, ucp_Tagbanwa },
|
||||
{ 1353, PT_SC, ucp_Tai_Le },
|
||||
{ 1360, PT_SC, ucp_Tai_Tham },
|
||||
{ 1369, PT_SC, ucp_Tai_Viet },
|
||||
{ 1378, PT_SC, ucp_Takri },
|
||||
{ 1384, PT_SC, ucp_Tamil },
|
||||
{ 1390, PT_SC, ucp_Tangut },
|
||||
{ 1397, PT_SC, ucp_Telugu },
|
||||
{ 1404, PT_SC, ucp_Thaana },
|
||||
{ 1411, PT_SC, ucp_Thai },
|
||||
{ 1416, PT_SC, ucp_Tibetan },
|
||||
{ 1424, PT_SC, ucp_Tifinagh },
|
||||
{ 1433, PT_SC, ucp_Tirhuta },
|
||||
{ 1441, PT_SC, ucp_Ugaritic },
|
||||
{ 1450, PT_SC, ucp_Unknown },
|
||||
{ 1458, PT_SC, ucp_Vai },
|
||||
{ 1462, PT_SC, ucp_Warang_Citi },
|
||||
{ 1474, PT_ALNUM, 0 },
|
||||
{ 1478, PT_PXSPACE, 0 },
|
||||
{ 1482, PT_SPACE, 0 },
|
||||
{ 1486, PT_UCNC, 0 },
|
||||
{ 1490, PT_WORD, 0 },
|
||||
{ 1494, PT_SC, ucp_Yi },
|
||||
{ 1497, PT_GC, ucp_Z },
|
||||
{ 1499, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1516, PT_PC, ucp_Zl },
|
||||
{ 1519, PT_PC, ucp_Zp },
|
||||
{ 1522, PT_PC, ucp_Zs }
|
||||
{ 337, PT_SC, ucp_Elymaic },
|
||||
{ 345, PT_SC, ucp_Ethiopic },
|
||||
{ 354, PT_SC, ucp_Georgian },
|
||||
{ 363, PT_SC, ucp_Glagolitic },
|
||||
{ 374, PT_SC, ucp_Gothic },
|
||||
{ 381, PT_SC, ucp_Grantha },
|
||||
{ 389, PT_SC, ucp_Greek },
|
||||
{ 395, PT_SC, ucp_Gujarati },
|
||||
{ 404, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 418, PT_SC, ucp_Gurmukhi },
|
||||
{ 427, PT_SC, ucp_Han },
|
||||
{ 431, PT_SC, ucp_Hangul },
|
||||
{ 438, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 454, PT_SC, ucp_Hanunoo },
|
||||
{ 462, PT_SC, ucp_Hatran },
|
||||
{ 469, PT_SC, ucp_Hebrew },
|
||||
{ 476, PT_SC, ucp_Hiragana },
|
||||
{ 485, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 502, PT_SC, ucp_Inherited },
|
||||
{ 512, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 534, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 557, PT_SC, ucp_Javanese },
|
||||
{ 566, PT_SC, ucp_Kaithi },
|
||||
{ 573, PT_SC, ucp_Kannada },
|
||||
{ 581, PT_SC, ucp_Katakana },
|
||||
{ 590, PT_SC, ucp_Kayah_Li },
|
||||
{ 599, PT_SC, ucp_Kharoshthi },
|
||||
{ 610, PT_SC, ucp_Khmer },
|
||||
{ 616, PT_SC, ucp_Khojki },
|
||||
{ 623, PT_SC, ucp_Khudawadi },
|
||||
{ 633, PT_GC, ucp_L },
|
||||
{ 635, PT_LAMP, 0 },
|
||||
{ 638, PT_SC, ucp_Lao },
|
||||
{ 642, PT_SC, ucp_Latin },
|
||||
{ 648, PT_SC, ucp_Lepcha },
|
||||
{ 655, PT_SC, ucp_Limbu },
|
||||
{ 661, PT_SC, ucp_Linear_A },
|
||||
{ 670, PT_SC, ucp_Linear_B },
|
||||
{ 679, PT_SC, ucp_Lisu },
|
||||
{ 684, PT_PC, ucp_Ll },
|
||||
{ 687, PT_PC, ucp_Lm },
|
||||
{ 690, PT_PC, ucp_Lo },
|
||||
{ 693, PT_PC, ucp_Lt },
|
||||
{ 696, PT_PC, ucp_Lu },
|
||||
{ 699, PT_SC, ucp_Lycian },
|
||||
{ 706, PT_SC, ucp_Lydian },
|
||||
{ 713, PT_GC, ucp_M },
|
||||
{ 715, PT_SC, ucp_Mahajani },
|
||||
{ 724, PT_SC, ucp_Makasar },
|
||||
{ 732, PT_SC, ucp_Malayalam },
|
||||
{ 742, PT_SC, ucp_Mandaic },
|
||||
{ 750, PT_SC, ucp_Manichaean },
|
||||
{ 761, PT_SC, ucp_Marchen },
|
||||
{ 769, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 783, PT_PC, ucp_Mc },
|
||||
{ 786, PT_PC, ucp_Me },
|
||||
{ 789, PT_SC, ucp_Medefaidrin },
|
||||
{ 801, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 814, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 828, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 845, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 866, PT_SC, ucp_Miao },
|
||||
{ 871, PT_PC, ucp_Mn },
|
||||
{ 874, PT_SC, ucp_Modi },
|
||||
{ 879, PT_SC, ucp_Mongolian },
|
||||
{ 889, PT_SC, ucp_Mro },
|
||||
{ 893, PT_SC, ucp_Multani },
|
||||
{ 901, PT_SC, ucp_Myanmar },
|
||||
{ 909, PT_GC, ucp_N },
|
||||
{ 911, PT_SC, ucp_Nabataean },
|
||||
{ 921, PT_SC, ucp_Nandinagari },
|
||||
{ 933, PT_PC, ucp_Nd },
|
||||
{ 936, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 948, PT_SC, ucp_Newa },
|
||||
{ 953, PT_SC, ucp_Nko },
|
||||
{ 957, PT_PC, ucp_Nl },
|
||||
{ 960, PT_PC, ucp_No },
|
||||
{ 963, PT_SC, ucp_Nushu },
|
||||
{ 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 992, PT_SC, ucp_Ogham },
|
||||
{ 998, PT_SC, ucp_Ol_Chiki },
|
||||
{ 1007, PT_SC, ucp_Old_Hungarian },
|
||||
{ 1021, PT_SC, ucp_Old_Italic },
|
||||
{ 1032, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1050, PT_SC, ucp_Old_Permic },
|
||||
{ 1061, PT_SC, ucp_Old_Persian },
|
||||
{ 1073, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1085, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1103, PT_SC, ucp_Old_Turkic },
|
||||
{ 1114, PT_SC, ucp_Oriya },
|
||||
{ 1120, PT_SC, ucp_Osage },
|
||||
{ 1126, PT_SC, ucp_Osmanya },
|
||||
{ 1134, PT_GC, ucp_P },
|
||||
{ 1136, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1149, PT_SC, ucp_Palmyrene },
|
||||
{ 1159, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1171, PT_PC, ucp_Pc },
|
||||
{ 1174, PT_PC, ucp_Pd },
|
||||
{ 1177, PT_PC, ucp_Pe },
|
||||
{ 1180, PT_PC, ucp_Pf },
|
||||
{ 1183, PT_SC, ucp_Phags_Pa },
|
||||
{ 1192, PT_SC, ucp_Phoenician },
|
||||
{ 1203, PT_PC, ucp_Pi },
|
||||
{ 1206, PT_PC, ucp_Po },
|
||||
{ 1209, PT_PC, ucp_Ps },
|
||||
{ 1212, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1228, PT_SC, ucp_Rejang },
|
||||
{ 1235, PT_SC, ucp_Runic },
|
||||
{ 1241, PT_GC, ucp_S },
|
||||
{ 1243, PT_SC, ucp_Samaritan },
|
||||
{ 1253, PT_SC, ucp_Saurashtra },
|
||||
{ 1264, PT_PC, ucp_Sc },
|
||||
{ 1267, PT_SC, ucp_Sharada },
|
||||
{ 1275, PT_SC, ucp_Shavian },
|
||||
{ 1283, PT_SC, ucp_Siddham },
|
||||
{ 1291, PT_SC, ucp_SignWriting },
|
||||
{ 1303, PT_SC, ucp_Sinhala },
|
||||
{ 1311, PT_PC, ucp_Sk },
|
||||
{ 1314, PT_PC, ucp_Sm },
|
||||
{ 1317, PT_PC, ucp_So },
|
||||
{ 1320, PT_SC, ucp_Sogdian },
|
||||
{ 1328, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1341, PT_SC, ucp_Soyombo },
|
||||
{ 1349, PT_SC, ucp_Sundanese },
|
||||
{ 1359, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1372, PT_SC, ucp_Syriac },
|
||||
{ 1379, PT_SC, ucp_Tagalog },
|
||||
{ 1387, PT_SC, ucp_Tagbanwa },
|
||||
{ 1396, PT_SC, ucp_Tai_Le },
|
||||
{ 1403, PT_SC, ucp_Tai_Tham },
|
||||
{ 1412, PT_SC, ucp_Tai_Viet },
|
||||
{ 1421, PT_SC, ucp_Takri },
|
||||
{ 1427, PT_SC, ucp_Tamil },
|
||||
{ 1433, PT_SC, ucp_Tangut },
|
||||
{ 1440, PT_SC, ucp_Telugu },
|
||||
{ 1447, PT_SC, ucp_Thaana },
|
||||
{ 1454, PT_SC, ucp_Thai },
|
||||
{ 1459, PT_SC, ucp_Tibetan },
|
||||
{ 1467, PT_SC, ucp_Tifinagh },
|
||||
{ 1476, PT_SC, ucp_Tirhuta },
|
||||
{ 1484, PT_SC, ucp_Ugaritic },
|
||||
{ 1493, PT_SC, ucp_Unknown },
|
||||
{ 1501, PT_SC, ucp_Vai },
|
||||
{ 1505, PT_SC, ucp_Wancho },
|
||||
{ 1512, PT_SC, ucp_Warang_Citi },
|
||||
{ 1524, PT_ALNUM, 0 },
|
||||
{ 1528, PT_PXSPACE, 0 },
|
||||
{ 1532, PT_SPACE, 0 },
|
||||
{ 1536, PT_UCNC, 0 },
|
||||
{ 1540, PT_WORD, 0 },
|
||||
{ 1544, PT_SC, ucp_Yi },
|
||||
{ 1547, PT_GC, ucp_Z },
|
||||
{ 1549, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1566, PT_PC, ucp_Zl },
|
||||
{ 1569, PT_PC, ucp_Zp },
|
||||
{ 1572, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
5747
thirdparty/pcre2/src/pcre2_ucd.c
vendored
5747
thirdparty/pcre2/src/pcre2_ucd.c
vendored
File diff suppressed because it is too large
Load Diff
7
thirdparty/pcre2/src/pcre2_ucp.h
vendored
7
thirdparty/pcre2/src/pcre2_ucp.h
vendored
@ -281,7 +281,12 @@ enum {
|
||||
ucp_Makasar,
|
||||
ucp_Medefaidrin,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Sogdian
|
||||
ucp_Sogdian,
|
||||
/* New for Unicode 12.0.0 */
|
||||
ucp_Elymaic,
|
||||
ucp_Nandinagari,
|
||||
ucp_Nyiakeng_Puachue_Hmong,
|
||||
ucp_Wancho
|
||||
};
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
@ -214,6 +214,10 @@
|
||||
#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_MEMMOVE
|
||||
#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
|
||||
#endif
|
||||
|
||||
#ifndef SLJIT_ZEROMEM
|
||||
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
|
||||
#endif
|
||||
|
28
thirdparty/pcre2/src/sljit/sljitExecAllocator.c
vendored
28
thirdparty/pcre2/src/sljit/sljitExecAllocator.c
vendored
@ -118,10 +118,20 @@ static SLJIT_INLINE int get_map_jit_flag()
|
||||
if (map_jit_flag == -1) {
|
||||
struct utsname name;
|
||||
|
||||
map_jit_flag = 0;
|
||||
uname(&name);
|
||||
|
||||
/* Kernel version for 10.14.0 (Mojave) */
|
||||
map_jit_flag = (atoi(name.release) >= 18) ? MAP_JIT : 0;
|
||||
if (atoi(name.release) >= 18) {
|
||||
/* Only use MAP_JIT if a hardened runtime is used, because MAP_JIT is incompatible with fork(). */
|
||||
void *ptr = mmap(NULL, getpagesize(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
if (ptr == MAP_FAILED) {
|
||||
map_jit_flag = MAP_JIT;
|
||||
} else {
|
||||
munmap(ptr, getpagesize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return map_jit_flag;
|
||||
@ -137,6 +147,7 @@ static SLJIT_INLINE int get_map_jit_flag()
|
||||
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
|
||||
{
|
||||
void *retval;
|
||||
const int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
|
||||
|
||||
#ifdef MAP_ANON
|
||||
|
||||
@ -146,16 +157,25 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
|
||||
flags |= get_map_jit_flag();
|
||||
#endif
|
||||
|
||||
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
|
||||
retval = mmap(NULL, size, prot, flags, -1, 0);
|
||||
#else /* !MAP_ANON */
|
||||
if (dev_zero < 0) {
|
||||
if (open_dev_zero())
|
||||
return NULL;
|
||||
}
|
||||
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
|
||||
retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0);
|
||||
#endif /* MAP_ANON */
|
||||
|
||||
return (retval != MAP_FAILED) ? retval : NULL;
|
||||
if (retval == MAP_FAILED)
|
||||
retval = NULL;
|
||||
else {
|
||||
if (mprotect(retval, size, prot) < 0) {
|
||||
munmap(retval, size);
|
||||
retval = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
|
||||
|
69
thirdparty/pcre2/src/sljit/sljitLir.c
vendored
69
thirdparty/pcre2/src/sljit/sljitLir.c
vendored
@ -144,6 +144,7 @@
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
# define PATCH_MD 0x10
|
||||
#endif
|
||||
# define TYPE_SHIFT 13
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
|
||||
@ -521,6 +522,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw
|
||||
}
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label)
|
||||
{
|
||||
if (SLJIT_LIKELY(!!put_label))
|
||||
put_label->label = label;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
|
||||
{
|
||||
SLJIT_UNUSED_ARG(compiler);
|
||||
@ -620,6 +627,30 @@ static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types)
|
||||
return arg_count;
|
||||
}
|
||||
|
||||
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
|
||||
static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump,
|
||||
struct sljit_const *const_, struct sljit_put_label *put_label)
|
||||
{
|
||||
sljit_uw result = ~(sljit_uw)0;
|
||||
|
||||
if (label)
|
||||
result = label->size;
|
||||
|
||||
if (jump && jump->addr < result)
|
||||
result = jump->addr;
|
||||
|
||||
if (const_ && const_->addr < result)
|
||||
result = const_->addr;
|
||||
|
||||
if (put_label && put_label->addr < result)
|
||||
result = put_label->addr;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif /* !SLJIT_CONFIG_X86 */
|
||||
|
||||
static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler,
|
||||
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
|
||||
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
|
||||
@ -687,6 +718,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
|
||||
compiler->last_const = const_;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset)
|
||||
{
|
||||
put_label->next = NULL;
|
||||
put_label->label = NULL;
|
||||
put_label->addr = compiler->size - offset;
|
||||
put_label->flags = 0;
|
||||
if (compiler->last_put_label)
|
||||
compiler->last_put_label->next = put_label;
|
||||
else
|
||||
compiler->put_labels = put_label;
|
||||
compiler->last_put_label = put_label;
|
||||
}
|
||||
|
||||
#define ADDRESSING_DEPENDS_ON(exp, reg) \
|
||||
(((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))
|
||||
|
||||
@ -1905,6 +1949,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil
|
||||
CHECK_RETURN_OK;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
|
||||
FUNCTION_CHECK_DST(dst, dstw, 0);
|
||||
#endif
|
||||
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
|
||||
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
|
||||
fprintf(compiler->verbose, " put_label ");
|
||||
sljit_verbose_param(compiler, dst, dstw);
|
||||
fprintf(compiler->verbose, "\n");
|
||||
}
|
||||
#endif
|
||||
CHECK_RETURN_OK;
|
||||
}
|
||||
|
||||
#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */
|
||||
|
||||
#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
|
||||
@ -2581,6 +2640,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
SLJIT_UNUSED_ARG(compiler);
|
||||
SLJIT_UNUSED_ARG(dst);
|
||||
SLJIT_UNUSED_ARG(dstw);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
|
||||
{
|
||||
SLJIT_UNUSED_ARG(addr);
|
||||
@ -2597,4 +2664,4 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
|
||||
SLJIT_UNREACHABLE();
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* !SLJIT_CONFIG_UNSUPPORTED */
|
||||
|
22
thirdparty/pcre2/src/sljit/sljitLir.h
vendored
22
thirdparty/pcre2/src/sljit/sljitLir.h
vendored
@ -348,13 +348,20 @@ struct sljit_label {
|
||||
struct sljit_jump {
|
||||
struct sljit_jump *next;
|
||||
sljit_uw addr;
|
||||
sljit_sw flags;
|
||||
sljit_uw flags;
|
||||
union {
|
||||
sljit_uw target;
|
||||
struct sljit_label* label;
|
||||
struct sljit_label *label;
|
||||
} u;
|
||||
};
|
||||
|
||||
struct sljit_put_label {
|
||||
struct sljit_put_label *next;
|
||||
struct sljit_label *label;
|
||||
sljit_uw addr;
|
||||
sljit_uw flags;
|
||||
};
|
||||
|
||||
struct sljit_const {
|
||||
struct sljit_const *next;
|
||||
sljit_uw addr;
|
||||
@ -366,10 +373,12 @@ struct sljit_compiler {
|
||||
|
||||
struct sljit_label *labels;
|
||||
struct sljit_jump *jumps;
|
||||
struct sljit_put_label *put_labels;
|
||||
struct sljit_const *consts;
|
||||
struct sljit_label *last_label;
|
||||
struct sljit_jump *last_jump;
|
||||
struct sljit_const *last_const;
|
||||
struct sljit_put_label *last_put_label;
|
||||
|
||||
void *allocator_data;
|
||||
struct sljit_memory_fragment *buf;
|
||||
@ -1314,10 +1323,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
|
||||
Flags: - (may destroy flags) */
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
|
||||
|
||||
/* The constant can be changed runtime (see: sljit_set_const)
|
||||
/* Store a value that can be changed runtime (see: sljit_get_const_addr / sljit_set_const)
|
||||
Flags: - (does not modify flags) */
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value);
|
||||
|
||||
/* Store the value of a label (see: sljit_set_put_label)
|
||||
Flags: - (does not modify flags) */
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw);
|
||||
|
||||
/* Set the value stored by put_label to this label. */
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label);
|
||||
|
||||
/* After the code generation the address for label, jump and const instructions
|
||||
are computed. Since these structures are freed by sljit_free_compiler, the
|
||||
addresses must be preserved by the user program elsewere. */
|
||||
|
128
thirdparty/pcre2/src/sljit/sljitNativeARM_32.c
vendored
128
thirdparty/pcre2/src/sljit/sljitNativeARM_32.c
vendored
@ -583,8 +583,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
sljit_uw *buf_end;
|
||||
sljit_uw size;
|
||||
sljit_uw word_count;
|
||||
sljit_uw next_addr;
|
||||
sljit_sw executable_offset;
|
||||
sljit_sw jump_addr;
|
||||
sljit_sw addr;
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
sljit_uw cpool_size;
|
||||
sljit_uw cpool_skip_alignment;
|
||||
@ -597,6 +598,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -625,11 +627,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
code_ptr = code;
|
||||
word_count = 0;
|
||||
next_addr = 1;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
|
||||
if (label && label->size == 0) {
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
|
||||
@ -669,35 +673,45 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
|
||||
#endif
|
||||
*code_ptr = *buf_ptr++;
|
||||
if (next_addr == word_count) {
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
|
||||
|
||||
/* These structures are ordered by their address. */
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
if (jump && jump->addr == word_count) {
|
||||
if (jump && jump->addr == word_count) {
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
if (detect_jump_type(jump, code_ptr, code, executable_offset))
|
||||
code_ptr--;
|
||||
jump->addr = (sljit_uw)code_ptr;
|
||||
if (detect_jump_type(jump, code_ptr, code, executable_offset))
|
||||
code_ptr--;
|
||||
jump->addr = (sljit_uw)code_ptr;
|
||||
#else
|
||||
jump->addr = (sljit_uw)(code_ptr - 2);
|
||||
if (detect_jump_type(jump, code_ptr, code, executable_offset))
|
||||
code_ptr -= 2;
|
||||
jump->addr = (sljit_uw)(code_ptr - 2);
|
||||
if (detect_jump_type(jump, code_ptr, code, executable_offset))
|
||||
code_ptr -= 2;
|
||||
#endif
|
||||
jump = jump->next;
|
||||
}
|
||||
if (label && label->size == word_count) {
|
||||
/* code_ptr can be affected above. */
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
|
||||
label->size = (code_ptr + 1) - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
jump = jump->next;
|
||||
}
|
||||
if (label && label->size == word_count) {
|
||||
/* code_ptr can be affected above. */
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
|
||||
label->size = (code_ptr + 1) - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
#else
|
||||
const_->addr = (sljit_uw)(code_ptr - 1);
|
||||
const_->addr = (sljit_uw)(code_ptr - 1);
|
||||
#endif
|
||||
const_ = const_->next;
|
||||
const_ = const_->next;
|
||||
}
|
||||
if (put_label && put_label->addr == word_count) {
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
put_label = put_label->next;
|
||||
}
|
||||
next_addr = compute_next_addr(label, jump, const_, put_label);
|
||||
}
|
||||
code_ptr++;
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
@ -725,6 +739,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
SLJIT_ASSERT(cpool_size == 0);
|
||||
@ -755,15 +770,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
buf_ptr = (sljit_uw *)jump->addr;
|
||||
|
||||
if (jump->flags & PATCH_B) {
|
||||
jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
|
||||
addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
|
||||
if (!(jump->flags & JUMP_ADDR)) {
|
||||
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
|
||||
SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
|
||||
*buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
|
||||
SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000);
|
||||
*buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff;
|
||||
}
|
||||
else {
|
||||
SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
|
||||
*buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
|
||||
SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000);
|
||||
*buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff;
|
||||
}
|
||||
}
|
||||
else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
|
||||
@ -813,6 +828,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
}
|
||||
#endif
|
||||
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
addr = put_label->label->addr;
|
||||
buf_ptr = (sljit_uw*)put_label->addr;
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
|
||||
buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
|
||||
#else
|
||||
SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
|
||||
buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
|
||||
buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
@ -2639,23 +2670,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
|
||||
{
|
||||
struct sljit_const *const_;
|
||||
sljit_s32 reg;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
|
||||
PTR_FAIL_IF(!const_);
|
||||
|
||||
reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
|
||||
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), init_value));
|
||||
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value));
|
||||
compiler->patches++;
|
||||
#else
|
||||
PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
|
||||
PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
|
||||
#endif
|
||||
|
||||
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
|
||||
PTR_FAIL_IF(!const_);
|
||||
set_const(const_, compiler);
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
@ -2663,6 +2694,33 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
|
||||
|
||||
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
|
||||
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
|
||||
compiler->patches++;
|
||||
#else
|
||||
PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
|
||||
#endif
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 0);
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
|
||||
return put_label;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
|
||||
{
|
||||
inline_set_jump_addr(addr, executable_offset, new_target, 1);
|
||||
|
111
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
vendored
111
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
vendored
@ -161,7 +161,7 @@ static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
|
||||
inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
|
||||
static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
|
||||
{
|
||||
sljit_sw diff;
|
||||
sljit_uw target_addr;
|
||||
@ -196,14 +196,14 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (target_addr <= 0xffffffffl) {
|
||||
if (target_addr < 0x100000000l) {
|
||||
if (jump->flags & IS_COND)
|
||||
code_ptr[-5] -= (2 << 5);
|
||||
code_ptr[-2] = code_ptr[0];
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (target_addr <= 0xffffffffffffl) {
|
||||
if (target_addr < 0x1000000000000l) {
|
||||
if (jump->flags & IS_COND)
|
||||
code_ptr[-5] -= (1 << 5);
|
||||
jump->flags |= PATCH_ABS48;
|
||||
@ -215,6 +215,22 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
|
||||
return 0;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
|
||||
{
|
||||
if (max_label < 0x100000000l) {
|
||||
put_label->flags = 0;
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (max_label < 0x1000000000000l) {
|
||||
put_label->flags = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
put_label->flags = 2;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
|
||||
{
|
||||
struct sljit_memory_fragment *buf;
|
||||
@ -223,6 +239,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
sljit_ins *buf_ptr;
|
||||
sljit_ins *buf_end;
|
||||
sljit_uw word_count;
|
||||
sljit_uw next_addr;
|
||||
sljit_sw executable_offset;
|
||||
sljit_uw addr;
|
||||
sljit_s32 dst;
|
||||
@ -230,6 +247,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -241,34 +259,47 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
code_ptr = code;
|
||||
word_count = 0;
|
||||
next_addr = 0;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
|
||||
do {
|
||||
buf_ptr = (sljit_ins*)buf->memory;
|
||||
buf_end = buf_ptr + (buf->used_size >> 2);
|
||||
do {
|
||||
*code_ptr = *buf_ptr++;
|
||||
/* These structures are ordered by their address. */
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
if (label && label->size == word_count) {
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
jump->addr = (sljit_uw)(code_ptr - 4);
|
||||
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
if (next_addr == word_count) {
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
|
||||
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
jump->addr = (sljit_uw)(code_ptr - 4);
|
||||
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
}
|
||||
if (put_label && put_label->addr == word_count) {
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)(code_ptr - 3);
|
||||
code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
|
||||
put_label = put_label->next;
|
||||
}
|
||||
next_addr = compute_next_addr(label, jump, const_, put_label);
|
||||
}
|
||||
code_ptr ++;
|
||||
word_count ++;
|
||||
@ -286,6 +317,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
|
||||
|
||||
jump = compiler->jumps;
|
||||
@ -323,6 +355,23 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
jump = jump->next;
|
||||
}
|
||||
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
addr = put_label->label->addr;
|
||||
buf_ptr = (sljit_ins *)put_label->addr;
|
||||
|
||||
buf_ptr[0] |= (addr & 0xffff) << 5;
|
||||
buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5;
|
||||
|
||||
if (put_label->flags >= 1)
|
||||
buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5;
|
||||
|
||||
if (put_label->flags >= 2)
|
||||
buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5;
|
||||
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
compiler->executable_offset = executable_offset;
|
||||
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
|
||||
@ -1947,6 +1996,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
|
||||
PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0));
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 1);
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
|
||||
|
||||
return put_label;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
|
||||
{
|
||||
sljit_ins* inst = (sljit_ins*)addr;
|
||||
|
@ -365,11 +365,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
sljit_u16 *buf_ptr;
|
||||
sljit_u16 *buf_end;
|
||||
sljit_uw half_count;
|
||||
sljit_uw next_addr;
|
||||
sljit_sw executable_offset;
|
||||
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -381,34 +383,46 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
code_ptr = code;
|
||||
half_count = 0;
|
||||
next_addr = 0;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
|
||||
do {
|
||||
buf_ptr = (sljit_u16*)buf->memory;
|
||||
buf_end = buf_ptr + (buf->used_size >> 1);
|
||||
do {
|
||||
*code_ptr = *buf_ptr++;
|
||||
/* These structures are ordered by their address. */
|
||||
SLJIT_ASSERT(!label || label->size >= half_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= half_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
|
||||
if (label && label->size == half_count) {
|
||||
label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == half_count) {
|
||||
jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
|
||||
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == half_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
if (next_addr == half_count) {
|
||||
SLJIT_ASSERT(!label || label->size >= half_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= half_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
|
||||
SLJIT_ASSERT(!put_label || put_label->addr >= half_count);
|
||||
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == half_count) {
|
||||
label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == half_count) {
|
||||
jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
|
||||
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == half_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
}
|
||||
if (put_label && put_label->addr == half_count) {
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
put_label = put_label->next;
|
||||
}
|
||||
next_addr = compute_next_addr(label, jump, const_, put_label);
|
||||
}
|
||||
code_ptr ++;
|
||||
half_count ++;
|
||||
@ -426,6 +440,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
|
||||
|
||||
jump = compiler->jumps;
|
||||
@ -434,6 +449,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
jump = jump->next;
|
||||
}
|
||||
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr);
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
compiler->executable_offset = executable_offset;
|
||||
compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16);
|
||||
@ -2311,6 +2332,27 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 0);
|
||||
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
|
||||
PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0));
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
|
||||
return put_label;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
|
||||
{
|
||||
sljit_u16 *inst = (sljit_u16*)addr;
|
||||
|
@ -425,6 +425,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins *)addr;
|
||||
|
||||
SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI);
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
|
||||
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
|
||||
@ -435,6 +436,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins *)addr;
|
||||
|
||||
SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI);
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
|
||||
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
|
||||
|
155
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
vendored
155
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
vendored
@ -449,6 +449,55 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
|
||||
|
||||
static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
|
||||
{
|
||||
if (max_label < 0x80000000l) {
|
||||
put_label->flags = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (max_label < 0x800000000000l) {
|
||||
put_label->flags = 1;
|
||||
return 3;
|
||||
}
|
||||
|
||||
put_label->flags = 2;
|
||||
return 5;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
|
||||
{
|
||||
sljit_uw addr = put_label->label->addr;
|
||||
sljit_ins *inst = (sljit_ins *)put_label->addr;
|
||||
sljit_s32 reg = *inst;
|
||||
|
||||
if (put_label->flags == 0) {
|
||||
SLJIT_ASSERT(addr < 0x80000000l);
|
||||
inst[0] = LUI | T(reg) | IMM(addr >> 16);
|
||||
}
|
||||
else if (put_label->flags == 1) {
|
||||
SLJIT_ASSERT(addr < 0x800000000000l);
|
||||
inst[0] = LUI | T(reg) | IMM(addr >> 32);
|
||||
inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
|
||||
inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
|
||||
inst += 2;
|
||||
}
|
||||
else {
|
||||
inst[0] = LUI | T(reg) | IMM(addr >> 48);
|
||||
inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff);
|
||||
inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
|
||||
inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
|
||||
inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16);
|
||||
inst += 4;
|
||||
}
|
||||
|
||||
inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
|
||||
{
|
||||
struct sljit_memory_fragment *buf;
|
||||
@ -457,12 +506,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
sljit_ins *buf_ptr;
|
||||
sljit_ins *buf_end;
|
||||
sljit_uw word_count;
|
||||
sljit_uw next_addr;
|
||||
sljit_sw executable_offset;
|
||||
sljit_uw addr;
|
||||
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -474,39 +525,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
code_ptr = code;
|
||||
word_count = 0;
|
||||
next_addr = 0;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
|
||||
do {
|
||||
buf_ptr = (sljit_ins*)buf->memory;
|
||||
buf_end = buf_ptr + (buf->used_size >> 2);
|
||||
do {
|
||||
*code_ptr = *buf_ptr++;
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
if (next_addr == word_count) {
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
|
||||
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
|
||||
jump->addr = (sljit_uw)(code_ptr - 3);
|
||||
jump->addr = (sljit_uw)(code_ptr - 3);
|
||||
#else
|
||||
jump->addr = (sljit_uw)(code_ptr - 7);
|
||||
jump->addr = (sljit_uw)(code_ptr - 7);
|
||||
#endif
|
||||
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
/* Just recording the address. */
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
}
|
||||
if (put_label && put_label->addr == word_count) {
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
|
||||
code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
|
||||
word_count += 5;
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
}
|
||||
next_addr = compute_next_addr(label, jump, const_, put_label);
|
||||
}
|
||||
code_ptr ++;
|
||||
word_count ++;
|
||||
@ -524,6 +590,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
|
||||
|
||||
jump = compiler->jumps;
|
||||
@ -571,6 +638,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
jump = jump->next;
|
||||
}
|
||||
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
|
||||
addr = put_label->label->addr;
|
||||
buf_ptr = (sljit_ins *)put_label->addr;
|
||||
|
||||
SLJIT_ASSERT((buf_ptr[0] & 0xffe00000) == LUI && (buf_ptr[1] & 0xfc000000) == ORI);
|
||||
buf_ptr[0] |= (addr >> 16) & 0xffff;
|
||||
buf_ptr[1] |= addr & 0xffff;
|
||||
#else
|
||||
put_label_set(put_label);
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
compiler->executable_offset = executable_offset;
|
||||
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
|
||||
@ -2157,7 +2239,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
|
||||
{
|
||||
struct sljit_const *const_;
|
||||
sljit_s32 reg;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
|
||||
@ -2167,11 +2249,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
PTR_FAIL_IF(!const_);
|
||||
set_const(const_, compiler);
|
||||
|
||||
reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
|
||||
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
|
||||
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 0);
|
||||
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
|
||||
PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
|
||||
#else
|
||||
PTR_FAIL_IF(push_inst(compiler, dst_r, UNMOVABLE_INS));
|
||||
compiler->size += 5;
|
||||
#endif
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
|
||||
|
||||
return put_label;
|
||||
}
|
||||
|
@ -259,6 +259,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins *)addr;
|
||||
|
||||
SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI);
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
|
||||
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
|
||||
@ -269,6 +270,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins *)addr;
|
||||
|
||||
SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI);
|
||||
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
|
||||
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
|
||||
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
|
||||
|
@ -35,9 +35,6 @@
|
||||
#error "Must implement count leading zeroes"
|
||||
#endif
|
||||
|
||||
#define RLDI(dst, src, sh, mb, type) \
|
||||
(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
|
||||
|
||||
#define PUSH_RLDICR(reg, shift) \
|
||||
push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
|
||||
|
||||
|
228
thirdparty/pcre2/src/sljit/sljitNativePPC_common.c
vendored
228
thirdparty/pcre2/src/sljit/sljitNativePPC_common.c
vendored
@ -231,6 +231,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
||||
#define SIMM_MIN (-0x8000)
|
||||
#define UIMM_MAX (0xffff)
|
||||
|
||||
#define RLDI(dst, src, sh, mb, type) \
|
||||
(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
|
||||
|
||||
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
|
||||
{
|
||||
@ -324,6 +327,55 @@ keep_address:
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
|
||||
|
||||
static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
|
||||
{
|
||||
if (max_label < 0x100000000l) {
|
||||
put_label->flags = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (max_label < 0x1000000000000l) {
|
||||
put_label->flags = 1;
|
||||
return 3;
|
||||
}
|
||||
|
||||
put_label->flags = 2;
|
||||
return 4;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
|
||||
{
|
||||
sljit_uw addr = put_label->label->addr;
|
||||
sljit_ins *inst = (sljit_ins *)put_label->addr;
|
||||
sljit_s32 reg = *inst;
|
||||
|
||||
if (put_label->flags == 0) {
|
||||
SLJIT_ASSERT(addr < 0x100000000l);
|
||||
inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16);
|
||||
}
|
||||
else {
|
||||
if (put_label->flags == 1) {
|
||||
SLJIT_ASSERT(addr < 0x1000000000000l);
|
||||
inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32);
|
||||
}
|
||||
else {
|
||||
inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48);
|
||||
inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff);
|
||||
inst ++;
|
||||
}
|
||||
|
||||
inst[1] = RLDI(reg, reg, 32, 31, 1);
|
||||
inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff);
|
||||
inst += 2;
|
||||
}
|
||||
|
||||
inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
|
||||
{
|
||||
struct sljit_memory_fragment *buf;
|
||||
@ -332,12 +384,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
sljit_ins *buf_ptr;
|
||||
sljit_ins *buf_end;
|
||||
sljit_uw word_count;
|
||||
sljit_uw next_addr;
|
||||
sljit_sw executable_offset;
|
||||
sljit_uw addr;
|
||||
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -356,71 +410,87 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
code_ptr = code;
|
||||
word_count = 0;
|
||||
next_addr = 0;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
|
||||
do {
|
||||
buf_ptr = (sljit_ins*)buf->memory;
|
||||
buf_end = buf_ptr + (buf->used_size >> 2);
|
||||
do {
|
||||
*code_ptr = *buf_ptr++;
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
/* Just recording the address. */
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
jump->addr = (sljit_uw)(code_ptr - 3);
|
||||
#else
|
||||
jump->addr = (sljit_uw)(code_ptr - 6);
|
||||
#endif
|
||||
if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
code_ptr[-3] = code_ptr[0];
|
||||
code_ptr -= 3;
|
||||
#else
|
||||
if (jump->flags & PATCH_ABS32) {
|
||||
code_ptr -= 3;
|
||||
code_ptr[-1] = code_ptr[2];
|
||||
code_ptr[0] = code_ptr[3];
|
||||
}
|
||||
else if (jump->flags & PATCH_ABS48) {
|
||||
code_ptr--;
|
||||
code_ptr[-1] = code_ptr[0];
|
||||
code_ptr[0] = code_ptr[1];
|
||||
/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
|
||||
SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
|
||||
code_ptr[-3] ^= 0x8422;
|
||||
/* oris -> ori */
|
||||
code_ptr[-2] ^= 0x4000000;
|
||||
}
|
||||
else {
|
||||
code_ptr[-6] = code_ptr[0];
|
||||
code_ptr -= 6;
|
||||
}
|
||||
#endif
|
||||
if (jump->flags & REMOVE_COND) {
|
||||
code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
|
||||
code_ptr++;
|
||||
jump->addr += sizeof(sljit_ins);
|
||||
code_ptr[0] = Bx;
|
||||
jump->flags -= IS_COND;
|
||||
}
|
||||
if (next_addr == word_count) {
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
|
||||
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
/* Just recording the address. */
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
if (jump && jump->addr == word_count) {
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
jump->addr = (sljit_uw)(code_ptr - 3);
|
||||
#else
|
||||
jump->addr = (sljit_uw)(code_ptr - 6);
|
||||
#endif
|
||||
if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
code_ptr[-3] = code_ptr[0];
|
||||
code_ptr -= 3;
|
||||
#else
|
||||
if (jump->flags & PATCH_ABS32) {
|
||||
code_ptr -= 3;
|
||||
code_ptr[-1] = code_ptr[2];
|
||||
code_ptr[0] = code_ptr[3];
|
||||
}
|
||||
else if (jump->flags & PATCH_ABS48) {
|
||||
code_ptr--;
|
||||
code_ptr[-1] = code_ptr[0];
|
||||
code_ptr[0] = code_ptr[1];
|
||||
/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
|
||||
SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
|
||||
code_ptr[-3] ^= 0x8422;
|
||||
/* oris -> ori */
|
||||
code_ptr[-2] ^= 0x4000000;
|
||||
}
|
||||
else {
|
||||
code_ptr[-6] = code_ptr[0];
|
||||
code_ptr -= 6;
|
||||
}
|
||||
#endif
|
||||
if (jump->flags & REMOVE_COND) {
|
||||
code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
|
||||
code_ptr++;
|
||||
jump->addr += sizeof(sljit_ins);
|
||||
code_ptr[0] = Bx;
|
||||
jump->flags -= IS_COND;
|
||||
}
|
||||
}
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
}
|
||||
if (put_label && put_label->addr == word_count) {
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
|
||||
code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
|
||||
word_count += 4;
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
}
|
||||
next_addr = compute_next_addr(label, jump, const_, put_label);
|
||||
}
|
||||
code_ptr ++;
|
||||
word_count ++;
|
||||
@ -438,6 +508,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
|
||||
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
|
||||
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
|
||||
#else
|
||||
@ -503,6 +575,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
jump = jump->next;
|
||||
}
|
||||
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
addr = put_label->label->addr;
|
||||
buf_ptr = (sljit_ins *)put_label->addr;
|
||||
|
||||
SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI);
|
||||
buf_ptr[0] |= (addr >> 16) & 0xffff;
|
||||
buf_ptr[1] |= addr & 0xffff;
|
||||
#else
|
||||
put_label_set(put_label);
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
compiler->executable_offset = executable_offset;
|
||||
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
|
||||
@ -2261,7 +2348,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
|
||||
{
|
||||
struct sljit_const *const_;
|
||||
sljit_s32 reg;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
|
||||
@ -2271,11 +2358,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
PTR_FAIL_IF(!const_);
|
||||
set_const(const_, compiler);
|
||||
|
||||
reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
|
||||
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
|
||||
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 0);
|
||||
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
|
||||
PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
|
||||
#else
|
||||
PTR_FAIL_IF(push_inst(compiler, dst_r));
|
||||
compiler->size += 4;
|
||||
#endif
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
|
||||
|
||||
return put_label;
|
||||
}
|
||||
|
@ -267,6 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins *)addr;
|
||||
|
||||
SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
|
||||
inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
|
||||
inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
|
||||
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
|
||||
@ -277,6 +278,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
|
||||
{
|
||||
sljit_ins *inst = (sljit_ins *)addr;
|
||||
|
||||
SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
|
||||
inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
|
||||
inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
|
||||
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
|
||||
|
102
thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
vendored
102
thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
vendored
@ -298,12 +298,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
sljit_ins *buf_ptr;
|
||||
sljit_ins *buf_end;
|
||||
sljit_uw word_count;
|
||||
sljit_uw next_addr;
|
||||
sljit_sw executable_offset;
|
||||
sljit_uw addr;
|
||||
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -315,40 +317,52 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
code_ptr = code;
|
||||
word_count = 0;
|
||||
next_addr = 0;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
|
||||
do {
|
||||
buf_ptr = (sljit_ins*)buf->memory;
|
||||
buf_end = buf_ptr + (buf->used_size >> 2);
|
||||
do {
|
||||
*code_ptr = *buf_ptr++;
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
/* Just recording the address. */
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
if (next_addr == word_count) {
|
||||
SLJIT_ASSERT(!label || label->size >= word_count);
|
||||
SLJIT_ASSERT(!jump || jump->addr >= word_count);
|
||||
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
|
||||
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
|
||||
|
||||
/* These structures are ordered by their address. */
|
||||
if (label && label->size == word_count) {
|
||||
/* Just recording the address. */
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
if (jump && jump->addr == word_count) {
|
||||
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
jump->addr = (sljit_uw)(code_ptr - 3);
|
||||
jump->addr = (sljit_uw)(code_ptr - 3);
|
||||
#else
|
||||
jump->addr = (sljit_uw)(code_ptr - 6);
|
||||
jump->addr = (sljit_uw)(code_ptr - 6);
|
||||
#endif
|
||||
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
/* Just recording the address. */
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
|
||||
jump = jump->next;
|
||||
}
|
||||
if (const_ && const_->addr == word_count) {
|
||||
/* Just recording the address. */
|
||||
const_->addr = (sljit_uw)code_ptr;
|
||||
const_ = const_->next;
|
||||
}
|
||||
if (put_label && put_label->addr == word_count) {
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
put_label = put_label->next;
|
||||
}
|
||||
next_addr = compute_next_addr(label, jump, const_, put_label);
|
||||
}
|
||||
code_ptr ++;
|
||||
word_count ++;
|
||||
@ -366,6 +380,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
|
||||
|
||||
jump = compiler->jumps;
|
||||
@ -389,8 +404,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
/* Set the fields of immediate loads. */
|
||||
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
|
||||
buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
|
||||
SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
|
||||
buf_ptr[0] |= (addr >> 10) & 0x3fffff;
|
||||
buf_ptr[1] |= addr & 0x3ff;
|
||||
#else
|
||||
#error "Implementation required"
|
||||
#endif
|
||||
@ -398,6 +414,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
jump = jump->next;
|
||||
}
|
||||
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
addr = put_label->label->addr;
|
||||
buf_ptr = (sljit_ins *)put_label->addr;
|
||||
|
||||
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
|
||||
SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
|
||||
buf_ptr[0] |= (addr >> 10) & 0x3fffff;
|
||||
buf_ptr[1] |= addr & 0x3ff;
|
||||
#else
|
||||
#error "Implementation required"
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
compiler->executable_offset = executable_offset;
|
||||
@ -1465,8 +1495,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
|
||||
{
|
||||
sljit_s32 reg;
|
||||
struct sljit_const *const_;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
|
||||
@ -1476,11 +1506,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
PTR_FAIL_IF(!const_);
|
||||
set_const(const_, compiler);
|
||||
|
||||
reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
|
||||
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_s32 dst_r;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 0);
|
||||
|
||||
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
|
||||
PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
|
||||
|
||||
if (dst & SLJIT_MEM)
|
||||
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
|
||||
return put_label;
|
||||
}
|
||||
|
@ -38,8 +38,10 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
|
||||
{
|
||||
sljit_s32 type = jump->flags >> TYPE_SHIFT;
|
||||
|
||||
if (type == SLJIT_JUMP) {
|
||||
*code_ptr++ = JMP_i32;
|
||||
jump->addr++;
|
||||
|
54
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c
vendored
54
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c
vendored
@ -39,8 +39,10 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg,
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
|
||||
{
|
||||
sljit_s32 type = jump->flags >> TYPE_SHIFT;
|
||||
|
||||
int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
|
||||
|
||||
/* The relative jump below specialized for this case. */
|
||||
@ -72,6 +74,56 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
|
||||
return code_ptr;
|
||||
}
|
||||
|
||||
static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
|
||||
{
|
||||
if (max_label > HALFWORD_MAX) {
|
||||
put_label->addr -= put_label->flags;
|
||||
put_label->flags = PATCH_MD;
|
||||
return code_ptr;
|
||||
}
|
||||
|
||||
if (put_label->flags == 0) {
|
||||
/* Destination is register. */
|
||||
code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
|
||||
|
||||
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
|
||||
SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
|
||||
|
||||
if ((code_ptr[0] & 0x07) != 0) {
|
||||
code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08);
|
||||
code_ptr += 2 + sizeof(sljit_s32);
|
||||
}
|
||||
else {
|
||||
code_ptr[0] = code_ptr[1];
|
||||
code_ptr += 1 + sizeof(sljit_s32);
|
||||
}
|
||||
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
return code_ptr;
|
||||
}
|
||||
|
||||
code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
|
||||
SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
|
||||
|
||||
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
|
||||
|
||||
if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
|
||||
code_ptr += 2 + sizeof(sljit_uw);
|
||||
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
|
||||
|
||||
code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4);
|
||||
code_ptr[1] = MOV_rm_i32;
|
||||
code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3));
|
||||
|
||||
code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
put_label->flags = 0;
|
||||
return code_ptr;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
|
||||
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
|
||||
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
|
||||
|
125
thirdparty/pcre2/src/sljit/sljitNativeX86_common.c
vendored
125
thirdparty/pcre2/src/sljit/sljitNativeX86_common.c
vendored
@ -428,13 +428,15 @@ static sljit_u8 get_jump_code(sljit_s32 type)
|
||||
}
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
|
||||
#else
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
|
||||
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
|
||||
static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
|
||||
#endif
|
||||
|
||||
static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
|
||||
static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
|
||||
{
|
||||
sljit_s32 type = jump->flags >> TYPE_SHIFT;
|
||||
sljit_s32 short_jump;
|
||||
sljit_uw label_addr;
|
||||
|
||||
@ -447,7 +449,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
|
||||
return generate_far_jump_code(jump, code_ptr, type);
|
||||
return generate_far_jump_code(jump, code_ptr);
|
||||
#endif
|
||||
|
||||
if (type == SLJIT_JUMP) {
|
||||
@ -497,6 +499,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
struct sljit_label *label;
|
||||
struct sljit_jump *jump;
|
||||
struct sljit_const *const_;
|
||||
struct sljit_put_label *put_label;
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_generate_code(compiler));
|
||||
@ -511,6 +514,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
label = compiler->labels;
|
||||
jump = compiler->jumps;
|
||||
const_ = compiler->consts;
|
||||
put_label = compiler->put_labels;
|
||||
executable_offset = SLJIT_EXEC_OFFSET(code);
|
||||
|
||||
do {
|
||||
@ -525,27 +529,38 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
buf_ptr += len;
|
||||
}
|
||||
else {
|
||||
if (*buf_ptr >= 2) {
|
||||
switch (*buf_ptr) {
|
||||
case 0:
|
||||
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
break;
|
||||
case 1:
|
||||
jump->addr = (sljit_uw)code_ptr;
|
||||
if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
|
||||
code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
|
||||
code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
|
||||
else {
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
|
||||
code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
|
||||
#else
|
||||
code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
|
||||
code_ptr = generate_far_jump_code(jump, code_ptr);
|
||||
#endif
|
||||
}
|
||||
jump = jump->next;
|
||||
}
|
||||
else if (*buf_ptr == 0) {
|
||||
label->addr = ((sljit_uw)code_ptr) + executable_offset;
|
||||
label->size = code_ptr - code;
|
||||
label = label->next;
|
||||
}
|
||||
else { /* *buf_ptr is 1 */
|
||||
break;
|
||||
case 2:
|
||||
const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
|
||||
const_ = const_->next;
|
||||
break;
|
||||
default:
|
||||
SLJIT_ASSERT(*buf_ptr == 3);
|
||||
SLJIT_ASSERT(put_label->label);
|
||||
put_label->addr = (sljit_uw)code_ptr;
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
|
||||
#endif
|
||||
put_label = put_label->next;
|
||||
break;
|
||||
}
|
||||
buf_ptr++;
|
||||
}
|
||||
@ -557,6 +572,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
SLJIT_ASSERT(!label);
|
||||
SLJIT_ASSERT(!jump);
|
||||
SLJIT_ASSERT(!const_);
|
||||
SLJIT_ASSERT(!put_label);
|
||||
SLJIT_ASSERT(code_ptr <= code + compiler->size);
|
||||
|
||||
jump = compiler->jumps;
|
||||
while (jump) {
|
||||
@ -591,8 +608,24 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
jump = jump->next;
|
||||
}
|
||||
|
||||
/* Some space may be wasted because of short jumps. */
|
||||
SLJIT_ASSERT(code_ptr <= code + compiler->size);
|
||||
put_label = compiler->put_labels;
|
||||
while (put_label) {
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
|
||||
#else
|
||||
if (put_label->flags & PATCH_MD) {
|
||||
SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
|
||||
sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
|
||||
}
|
||||
else {
|
||||
SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
|
||||
sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
put_label = put_label->next;
|
||||
}
|
||||
|
||||
compiler->error = SLJIT_ERR_COMPILED;
|
||||
compiler->executable_offset = executable_offset;
|
||||
compiler->executable_size = code_ptr - code;
|
||||
@ -2481,7 +2514,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
|
||||
|
||||
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
|
||||
PTR_FAIL_IF_NULL(jump);
|
||||
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
|
||||
set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
|
||||
type &= 0xff;
|
||||
|
||||
/* Worst case size. */
|
||||
@ -2495,7 +2528,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
|
||||
PTR_FAIL_IF_NULL(inst);
|
||||
|
||||
*inst++ = 0;
|
||||
*inst++ = type + 2;
|
||||
*inst++ = 1;
|
||||
return jump;
|
||||
}
|
||||
|
||||
@ -2513,7 +2546,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
|
||||
if (src == SLJIT_IMM) {
|
||||
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
|
||||
FAIL_IF_NULL(jump);
|
||||
set_jump(jump, compiler, JUMP_ADDR);
|
||||
set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
|
||||
jump->u.target = srcw;
|
||||
|
||||
/* Worst case size. */
|
||||
@ -2527,7 +2560,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
|
||||
FAIL_IF_NULL(inst);
|
||||
|
||||
*inst++ = 0;
|
||||
*inst++ = type + 2;
|
||||
*inst++ = 1;
|
||||
}
|
||||
else {
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
@ -2831,7 +2864,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
PTR_FAIL_IF(!inst);
|
||||
|
||||
*inst++ = 0;
|
||||
*inst++ = 1;
|
||||
*inst++ = 2;
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
if (dst & SLJIT_MEM)
|
||||
@ -2842,6 +2875,54 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
|
||||
return const_;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
|
||||
{
|
||||
struct sljit_put_label *put_label;
|
||||
sljit_u8 *inst;
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
sljit_s32 reg;
|
||||
sljit_uw start_size;
|
||||
#endif
|
||||
|
||||
CHECK_ERROR_PTR();
|
||||
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
|
||||
ADJUST_LOCAL_OFFSET(dst, dstw);
|
||||
|
||||
CHECK_EXTRA_REGS(dst, dstw, (void)0);
|
||||
|
||||
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
|
||||
PTR_FAIL_IF(!put_label);
|
||||
set_put_label(put_label, compiler, 0);
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
compiler->mode32 = 0;
|
||||
reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
|
||||
|
||||
if (emit_load_imm64(compiler, reg, 0))
|
||||
return NULL;
|
||||
#else
|
||||
if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
if (dst & SLJIT_MEM) {
|
||||
start_size = compiler->size;
|
||||
if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
|
||||
return NULL;
|
||||
put_label->flags = compiler->size - start_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 2);
|
||||
PTR_FAIL_IF(!inst);
|
||||
|
||||
*inst++ = 0;
|
||||
*inst++ = 3;
|
||||
|
||||
return put_label;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
|
||||
{
|
||||
SLJIT_UNUSED_ARG(executable_offset);
|
||||
|
6
thirdparty/pcre2/src/sljit/sljitUtils.c
vendored
6
thirdparty/pcre2/src/sljit/sljitUtils.c
vendored
@ -154,7 +154,13 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
|
||||
#include "windows.h"
|
||||
#else
|
||||
/* Provides mmap function. */
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#ifndef MAP_ANON
|
||||
#ifdef MAP_ANONYMOUS
|
||||
#define MAP_ANON MAP_ANONYMOUS
|
||||
#endif
|
||||
#endif
|
||||
/* For detecting the page size. */
|
||||
#include <unistd.h>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user