Merge pull request #38363 from akien-mga/pcre2-10.34

pcre2: Update to upstream version 10.34
This commit is contained in:
Rémi Verschelde 2020-04-30 16:36:39 +02:00 committed by GitHub
commit f870118323
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 7446 additions and 4735 deletions

View File

@ -457,14 +457,13 @@ Files extracted from upstream source:
## pcre2
- Upstream: http://www.pcre.org
- Version: 10.33
- Version: 10.34
- License: BSD-3-Clause
Files extracted from upstream source:
- Files listed in the file NON-AUTOTOOLS-BUILD steps 1-4
- All .h files in src/ apart from pcre2posix.h
- src/pcre2_jit_compile.c
- src/pcre2_jit_match.c
- src/pcre2_jit_misc.c
- src/sljit/*

View File

@ -218,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE2 10.33"
#define PACKAGE_STRING "PCRE2 10.34"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@ -227,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "10.33"
#define PACKAGE_VERSION "10.34"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@ -352,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */
#endif
/* Version number of package */
#define VERSION "10.33"
#define VERSION "10.34"
/* Define to 1 if on MINIX. */
/* #undef _MINIX */

View File

@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
Copyright (c) 2016-2018 University of Cambridge
Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE2_MAJOR 10
#define PCRE2_MINOR 33
#define PCRE2_MINOR 34
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2019-04-16
#define PCRE2_DATE 2019-11-21
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
@ -142,6 +142,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
#define PCRE2_LITERAL 0x02000000u /* C */
#define PCRE2_MATCH_INVALID_UTF 0x04000000u /* J M D */
/* An additional compile options word is available in the compile context. */
@ -305,6 +306,8 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED 198
/* "Expected" matching error codes: no match and partial match. */
@ -390,6 +393,7 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
#define PCRE2_ERROR_DFA_UINVALID_UTF (-66)
/* Request types for pcre2_pattern_info() */
@ -580,7 +584,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -675,6 +679,8 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
pcre2_get_match_data_size(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
@ -773,7 +779,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
*pcre2_maketables(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
/* Define macros that generate width-specific names from generic versions. The
three-level macro scheme is necessary to get the macros expanded when we want
@ -838,6 +845,7 @@ pcre2_compile are called by application code. */
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
#define pcre2_get_match_data_size PCRE2_SUFFIX(pcre2_get_match_data_size_)
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
@ -848,6 +856,7 @@ pcre2_compile are called by application code. */
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)

View File

@ -624,6 +624,13 @@ for(;;)
case OP_ASSERTBACK_NOT:
case OP_ONCE:
return !entered_a_group;
/* Non-atomic assertions - don't possessify last iterator. This needs
more thought. */
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
return FALSE;
}
/* Skip over the bracket and inspect what comes next. */

File diff suppressed because it is too large Load Diff

View File

@ -323,7 +323,7 @@ data. */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables)
const uint8_t *tables)
{
ccontext->tables = tables;
return 0;

View File

@ -173,6 +173,8 @@ static const uint8_t coptable[] = {
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
0, /* NA assert */
0, /* NA assert behind */
0, /* ONCE */
0, /* SCRIPT_RUN */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
@ -248,6 +250,8 @@ static const uint8_t poptable[] = {
0, /* Assert not */
0, /* Assert behind */
0, /* Assert behind not */
0, /* NA assert */
0, /* NA assert behind */
0, /* ONCE */
0, /* SCRIPT_RUN */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
@ -962,7 +966,7 @@ for (;;)
if (ptr >= end_subject)
{
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
could_continue = TRUE;
return PCRE2_ERROR_PARTIAL;
else { ADD_ACTIVE(state_offset + 1, 0); }
}
break;
@ -1011,10 +1015,12 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_EODN:
if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
could_continue = TRUE;
else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
{ ADD_ACTIVE(state_offset + 1, 0); }
if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
{
if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
return PCRE2_ERROR_PARTIAL;
ADD_ACTIVE(state_offset + 1, 0);
}
break;
/*-----------------------------------------------------------------*/
@ -3152,8 +3158,8 @@ for (;;)
/* We have finished the processing at the current subject character. If no
new states have been set for the next character, we have found all the
matches that we are going to find. If we are at the top level and partial
matching has been requested, check for appropriate conditions.
matches that we are going to find. If partial matching has been requested,
check for appropriate conditions.
The "forced_ fail" variable counts the number of (*F) encountered for the
character. If it is equal to the original active_count (saved in
@ -3165,22 +3171,24 @@ for (;;)
if (new_count <= 0)
{
if (rlevel == 1 && /* Top level, and */
could_continue && /* Some could go on, and */
if (could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|| /* or... */
((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 && /* Soft partial and */
match_count < 0) /* no matches */
match_count < 0) /* no matches */
) && /* And... */
(
partial_newline || /* Either partial NL */
( /* or ... */
ptr >= end_subject && /* End of subject and */
ptr > mb->start_used_ptr) /* Inspected non-empty string */
partial_newline || /* Either partial NL */
( /* or ... */
ptr >= end_subject && /* End of subject and */
( /* either */
ptr > mb->start_used_ptr || /* Inspected non-empty string */
mb->allowemptypartial /* or pattern has lookbehind */
) /* or could match empty */
)
)
))
match_count = PCRE2_ERROR_PARTIAL;
break; /* Exit from loop along the subject string */
}
@ -3246,6 +3254,11 @@ BOOL utf, anchored, startline, firstline;
BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE;
#if PCRE2_CODE_UNIT_WIDTH == 8
BOOL memchr_not_found_first_cu = FALSE;
BOOL memchr_not_found_first_cu2 = FALSE;
#endif
PCRE2_UCHAR first_cu = 0;
PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0;
@ -3295,6 +3308,11 @@ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
return PCRE2_ERROR_BADOPTION;
/* Invalid UTF support is not available for DFA matching. */
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
return PCRE2_ERROR_DFA_UINVALID_UTF;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */
@ -3404,6 +3422,8 @@ mb->tables = re->tables;
mb->start_subject = subject;
mb->end_subject = end_subject;
mb->start_offset = start_offset;
mb->allowemptypartial = (re->max_lookbehind > 0) ||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
mb->moptions = options;
mb->poptions = re->overall_options;
mb->match_call_count = 0;
@ -3619,7 +3639,10 @@ for (;;)
/* Not anchored. Advance to a unique first code unit if there is one. In
8-bit mode, the use of memchr() gives a big speed up, even though we have
to call it twice in caseless mode, in order to find the earliest occurrence
of the character in either of its cases. */
of the character in either of its cases. If a call to memchr() that
searches the rest of the subject fails to find one case, remember that in
order not to keep on repeating the search. This can make a huge difference
when the strings are very long and only one case is present. */
else
{
@ -3633,11 +3656,29 @@ for (;;)
(smc = UCHAR21TEST(start_match)) != first_cu &&
smc != first_cu2)
start_match++;
#else /* 8-bit code units */
PCRE2_SPTR pp1 =
memchr(start_match, first_cu, end_subject-start_match);
PCRE2_SPTR pp2 =
memchr(start_match, first_cu2, end_subject-start_match);
PCRE2_SPTR pp1 = NULL;
PCRE2_SPTR pp2 = NULL;
PCRE2_SIZE cu2size = end_subject - start_match;
if (!memchr_not_found_first_cu)
{
pp1 = memchr(start_match, first_cu, end_subject - start_match);
if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
else cu2size = pp1 - start_match;
}
/* If pp1 is not NULL, we have arranged to search only as far as pp1,
to see if the other case is earlier, so we can set "not found" only
when both searches have returned NULL. */
if (!memchr_not_found_first_cu2)
{
pp2 = memchr(start_match, first_cu2, cu2size);
memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
}
if (pp1 == NULL)
start_match = (pp2 == NULL)? end_subject : pp2;
else
@ -3653,7 +3694,7 @@ for (;;)
while (start_match < end_subject && UCHAR21TEST(start_match) !=
first_cu)
start_match++;
#else
#else /* 8-bit code units */
start_match = memchr(start_match, first_cu, end_subject - start_match);
if (start_match == NULL) start_match = end_subject;
#endif
@ -3740,6 +3781,8 @@ for (;;)
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
{
PCRE2_SPTR p;
/* The minimum matching length is a lower bound; no actual string of that
length may actually match the pattern. Although the value is, strictly,
in characters, we treat it as code units to avoid spending too much time
@ -3753,37 +3796,63 @@ for (;;)
point. This optimization can save a huge amount of backtracking in
patterns with nested unlimited repeats that aren't going to match.
Writing separate code for cased/caseless versions makes it go faster, as
does using an autoincrement and backing off on a match.
does using an autoincrement and backing off on a match. As in the case of
the first code unit, using memchr() in the 8-bit library gives a big
speed up. Unlike the first_cu check above, we do not need to call
memchr() twice in the caseless case because we only need to check for the
presence of the character in either case, not find the first occurrence.
The search can be skipped if the code unit was found later than the
current starting point in a previous iteration of the bumpalong loop.
HOWEVER: when the subject string is very, very long, searching to its end
can take a long time, and give bad performance on quite ordinary
patterns. This showed up when somebody was matching something like
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
sufficiently long. */
sufficiently long, but it's worth searching a lot more for unanchored
patterns. */
if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
p = start_match + (has_first_cu? 1:0);
if (has_req_cu && p > req_cu_ptr)
{
PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
PCRE2_SIZE check_length = end_subject - start_match;
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
if (p > req_cu_ptr)
if (check_length < REQ_CU_MAX ||
(!anchored && check_length < REQ_CU_MAX * 1000))
{
if (req_cu != req_cu2)
if (req_cu != req_cu2) /* Caseless */
{
#if PCRE2_CODE_UNIT_WIDTH != 8
while (p < end_subject)
{
uint32_t pp = UCHAR21INCTEST(p);
if (pp == req_cu || pp == req_cu2) { p--; break; }
}
#else /* 8-bit code units */
PCRE2_SPTR pp = p;
p = memchr(pp, req_cu, end_subject - pp);
if (p == NULL)
{
p = memchr(pp, req_cu2, end_subject - pp);
if (p == NULL) p = end_subject;
}
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
}
/* The caseful case */
else
{
#if PCRE2_CODE_UNIT_WIDTH != 8
while (p < end_subject)
{
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
}
#else /* 8-bit code units */
p = memchr(p, req_cu, end_subject - p);
if (p == NULL) p = end_subject;
#endif
}
/* If we can't find the required code unit, break the matching loop,

View File

@ -184,6 +184,8 @@ static const unsigned char compile_error_texts[] =
/* 95 */
"(*alpha_assertion) not recognized\0"
"script runs require Unicode support, which this version of PCRE2 does not have\0"
"too many capturing groups (maximum 65535)\0"
"atomic assertion expected after (?( or (?(?C)\0"
;
/* Match-time and UTF error texts are in the same format. */
@ -268,6 +270,7 @@ static const unsigned char match_error_texts[] =
"invalid syntax\0"
/* 65 */
"internal error - duplicate substitution match\0"
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
;

View File

@ -517,6 +517,7 @@ bytes in a code unit in that mode. */
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
#define PCRE2_HASBKC 0x00400000 /* contains \C */
#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
@ -535,13 +536,14 @@ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
/* The maximum remaining length of subject we are prepared to search for a
req_unit match. In 8-bit mode, memchr() is used and is much faster than the
search loop that has to be used in 16-bit and 32-bit modes. */
req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
much faster than the search loop that has to be used in 16-bit and 32-bit
modes. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define REQ_CU_MAX 2000
#define REQ_CU_MAX 5000
#else
#define REQ_CU_MAX 1000
#define REQ_CU_MAX 2000
#endif
/* Offsets for the bitmap tables in the cbits set of tables. Each table
@ -881,12 +883,16 @@ a positive value. */
#define STRING_atomic0 "atomic\0"
#define STRING_pla0 "pla\0"
#define STRING_plb0 "plb\0"
#define STRING_napla0 "napla\0"
#define STRING_naplb0 "naplb\0"
#define STRING_nla0 "nla\0"
#define STRING_nlb0 "nlb\0"
#define STRING_sr0 "sr\0"
#define STRING_asr0 "asr\0"
#define STRING_positive_lookahead0 "positive_lookahead\0"
#define STRING_positive_lookbehind0 "positive_lookbehind\0"
#define STRING_non_atomic_positive_lookahead0 "non_atomic_positive_lookahead\0"
#define STRING_non_atomic_positive_lookbehind0 "non_atomic_positive_lookbehind\0"
#define STRING_negative_lookahead0 "negative_lookahead\0"
#define STRING_negative_lookbehind0 "negative_lookbehind\0"
#define STRING_script_run0 "script_run\0"
@ -1171,12 +1177,16 @@ only. */
#define STRING_atomic0 STR_a STR_t STR_o STR_m STR_i STR_c "\0"
#define STRING_pla0 STR_p STR_l STR_a "\0"
#define STRING_plb0 STR_p STR_l STR_b "\0"
#define STRING_napla0 STR_n STR_a STR_p STR_l STR_a "\0"
#define STRING_naplb0 STR_n STR_a STR_p STR_l STR_b "\0"
#define STRING_nla0 STR_n STR_l STR_a "\0"
#define STRING_nlb0 STR_n STR_l STR_b "\0"
#define STRING_sr0 STR_s STR_r "\0"
#define STRING_asr0 STR_a STR_s STR_r "\0"
#define STRING_positive_lookahead0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
#define STRING_positive_lookbehind0 STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
#define STRING_non_atomic_positive_lookahead0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
#define STRING_non_atomic_positive_lookbehind0 STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
#define STRING_negative_lookahead0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
#define STRING_negative_lookbehind0 STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
#define STRING_script_run0 STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
@ -1301,7 +1311,7 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
order to the list of escapes immediately above. Furthermore, values up to
OP_DOLLM must not be changed without adjusting the table called autoposstab in
pcre2_auto_possess.c
pcre2_auto_possess.c.
Whenever this list is updated, the two macro definitions that follow must be
updated to match. The possessification table called "opcode_possessify" in
@ -1499,80 +1509,81 @@ enum {
OP_KETRMIN, /* 123 order. They are for groups the repeat for ever. */
OP_KETRPOS, /* 124 Possessive unlimited repeat. */
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
asserts must remain in order. */
/* The assertions must come before BRA, CBRA, ONCE, and COND. */
OP_REVERSE, /* 125 Move pointer back - used in lookbehind assertions */
OP_ASSERT, /* 126 Positive lookahead */
OP_ASSERT_NOT, /* 127 Negative lookahead */
OP_ASSERTBACK, /* 128 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
OP_ASSERT_NA, /* 130 Positive non-atomic lookahead */
OP_ASSERTBACK_NA, /* 131 Positive non-atomic lookbehind */
/* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
immediately after the assertions, with ONCE first, as there's a test for >=
ONCE for a subpattern that isn't an assertion. The POS versions must
immediately follow the non-POS versions in each case. */
OP_ONCE, /* 130 Atomic group, contains captures */
OP_SCRIPT_RUN, /* 131 Non-capture, but check characters' scripts */
OP_BRA, /* 132 Start of non-capturing bracket */
OP_BRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
OP_CBRA, /* 134 Start of capturing bracket */
OP_CBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
OP_COND, /* 136 Conditional group */
OP_ONCE, /* 132 Atomic group, contains captures */
OP_SCRIPT_RUN, /* 133 Non-capture, but check characters' scripts */
OP_BRA, /* 134 Start of non-capturing bracket */
OP_BRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
OP_CBRA, /* 136 Start of capturing bracket */
OP_CBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
OP_COND, /* 138 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
OP_SBRA, /* 137 Start of non-capturing bracket, check empty */
OP_SBRAPOS, /* 138 Ditto, with unlimited, possessive repeat */
OP_SCBRA, /* 139 Start of capturing bracket, check empty */
OP_SCBRAPOS, /* 140 Ditto, with unlimited, possessive repeat */
OP_SCOND, /* 141 Conditional group, check empty */
OP_SBRA, /* 139 Start of non-capturing bracket, check empty */
OP_SBRAPOS, /* 149 Ditto, with unlimited, possessive repeat */
OP_SCBRA, /* 141 Start of capturing bracket, check empty */
OP_SCBRAPOS, /* 142 Ditto, with unlimited, possessive repeat */
OP_SCOND, /* 143 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
OP_CREF, /* 142 Used to hold a capture number as condition */
OP_DNCREF, /* 143 Used to point to duplicate names as a condition */
OP_RREF, /* 144 Used to hold a recursion number as condition */
OP_DNRREF, /* 145 Used to point to duplicate names as a condition */
OP_FALSE, /* 146 Always false (used by DEFINE and VERSION) */
OP_TRUE, /* 147 Always true (used by VERSION) */
OP_CREF, /* 144 Used to hold a capture number as condition */
OP_DNCREF, /* 145 Used to point to duplicate names as a condition */
OP_RREF, /* 146 Used to hold a recursion number as condition */
OP_DNRREF, /* 147 Used to point to duplicate names as a condition */
OP_FALSE, /* 148 Always false (used by DEFINE and VERSION) */
OP_TRUE, /* 149 Always true (used by VERSION) */
OP_BRAZERO, /* 148 These two must remain together and in this */
OP_BRAMINZERO, /* 149 order. */
OP_BRAPOSZERO, /* 150 */
OP_BRAZERO, /* 150 These two must remain together and in this */
OP_BRAMINZERO, /* 151 order. */
OP_BRAPOSZERO, /* 152 */
/* These are backtracking control verbs */
OP_MARK, /* 151 always has an argument */
OP_PRUNE, /* 152 */
OP_PRUNE_ARG, /* 153 same, but with argument */
OP_SKIP, /* 154 */
OP_SKIP_ARG, /* 155 same, but with argument */
OP_THEN, /* 156 */
OP_THEN_ARG, /* 157 same, but with argument */
OP_COMMIT, /* 158 */
OP_COMMIT_ARG, /* 159 same, but with argument */
OP_MARK, /* 153 always has an argument */
OP_PRUNE, /* 154 */
OP_PRUNE_ARG, /* 155 same, but with argument */
OP_SKIP, /* 156 */
OP_SKIP_ARG, /* 157 same, but with argument */
OP_THEN, /* 158 */
OP_THEN_ARG, /* 159 same, but with argument */
OP_COMMIT, /* 160 */
OP_COMMIT_ARG, /* 161 same, but with argument */
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
without the need for a special opcode. */
OP_FAIL, /* 160 */
OP_ACCEPT, /* 161 */
OP_ASSERT_ACCEPT, /* 162 Used inside assertions */
OP_CLOSE, /* 163 Used before OP_ACCEPT to close open captures */
OP_FAIL, /* 162 */
OP_ACCEPT, /* 163 */
OP_ASSERT_ACCEPT, /* 164 Used inside assertions */
OP_CLOSE, /* 165 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 164 */
OP_SKIPZERO, /* 166 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
OP_DEFINE, /* 165 */
OP_DEFINE, /* 167 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@ -1585,7 +1596,7 @@ enum {
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
definitions that follow must also be updated to match. There are also tables
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
pcre2_dfa_exec.c that must be updated. */
pcre2_dfa_match.c that must be updated. */
/* This macro defines textual names for all the opcodes. These are used only
@ -1618,7 +1629,9 @@ some cases doesn't actually use these names at all). */
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
"Recurse", "Callout", "CalloutStr", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
"Reverse", "Assert", "Assert not", \
"Assert back", "Assert back not", \
"Non-atomic assert", "Non-atomic assert back", \
"Once", \
"Script run", \
"Bra", "BraPos", "CBra", "CBraPos", \
@ -1703,6 +1716,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE, /* Assert not */ \
1+LINK_SIZE, /* Assert behind */ \
1+LINK_SIZE, /* Assert behind not */ \
1+LINK_SIZE, /* NA Assert */ \
1+LINK_SIZE, /* NA Assert behind */ \
1+LINK_SIZE, /* ONCE */ \
1+LINK_SIZE, /* SCRIPT_RUN */ \
1+LINK_SIZE, /* BRA */ \

View File

@ -205,19 +205,19 @@ whether its argument, which is assumed to be one code unit, is less than 256.
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
name must fit in one code unit; currently it is set to 255 or 65535. The
TABLE_GET macro is used to access elements of tables containing exactly 256
items. When code points can be greater than 255, a check is needed before
accessing these tables. */
items. Its argument is a code unit. When code points can be greater than 255, a
check is needed before accessing these tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
#define MAX_MARK ((1u << 8) - 1)
#define TABLE_GET(c, table, default) ((table)[c])
#ifdef SUPPORT_UNICODE
#define SUPPORT_WIDE_CHARS
#define CHMAX_255(c) ((c) <= 255u)
#else
#define CHMAX_255(c) TRUE
#endif /* SUPPORT_UNICODE */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */
#define CHMAX_255(c) ((c) <= 255u)
@ -228,7 +228,6 @@ accessing these tables. */
#endif
/* ----------------- Character-handling macros ----------------- */
/* There is a proposed future special "UTF-21" mode, in which only the lowest
@ -854,6 +853,7 @@ typedef struct match_block {
uint32_t match_call_count; /* Number of times a new frame is created */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
BOOL allowemptypartial; /* Allow empty hard partial */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
@ -866,6 +866,7 @@ typedef struct match_block {
PCRE2_SPTR name_table; /* Table of group names */
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR check_subject; /* Where UTF-checked from */
PCRE2_SPTR end_subject; /* End of the subject string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
@ -908,6 +909,7 @@ typedef struct dfa_match_block {
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
BOOL allowemptypartial; /* Allow empty hard partial */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */
pcre2_callout_block *cb; /* Points to a callout block */

File diff suppressed because it is too large Load Diff

View File

@ -74,7 +74,6 @@ Arguments:
options option bits
match_data points to a match_data block
mcontext points to a match context
jit_stack points to a JIT stack
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough

View File

@ -0,0 +1,321 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
This module by Zoltan Herczeg and Sebastian Pop
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
# if defined(FFCS)
# if defined(FF_UTF)
# define FF_FUN ffcs_utf
# else
# define FF_FUN ffcs
# endif
# elif defined(FFCS_2)
# if defined(FF_UTF)
# define FF_FUN ffcs_2_utf
# else
# define FF_FUN ffcs_2
# endif
# elif defined(FFCS_MASK)
# if defined(FF_UTF)
# define FF_FUN ffcs_mask_utf
# else
# define FF_FUN ffcs_mask
# endif
# elif defined(FFCPS_0)
# if defined (FF_UTF)
# define FF_FUN ffcps_0_utf
# else
# define FF_FUN ffcps_0
# endif
# elif defined (FFCPS_1)
# if defined (FF_UTF)
# define FF_FUN ffcps_1_utf
# else
# define FF_FUN ffcps_1
# endif
# elif defined (FFCPS_DEFAULT)
# if defined (FF_UTF)
# define FF_FUN ffcps_default_utf
# else
# define FF_FUN ffcps_default
# endif
# endif
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
#undef FF_FUN
{
quad_word qw;
int_char ic;
ic.x = chars;
#if defined(FFCS)
sljit_u8 c1 = ic.c.c1;
vect_t vc1 = VDUPQ(c1);
#elif defined(FFCS_2)
sljit_u8 c1 = ic.c.c1;
vect_t vc1 = VDUPQ(c1);
sljit_u8 c2 = ic.c.c2;
vect_t vc2 = VDUPQ(c2);
#elif defined(FFCS_MASK)
sljit_u8 c1 = ic.c.c1;
vect_t vc1 = VDUPQ(c1);
sljit_u8 mask = ic.c.c2;
vect_t vmask = VDUPQ(mask);
#endif
#if defined(FFCPS)
compare_type compare1_type = compare_match1;
compare_type compare2_type = compare_match1;
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
PCRE2_UCHAR char1a = ic.c.c1;
PCRE2_UCHAR char2a = ic.c.c3;
# ifdef FFCPS_CHAR1A2A
cmp1a = VDUPQ(char1a);
cmp2a = VDUPQ(char2a);
# else
PCRE2_UCHAR char1b = ic.c.c2;
PCRE2_UCHAR char2b = ic.c.c4;
if (char1a == char1b)
cmp1a = VDUPQ(char1a);
else
{
sljit_u32 bit1 = char1a ^ char1b;
if (is_powerof2(bit1))
{
compare1_type = compare_match1i;
cmp1a = VDUPQ(char1a | bit1);
cmp1b = VDUPQ(bit1);
}
else
{
compare1_type = compare_match2;
cmp1a = VDUPQ(char1a);
cmp1b = VDUPQ(char1b);
}
}
if (char2a == char2b)
cmp2a = VDUPQ(char2a);
else
{
sljit_u32 bit2 = char2a ^ char2b;
if (is_powerof2(bit2))
{
compare2_type = compare_match1i;
cmp2a = VDUPQ(char2a | bit2);
cmp2b = VDUPQ(bit2);
}
else
{
compare2_type = compare_match2;
cmp2a = VDUPQ(char2a);
cmp2b = VDUPQ(char2b);
}
}
# endif
str_ptr += IN_UCHARS(offs1);
#endif
#if PCRE2_CODE_UNIT_WIDTH != 8
vect_t char_mask = VDUPQ(0xff);
#endif
#if defined(FF_UTF)
restart:;
#endif
#if defined(FFCPS)
sljit_u8 *p1 = str_ptr - diff;
#endif
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
vect_t data = VLD1Q(str_ptr);
#if PCRE2_CODE_UNIT_WIDTH != 8
data = VANDQ(data, char_mask);
#endif
#if defined(FFCS)
vect_t eq = VCEQQ(data, vc1);
#elif defined(FFCS_2)
vect_t eq1 = VCEQQ(data, vc1);
vect_t eq2 = VCEQQ(data, vc2);
vect_t eq = VORRQ(eq1, eq2);
#elif defined(FFCS_MASK)
vect_t eq = VORRQ(data, vmask);
eq = VCEQQ(eq, vc1);
#elif defined(FFCPS)
# if defined(FFCPS_DIFF1)
vect_t prev_data = data;
# endif
vect_t data2;
if (p1 < str_ptr)
{
data2 = VLD1Q(str_ptr - diff);
#if PCRE2_CODE_UNIT_WIDTH != 8
data2 = VANDQ(data2, char_mask);
#endif
}
else
data2 = shift_left_n_lanes(data, offs1 - offs2);
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
vect_t eq = VANDQ(data, data2);
#endif
VST1Q(qw.mem, eq);
/* Ignore matches before the first STR_PTR. */
if (align_offset < 8)
{
qw.dw[0] >>= align_offset * 8;
if (qw.dw[0])
{
str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
goto match;
}
if (qw.dw[1])
{
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
goto match;
}
}
else
{
qw.dw[1] >>= (align_offset - 8) * 8;
if (qw.dw[1])
{
str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
goto match;
}
}
str_ptr += 16;
while (str_ptr < str_end)
{
vect_t orig_data = VLD1Q(str_ptr);
#if PCRE2_CODE_UNIT_WIDTH != 8
orig_data = VANDQ(orig_data, char_mask);
#endif
data = orig_data;
#if defined(FFCS)
eq = VCEQQ(data, vc1);
#elif defined(FFCS_2)
eq1 = VCEQQ(data, vc1);
eq2 = VCEQQ(data, vc2);
eq = VORRQ(eq1, eq2);
#elif defined(FFCS_MASK)
eq = VORRQ(data, vmask);
eq = VCEQQ(eq, vc1);
#endif
#if defined(FFCPS)
# if defined (FFCPS_DIFF1)
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
# else
data2 = VLD1Q(str_ptr - diff);
# if PCRE2_CODE_UNIT_WIDTH != 8
data2 = VANDQ(data2, char_mask);
# endif
# endif
# ifdef FFCPS_CHAR1A2A
data = VCEQQ(data, cmp1a);
data2 = VCEQQ(data2, cmp2a);
# else
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
# endif
eq = VANDQ(data, data2);
#endif
VST1Q(qw.mem, eq);
if (qw.dw[0])
str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
else if (qw.dw[1])
str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
else {
str_ptr += 16;
#if defined (FFCPS_DIFF1)
prev_data = orig_data;
#endif
continue;
}
match:;
if (str_ptr >= str_end)
/* Failed match. */
return NULL;
#if defined(FF_UTF)
if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
{
/* Not a match. */
str_ptr += IN_UCHARS(1);
goto restart;
}
#endif
/* Match. */
#if defined (FFCPS)
str_ptr -= IN_UCHARS(offs1);
#endif
return str_ptr;
}
/* Failed match. */
return NULL;
}

View File

@ -0,0 +1,993 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
This module by Zoltan Herczeg
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
#elif PCRE2_CODE_UNIT_WIDTH == 16
OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
#else
#error "Unknown code width"
#endif
}
#endif
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
{
sljit_u32 value = chr;
#if PCRE2_CODE_UNIT_WIDTH == 8
#define SSE2_COMPARE_TYPE_INDEX 0
return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
#elif PCRE2_CODE_UNIT_WIDTH == 16
#define SSE2_COMPARE_TYPE_INDEX 1
return (sljit_s32)((value << 16) | value);
#elif PCRE2_CODE_UNIT_WIDTH == 32
#define SSE2_COMPARE_TYPE_INDEX 2
return (sljit_s32)(value);
#else
#error "Unsupported unit width"
#endif
}
static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset)
{
sljit_u8 instruction[5];
SLJIT_ASSERT(dst_xmm_reg < 8);
SLJIT_ASSERT(src_general_reg < 8);
/* MOVDQA xmm1, xmm2/m128 */
instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3;
instruction[1] = 0x0f;
instruction[2] = 0x6f;
if (offset == 0)
{
instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
sljit_emit_op_custom(compiler, instruction, 4);
return;
}
instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg;
instruction[4] = (sljit_u8)offset;
sljit_emit_op_custom(compiler, instruction, 5);
}
typedef enum {
sse2_compare_match1,
sse2_compare_match1i,
sse2_compare_match2,
} sse2_compare_type;
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
{
sljit_u8 instruction[4];
instruction[0] = 0x66;
instruction[1] = 0x0f;
SLJIT_ASSERT(step >= 0 && step <= 3);
if (compare_type != sse2_compare_match2)
{
if (step == 0)
{
if (compare_type == sse2_compare_match1i)
{
/* POR xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xeb;
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
return;
}
if (step != 2)
return;
/* PCMPEQB/W/D xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
return;
}
switch (step)
{
case 0:
/* MOVDQA xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x6f;
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
sljit_emit_op_custom(compiler, instruction, 4);
return;
case 1:
/* PCMPEQB/W/D xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
return;
case 2:
/* PCMPEQB/W/D xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
sljit_emit_op_custom(compiler, instruction, 4);
return;
case 3:
/* POR xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xeb;
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
sljit_emit_op_custom(compiler, instruction, 4);
return;
}
}
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
#endif
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
sse2_compare_type compare_type = sse2_compare_match1;
sljit_u8 instruction[8];
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
sljit_s32 cmp2_ind = 3;
sljit_u32 bit = 0;
int i;
SLJIT_UNUSED_ARG(offset);
if (char1 != char2)
{
bit = char1 ^ char2;
compare_type = sse2_compare_match1i;
if (!is_powerof2(bit))
{
bit = 0;
compare_type = sse2_compare_match2;
}
}
partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[0]);
/* First part (unaligned start) */
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
SLJIT_ASSERT(tmp1_reg_ind < 8);
/* MOVD xmm, r/m32 */
instruction[0] = 0x66;
instruction[1] = 0x0f;
instruction[2] = 0x6e;
instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char1 != char2)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
/* MOVD xmm, r/m32 */
instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
/* PSHUFD xmm1, xmm2/m128, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x70;
instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
instruction[4] = 0;
sljit_emit_op_custom(compiler, instruction, 5);
if (char1 != char2)
{
/* PSHUFD xmm1, xmm2/m128, imm8 */
instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
sljit_emit_op_custom(compiler, instruction, 5);
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
sljit_emit_op_custom(compiler, instruction, 4);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
/* Second part (aligned) */
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[1]);
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
sljit_emit_op_custom(compiler, instruction, 4);
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
JUMPHERE(quit);
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 3);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
if (common->mode != PCRE2_JIT_COMPLETE)
{
JUMPHERE(partial_quit[0]);
JUMPHERE(partial_quit[1]);
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
}
else
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
{
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
quit = jump_if_utf_char_start(compiler, TMP1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, restart);
JUMPHERE(quit);
}
#endif
}
#ifndef _WIN64
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return 15;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return 7;
#elif PCRE2_CODE_UNIT_WIDTH == 32
return 3;
#else
#error "Unsupported unit width"
#endif
}
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{
DEFINE_COMPILER;
sse2_compare_type compare1_type = sse2_compare_match1;
sse2_compare_type compare2_type = sse2_compare_match1;
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp1_ind = 2;
sljit_s32 tmp2_ind = 3;
sljit_s32 cmp1a_ind = 4;
sljit_s32 cmp1b_ind = 5;
sljit_s32 cmp2a_ind = 6;
sljit_s32 cmp2b_ind = 7;
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
#endif
struct sljit_jump *jump[2];
sljit_u8 instruction[8];
int i;
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1);
/* Initialize. */
if (common->match_end_ptr != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
CMOV(SLJIT_LESS, STR_END, TMP1, 0);
}
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
/* MOVD xmm, r/m32 */
instruction[0] = 0x66;
instruction[1] = 0x0f;
instruction[2] = 0x6e;
if (char1a == char1b)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
else
{
bit1 = char1a ^ char1b;
if (is_powerof2(bit1))
{
compare1_type = sse2_compare_match1i;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
}
else
{
compare1_type = sse2_compare_match2;
bit1 = 0;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
}
}
instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char1a != char1b)
{
instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
if (char2a == char2b)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
else
{
bit2 = char2a ^ char2b;
if (is_powerof2(bit2))
{
compare2_type = sse2_compare_match1i;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
}
else
{
compare2_type = sse2_compare_match2;
bit2 = 0;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
}
}
instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char2a != char2b)
{
instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
/* PSHUFD xmm1, xmm2/m128, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x70;
instruction[4] = 0;
instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
sljit_emit_op_custom(compiler, instruction, 5);
if (char1a != char1b)
{
instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
sljit_emit_op_custom(compiler, instruction, 5);
}
instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
sljit_emit_op_custom(compiler, instruction, 5);
if (char2a != char2b)
{
instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
sljit_emit_op_custom(compiler, instruction, 5);
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
jump[1] = JUMP(SLJIT_JUMP);
JUMPHERE(jump[0]);
/* MOVDQA xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x6f;
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
/* PSLLDQ xmm1, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x73;
instruction[3] = 0xc0 | (7 << 3) | data2_ind;
instruction[4] = diff;
sljit_emit_op_custom(compiler, instruction, 5);
JUMPHERE(jump[1]);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
for (i = 0; i < 4; i++)
{
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
}
/* PAND xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xdb;
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
sljit_emit_op_custom(compiler, instruction, 4);
/* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);
/* Ignore matches before the first STR_PTR. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
/* Main loop. */
start = LABEL();
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
for (i = 0; i < 4; i++)
{
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
}
/* PAND xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xdb;
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
sljit_emit_op_custom(compiler, instruction, 4);
/* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
JUMPHERE(jump[0]);
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 3);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
jump[0] = jump_if_utf_char_start(compiler, TMP1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
JUMPHERE(jump[0]);
}
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
}
#endif /* !_WIN64 */
#undef SSE2_COMPARE_TYPE_INDEX
#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
#include <arm_neon.h>
typedef union {
unsigned int x;
struct { unsigned char c1, c2, c3, c4; } c;
} int_char;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static SLJIT_INLINE int utf_continue(sljit_u8 *s)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return (*s & 0xc0) == 0x80;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return (*s & 0xfc00) == 0xdc00;
#else
#error "Unknown code width"
#endif
}
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
#if PCRE2_CODE_UNIT_WIDTH == 8
# define VECTOR_FACTOR 16
# define vect_t uint8x16_t
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
# define VCEQQ vceqq_u8
# define VORRQ vorrq_u8
# define VST1Q vst1q_u8
# define VDUPQ vdupq_n_u8
# define VEXTQ vextq_u8
# define VANDQ vandq_u8
typedef union {
uint8_t mem[16];
uint64_t dw[2];
} quad_word;
#elif PCRE2_CODE_UNIT_WIDTH == 16
# define VECTOR_FACTOR 8
# define vect_t uint16x8_t
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
# define VCEQQ vceqq_u16
# define VORRQ vorrq_u16
# define VST1Q vst1q_u16
# define VDUPQ vdupq_n_u16
# define VEXTQ vextq_u16
# define VANDQ vandq_u16
typedef union {
uint16_t mem[8];
uint64_t dw[2];
} quad_word;
#else
# define VECTOR_FACTOR 4
# define vect_t uint32x4_t
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
# define VCEQQ vceqq_u32
# define VORRQ vorrq_u32
# define VST1Q vst1q_u32
# define VDUPQ vdupq_n_u32
# define VEXTQ vextq_u32
# define VANDQ vandq_u32
typedef union {
uint32_t mem[4];
uint64_t dw[2];
} quad_word;
#endif
#define FFCS
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS
#define FFCS_2
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS_2
#define FFCS_MASK
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCS_MASK
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
int_char ic;
struct sljit_jump *partial_quit;
/* Save temporary registers. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
/* Prepare function arguments */
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
if (char1 == char2)
{
ic.c.c1 = char1;
ic.c.c2 = char2;
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
#endif
}
else
{
PCRE2_UCHAR mask = char1 ^ char2;
if (is_powerof2(mask))
{
ic.c.c1 = char1 | mask;
ic.c.c2 = mask;
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
#endif
}
else
{
ic.c.c1 = char1;
ic.c.c2 = char2;
OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
#endif
}
}
/* Restore registers. */
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
/* Check return value. */
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit);
/* Fast forward STR_PTR to the result of memchr. */
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
if (common->mode != PCRE2_JIT_COMPLETE)
JUMPHERE(partial_quit);
}
typedef enum {
compare_match1,
compare_match1i,
compare_match2,
} compare_type;
static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
{
if (ctype == compare_match2)
{
vect_t tmp = dst;
dst = VCEQQ(dst, cmp1);
tmp = VCEQQ(tmp, cmp2);
dst = VORRQ(dst, tmp);
return dst;
}
if (ctype == compare_match1i)
dst = VORRQ(dst, cmp2);
dst = VCEQQ(dst, cmp1);
return dst;
}
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
{
#if PCRE2_CODE_UNIT_WIDTH == 8
return 15;
#elif PCRE2_CODE_UNIT_WIDTH == 16
return 7;
#elif PCRE2_CODE_UNIT_WIDTH == 32
return 3;
#else
#error "Unsupported unit width"
#endif
}
/* ARM doesn't have a shift left across lanes. */
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
{
vect_t zero = VDUPQ(0);
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
/* VEXTQ takes an immediate as last argument. */
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
switch (n)
{
C(1); C(2); C(3);
#if PCRE2_CODE_UNIT_WIDTH != 32
C(4); C(5); C(6); C(7);
# if PCRE2_CODE_UNIT_WIDTH != 16
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
# endif
#endif
default:
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
happen. The return is still here for compilers to not warn. */
return a;
}
}
#define FFCPS
#define FFCPS_DIFF1
#define FFCPS_CHAR1A2A
#define FFCPS_0
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS_0
#undef FFCPS_CHAR1A2A
#define FFCPS_1
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS_1
#undef FFCPS_DIFF1
#define FFCPS_DEFAULT
#include "pcre2_jit_neon_inc.h"
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
# define FF_UTF
# include "pcre2_jit_neon_inc.h"
# undef FF_UTF
#endif
#undef FFCPS
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{
DEFINE_COMPILER;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
struct sljit_jump *partial_quit;
int_char ic;
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
SLJIT_ASSERT(compiler->scratches == 5);
/* Save temporary register STR_PTR. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
/* Prepare arguments for the function call. */
if (common->match_end_ptr == 0)
OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
else
{
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0);
CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0);
}
OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
ic.c.c1 = char1a;
ic.c.c2 = char1b;
ic.c.c3 = char2a;
ic.c.c4 = char2b;
OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
if (diff == 1) {
if (char1a == char1b && char2a == char2b) {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf));
else
#endif
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0));
} else {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf));
else
#endif
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1));
}
} else {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf));
else
#endif
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default));
}
/* Restore STR_PTR register. */
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
/* Check return value. */
partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
add_jump(compiler, &common->failed_match, partial_quit);
/* Fast forward STR_PTR to the result of memchr. */
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
JUMPHERE(partial_quit);
}
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */

View File

@ -147,4 +147,15 @@ for (i = 0; i < 256; i++)
return yield;
}
#ifndef DFTABLES
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
{
if (gcontext)
gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
else
free((void *)tables);
}
#endif
/* End of pcre2_maketables.c */

View File

@ -415,8 +415,7 @@ if (caseless)
else
#endif
/* Not in UTF mode */
/* Not in UTF mode */
{
for (; length > 0; length--)
{
@ -491,27 +490,32 @@ heap is used for a larger vector.
*************************************************/
/* These macros pack up tests that are used for partial matching several times
in the code. We set the "hit end" flag if the pointer is at the end of the
subject and also past the earliest inspected character (i.e. something has been
matched, even if not part of the actual matched string). For hard partial
matching, we then return immediately. The second one is used when we already
know we are past the end of the subject. */
in the code. The second one is used when we already know we are past the end of
the subject. We set the "hit end" flag if the pointer is at the end of the
subject and either (a) the pointer is past the earliest inspected character
(i.e. something has been matched, even if not part of the actual matched
string), or (b) the pattern contains a lookbehind. These are the conditions for
which adding more characters may allow the current match to continue.
For hard partial matching, we immediately return a partial match. Otherwise,
carrying on means that a complete match on the current subject will be sought.
A partial match is returned only if no complete match can be found. */
#define CHECK_PARTIAL()\
if (mb->partial != 0 && Feptr >= mb->end_subject && \
Feptr > mb->start_used_ptr) \
if (Feptr >= mb->end_subject) \
{ \
mb->hitend = TRUE; \
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
SCHECK_PARTIAL(); \
}
#define SCHECK_PARTIAL()\
if (mb->partial != 0 && Feptr > mb->start_used_ptr) \
if (mb->partial != 0 && \
(Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
{ \
mb->hitend = TRUE; \
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
}
/* These macros are used to implement backtracking. They simulate a recursive
call to the match() function by means of a local vector of frames which
remember the backtracking points. */
@ -5127,6 +5131,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case OP_ASSERT:
case OP_ASSERTBACK:
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
Lframe_type = GF_NOCAPTURE | Fop;
for (;;)
{
@ -5412,7 +5418,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
{
while (number-- > 0)
{
if (Feptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
Feptr--;
BACKCHAR(Feptr);
}
@ -5420,7 +5426,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
else
#endif
/* No UTF-8 support, or not in UTF-8 mode: count is byte count */
/* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
{
if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
@ -5472,15 +5478,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* If we are at the end of an assertion that is a condition, return a
match, discarding any intermediate backtracking points. Copy back the
captures into the frame before N so that they are set on return. Doing
this for all assertions, both positive and negative, seems to match what
Perl does. */
mark setting and the captures into the frame before N so that they are
set on return. Doing this for all assertions, both positive and negative,
seems to match what Perl does. */
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
{
memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
Foffset_top * sizeof(PCRE2_SIZE));
P->offset_top = Foffset_top;
P->mark = Fmark;
Fback_frame = (char *)F - (char *)P;
RRETURN(MATCH_MATCH);
}
@ -5496,10 +5503,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case OP_SCOND:
break;
/* Positive assertions are like OP_ONCE, except that in addition the
/* Non-atomic positive assertions are like OP_BRA, except that the
subject pointer must be put back to where it was at the start of the
assertion. */
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
Feptr = P->eptr;
break;
/* Atomic positive assertions are like OP_ONCE, except that in addition
the subject pointer must be put back to where it was at the start of the
assertion. */
case OP_ASSERT:
case OP_ASSERTBACK:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
@ -5640,7 +5657,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case OP_EOD:
if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
SCHECK_PARTIAL();
if (mb->partial != 0)
{
mb->hitend = TRUE;
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
Fecode++;
break;
@ -5665,7 +5686,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Either at end of string or \n before end. */
SCHECK_PARTIAL();
if (mb->partial != 0)
{
mb->hitend = TRUE;
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
}
Fecode++;
break;
@ -5743,7 +5768,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
if (Feptr == mb->start_subject) prev_is_word = FALSE; else
if (Feptr == mb->check_subject) prev_is_word = FALSE; else
{
PCRE2_SPTR lastptr = Feptr - 1;
#ifdef SUPPORT_UNICODE
@ -5946,6 +5971,7 @@ in rrc. */
#define LBL(val) case val: goto L_RM##val;
RETURN_SWITCH:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
if (Frdepth == 0) return rrc; /* Exit from the top level */
F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
@ -5999,9 +6025,9 @@ Arguments:
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough
-1 => failed to match (PCRE2_ERROR_NOMATCH)
-2 => partial match (PCRE2_ERROR_PARTIAL)
< -2 => some kind of unexpected problem
= -1 => failed to match (PCRE2_ERROR_NOMATCH)
= -2 => partial match (PCRE2_ERROR_PARTIAL)
< -2 => some kind of unexpected problem
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -6014,7 +6040,6 @@ int was_zero_terminated = 0;
const uint8_t *start_bits = NULL;
const pcre2_real_code *re = (const pcre2_real_code *)code;
BOOL anchored;
BOOL firstline;
BOOL has_first_cu = FALSE;
@ -6022,6 +6047,11 @@ BOOL has_req_cu = FALSE;
BOOL startline;
BOOL utf;
#if PCRE2_CODE_UNIT_WIDTH == 8
BOOL memchr_not_found_first_cu = FALSE;
BOOL memchr_not_found_first_cu2 = FALSE;
#endif
PCRE2_UCHAR first_cu = 0;
PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0;
@ -6029,10 +6059,23 @@ PCRE2_UCHAR req_cu2 = 0;
PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR end_subject;
PCRE2_SPTR true_end_subject;
PCRE2_SPTR start_match = subject + start_offset;
PCRE2_SPTR req_cu_ptr = start_match - 1;
PCRE2_SPTR start_partial = NULL;
PCRE2_SPTR match_partial = NULL;
PCRE2_SPTR start_partial;
PCRE2_SPTR match_partial;
#ifdef SUPPORT_JIT
BOOL use_jit;
#endif
#ifdef SUPPORT_UNICODE
BOOL allow_invalid;
uint32_t fragment_options = 0;
#ifdef SUPPORT_JIT
BOOL jit_checked_utf = FALSE;
#endif
#endif
PCRE2_SIZE frame_size;
@ -6059,7 +6102,7 @@ if (length == PCRE2_ZERO_TERMINATED)
length = PRIV(strlen)(subject);
was_zero_terminated = 1;
}
end_subject = subject + length;
true_end_subject = end_subject = subject + length;
/* Plausibility checks */
@ -6095,12 +6138,24 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO
/* These two settings are used in the code for checking a UTF string that
follows immediately afterwards. Other values in the mb block are used only
during interpretive processing, not when the JIT support is in use, so they are
set up later. */
/* If the pattern was successfully studied with JIT support, we will run the
JIT executable instead of the rest of this function. Most options must be set
at compile time for the JIT code to be usable. */
#ifdef SUPPORT_JIT
use_jit = (re->executable_jit != NULL &&
(options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
#endif
/* Initialize UTF parameters. */
utf = (re->overall_options & PCRE2_UTF) != 0;
#ifdef SUPPORT_UNICODE
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
#endif
/* Convert the partial matching flags into an integer. */
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
@ -6111,61 +6166,6 @@ if (mb->partial != 0 &&
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
return PCRE2_ERROR_BADOPTION;
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
we must also check that a starting offset does not point into the middle of a
multiunit character. We check only the portion of the subject that is going to
be inspected during matching - from the offset minus the maximum back reference
to the given length. This saves time when a small part of a large subject is
being matched by the use of a starting offset. Note that the maximum lookbehind
is a number of characters, not code units. */
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{
PCRE2_SPTR check_subject = start_match; /* start_match includes offset */
if (start_offset > 0)
{
#if PCRE2_CODE_UNIT_WIDTH != 32
unsigned int i;
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
return PCRE2_ERROR_BADUTFOFFSET;
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
{
check_subject--;
while (check_subject > subject &&
#if PCRE2_CODE_UNIT_WIDTH == 8
(*check_subject & 0xc0) == 0x80)
#else /* 16-bit */
(*check_subject & 0xfc00) == 0xdc00)
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
check_subject--;
}
#else
/* In the 32-bit library, one code unit equals one character. However,
we cannot just subtract the lookbehind and then compare pointers, because
a very large lookbehind could create an invalid pointer. */
if (start_offset >= re->max_lookbehind)
check_subject -= re->max_lookbehind;
else
check_subject = subject;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
}
/* Validate the relevant portion of the subject. After an error, adjust the
offset to be an absolute offset in the whole string. */
match_data->rc = PRIV(valid_utf)(check_subject,
length - (check_subject - subject), &(match_data->startchar));
if (match_data->rc != 0)
{
match_data->startchar += check_subject - subject;
return match_data->rc;
}
}
#endif /* SUPPORT_UNICODE */
/* It is an error to set an offset limit without setting the flag at compile
time. */
@ -6184,15 +6184,89 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
}
match_data->subject = NULL;
/* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if
an unsupported option is set or if JIT returns BADOPTION (which means that the
selected normal or partial matching mode was not compiled). */
/* Zero the error offset in case the first code unit is invalid UTF. */
match_data->startchar = 0;
/* ============================= JIT matching ============================== */
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
requested or invalid UTF can be handled. We check only the portion of the
subject that might be be inspected during matching - from the offset minus the
maximum lookbehind to the given length. This saves time when a small part of a
large subject is being matched by the use of a starting offset. Note that the
maximum lookbehind is a number of characters, not code units. */
#ifdef SUPPORT_JIT
if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
if (use_jit)
{
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
{
#if PCRE2_CODE_UNIT_WIDTH != 32
unsigned int i;
#endif
/* For 8-bit and 16-bit UTF, check that the first code unit is a valid
character start. */
#if PCRE2_CODE_UNIT_WIDTH != 32
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
{
if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
#if PCRE2_CODE_UNIT_WIDTH == 8
return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */
#else
return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */
#endif
}
#endif /* WIDTH != 32 */
/* Move back by the maximum lookbehind, just in case it happens at the very
start of matching. */
#if PCRE2_CODE_UNIT_WIDTH != 32
for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
{
start_match--;
while (start_match > subject &&
#if PCRE2_CODE_UNIT_WIDTH == 8
(*start_match & 0xc0) == 0x80)
#else /* 16-bit */
(*start_match & 0xfc00) == 0xdc00)
#endif
start_match--;
}
#else /* PCRE2_CODE_UNIT_WIDTH != 32 */
/* In the 32-bit library, one code unit equals one character. However,
we cannot just subtract the lookbehind and then compare pointers, because
a very large lookbehind could create an invalid pointer. */
if (start_offset >= re->max_lookbehind)
start_match -= re->max_lookbehind;
else
start_match = subject;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
/* Validate the relevant portion of the subject. Adjust the offset of an
invalid code point to be an absolute offset in the whole string. */
match_data->rc = PRIV(valid_utf)(start_match,
length - (start_match - subject), &(match_data->startchar));
if (match_data->rc != 0)
{
match_data->startchar += start_match - subject;
return match_data->rc;
}
jit_checked_utf = TRUE;
}
#endif /* SUPPORT_UNICODE */
/* If JIT returns BADOPTION, which means that the selected complete or
partial matching mode was not compiled, fall through to the interpreter. */
rc = pcre2_jit_match(code, subject, length, start_offset, options,
match_data, mcontext);
if (rc != PCRE2_ERROR_JIT_BADOPTION)
@ -6209,10 +6283,152 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
return rc;
}
}
#endif /* SUPPORT_JIT */
/* ========================= End of JIT matching ========================== */
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
start of the subject. A UTF check when there is a non-zero offset may change
this. */
mb->check_subject = subject;
/* If a UTF subject string was not checked for validity in the JIT code above,
check it here, and handle support for invalid UTF strings. The check above
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
If we get here in those circumstances, it means the subject string is valid,
but for some reason JIT matching was not successful. There is no need to check
the subject again.
We check only the portion of the subject that might be be inspected during
matching - from the offset minus the maximum lookbehind to the given length.
This saves time when a small part of a large subject is being matched by the
use of a starting offset. Note that the maximum lookbehind is a number of
characters, not code units.
Note also that support for invalid UTF forces a check, overriding the setting
of PCRE2_NO_CHECK_UTF. */
#ifdef SUPPORT_UNICODE
if (utf &&
#ifdef SUPPORT_JIT
!jit_checked_utf &&
#endif
((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
{
#if PCRE2_CODE_UNIT_WIDTH != 32
BOOL skipped_bad_start = FALSE;
#endif
/* Carry on with non-JIT matching. A NULL match context means "use a default
context", but we take the memory control functions from the pattern. */
/* For 8-bit and 16-bit UTF, check that the first code unit is a valid
character start. If we are handling invalid UTF, just skip over such code
units. Otherwise, give an appropriate error. */
#if PCRE2_CODE_UNIT_WIDTH != 32
if (allow_invalid)
{
while (start_match < end_subject && NOT_FIRSTCU(*start_match))
{
start_match++;
skipped_bad_start = TRUE;
}
}
else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
{
if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
#if PCRE2_CODE_UNIT_WIDTH == 8
return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */
#else
return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */
#endif
}
#endif /* WIDTH != 32 */
/* The mb->check_subject field points to the start of UTF checking;
lookbehinds can go back no further than this. */
mb->check_subject = start_match;
/* Move back by the maximum lookbehind, just in case it happens at the very
start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
units above. */
#if PCRE2_CODE_UNIT_WIDTH != 32
if (!skipped_bad_start)
{
unsigned int i;
for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
{
mb->check_subject--;
while (mb->check_subject > subject &&
#if PCRE2_CODE_UNIT_WIDTH == 8
(*mb->check_subject & 0xc0) == 0x80)
#else /* 16-bit */
(*mb->check_subject & 0xfc00) == 0xdc00)
#endif
mb->check_subject--;
}
}
#else /* PCRE2_CODE_UNIT_WIDTH != 32 */
/* In the 32-bit library, one code unit equals one character. However,
we cannot just subtract the lookbehind and then compare pointers, because
a very large lookbehind could create an invalid pointer. */
if (start_offset >= re->max_lookbehind)
mb->check_subject -= re->max_lookbehind;
else
mb->check_subject = subject;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
/* Validate the relevant portion of the subject. There's a loop in case we
encounter bad UTF in the characters preceding start_match which we are
scanning because of a lookbehind. */
for (;;)
{
match_data->rc = PRIV(valid_utf)(mb->check_subject,
length - (mb->check_subject - subject), &(match_data->startchar));
if (match_data->rc == 0) break; /* Valid UTF string */
/* Invalid UTF string. Adjust the offset to be an absolute offset in the
whole string. If we are handling invalid UTF strings, set end_subject to
stop before the bad code unit, and set the options to "not end of line".
Otherwise return the error. */
match_data->startchar += mb->check_subject - subject;
if (!allow_invalid || match_data->rc > 0) return match_data->rc;
end_subject = subject + match_data->startchar;
/* If the end precedes start_match, it means there is invalid UTF in the
extra code units we reversed over because of a lookbehind. Advance past the
first bad code unit, and then skip invalid character starting code units in
8-bit and 16-bit modes, and try again. */
if (end_subject < start_match)
{
mb->check_subject = end_subject + 1;
#if PCRE2_CODE_UNIT_WIDTH != 32
while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
mb->check_subject++;
#endif
}
/* Otherwise, set the not end of line option, and do the match. */
else
{
fragment_options = PCRE2_NOTEOL;
break;
}
}
}
#endif /* SUPPORT_UNICODE */
/* A NULL match context means "use a default context", but we take the memory
control functions from the pattern. */
if (mcontext == NULL)
{
@ -6224,8 +6440,8 @@ else mb->memctl = mcontext->memctl;
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
startline = (re->flags & PCRE2_STARTLINE) != 0;
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
end_subject : subject + mcontext->offset_limit;
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
true_end_subject : subject + mcontext->offset_limit;
/* Initialize and set up the fixed fields in the callout block, with a pointer
in the match block. */
@ -6236,7 +6452,8 @@ cb.subject = subject;
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
cb.callout_flags = 0;
/* Fill in the remaining fields in the match block. */
/* Fill in the remaining fields in the match block, except for moptions, which
gets set later. */
mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data;
@ -6245,13 +6462,11 @@ mb->start_subject = subject;
mb->start_offset = start_offset;
mb->end_subject = end_subject;
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
mb->moptions = options; /* Match options */
mb->poptions = re->overall_options; /* Pattern options */
mb->allowemptypartial = (re->max_lookbehind > 0) ||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
mb->poptions = re->overall_options; /* Pattern options */
mb->ignore_skip_arg = 0;
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
mb->hitend = FALSE;
mb->mark = mb->nomatch_mark = NULL; /* In case never set */
/* The name table is needed for finding all the numbers associated with a
given name, for condition testing. The code follows the name table. */
@ -6404,6 +6619,13 @@ if ((re->flags & PCRE2_LASTSET) != 0)
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
the loop runs just once. */
#ifdef SUPPORT_UNICODE
FRAGMENT_RESTART:
#endif
start_partial = match_partial = NULL;
mb->hitend = FALSE;
for(;;)
{
PCRE2_SPTR new_start_match;
@ -6473,7 +6695,10 @@ for(;;)
/* Not anchored. Advance to a unique first code unit if there is one. In
8-bit mode, the use of memchr() gives a big speed up, even though we have
to call it twice in caseless mode, in order to find the earliest occurrence
of the character in either of its cases. */
of the character in either of its cases. If a call to memchr() that
searches the rest of the subject fails to find one case, remember that in
order not to keep on repeating the search. This can make a huge difference
when the strings are very long and only one case is present. */
else
{
@ -6487,11 +6712,29 @@ for(;;)
(smc = UCHAR21TEST(start_match)) != first_cu &&
smc != first_cu2)
start_match++;
#else /* 8-bit code units */
PCRE2_SPTR pp1 =
memchr(start_match, first_cu, end_subject-start_match);
PCRE2_SPTR pp2 =
memchr(start_match, first_cu2, end_subject-start_match);
PCRE2_SPTR pp1 = NULL;
PCRE2_SPTR pp2 = NULL;
PCRE2_SIZE cu2size = end_subject - start_match;
if (!memchr_not_found_first_cu)
{
pp1 = memchr(start_match, first_cu, end_subject - start_match);
if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
else cu2size = pp1 - start_match;
}
/* If pp1 is not NULL, we have arranged to search only as far as pp1,
to see if the other case is earlier, so we can set "not found" only
when both searches have returned NULL. */
if (!memchr_not_found_first_cu2)
{
pp2 = memchr(start_match, first_cu2, cu2size);
memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
}
if (pp1 == NULL)
start_match = (pp2 == NULL)? end_subject : pp2;
else
@ -6523,7 +6766,7 @@ for(;;)
we also let the cycle run, because the matching string is legitimately
allowed to start with the first code unit of a newline. */
if (!mb->partial && start_match >= mb->end_subject)
if (mb->partial == 0 && start_match >= mb->end_subject)
{
rc = MATCH_NOMATCH;
break;
@ -6582,7 +6825,7 @@ for(;;)
/* See comment above in first_cu checking about the next few lines. */
if (!mb->partial && start_match >= mb->end_subject)
if (mb->partial == 0 && start_match >= mb->end_subject)
{
rc = MATCH_NOMATCH;
break;
@ -6596,8 +6839,10 @@ for(;;)
/* The following two optimizations must be disabled for partial matching. */
if (!mb->partial)
if (mb->partial == 0)
{
PCRE2_SPTR p;
/* The minimum matching length is a lower bound; no string of that length
may actually match the pattern. Although the value is, strictly, in
characters, we treat it as code units to avoid spending too much time in
@ -6621,60 +6866,57 @@ for(;;)
memchr() twice in the caseless case because we only need to check for the
presence of the character in either case, not find the first occurrence.
The search can be skipped if the code unit was found later than the
current starting point in a previous iteration of the bumpalong loop.
HOWEVER: when the subject string is very, very long, searching to its end
can take a long time, and give bad performance on quite ordinary
patterns. This showed up when somebody was matching something like
/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
sufficiently long. */
anchored patterns. This showed up when somebody was matching something
like /^\d+C/ on a 32-megabyte string... so we don't do this when the
string is sufficiently long, but it's worth searching a lot more for
unanchored patterns. */
if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
p = start_match + (has_first_cu? 1:0);
if (has_req_cu && p > req_cu_ptr)
{
PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
PCRE2_SIZE check_length = end_subject - start_match;
/* We don't need to repeat the search if we haven't yet reached the
place we found it last time round the bumpalong loop. */
if (p > req_cu_ptr)
if (check_length < REQ_CU_MAX ||
(!anchored && check_length < REQ_CU_MAX * 1000))
{
if (p < end_subject)
if (req_cu != req_cu2) /* Caseless */
{
if (req_cu != req_cu2) /* Caseless */
{
#if PCRE2_CODE_UNIT_WIDTH != 8
do
{
uint32_t pp = UCHAR21INCTEST(p);
if (pp == req_cu || pp == req_cu2) { p--; break; }
}
while (p < end_subject);
#else /* 8-bit code units */
PCRE2_SPTR pp = p;
p = memchr(pp, req_cu, end_subject - pp);
if (p == NULL)
{
p = memchr(pp, req_cu2, end_subject - pp);
if (p == NULL) p = end_subject;
}
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
while (p < end_subject)
{
uint32_t pp = UCHAR21INCTEST(p);
if (pp == req_cu || pp == req_cu2) { p--; break; }
}
/* The caseful case */
else
{
#if PCRE2_CODE_UNIT_WIDTH != 8
do
{
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
}
while (p < end_subject);
#else /* 8-bit code units */
p = memchr(p, req_cu, end_subject - p);
PCRE2_SPTR pp = p;
p = memchr(pp, req_cu, end_subject - pp);
if (p == NULL)
{
p = memchr(pp, req_cu2, end_subject - pp);
if (p == NULL) p = end_subject;
#endif
}
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
}
/* The caseful case */
else
{
#if PCRE2_CODE_UNIT_WIDTH != 8
while (p < end_subject)
{
if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
}
#else /* 8-bit code units */
p = memchr(p, req_cu, end_subject - p);
if (p == NULL) p = end_subject;
#endif
}
/* If we can't find the required code unit, break the bumpalong loop,
@ -6714,6 +6956,11 @@ for(;;)
mb->start_used_ptr = start_match;
mb->last_used_ptr = start_match;
#ifdef SUPPORT_UNICODE
mb->moptions = options | fragment_options;
#else
mb->moptions = options;
#endif
mb->match_call_count = 0;
mb->end_offset_top = 0;
mb->skip_arg_count = 0;
@ -6839,6 +7086,68 @@ for(;;)
ENDLOOP:
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
and have just processed a non-terminal fragment. If this resulted in no match
or a partial match we must carry on to the next fragment (a partial match is
returned to the caller only at the very end of the subject). A loop is used to
avoid trying to match against empty fragments; if the pattern can match an
empty string it would have done so already. */
#ifdef SUPPORT_UNICODE
if (utf && end_subject != true_end_subject &&
(rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
{
for (;;)
{
/* Advance past the first bad code unit, and then skip invalid character
starting code units in 8-bit and 16-bit modes. */
start_match = end_subject + 1;
#if PCRE2_CODE_UNIT_WIDTH != 32
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
start_match++;
#endif
/* If we have hit the end of the subject, there isn't another non-empty
fragment, so give up. */
if (start_match >= true_end_subject)
{
rc = MATCH_NOMATCH; /* In case it was partial */
break;
}
/* Check the rest of the subject */
mb->check_subject = start_match;
rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
&(match_data->startchar));
/* The rest of the subject is valid UTF. */
if (rc == 0)
{
mb->end_subject = end_subject = true_end_subject;
fragment_options = PCRE2_NOTBOL;
goto FRAGMENT_RESTART;
}
/* A subsequent UTF error has been found; if the next fragment is
non-empty, set up to process it. Otherwise, let the loop advance. */
else if (rc < 0)
{
mb->end_subject = end_subject = start_match + match_data->startchar;
if (end_subject > start_match)
{
fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
goto FRAGMENT_RESTART;
}
}
}
}
#endif /* SUPPORT_UNICODE */
/* Release an enlarged frame vector that is on the heap. */
if (mb->match_frames != mb->stack_frames)

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -150,4 +150,17 @@ pcre2_get_startchar(pcre2_match_data *match_data)
return match_data->startchar;
}
/*************************************************
* Get size of match data block *
*************************************************/
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_match_data_size(pcre2_match_data *match_data)
{
return offsetof(pcre2_match_data, ovector) +
2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
}
/* End of pcre2_match_data.c */

View File

@ -88,11 +88,13 @@ Arguments:
countptr pointer to call count (to catch over complexity)
backref_cache vector for caching back references.
This function is no longer called when the pattern contains (*ACCEPT); however,
the old code for returning -1 is retained, just in case.
Returns: the minimum length
-1 \C in UTF-8 mode
or (*ACCEPT)
or pattern too complicated
or back reference to duplicate name/number
-2 internal error (missing capturing bracket)
-3 internal error (opcode not listed)
*/
@ -103,6 +105,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
int *backref_cache)
{
int length = -1;
int branchlength = 0;
int prev_cap_recno = -1;
int prev_cap_d = 0;
int prev_recurse_recno = -1;
@ -110,9 +113,9 @@ int prev_recurse_d = 0;
uint32_t once_fudge = 0;
BOOL had_recurse = FALSE;
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
recurse_check this_recurse;
int branchlength = 0;
PCRE2_SPTR nextbranch = code + GET(code, 1);
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
recurse_check this_recurse;
/* If this is a "could be empty" group, its minimum length is 0. */
@ -128,16 +131,20 @@ if ((*countptr)++ > 1000) return -1;
/* Scan along the opcodes for this branch. If we get to the end of the branch,
check the length against that of the other branches. If the accumulated length
passes 16-bits, stop. */
passes 16-bits, reset to that value and skip the rest of the branch. */
for (;;)
{
int d, min, recno;
PCRE2_UCHAR *cs, *ce;
PCRE2_UCHAR op = *cc;
PCRE2_UCHAR op, *cs, *ce;
if (branchlength >= UINT16_MAX) return UINT16_MAX;
if (branchlength >= UINT16_MAX)
{
branchlength = UINT16_MAX;
cc = (PCRE2_UCHAR *)nextbranch;
}
op = *cc;
switch (op)
{
case OP_COND:
@ -206,7 +213,9 @@ for (;;)
cc += 1 + LINK_SIZE;
break;
/* ACCEPT makes things far too complicated; we have to give up. */
/* ACCEPT makes things far too complicated; we have to give up. In fact,
from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
used. However, leave the code in place, just in case. */
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
@ -214,9 +223,9 @@ for (;;)
/* Reached end of a branch; if it's a ket it is the end of a nested
call. If it's ALT it is an alternation in a nested call. If it is END it's
the end of the outer call. All can be handled by the same code. If an
ACCEPT was previously encountered, use the length that was in force at that
time, and pass back the shortest ACCEPT length. */
the end of the outer call. All can be handled by the same code. If the
length of any branch is zero, there is no need to scan any subsequent
branches. */
case OP_ALT:
case OP_KET:
@ -226,7 +235,8 @@ for (;;)
case OP_END:
if (length < 0 || (!had_recurse && branchlength < length))
length = branchlength;
if (op != OP_ALT) return length;
if (op != OP_ALT || length == 0) return length;
nextbranch = cc + GET(cc, 1);
cc += 1 + LINK_SIZE;
branchlength = 0;
had_recurse = FALSE;
@ -238,6 +248,8 @@ for (;;)
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
do cc += GET(cc, 1); while (*cc == OP_ALT);
/* Fall through */
@ -451,15 +463,17 @@ for (;;)
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
matches an empty string (by default it causes a matching failure), so in
that case we must set the minimum length to zero. */
that case we must set the minimum length to zero.
/* Duplicate named pattern back reference. We cannot reliably find a length
for this if duplicate numbers are present in the pattern. */
For backreferenes, if duplicate numbers are present in the pattern we check
for a reference to a duplicate. If it is, we don't know which version will
be referenced, so we have to set the minimum length to zero. */
/* Duplicate named pattern back reference. */
case OP_DNREF:
case OP_DNREFI:
if (dupcapused) return -1;
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
int count = GET2(cc, 1+IMM2_SIZE);
PCRE2_UCHAR *slot =
@ -482,28 +496,32 @@ for (;;)
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce) /* Simple recursion */
dd = 0;
if (!dupcapused ||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
{
dd = 0;
had_recurse = TRUE;
}
else
{
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev)
if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
if (cc > cs && cc < ce) /* Simple recursion */
{
dd = 0;
had_recurse = TRUE;
}
else
{
this_recurse.prev = recurses;
this_recurse.group = cs;
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
countptr, backref_cache);
if (dd < 0) return dd;
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev)
if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
{
had_recurse = TRUE;
}
else
{
this_recurse.prev = recurses; /* No recursion */
this_recurse.group = cs;
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
countptr, backref_cache);
if (dd < 0) return dd;
}
}
}
@ -521,48 +539,51 @@ for (;;)
cc += 1 + 2*IMM2_SIZE;
goto REPEAT_BACK_REFERENCE;
/* Single back reference. We cannot find a length for this if duplicate
numbers are present in the pattern. */
/* Single back reference by number. References by name are converted to by
number when there is no duplication. */
case OP_REF:
case OP_REFI:
if (dupcapused) return -1;
recno = GET2(cc, 1);
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
d = backref_cache[recno];
else
{
int i;
d = 0;
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce) /* Simple recursion */
if (!dupcapused ||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
{
d = 0;
had_recurse = TRUE;
}
else
{
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
if (cc > cs && cc < ce) /* Simple recursion */
{
d = 0;
had_recurse = TRUE;
}
else
{
this_recurse.prev = recurses;
this_recurse.group = cs;
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
backref_cache);
if (d < 0) return d;
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL) /* Mutual recursion */
{
had_recurse = TRUE;
}
else /* No recursion */
{
this_recurse.prev = recurses;
this_recurse.group = cs;
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
backref_cache);
if (d < 0) return d;
}
}
}
}
else d = 0;
backref_cache[recno] = d;
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
@ -888,7 +909,7 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
/*************************************************
* Create bitmap of starting bytes *
* Create bitmap of starting code units *
*************************************************/
/* This function scans a compiled unanchored expression recursively and
@ -938,6 +959,9 @@ do
{
int rc;
uint8_t *classmap = NULL;
#ifdef SUPPORT_WIDE_CHARS
PCRE2_UCHAR xclassflags;
#endif
switch(*tcode)
{
@ -1078,6 +1102,7 @@ do
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_ASSERT:
case OP_ASSERT_NA:
rc = set_start_bits(re, tcode, utf);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
if (rc == SSB_DONE) try_next = FALSE; else
@ -1120,6 +1145,7 @@ do
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERTBACK_NA:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@ -1444,20 +1470,59 @@ do
negative XCLASS without a map, give up. If there are no property checks,
there must be wide characters on the XCLASS list, because otherwise an
XCLASS would not have been created. This means that code points >= 255
are always potential starters. */
are potential starters. In the UTF-8 case we can scan them and set bits
for the relevant leading bytes. */
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
xclassflags = tcode[1 + LINK_SIZE];
if ((xclassflags & XCL_HASPROP) != 0 ||
(xclassflags & (XCL_MAP|XCL_NOT)) == XCL_NOT)
return SSB_FAIL;
/* We have a positive XCLASS or a negative one without a map. Set up the
map pointer if there is one, and fall through. */
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
#endif
/* In UTF-8 mode, scan the character list and set bits for leading bytes,
then jump to handle the map. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if (utf && (xclassflags & XCL_NOT) == 0)
{
PCRE2_UCHAR b, e;
PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
tcode += GET(tcode, 1);
for (;;) switch (*p++)
{
case XCL_SINGLE:
b = *p++;
while ((*p & 0xc0) == 0x80) p++;
re->start_bitmap[b/8] |= (1u << (b&7));
break;
case XCL_RANGE:
b = *p++;
while ((*p & 0xc0) == 0x80) p++;
e = *p++;
while ((*p & 0xc0) == 0x80) p++;
for (; b <= e; b++)
re->start_bitmap[b/8] |= (1u << (b&7));
break;
case XCL_END:
goto HANDLE_CLASSMAP;
default:
return SSB_UNKNOWN; /* Internal error, should not occur */
}
}
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
#endif /* SUPPORT_WIDE_CHARS */
/* It seems that the fall through comment must be outside the #ifdef if
it is to avoid the gcc compiler warning. */
@ -1499,6 +1564,9 @@ do
greater than 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */
#if defined SUPPORT_WIDE_CHARS && PCRE2_CODE_UNIT_WIDTH == 8
HANDLE_CLASSMAP:
#endif
if (classmap != NULL)
{
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
@ -1569,7 +1637,9 @@ return yield;
/* This function is handed a compiled expression that it must study to produce
information that will speed up the matching.
Argument: points to the compiled expression
Argument:
re points to the compiled expression
Returns: 0 normally; non-zero should never normally occur
1 unknown opcode in set_start_bits
2 missing capturing bracket
@ -1579,7 +1649,6 @@ Returns: 0 normally; non-zero should never normally occur
int
PRIV(study)(pcre2_real_code *re)
{
int min;
int count = 0;
PCRE2_UCHAR *code;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
@ -1597,25 +1666,121 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{
int rc = set_start_bits(re, code, utf);
if (rc == SSB_UNKNOWN) return 1;
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
/* If a list of starting code units was set up, scan the list to see if only
one or two were listed. Having only one listed is rare because usually a
single starting code unit will have been recognized and PCRE2_FIRSTSET set.
If two are listed, see if they are caseless versions of the same character;
if so we can replace the list with a caseless first code unit. This gives
better performance and is plausibly worth doing for patterns such as [Ww]ord
or (word|WORD). */
if (rc == SSB_DONE)
{
int i;
int a = -1;
int b = -1;
uint8_t *p = re->start_bitmap;
uint32_t flags = PCRE2_FIRSTMAPSET;
for (i = 0; i < 256; p++, i += 8)
{
uint8_t x = *p;
if (x != 0)
{
int c;
uint8_t y = x & (~x + 1); /* Least significant bit */
if (y != x) goto DONE; /* More than one bit set */
/* In the 16-bit and 32-bit libraries, the bit for 0xff means "0xff and
all wide characters", so we cannot use it here. */
#if PCRE2_CODE_UNIT_WIDTH != 8
if (i == 248 && x == 0x80) goto DONE;
#endif
/* Compute the character value */
c = i;
switch (x)
{
case 1: break;
case 2: c += 1; break; case 4: c += 2; break;
case 8: c += 3; break; case 16: c += 4; break;
case 32: c += 5; break; case 64: c += 6; break;
case 128: c += 7; break;
}
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
mode, only values < 128 can be used. */
#if PCRE2_CODE_UNIT_WIDTH == 8
if (c > 127) goto DONE;
#endif
if (a < 0) a = c; /* First one found */
else if (b < 0) /* Second one found */
{
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
#ifdef SUPPORT_UNICODE
#if PCRE2_CODE_UNIT_WIDTH == 8
if (utf && UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
#else /* 16-bit or 32-bit */
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
if (utf && c > 127) d = UCD_OTHERCASE(c);
#endif /* Code width */
#endif /* SUPPORT_UNICODE */
if (d != a) goto DONE; /* Not other case of a */
b = c;
}
else goto DONE; /* More than two characters found */
}
}
/* Replace the start code unit bits with a first code unit, but only if it
is not the same as a required later code unit. This is because a search for
a required code unit starts after an explicit first code unit, but at a
code unit found from the bitmap. Patterns such as /a*a/ don't work
if both the start unit and required unit are the same. */
if (a >= 0 &&
(
(re->flags & PCRE2_LASTSET) == 0 ||
(
re->last_codeunit != (uint32_t)a &&
(b < 0 || re->last_codeunit != (uint32_t)b)
)
))
{
re->first_codeunit = a;
flags = PCRE2_FIRSTSET;
if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
}
DONE:
re->flags |= flags;
}
}
/* Find the minimum length of subject string. If the pattern can match an empty
string, the minimum length is already known. If there are more back references
than the size of the vector we are going to cache them in, do nothing. A
pattern that complicated will probably take a long time to analyze and may in
any case turn out to be too complicated. Note that back reference minima are
held as 16-bit numbers. */
string, the minimum length is already known. If the pattern contains (*ACCEPT)
all bets are off, and we don't even try to find a minimum length. If there are
more back references than the size of the vector we are going to cache them in,
do nothing. A pattern that complicated will probably take a long time to
analyze and may in any case turn out to be too complicated. Note that back
reference minima are held as 16-bit numbers. */
if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
re->top_backref <= MAX_CACHE_BACKREF)
{
int min;
int backref_cache[MAX_CACHE_BACKREF+1];
backref_cache[0] = 0; /* Highest one that is set */
min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
switch(min)
{
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
case -1: /* \C in UTF mode or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */
case -2:
@ -1625,8 +1790,7 @@ if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
return 3; /* unrecognized opcode */
default:
if (min > UINT16_MAX) min = UINT16_MAX;
re->minlength = min;
re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
break;
}
}

View File

@ -279,6 +279,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
#define STRING_Elymaic0 STR_E STR_l STR_y STR_m STR_a STR_i STR_c "\0"
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
@ -348,6 +349,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
#define STRING_N0 STR_N "\0"
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
#define STRING_Nandinagari0 STR_N STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
#define STRING_Nd0 STR_N STR_d "\0"
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
@ -355,6 +357,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Nl0 STR_N STR_l "\0"
#define STRING_No0 STR_N STR_o "\0"
#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
#define STRING_Nyiakeng_Puachue_Hmong0 STR_N STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_UNDERSCORE STR_P STR_u STR_a STR_c STR_h STR_u STR_e STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
@ -419,6 +422,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
#define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0"
#define STRING_Vai0 STR_V STR_a STR_i "\0"
#define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0"
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
@ -474,6 +478,7 @@ const char PRIV(utt_names)[] =
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0
STRING_Elymaic0
STRING_Ethiopic0
STRING_Georgian0
STRING_Glagolitic0
@ -543,6 +548,7 @@ const char PRIV(utt_names)[] =
STRING_Myanmar0
STRING_N0
STRING_Nabataean0
STRING_Nandinagari0
STRING_Nd0
STRING_New_Tai_Lue0
STRING_Newa0
@ -550,6 +556,7 @@ const char PRIV(utt_names)[] =
STRING_Nl0
STRING_No0
STRING_Nushu0
STRING_Nyiakeng_Puachue_Hmong0
STRING_Ogham0
STRING_Ol_Chiki0
STRING_Old_Hungarian0
@ -614,6 +621,7 @@ const char PRIV(utt_names)[] =
STRING_Ugaritic0
STRING_Unknown0
STRING_Vai0
STRING_Wancho0
STRING_Warang_Citi0
STRING_Xan0
STRING_Xps0
@ -669,158 +677,162 @@ const ucp_type_table PRIV(utt)[] = {
{ 299, PT_SC, ucp_Duployan },
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 329, PT_SC, ucp_Elbasan },
{ 337, PT_SC, ucp_Ethiopic },
{ 346, PT_SC, ucp_Georgian },
{ 355, PT_SC, ucp_Glagolitic },
{ 366, PT_SC, ucp_Gothic },
{ 373, PT_SC, ucp_Grantha },
{ 381, PT_SC, ucp_Greek },
{ 387, PT_SC, ucp_Gujarati },
{ 396, PT_SC, ucp_Gunjala_Gondi },
{ 410, PT_SC, ucp_Gurmukhi },
{ 419, PT_SC, ucp_Han },
{ 423, PT_SC, ucp_Hangul },
{ 430, PT_SC, ucp_Hanifi_Rohingya },
{ 446, PT_SC, ucp_Hanunoo },
{ 454, PT_SC, ucp_Hatran },
{ 461, PT_SC, ucp_Hebrew },
{ 468, PT_SC, ucp_Hiragana },
{ 477, PT_SC, ucp_Imperial_Aramaic },
{ 494, PT_SC, ucp_Inherited },
{ 504, PT_SC, ucp_Inscriptional_Pahlavi },
{ 526, PT_SC, ucp_Inscriptional_Parthian },
{ 549, PT_SC, ucp_Javanese },
{ 558, PT_SC, ucp_Kaithi },
{ 565, PT_SC, ucp_Kannada },
{ 573, PT_SC, ucp_Katakana },
{ 582, PT_SC, ucp_Kayah_Li },
{ 591, PT_SC, ucp_Kharoshthi },
{ 602, PT_SC, ucp_Khmer },
{ 608, PT_SC, ucp_Khojki },
{ 615, PT_SC, ucp_Khudawadi },
{ 625, PT_GC, ucp_L },
{ 627, PT_LAMP, 0 },
{ 630, PT_SC, ucp_Lao },
{ 634, PT_SC, ucp_Latin },
{ 640, PT_SC, ucp_Lepcha },
{ 647, PT_SC, ucp_Limbu },
{ 653, PT_SC, ucp_Linear_A },
{ 662, PT_SC, ucp_Linear_B },
{ 671, PT_SC, ucp_Lisu },
{ 676, PT_PC, ucp_Ll },
{ 679, PT_PC, ucp_Lm },
{ 682, PT_PC, ucp_Lo },
{ 685, PT_PC, ucp_Lt },
{ 688, PT_PC, ucp_Lu },
{ 691, PT_SC, ucp_Lycian },
{ 698, PT_SC, ucp_Lydian },
{ 705, PT_GC, ucp_M },
{ 707, PT_SC, ucp_Mahajani },
{ 716, PT_SC, ucp_Makasar },
{ 724, PT_SC, ucp_Malayalam },
{ 734, PT_SC, ucp_Mandaic },
{ 742, PT_SC, ucp_Manichaean },
{ 753, PT_SC, ucp_Marchen },
{ 761, PT_SC, ucp_Masaram_Gondi },
{ 775, PT_PC, ucp_Mc },
{ 778, PT_PC, ucp_Me },
{ 781, PT_SC, ucp_Medefaidrin },
{ 793, PT_SC, ucp_Meetei_Mayek },
{ 806, PT_SC, ucp_Mende_Kikakui },
{ 820, PT_SC, ucp_Meroitic_Cursive },
{ 837, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 858, PT_SC, ucp_Miao },
{ 863, PT_PC, ucp_Mn },
{ 866, PT_SC, ucp_Modi },
{ 871, PT_SC, ucp_Mongolian },
{ 881, PT_SC, ucp_Mro },
{ 885, PT_SC, ucp_Multani },
{ 893, PT_SC, ucp_Myanmar },
{ 901, PT_GC, ucp_N },
{ 903, PT_SC, ucp_Nabataean },
{ 913, PT_PC, ucp_Nd },
{ 916, PT_SC, ucp_New_Tai_Lue },
{ 928, PT_SC, ucp_Newa },
{ 933, PT_SC, ucp_Nko },
{ 937, PT_PC, ucp_Nl },
{ 940, PT_PC, ucp_No },
{ 943, PT_SC, ucp_Nushu },
{ 949, PT_SC, ucp_Ogham },
{ 955, PT_SC, ucp_Ol_Chiki },
{ 964, PT_SC, ucp_Old_Hungarian },
{ 978, PT_SC, ucp_Old_Italic },
{ 989, PT_SC, ucp_Old_North_Arabian },
{ 1007, PT_SC, ucp_Old_Permic },
{ 1018, PT_SC, ucp_Old_Persian },
{ 1030, PT_SC, ucp_Old_Sogdian },
{ 1042, PT_SC, ucp_Old_South_Arabian },
{ 1060, PT_SC, ucp_Old_Turkic },
{ 1071, PT_SC, ucp_Oriya },
{ 1077, PT_SC, ucp_Osage },
{ 1083, PT_SC, ucp_Osmanya },
{ 1091, PT_GC, ucp_P },
{ 1093, PT_SC, ucp_Pahawh_Hmong },
{ 1106, PT_SC, ucp_Palmyrene },
{ 1116, PT_SC, ucp_Pau_Cin_Hau },
{ 1128, PT_PC, ucp_Pc },
{ 1131, PT_PC, ucp_Pd },
{ 1134, PT_PC, ucp_Pe },
{ 1137, PT_PC, ucp_Pf },
{ 1140, PT_SC, ucp_Phags_Pa },
{ 1149, PT_SC, ucp_Phoenician },
{ 1160, PT_PC, ucp_Pi },
{ 1163, PT_PC, ucp_Po },
{ 1166, PT_PC, ucp_Ps },
{ 1169, PT_SC, ucp_Psalter_Pahlavi },
{ 1185, PT_SC, ucp_Rejang },
{ 1192, PT_SC, ucp_Runic },
{ 1198, PT_GC, ucp_S },
{ 1200, PT_SC, ucp_Samaritan },
{ 1210, PT_SC, ucp_Saurashtra },
{ 1221, PT_PC, ucp_Sc },
{ 1224, PT_SC, ucp_Sharada },
{ 1232, PT_SC, ucp_Shavian },
{ 1240, PT_SC, ucp_Siddham },
{ 1248, PT_SC, ucp_SignWriting },
{ 1260, PT_SC, ucp_Sinhala },
{ 1268, PT_PC, ucp_Sk },
{ 1271, PT_PC, ucp_Sm },
{ 1274, PT_PC, ucp_So },
{ 1277, PT_SC, ucp_Sogdian },
{ 1285, PT_SC, ucp_Sora_Sompeng },
{ 1298, PT_SC, ucp_Soyombo },
{ 1306, PT_SC, ucp_Sundanese },
{ 1316, PT_SC, ucp_Syloti_Nagri },
{ 1329, PT_SC, ucp_Syriac },
{ 1336, PT_SC, ucp_Tagalog },
{ 1344, PT_SC, ucp_Tagbanwa },
{ 1353, PT_SC, ucp_Tai_Le },
{ 1360, PT_SC, ucp_Tai_Tham },
{ 1369, PT_SC, ucp_Tai_Viet },
{ 1378, PT_SC, ucp_Takri },
{ 1384, PT_SC, ucp_Tamil },
{ 1390, PT_SC, ucp_Tangut },
{ 1397, PT_SC, ucp_Telugu },
{ 1404, PT_SC, ucp_Thaana },
{ 1411, PT_SC, ucp_Thai },
{ 1416, PT_SC, ucp_Tibetan },
{ 1424, PT_SC, ucp_Tifinagh },
{ 1433, PT_SC, ucp_Tirhuta },
{ 1441, PT_SC, ucp_Ugaritic },
{ 1450, PT_SC, ucp_Unknown },
{ 1458, PT_SC, ucp_Vai },
{ 1462, PT_SC, ucp_Warang_Citi },
{ 1474, PT_ALNUM, 0 },
{ 1478, PT_PXSPACE, 0 },
{ 1482, PT_SPACE, 0 },
{ 1486, PT_UCNC, 0 },
{ 1490, PT_WORD, 0 },
{ 1494, PT_SC, ucp_Yi },
{ 1497, PT_GC, ucp_Z },
{ 1499, PT_SC, ucp_Zanabazar_Square },
{ 1516, PT_PC, ucp_Zl },
{ 1519, PT_PC, ucp_Zp },
{ 1522, PT_PC, ucp_Zs }
{ 337, PT_SC, ucp_Elymaic },
{ 345, PT_SC, ucp_Ethiopic },
{ 354, PT_SC, ucp_Georgian },
{ 363, PT_SC, ucp_Glagolitic },
{ 374, PT_SC, ucp_Gothic },
{ 381, PT_SC, ucp_Grantha },
{ 389, PT_SC, ucp_Greek },
{ 395, PT_SC, ucp_Gujarati },
{ 404, PT_SC, ucp_Gunjala_Gondi },
{ 418, PT_SC, ucp_Gurmukhi },
{ 427, PT_SC, ucp_Han },
{ 431, PT_SC, ucp_Hangul },
{ 438, PT_SC, ucp_Hanifi_Rohingya },
{ 454, PT_SC, ucp_Hanunoo },
{ 462, PT_SC, ucp_Hatran },
{ 469, PT_SC, ucp_Hebrew },
{ 476, PT_SC, ucp_Hiragana },
{ 485, PT_SC, ucp_Imperial_Aramaic },
{ 502, PT_SC, ucp_Inherited },
{ 512, PT_SC, ucp_Inscriptional_Pahlavi },
{ 534, PT_SC, ucp_Inscriptional_Parthian },
{ 557, PT_SC, ucp_Javanese },
{ 566, PT_SC, ucp_Kaithi },
{ 573, PT_SC, ucp_Kannada },
{ 581, PT_SC, ucp_Katakana },
{ 590, PT_SC, ucp_Kayah_Li },
{ 599, PT_SC, ucp_Kharoshthi },
{ 610, PT_SC, ucp_Khmer },
{ 616, PT_SC, ucp_Khojki },
{ 623, PT_SC, ucp_Khudawadi },
{ 633, PT_GC, ucp_L },
{ 635, PT_LAMP, 0 },
{ 638, PT_SC, ucp_Lao },
{ 642, PT_SC, ucp_Latin },
{ 648, PT_SC, ucp_Lepcha },
{ 655, PT_SC, ucp_Limbu },
{ 661, PT_SC, ucp_Linear_A },
{ 670, PT_SC, ucp_Linear_B },
{ 679, PT_SC, ucp_Lisu },
{ 684, PT_PC, ucp_Ll },
{ 687, PT_PC, ucp_Lm },
{ 690, PT_PC, ucp_Lo },
{ 693, PT_PC, ucp_Lt },
{ 696, PT_PC, ucp_Lu },
{ 699, PT_SC, ucp_Lycian },
{ 706, PT_SC, ucp_Lydian },
{ 713, PT_GC, ucp_M },
{ 715, PT_SC, ucp_Mahajani },
{ 724, PT_SC, ucp_Makasar },
{ 732, PT_SC, ucp_Malayalam },
{ 742, PT_SC, ucp_Mandaic },
{ 750, PT_SC, ucp_Manichaean },
{ 761, PT_SC, ucp_Marchen },
{ 769, PT_SC, ucp_Masaram_Gondi },
{ 783, PT_PC, ucp_Mc },
{ 786, PT_PC, ucp_Me },
{ 789, PT_SC, ucp_Medefaidrin },
{ 801, PT_SC, ucp_Meetei_Mayek },
{ 814, PT_SC, ucp_Mende_Kikakui },
{ 828, PT_SC, ucp_Meroitic_Cursive },
{ 845, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 866, PT_SC, ucp_Miao },
{ 871, PT_PC, ucp_Mn },
{ 874, PT_SC, ucp_Modi },
{ 879, PT_SC, ucp_Mongolian },
{ 889, PT_SC, ucp_Mro },
{ 893, PT_SC, ucp_Multani },
{ 901, PT_SC, ucp_Myanmar },
{ 909, PT_GC, ucp_N },
{ 911, PT_SC, ucp_Nabataean },
{ 921, PT_SC, ucp_Nandinagari },
{ 933, PT_PC, ucp_Nd },
{ 936, PT_SC, ucp_New_Tai_Lue },
{ 948, PT_SC, ucp_Newa },
{ 953, PT_SC, ucp_Nko },
{ 957, PT_PC, ucp_Nl },
{ 960, PT_PC, ucp_No },
{ 963, PT_SC, ucp_Nushu },
{ 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 992, PT_SC, ucp_Ogham },
{ 998, PT_SC, ucp_Ol_Chiki },
{ 1007, PT_SC, ucp_Old_Hungarian },
{ 1021, PT_SC, ucp_Old_Italic },
{ 1032, PT_SC, ucp_Old_North_Arabian },
{ 1050, PT_SC, ucp_Old_Permic },
{ 1061, PT_SC, ucp_Old_Persian },
{ 1073, PT_SC, ucp_Old_Sogdian },
{ 1085, PT_SC, ucp_Old_South_Arabian },
{ 1103, PT_SC, ucp_Old_Turkic },
{ 1114, PT_SC, ucp_Oriya },
{ 1120, PT_SC, ucp_Osage },
{ 1126, PT_SC, ucp_Osmanya },
{ 1134, PT_GC, ucp_P },
{ 1136, PT_SC, ucp_Pahawh_Hmong },
{ 1149, PT_SC, ucp_Palmyrene },
{ 1159, PT_SC, ucp_Pau_Cin_Hau },
{ 1171, PT_PC, ucp_Pc },
{ 1174, PT_PC, ucp_Pd },
{ 1177, PT_PC, ucp_Pe },
{ 1180, PT_PC, ucp_Pf },
{ 1183, PT_SC, ucp_Phags_Pa },
{ 1192, PT_SC, ucp_Phoenician },
{ 1203, PT_PC, ucp_Pi },
{ 1206, PT_PC, ucp_Po },
{ 1209, PT_PC, ucp_Ps },
{ 1212, PT_SC, ucp_Psalter_Pahlavi },
{ 1228, PT_SC, ucp_Rejang },
{ 1235, PT_SC, ucp_Runic },
{ 1241, PT_GC, ucp_S },
{ 1243, PT_SC, ucp_Samaritan },
{ 1253, PT_SC, ucp_Saurashtra },
{ 1264, PT_PC, ucp_Sc },
{ 1267, PT_SC, ucp_Sharada },
{ 1275, PT_SC, ucp_Shavian },
{ 1283, PT_SC, ucp_Siddham },
{ 1291, PT_SC, ucp_SignWriting },
{ 1303, PT_SC, ucp_Sinhala },
{ 1311, PT_PC, ucp_Sk },
{ 1314, PT_PC, ucp_Sm },
{ 1317, PT_PC, ucp_So },
{ 1320, PT_SC, ucp_Sogdian },
{ 1328, PT_SC, ucp_Sora_Sompeng },
{ 1341, PT_SC, ucp_Soyombo },
{ 1349, PT_SC, ucp_Sundanese },
{ 1359, PT_SC, ucp_Syloti_Nagri },
{ 1372, PT_SC, ucp_Syriac },
{ 1379, PT_SC, ucp_Tagalog },
{ 1387, PT_SC, ucp_Tagbanwa },
{ 1396, PT_SC, ucp_Tai_Le },
{ 1403, PT_SC, ucp_Tai_Tham },
{ 1412, PT_SC, ucp_Tai_Viet },
{ 1421, PT_SC, ucp_Takri },
{ 1427, PT_SC, ucp_Tamil },
{ 1433, PT_SC, ucp_Tangut },
{ 1440, PT_SC, ucp_Telugu },
{ 1447, PT_SC, ucp_Thaana },
{ 1454, PT_SC, ucp_Thai },
{ 1459, PT_SC, ucp_Tibetan },
{ 1467, PT_SC, ucp_Tifinagh },
{ 1476, PT_SC, ucp_Tirhuta },
{ 1484, PT_SC, ucp_Ugaritic },
{ 1493, PT_SC, ucp_Unknown },
{ 1501, PT_SC, ucp_Vai },
{ 1505, PT_SC, ucp_Wancho },
{ 1512, PT_SC, ucp_Warang_Citi },
{ 1524, PT_ALNUM, 0 },
{ 1528, PT_PXSPACE, 0 },
{ 1532, PT_SPACE, 0 },
{ 1536, PT_UCNC, 0 },
{ 1540, PT_WORD, 0 },
{ 1544, PT_SC, ucp_Yi },
{ 1547, PT_GC, ucp_Z },
{ 1549, PT_SC, ucp_Zanabazar_Square },
{ 1566, PT_PC, ucp_Zl },
{ 1569, PT_PC, ucp_Zp },
{ 1572, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large Load Diff

View File

@ -281,7 +281,12 @@ enum {
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
ucp_Sogdian
ucp_Sogdian,
/* New for Unicode 12.0.0 */
ucp_Elymaic,
ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho
};
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */

View File

@ -214,6 +214,10 @@
#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
#endif
#ifndef SLJIT_MEMMOVE
#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
#endif
#ifndef SLJIT_ZEROMEM
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
#endif

View File

@ -118,10 +118,20 @@ static SLJIT_INLINE int get_map_jit_flag()
if (map_jit_flag == -1) {
struct utsname name;
map_jit_flag = 0;
uname(&name);
/* Kernel version for 10.14.0 (Mojave) */
map_jit_flag = (atoi(name.release) >= 18) ? MAP_JIT : 0;
if (atoi(name.release) >= 18) {
/* Only use MAP_JIT if a hardened runtime is used, because MAP_JIT is incompatible with fork(). */
void *ptr = mmap(NULL, getpagesize(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
map_jit_flag = MAP_JIT;
} else {
munmap(ptr, getpagesize());
}
}
}
return map_jit_flag;
@ -137,6 +147,7 @@ static SLJIT_INLINE int get_map_jit_flag()
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
{
void *retval;
const int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
#ifdef MAP_ANON
@ -146,16 +157,25 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
flags |= get_map_jit_flag();
#endif
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
retval = mmap(NULL, size, prot, flags, -1, 0);
#else /* !MAP_ANON */
if (dev_zero < 0) {
if (open_dev_zero())
return NULL;
}
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0);
#endif /* MAP_ANON */
return (retval != MAP_FAILED) ? retval : NULL;
if (retval == MAP_FAILED)
retval = NULL;
else {
if (mprotect(retval, size, prot) < 0) {
munmap(retval, size);
retval = NULL;
}
}
return retval;
}
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)

View File

@ -144,6 +144,7 @@
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
# define PATCH_MD 0x10
#endif
# define TYPE_SHIFT 13
#endif
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
@ -521,6 +522,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw
}
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label)
{
if (SLJIT_LIKELY(!!put_label))
put_label->label = label;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
{
SLJIT_UNUSED_ARG(compiler);
@ -620,6 +627,30 @@ static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types)
return arg_count;
}
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump,
struct sljit_const *const_, struct sljit_put_label *put_label)
{
sljit_uw result = ~(sljit_uw)0;
if (label)
result = label->size;
if (jump && jump->addr < result)
result = jump->addr;
if (const_ && const_->addr < result)
result = const_->addr;
if (put_label && put_label->addr < result)
result = put_label->addr;
return result;
}
#endif /* !SLJIT_CONFIG_X86 */
static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
@ -687,6 +718,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
compiler->last_const = const_;
}
static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset)
{
put_label->next = NULL;
put_label->label = NULL;
put_label->addr = compiler->size - offset;
put_label->flags = 0;
if (compiler->last_put_label)
compiler->last_put_label->next = put_label;
else
compiler->put_labels = put_label;
compiler->last_put_label = put_label;
}
#define ADDRESSING_DEPENDS_ON(exp, reg) \
(((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))
@ -1905,6 +1949,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil
CHECK_RETURN_OK;
}
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " put_label ");
sljit_verbose_param(compiler, dst, dstw);
fprintf(compiler->verbose, "\n");
}
#endif
CHECK_RETURN_OK;
}
#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */
#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
@ -2581,6 +2640,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return NULL;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
return NULL;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
SLJIT_UNUSED_ARG(addr);
@ -2597,4 +2664,4 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
SLJIT_UNREACHABLE();
}
#endif
#endif /* !SLJIT_CONFIG_UNSUPPORTED */

View File

@ -348,13 +348,20 @@ struct sljit_label {
struct sljit_jump {
struct sljit_jump *next;
sljit_uw addr;
sljit_sw flags;
sljit_uw flags;
union {
sljit_uw target;
struct sljit_label* label;
struct sljit_label *label;
} u;
};
struct sljit_put_label {
struct sljit_put_label *next;
struct sljit_label *label;
sljit_uw addr;
sljit_uw flags;
};
struct sljit_const {
struct sljit_const *next;
sljit_uw addr;
@ -366,10 +373,12 @@ struct sljit_compiler {
struct sljit_label *labels;
struct sljit_jump *jumps;
struct sljit_put_label *put_labels;
struct sljit_const *consts;
struct sljit_label *last_label;
struct sljit_jump *last_jump;
struct sljit_const *last_const;
struct sljit_put_label *last_put_label;
void *allocator_data;
struct sljit_memory_fragment *buf;
@ -1314,10 +1323,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
Flags: - (may destroy flags) */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
/* The constant can be changed runtime (see: sljit_set_const)
/* Store a value that can be changed runtime (see: sljit_get_const_addr / sljit_set_const)
Flags: - (does not modify flags) */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value);
/* Store the value of a label (see: sljit_set_put_label)
Flags: - (does not modify flags) */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw);
/* Set the value stored by put_label to this label. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label);
/* After the code generation the address for label, jump and const instructions
are computed. Since these structures are freed by sljit_free_compiler, the
addresses must be preserved by the user program elsewere. */

View File

@ -583,8 +583,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_uw *buf_end;
sljit_uw size;
sljit_uw word_count;
sljit_uw next_addr;
sljit_sw executable_offset;
sljit_sw jump_addr;
sljit_sw addr;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
sljit_uw cpool_size;
sljit_uw cpool_skip_alignment;
@ -597,6 +598,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -625,11 +627,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
next_addr = 1;
executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
if (label && label->size == 0) {
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
@ -669,35 +673,45 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
#endif
*code_ptr = *buf_ptr++;
if (next_addr == word_count) {
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
/* These structures are ordered by their address. */
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
if (jump && jump->addr == word_count) {
if (jump && jump->addr == word_count) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr--;
jump->addr = (sljit_uw)code_ptr;
if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr--;
jump->addr = (sljit_uw)code_ptr;
#else
jump->addr = (sljit_uw)(code_ptr - 2);
if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr -= 2;
jump->addr = (sljit_uw)(code_ptr - 2);
if (detect_jump_type(jump, code_ptr, code, executable_offset))
code_ptr -= 2;
#endif
jump = jump->next;
}
if (label && label->size == word_count) {
/* code_ptr can be affected above. */
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
label->size = (code_ptr + 1) - code;
label = label->next;
}
if (const_ && const_->addr == word_count) {
jump = jump->next;
}
if (label && label->size == word_count) {
/* code_ptr can be affected above. */
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
label->size = (code_ptr + 1) - code;
label = label->next;
}
if (const_ && const_->addr == word_count) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
const_->addr = (sljit_uw)code_ptr;
const_->addr = (sljit_uw)code_ptr;
#else
const_->addr = (sljit_uw)(code_ptr - 1);
const_->addr = (sljit_uw)(code_ptr - 1);
#endif
const_ = const_->next;
const_ = const_->next;
}
if (put_label && put_label->addr == word_count) {
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)code_ptr;
put_label = put_label->next;
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr++;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
@ -725,6 +739,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
SLJIT_ASSERT(cpool_size == 0);
@ -755,15 +770,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
buf_ptr = (sljit_uw *)jump->addr;
if (jump->flags & PATCH_B) {
jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
if (!(jump->flags & JUMP_ADDR)) {
SLJIT_ASSERT(jump->flags & JUMP_LABEL);
SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
*buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000);
*buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff;
}
else {
SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
*buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000);
*buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff;
}
}
else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
@ -813,6 +828,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
#endif
put_label = compiler->put_labels;
while (put_label) {
addr = put_label->label->addr;
buf_ptr = (sljit_uw*)put_label->addr;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
#else
SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
#endif
put_label = put_label->next;
}
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
compiler->error = SLJIT_ERR_COMPILED;
@ -2639,23 +2670,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
sljit_s32 reg;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
ADJUST_LOCAL_OFFSET(dst, dstw);
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
PTR_FAIL_IF(!const_);
reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), init_value));
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value));
compiler->patches++;
#else
PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
#endif
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
if (dst & SLJIT_MEM)
@ -2663,6 +2694,33 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
compiler->patches++;
#else
PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
#endif
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 0);
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
return put_label;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
inline_set_jump_addr(addr, executable_offset, new_target, 1);

View File

@ -161,7 +161,7 @@ static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
}
static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
{
sljit_sw diff;
sljit_uw target_addr;
@ -196,14 +196,14 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
return 4;
}
if (target_addr <= 0xffffffffl) {
if (target_addr < 0x100000000l) {
if (jump->flags & IS_COND)
code_ptr[-5] -= (2 << 5);
code_ptr[-2] = code_ptr[0];
return 2;
}
if (target_addr <= 0xffffffffffffl) {
if (target_addr < 0x1000000000000l) {
if (jump->flags & IS_COND)
code_ptr[-5] -= (1 << 5);
jump->flags |= PATCH_ABS48;
@ -215,6 +215,22 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
return 0;
}
static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
{
if (max_label < 0x100000000l) {
put_label->flags = 0;
return 2;
}
if (max_label < 0x1000000000000l) {
put_label->flags = 1;
return 1;
}
put_label->flags = 2;
return 0;
}
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
{
struct sljit_memory_fragment *buf;
@ -223,6 +239,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
sljit_uw next_addr;
sljit_sw executable_offset;
sljit_uw addr;
sljit_s32 dst;
@ -230,6 +247,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -241,34 +259,47 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
next_addr = 0;
executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
do {
*code_ptr = *buf_ptr++;
/* These structures are ordered by their address. */
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
if (label && label->size == word_count) {
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
jump->addr = (sljit_uw)(code_ptr - 4);
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
if (next_addr == word_count) {
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
jump->addr = (sljit_uw)(code_ptr - 4);
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
}
if (put_label && put_label->addr == word_count) {
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)(code_ptr - 3);
code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
put_label = put_label->next;
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
word_count ++;
@ -286,6 +317,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
jump = compiler->jumps;
@ -323,6 +355,23 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
put_label = compiler->put_labels;
while (put_label) {
addr = put_label->label->addr;
buf_ptr = (sljit_ins *)put_label->addr;
buf_ptr[0] |= (addr & 0xffff) << 5;
buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5;
if (put_label->flags >= 1)
buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5;
if (put_label->flags >= 2)
buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5;
put_label = put_label->next;
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
@ -1947,6 +1996,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0));
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 1);
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
return put_label;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_ins* inst = (sljit_ins*)addr;

View File

@ -365,11 +365,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_u16 *buf_ptr;
sljit_u16 *buf_end;
sljit_uw half_count;
sljit_uw next_addr;
sljit_sw executable_offset;
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -381,34 +383,46 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
half_count = 0;
next_addr = 0;
executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
do {
buf_ptr = (sljit_u16*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 1);
do {
*code_ptr = *buf_ptr++;
/* These structures are ordered by their address. */
SLJIT_ASSERT(!label || label->size >= half_count);
SLJIT_ASSERT(!jump || jump->addr >= half_count);
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
if (label && label->size == half_count) {
label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == half_count) {
jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == half_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
if (next_addr == half_count) {
SLJIT_ASSERT(!label || label->size >= half_count);
SLJIT_ASSERT(!jump || jump->addr >= half_count);
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
SLJIT_ASSERT(!put_label || put_label->addr >= half_count);
/* These structures are ordered by their address. */
if (label && label->size == half_count) {
label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == half_count) {
jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == half_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
}
if (put_label && put_label->addr == half_count) {
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)code_ptr;
put_label = put_label->next;
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
half_count ++;
@ -426,6 +440,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
jump = compiler->jumps;
@ -434,6 +449,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
put_label = compiler->put_labels;
while (put_label) {
modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr);
put_label = put_label->next;
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16);
@ -2311,6 +2332,27 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 0);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0));
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
return put_label;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
sljit_u16 *inst = (sljit_u16*)addr;

View File

@ -425,6 +425,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
{
sljit_ins *inst = (sljit_ins *)addr;
SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI);
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
@ -435,6 +436,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
{
sljit_ins *inst = (sljit_ins *)addr;
SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI);
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);

View File

@ -449,6 +449,55 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_
}
#endif
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
{
if (max_label < 0x80000000l) {
put_label->flags = 0;
return 1;
}
if (max_label < 0x800000000000l) {
put_label->flags = 1;
return 3;
}
put_label->flags = 2;
return 5;
}
static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
{
sljit_uw addr = put_label->label->addr;
sljit_ins *inst = (sljit_ins *)put_label->addr;
sljit_s32 reg = *inst;
if (put_label->flags == 0) {
SLJIT_ASSERT(addr < 0x80000000l);
inst[0] = LUI | T(reg) | IMM(addr >> 16);
}
else if (put_label->flags == 1) {
SLJIT_ASSERT(addr < 0x800000000000l);
inst[0] = LUI | T(reg) | IMM(addr >> 32);
inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
inst += 2;
}
else {
inst[0] = LUI | T(reg) | IMM(addr >> 48);
inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff);
inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16);
inst += 4;
}
inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff);
}
#endif
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
{
struct sljit_memory_fragment *buf;
@ -457,12 +506,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
sljit_uw next_addr;
sljit_sw executable_offset;
sljit_uw addr;
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -474,39 +525,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
next_addr = 0;
executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
do {
*code_ptr = *buf_ptr++;
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
if (next_addr == word_count) {
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
jump->addr = (sljit_uw)(code_ptr - 3);
jump->addr = (sljit_uw)(code_ptr - 3);
#else
jump->addr = (sljit_uw)(code_ptr - 7);
jump->addr = (sljit_uw)(code_ptr - 7);
#endif
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
/* Just recording the address. */
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
}
if (put_label && put_label->addr == word_count) {
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)code_ptr;
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
word_count += 5;
#endif
put_label = put_label->next;
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
word_count ++;
@ -524,6 +590,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
jump = compiler->jumps;
@ -571,6 +638,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
put_label = compiler->put_labels;
while (put_label) {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
addr = put_label->label->addr;
buf_ptr = (sljit_ins *)put_label->addr;
SLJIT_ASSERT((buf_ptr[0] & 0xffe00000) == LUI && (buf_ptr[1] & 0xfc000000) == ORI);
buf_ptr[0] |= (addr >> 16) & 0xffff;
buf_ptr[1] |= addr & 0xffff;
#else
put_label_set(put_label);
#endif
put_label = put_label->next;
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
@ -2157,7 +2239,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
sljit_s32 reg;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
@ -2167,11 +2249,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 0);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
#else
PTR_FAIL_IF(push_inst(compiler, dst_r, UNMOVABLE_INS));
compiler->size += 5;
#endif
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
return put_label;
}

View File

@ -259,6 +259,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
{
sljit_ins *inst = (sljit_ins *)addr;
SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI);
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
@ -269,6 +270,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
{
sljit_ins *inst = (sljit_ins *)addr;
SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI);
inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);

View File

@ -35,9 +35,6 @@
#error "Must implement count leading zeroes"
#endif
#define RLDI(dst, src, sh, mb, type) \
(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
#define PUSH_RLDICR(reg, shift) \
push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))

View File

@ -231,6 +231,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SIMM_MIN (-0x8000)
#define UIMM_MAX (0xffff)
#define RLDI(dst, src, sh, mb, type) \
(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
{
@ -324,6 +327,55 @@ keep_address:
return 0;
}
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
{
if (max_label < 0x100000000l) {
put_label->flags = 0;
return 1;
}
if (max_label < 0x1000000000000l) {
put_label->flags = 1;
return 3;
}
put_label->flags = 2;
return 4;
}
static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
{
sljit_uw addr = put_label->label->addr;
sljit_ins *inst = (sljit_ins *)put_label->addr;
sljit_s32 reg = *inst;
if (put_label->flags == 0) {
SLJIT_ASSERT(addr < 0x100000000l);
inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16);
}
else {
if (put_label->flags == 1) {
SLJIT_ASSERT(addr < 0x1000000000000l);
inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32);
}
else {
inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48);
inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff);
inst ++;
}
inst[1] = RLDI(reg, reg, 32, 31, 1);
inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff);
inst += 2;
}
inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff);
}
#endif
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
{
struct sljit_memory_fragment *buf;
@ -332,12 +384,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
sljit_uw next_addr;
sljit_sw executable_offset;
sljit_uw addr;
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -356,71 +410,87 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
next_addr = 0;
executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
do {
*code_ptr = *buf_ptr++;
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
/* Just recording the address. */
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
jump->addr = (sljit_uw)(code_ptr - 3);
#else
jump->addr = (sljit_uw)(code_ptr - 6);
#endif
if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
code_ptr[-3] = code_ptr[0];
code_ptr -= 3;
#else
if (jump->flags & PATCH_ABS32) {
code_ptr -= 3;
code_ptr[-1] = code_ptr[2];
code_ptr[0] = code_ptr[3];
}
else if (jump->flags & PATCH_ABS48) {
code_ptr--;
code_ptr[-1] = code_ptr[0];
code_ptr[0] = code_ptr[1];
/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
code_ptr[-3] ^= 0x8422;
/* oris -> ori */
code_ptr[-2] ^= 0x4000000;
}
else {
code_ptr[-6] = code_ptr[0];
code_ptr -= 6;
}
#endif
if (jump->flags & REMOVE_COND) {
code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
code_ptr++;
jump->addr += sizeof(sljit_ins);
code_ptr[0] = Bx;
jump->flags -= IS_COND;
}
if (next_addr == word_count) {
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
/* Just recording the address. */
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
if (jump && jump->addr == word_count) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
jump->addr = (sljit_uw)(code_ptr - 3);
#else
jump->addr = (sljit_uw)(code_ptr - 6);
#endif
if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
code_ptr[-3] = code_ptr[0];
code_ptr -= 3;
#else
if (jump->flags & PATCH_ABS32) {
code_ptr -= 3;
code_ptr[-1] = code_ptr[2];
code_ptr[0] = code_ptr[3];
}
else if (jump->flags & PATCH_ABS48) {
code_ptr--;
code_ptr[-1] = code_ptr[0];
code_ptr[0] = code_ptr[1];
/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
code_ptr[-3] ^= 0x8422;
/* oris -> ori */
code_ptr[-2] ^= 0x4000000;
}
else {
code_ptr[-6] = code_ptr[0];
code_ptr -= 6;
}
#endif
if (jump->flags & REMOVE_COND) {
code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
code_ptr++;
jump->addr += sizeof(sljit_ins);
code_ptr[0] = Bx;
jump->flags -= IS_COND;
}
}
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
}
if (put_label && put_label->addr == word_count) {
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)code_ptr;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
word_count += 4;
#endif
put_label = put_label->next;
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
word_count ++;
@ -438,6 +508,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
#else
@ -503,6 +575,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
put_label = compiler->put_labels;
while (put_label) {
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
addr = put_label->label->addr;
buf_ptr = (sljit_ins *)put_label->addr;
SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI);
buf_ptr[0] |= (addr >> 16) & 0xffff;
buf_ptr[1] |= addr & 0xffff;
#else
put_label_set(put_label);
#endif
put_label = put_label->next;
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
@ -2261,7 +2348,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
sljit_s32 reg;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
@ -2271,11 +2358,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 0);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
#else
PTR_FAIL_IF(push_inst(compiler, dst_r));
compiler->size += 4;
#endif
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
return put_label;
}

View File

@ -267,6 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
{
sljit_ins *inst = (sljit_ins *)addr;
SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
@ -277,6 +278,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
{
sljit_ins *inst = (sljit_ins *)addr;
SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);

View File

@ -298,12 +298,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
sljit_ins *buf_ptr;
sljit_ins *buf_end;
sljit_uw word_count;
sljit_uw next_addr;
sljit_sw executable_offset;
sljit_uw addr;
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -315,40 +317,52 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
code_ptr = code;
word_count = 0;
next_addr = 0;
executable_offset = SLJIT_EXEC_OFFSET(code);
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
do {
buf_ptr = (sljit_ins*)buf->memory;
buf_end = buf_ptr + (buf->used_size >> 2);
do {
*code_ptr = *buf_ptr++;
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
/* Just recording the address. */
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
if (next_addr == word_count) {
SLJIT_ASSERT(!label || label->size >= word_count);
SLJIT_ASSERT(!jump || jump->addr >= word_count);
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
/* These structures are ordered by their address. */
if (label && label->size == word_count) {
/* Just recording the address. */
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
}
if (jump && jump->addr == word_count) {
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
jump->addr = (sljit_uw)(code_ptr - 3);
jump->addr = (sljit_uw)(code_ptr - 3);
#else
jump->addr = (sljit_uw)(code_ptr - 6);
jump->addr = (sljit_uw)(code_ptr - 6);
#endif
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
/* Just recording the address. */
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
jump = jump->next;
}
if (const_ && const_->addr == word_count) {
/* Just recording the address. */
const_->addr = (sljit_uw)code_ptr;
const_ = const_->next;
}
if (put_label && put_label->addr == word_count) {
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)code_ptr;
put_label = put_label->next;
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
code_ptr ++;
word_count ++;
@ -366,6 +380,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
jump = compiler->jumps;
@ -389,8 +404,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* Set the fields of immediate loads. */
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
buf_ptr[0] |= (addr >> 10) & 0x3fffff;
buf_ptr[1] |= addr & 0x3ff;
#else
#error "Implementation required"
#endif
@ -398,6 +414,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
put_label = compiler->put_labels;
while (put_label) {
addr = put_label->label->addr;
buf_ptr = (sljit_ins *)put_label->addr;
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
buf_ptr[0] |= (addr >> 10) & 0x3fffff;
buf_ptr[1] |= addr & 0x3ff;
#else
#error "Implementation required"
#endif
put_label = put_label->next;
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
@ -1465,8 +1495,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
sljit_s32 reg;
struct sljit_const *const_;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
@ -1476,11 +1506,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_s32 dst_r;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 0);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
if (dst & SLJIT_MEM)
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
return put_label;
}

View File

@ -38,8 +38,10 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s
return SLJIT_SUCCESS;
}
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
{
sljit_s32 type = jump->flags >> TYPE_SHIFT;
if (type == SLJIT_JUMP) {
*code_ptr++ = JMP_i32;
jump->addr++;

View File

@ -39,8 +39,10 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg,
return SLJIT_SUCCESS;
}
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
{
sljit_s32 type = jump->flags >> TYPE_SHIFT;
int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
/* The relative jump below specialized for this case. */
@ -72,6 +74,56 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
return code_ptr;
}
static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
{
if (max_label > HALFWORD_MAX) {
put_label->addr -= put_label->flags;
put_label->flags = PATCH_MD;
return code_ptr;
}
if (put_label->flags == 0) {
/* Destination is register. */
code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
if ((code_ptr[0] & 0x07) != 0) {
code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08);
code_ptr += 2 + sizeof(sljit_s32);
}
else {
code_ptr[0] = code_ptr[1];
code_ptr += 1 + sizeof(sljit_s32);
}
put_label->addr = (sljit_uw)code_ptr;
return code_ptr;
}
code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
code_ptr += 2 + sizeof(sljit_uw);
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
}
SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4);
code_ptr[1] = MOV_rm_i32;
code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3));
code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
put_label->addr = (sljit_uw)code_ptr;
put_label->flags = 0;
return code_ptr;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)

View File

@ -428,13 +428,15 @@ static sljit_u8 get_jump_code(sljit_s32 type)
}
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
#else
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
#endif
static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
{
sljit_s32 type = jump->flags >> TYPE_SHIFT;
sljit_s32 short_jump;
sljit_uw label_addr;
@ -447,7 +449,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
return generate_far_jump_code(jump, code_ptr, type);
return generate_far_jump_code(jump, code_ptr);
#endif
if (type == SLJIT_JUMP) {
@ -497,6 +499,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
struct sljit_label *label;
struct sljit_jump *jump;
struct sljit_const *const_;
struct sljit_put_label *put_label;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
@ -511,6 +514,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
label = compiler->labels;
jump = compiler->jumps;
const_ = compiler->consts;
put_label = compiler->put_labels;
executable_offset = SLJIT_EXEC_OFFSET(code);
do {
@ -525,27 +529,38 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
buf_ptr += len;
}
else {
if (*buf_ptr >= 2) {
switch (*buf_ptr) {
case 0:
label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
label->size = code_ptr - code;
label = label->next;
break;
case 1:
jump->addr = (sljit_uw)code_ptr;
if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
else {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
#else
code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
code_ptr = generate_far_jump_code(jump, code_ptr);
#endif
}
jump = jump->next;
}
else if (*buf_ptr == 0) {
label->addr = ((sljit_uw)code_ptr) + executable_offset;
label->size = code_ptr - code;
label = label->next;
}
else { /* *buf_ptr is 1 */
break;
case 2:
const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
const_ = const_->next;
break;
default:
SLJIT_ASSERT(*buf_ptr == 3);
SLJIT_ASSERT(put_label->label);
put_label->addr = (sljit_uw)code_ptr;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
#endif
put_label = put_label->next;
break;
}
buf_ptr++;
}
@ -557,6 +572,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(!label);
SLJIT_ASSERT(!jump);
SLJIT_ASSERT(!const_);
SLJIT_ASSERT(!put_label);
SLJIT_ASSERT(code_ptr <= code + compiler->size);
jump = compiler->jumps;
while (jump) {
@ -591,8 +608,24 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
/* Some space may be wasted because of short jumps. */
SLJIT_ASSERT(code_ptr <= code + compiler->size);
put_label = compiler->put_labels;
while (put_label) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
#else
if (put_label->flags & PATCH_MD) {
SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
}
else {
SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
}
#endif
put_label = put_label->next;
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = code_ptr - code;
@ -2481,7 +2514,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF_NULL(jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
type &= 0xff;
/* Worst case size. */
@ -2495,7 +2528,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
PTR_FAIL_IF_NULL(inst);
*inst++ = 0;
*inst++ = type + 2;
*inst++ = 1;
return jump;
}
@ -2513,7 +2546,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
if (src == SLJIT_IMM) {
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF_NULL(jump);
set_jump(jump, compiler, JUMP_ADDR);
set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
jump->u.target = srcw;
/* Worst case size. */
@ -2527,7 +2560,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
FAIL_IF_NULL(inst);
*inst++ = 0;
*inst++ = type + 2;
*inst++ = 1;
}
else {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@ -2831,7 +2864,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!inst);
*inst++ = 0;
*inst++ = 1;
*inst++ = 2;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (dst & SLJIT_MEM)
@ -2842,6 +2875,54 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
return const_;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
struct sljit_put_label *put_label;
sljit_u8 *inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_s32 reg;
sljit_uw start_size;
#endif
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
CHECK_EXTRA_REGS(dst, dstw, (void)0);
put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
PTR_FAIL_IF(!put_label);
set_put_label(put_label, compiler, 0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (emit_load_imm64(compiler, reg, 0))
return NULL;
#else
if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
return NULL;
#endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (dst & SLJIT_MEM) {
start_size = compiler->size;
if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
return NULL;
put_label->flags = compiler->size - start_size;
}
#endif
inst = (sljit_u8*)ensure_buf(compiler, 2);
PTR_FAIL_IF(!inst);
*inst++ = 0;
*inst++ = 3;
return put_label;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
{
SLJIT_UNUSED_ARG(executable_offset);

View File

@ -154,7 +154,13 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
#include "windows.h"
#else
/* Provides mmap function. */
#include <sys/types.h>
#include <sys/mman.h>
#ifndef MAP_ANON
#ifdef MAP_ANONYMOUS
#define MAP_ANON MAP_ANONYMOUS
#endif
#endif
/* For detecting the page size. */
#include <unistd.h>