pcre2: Sync with upstream 10.32
This commit is contained in:
parent
2bc981948d
commit
0455bc64b6
|
@ -435,16 +435,16 @@ Files extracted from upstream source:
|
|||
## pcre2
|
||||
|
||||
- Upstream: http://www.pcre.org/
|
||||
- Version: 10.31
|
||||
- Version: 10.32
|
||||
- License: BSD-3-Clause
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
||||
- Files listed in the file NON-AUTOTOOLS-BUILD steps 1-4
|
||||
- All .h files in src/
|
||||
- All .h files in src/ apart from pcre2posix.h
|
||||
- src/pcre2_jit_compile.c
|
||||
- src/pcre2_jit_match.c
|
||||
- src/pcre2_jit_misc.c
|
||||
- src/pcre2_jit_maketables.c
|
||||
- src/sljit/*
|
||||
- AUTHORS and LICENCE
|
||||
|
||||
|
|
|
@ -4,11 +4,11 @@ PCRE2 LICENCE
|
|||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
|
||||
specified below, with one exemption for certain binary redistributions. The
|
||||
documentation for PCRE2, supplied in the "doc" directory, is distributed under
|
||||
the same terms as the software itself. The data in the testdata directory is
|
||||
not copyrighted and is in the public domain.
|
||||
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
|
||||
licence, as specified below, with one exemption for certain binary
|
||||
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
|
||||
is distributed under the same terms as the software itself. The data in the
|
||||
testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a just-in-time compiler that can be used to
|
||||
|
@ -35,7 +35,7 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT
|
|||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2018 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
@ -46,7 +46,7 @@ STACK-LESS JUST-IN-TIME COMPILER
|
|||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2018 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
|
|
@ -18,10 +18,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
|||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
|
@ -132,17 +132,18 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* This limits the amount of memory that pcre2_match() may use while matching
|
||||
a pattern. The value is in kilobytes. */
|
||||
/* This limits the amount of memory that may be used while matching a pattern.
|
||||
It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
|
||||
to JIT matching. The value is in kibibytes (units of 1024 bytes). */
|
||||
#ifndef HEAP_LIMIT
|
||||
#define HEAP_LIMIT 20000000
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
compiled patterns up to 65535 code units long. This covers the vast
|
||||
majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
|
||||
instead. This allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
@ -155,7 +156,8 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. There is a runtime interface for setting a different
|
||||
matching attempt. The value is also used to limit a loop counter in
|
||||
pcre2_dfa_match(). There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take for ever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
|
@ -170,7 +172,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. */
|
||||
limit. In the case of pcre2_dfa_match(), this limit controls the depth of
|
||||
the internal nested function calls that are used for pattern recursions,
|
||||
lookarounds, and atomic groups. */
|
||||
#ifndef MATCH_LIMIT_DEPTH
|
||||
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
|
||||
#endif
|
||||
|
@ -210,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.31"
|
||||
#define PACKAGE_STRING "PCRE2 10.32"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -219,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.31"
|
||||
#define PACKAGE_VERSION "10.32"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -339,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.31"
|
||||
#define VERSION "10.32"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2017 University of Cambridge
|
||||
Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -41,10 +41,16 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 31
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2018-02-12
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 32
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2018-09-10
|
||||
|
||||
/* For the benefit of systems without stdint.h, an alternative is to use
|
||||
inttypes.h. The existence of these headers is checked by configure or CMake. */
|
||||
|
||||
#define PCRE2_HAVE_STDINT_H 1
|
||||
#define PCRE2_HAVE_INTTYPES_H 1
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */
|
|||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. */
|
||||
/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
|
||||
that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
|
||||
header, the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if PCRE2_HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#elif PCRE2_HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
|
@ -269,6 +281,7 @@ pcre2_pattern_convert(). */
|
|||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
/* Error 159 is obsolete and should now never occur */
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
|
@ -303,6 +316,8 @@ pcre2_pattern_convert(). */
|
|||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
@ -387,6 +402,7 @@ released, the numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -505,7 +505,7 @@ Arguments:
|
|||
utf TRUE in UTF mode
|
||||
cb compile data block
|
||||
base_list the data list of the base opcode
|
||||
base_end the end of the data list
|
||||
base_end the end of the base opcode
|
||||
rec_limit points to recursion depth counter
|
||||
|
||||
Returns: TRUE if the auto-possessification is possible
|
||||
|
@ -730,7 +730,7 @@ for(;;)
|
|||
if ((*xclass_flags & XCL_MAP) == 0)
|
||||
{
|
||||
/* No bits are set for characters < 256. */
|
||||
if (list[1] == 0) return TRUE;
|
||||
if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
}
|
||||
|
@ -1235,6 +1235,7 @@ for (;;)
|
|||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -2,23 +2,24 @@
|
|||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE2 by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
/* This file was automatically written by the dftables auxiliary
|
||||
program. It contains character tables that are used when no external
|
||||
tables are passed to PCRE2 by the application that calls it. The tables
|
||||
are used only for characters whose code values are less than 256. */
|
||||
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE2) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
/*The dftables program (which is distributed with PCRE2) can be used to
|
||||
build alternative versions of this file. This is necessary if you are
|
||||
running in an EBCDIC environment, or if you want to default to a different
|
||||
encoding, for example ISO-8859-1. When dftables is run, it creates these
|
||||
tables in the current locale. This happens automatically if PCRE2 is
|
||||
configured with --enable-rebuild-chartables. */
|
||||
|
||||
The following #includes are present because without them gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE2 is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
/* The following #include is present because without it gcc 4.x may remove
|
||||
the array definition from the final binary if PCRE2 is built into a static
|
||||
library and dead code stripping is activated. This leads to link errors.
|
||||
Pulling in the header ensures that the array gets flagged as "someone
|
||||
outside this compilation unit might reference this" and so it will always
|
||||
be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
|
@ -101,7 +102,7 @@ const uint8_t PRIV(default_tables)[] = {
|
|||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
graph print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
@ -159,25 +160,24 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
|
|||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* Other debugging code can be enabled by these defines. */
|
||||
|
||||
// #define DEBUG_SHOW_CAPTURES
|
||||
// #define DEBUG_SHOW_PARSED
|
||||
/* #define DEBUG_SHOW_CAPTURES */
|
||||
/* #define DEBUG_SHOW_PARSED */
|
||||
|
||||
/* There are a few things that vary with different code unit sizes. Handle them
|
||||
by defining macros in order to minimize #if usage. */
|
||||
|
@ -250,34 +250,35 @@ is present where expected in a conditional group. */
|
|||
#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
|
||||
|
||||
/* These must be kept in this order, with consecutive values, and the _ARG
|
||||
versions of PRUNE, SKIP, and THEN immediately after their non-argument
|
||||
versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
|
||||
versions. */
|
||||
|
||||
#define META_MARK 0x80260000u /* (*MARK) */
|
||||
#define META_ACCEPT 0x80270000u /* (*ACCEPT) */
|
||||
#define META_COMMIT 0x80280000u /* (*COMMIT) */
|
||||
#define META_FAIL 0x80290000u /* (*FAIL) */
|
||||
#define META_PRUNE 0x802a0000u /* These pairs must */
|
||||
#define META_PRUNE_ARG 0x802b0000u /* be */
|
||||
#define META_SKIP 0x802c0000u /* kept */
|
||||
#define META_SKIP_ARG 0x802d0000u /* in */
|
||||
#define META_THEN 0x802e0000u /* this */
|
||||
#define META_THEN_ARG 0x802f0000u /* order */
|
||||
#define META_FAIL 0x80280000u /* (*FAIL) */
|
||||
#define META_COMMIT 0x80290000u /* These */
|
||||
#define META_COMMIT_ARG 0x802a0000u /* pairs */
|
||||
#define META_PRUNE 0x802b0000u /* must */
|
||||
#define META_PRUNE_ARG 0x802c0000u /* be */
|
||||
#define META_SKIP 0x802d0000u /* kept */
|
||||
#define META_SKIP_ARG 0x802e0000u /* in */
|
||||
#define META_THEN 0x802f0000u /* this */
|
||||
#define META_THEN_ARG 0x80300000u /* order */
|
||||
|
||||
/* These must be kept in groups of adjacent 3 values, and all together. */
|
||||
|
||||
#define META_ASTERISK 0x80300000u /* * */
|
||||
#define META_ASTERISK_PLUS 0x80310000u /* *+ */
|
||||
#define META_ASTERISK_QUERY 0x80320000u /* *? */
|
||||
#define META_PLUS 0x80330000u /* + */
|
||||
#define META_PLUS_PLUS 0x80340000u /* ++ */
|
||||
#define META_PLUS_QUERY 0x80350000u /* +? */
|
||||
#define META_QUERY 0x80360000u /* ? */
|
||||
#define META_QUERY_PLUS 0x80370000u /* ?+ */
|
||||
#define META_QUERY_QUERY 0x80380000u /* ?? */
|
||||
#define META_MINMAX 0x80390000u /* {n,m} repeat */
|
||||
#define META_MINMAX_PLUS 0x803a0000u /* {n,m}+ repeat */
|
||||
#define META_MINMAX_QUERY 0x803b0000u /* {n,m}? repeat */
|
||||
#define META_ASTERISK 0x80310000u /* * */
|
||||
#define META_ASTERISK_PLUS 0x80320000u /* *+ */
|
||||
#define META_ASTERISK_QUERY 0x80330000u /* *? */
|
||||
#define META_PLUS 0x80340000u /* + */
|
||||
#define META_PLUS_PLUS 0x80350000u /* ++ */
|
||||
#define META_PLUS_QUERY 0x80360000u /* +? */
|
||||
#define META_QUERY 0x80370000u /* ? */
|
||||
#define META_QUERY_PLUS 0x80380000u /* ?+ */
|
||||
#define META_QUERY_QUERY 0x80390000u /* ?? */
|
||||
#define META_MINMAX 0x803a0000u /* {n,m} repeat */
|
||||
#define META_MINMAX_PLUS 0x803b0000u /* {n,m}+ repeat */
|
||||
#define META_MINMAX_QUERY 0x803c0000u /* {n,m}? repeat */
|
||||
|
||||
#define META_FIRST_QUANTIFIER META_ASTERISK
|
||||
#define META_LAST_QUANTIFIER META_MINMAX_QUERY
|
||||
|
@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {
|
|||
SIZEOFFSET, /* META_LOOKBEHINDNOT */
|
||||
1, /* META_MARK - plus the string length */
|
||||
0, /* META_ACCEPT */
|
||||
0, /* META_COMMIT */
|
||||
0, /* META_FAIL */
|
||||
0, /* META_COMMIT */
|
||||
1, /* META_COMMIT_ARG - plus the string length */
|
||||
0, /* META_PRUNE */
|
||||
1, /* META_PRUNE_ARG - plus the string length */
|
||||
0, /* META_SKIP */
|
||||
|
@ -510,17 +512,17 @@ static const short int escapes[] = {
|
|||
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
|
||||
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
|
||||
CHAR_GRAVE_ACCENT, ESC_a,
|
||||
CHAR_GRAVE_ACCENT, CHAR_BEL,
|
||||
-ESC_b, 0,
|
||||
-ESC_d, ESC_e,
|
||||
ESC_f, 0,
|
||||
-ESC_d, CHAR_ESC,
|
||||
CHAR_FF, 0,
|
||||
-ESC_h, 0,
|
||||
0, -ESC_k,
|
||||
0, 0,
|
||||
ESC_n, 0,
|
||||
CHAR_LF, 0,
|
||||
-ESC_p, 0,
|
||||
ESC_r, -ESC_s,
|
||||
ESC_tee, 0,
|
||||
CHAR_CR, -ESC_s,
|
||||
CHAR_HT, 0,
|
||||
-ESC_v, -ESC_w,
|
||||
0, 0,
|
||||
-ESC_z
|
||||
|
@ -544,22 +546,22 @@ because it is defined as 'a', which of course picks up the ASCII value. */
|
|||
#endif
|
||||
|
||||
static const short int escapes[] = {
|
||||
/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
|
||||
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
|
||||
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p,
|
||||
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
|
||||
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
|
||||
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
|
||||
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
||||
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
|
||||
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P,
|
||||
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
|
||||
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
|
||||
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* F8 */ 0, 0
|
||||
/* 80 */ CHAR_BEL, -ESC_b, 0, -ESC_d, CHAR_ESC, CHAR_FF, 0,
|
||||
/* 88 */ -ESC_h, 0, 0, '{', 0, 0, 0, 0,
|
||||
/* 90 */ 0, 0, -ESC_k, 0, 0, CHAR_LF, 0, -ESC_p,
|
||||
/* 98 */ 0, CHAR_CR, 0, '}', 0, 0, 0, 0,
|
||||
/* A0 */ 0, '~', -ESC_s, CHAR_HT, 0, -ESC_v, -ESC_w, 0,
|
||||
/* A8 */ 0, -ESC_z, 0, 0, 0, '[', 0, 0,
|
||||
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
||||
/* C0 */ '{', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
|
||||
/* C8 */ -ESC_H, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* D0 */ '}', 0, -ESC_K, 0, 0, -ESC_N, 0, -ESC_P,
|
||||
/* D8 */ -ESC_Q, -ESC_R, 0, 0, 0, 0, 0, 0,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0, -ESC_V, -ESC_W, -ESC_X,
|
||||
/* E8 */ 0, -ESC_Z, 0, 0, 0, 0, 0, 0,
|
||||
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* F8 */ 0, 0
|
||||
};
|
||||
|
||||
/* We also need a table of characters that may follow \c in an EBCDIC
|
||||
|
@ -586,9 +588,9 @@ static const char verbnames[] =
|
|||
"\0" /* Empty name is a shorthand for MARK */
|
||||
STRING_MARK0
|
||||
STRING_ACCEPT0
|
||||
STRING_COMMIT0
|
||||
STRING_F0
|
||||
STRING_FAIL0
|
||||
STRING_COMMIT0
|
||||
STRING_PRUNE0
|
||||
STRING_SKIP0
|
||||
STRING_THEN;
|
||||
|
@ -596,11 +598,11 @@ static const char verbnames[] =
|
|||
static const verbitem verbs[] = {
|
||||
{ 0, META_MARK, +1 }, /* > 0 => must have an argument */
|
||||
{ 4, META_MARK, +1 },
|
||||
{ 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */
|
||||
{ 6, META_COMMIT, -1 },
|
||||
{ 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */
|
||||
{ 1, META_FAIL, -1 },
|
||||
{ 4, META_FAIL, -1 },
|
||||
{ 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */
|
||||
{ 6, META_COMMIT, 0 },
|
||||
{ 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */
|
||||
{ 4, META_SKIP, 0 },
|
||||
{ 4, META_THEN, 0 }
|
||||
};
|
||||
|
@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
|
|||
/* Verb opcodes, indexed by their META code offset from META_MARK. */
|
||||
|
||||
static const uint32_t verbops[] = {
|
||||
OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP,
|
||||
OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
|
||||
OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
|
||||
OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
|
||||
|
||||
/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
|
||||
|
||||
|
@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
|
||||
ERR91, ERR92};
|
||||
ERR91, ERR92, ERR93, ERR94 };
|
||||
|
||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||
|
@ -976,8 +978,8 @@ for (;;)
|
|||
case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
|
||||
|
||||
case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
|
||||
case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
|
||||
case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
|
||||
case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
|
||||
case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
|
||||
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
|
||||
case META_THEN: fprintf(stderr, "META (*THEN)"); break;
|
||||
|
@ -1067,6 +1069,10 @@ for (;;)
|
|||
fprintf(stderr, "META (*MARK:");
|
||||
goto SHOWARG;
|
||||
|
||||
case META_COMMIT_ARG:
|
||||
fprintf(stderr, "META (*COMMIT:");
|
||||
goto SHOWARG;
|
||||
|
||||
case META_PRUNE_ARG:
|
||||
fprintf(stderr, "META (*PRUNE:");
|
||||
goto SHOWARG;
|
||||
|
@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
|||
escape = -i; /* Else return a special escape */
|
||||
if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
|
||||
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
|
||||
|
||||
/* Perl supports \N{name} for character names and \N{U+dddd} for numerical
|
||||
Unicode code points, as well as plain \N for "not newline". PCRE does not
|
||||
support \N{name}. However, it does support quantification such as \N{2,3},
|
||||
so if \N{ is not followed by U+dddd we check for a quantifier. */
|
||||
|
||||
if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
PCRE2_SPTR p = ptr + 1;
|
||||
|
||||
/* \N{U+ can be handled by the \x{ code. However, this construction is
|
||||
not valid in EBCDIC environments because it specifies a Unicode
|
||||
character, not a codepoint in the local code. For example \N{U+0041}
|
||||
must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
|
||||
casing semantics for the entire pattern, so allow it only in UTF (i.e.
|
||||
Unicode) mode. */
|
||||
|
||||
if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
|
||||
{
|
||||
#ifdef EBCDIC
|
||||
*errorcodeptr = ERR93;
|
||||
#else
|
||||
if (utf)
|
||||
{
|
||||
ptr = p + 1;
|
||||
escape = 0; /* Not a fancy escape after all */
|
||||
goto COME_FROM_NU;
|
||||
}
|
||||
else *errorcodeptr = ERR93;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Give an error if what follows is not a quantifier, but don't override
|
||||
an error set by the quantifier reader (e.g. number overflow). */
|
||||
|
||||
else
|
||||
{
|
||||
if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
|
||||
*errorcodeptr == 0)
|
||||
*errorcodeptr = ERR37;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1462,6 +1510,7 @@ else
|
|||
/* A number of Perl escapes are not handled by PCRE. We give an explicit
|
||||
error. */
|
||||
|
||||
case CHAR_F:
|
||||
case CHAR_l:
|
||||
case CHAR_L:
|
||||
*errorcodeptr = ERR37;
|
||||
|
@ -1719,6 +1768,9 @@ else
|
|||
{
|
||||
if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
COME_FROM_NU:
|
||||
#endif
|
||||
if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
*errorcodeptr = ERR78;
|
||||
|
@ -1852,19 +1904,6 @@ else
|
|||
}
|
||||
}
|
||||
|
||||
/* Perl supports \N{name} for character names, as well as plain \N for "not
|
||||
newline". PCRE does not support \N{name}. However, it does support
|
||||
quantification such as \N{2,3}. */
|
||||
|
||||
if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
|
||||
ptrend - ptr > 2)
|
||||
{
|
||||
PCRE2_SPTR p = ptr + 1;
|
||||
if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
|
||||
*errorcodeptr == 0)
|
||||
*errorcodeptr = ERR37;
|
||||
}
|
||||
|
||||
/* Set the pointer to the next character before returning. */
|
||||
|
||||
*ptrptr = ptr;
|
||||
|
@ -2251,11 +2290,14 @@ typedef struct nest_save {
|
|||
#define NSF_RESET 0x0001u
|
||||
#define NSF_CONDASSERT 0x0002u
|
||||
|
||||
/* Of the options that are changeable within the pattern, these are tracked
|
||||
during parsing. The rest are used from META_OPTIONS items when compiling. */
|
||||
/* Options that are changeable within the pattern must be tracked during
|
||||
parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
|
||||
but all must be tracked so that META_OPTIONS items set the correct values for
|
||||
the main compiling phase. */
|
||||
|
||||
#define PARSE_TRACKED_OPTIONS \
|
||||
(PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
|
||||
#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
|
||||
PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
|
||||
PCRE2_UNGREEDY)
|
||||
|
||||
/* States used for analyzing ranges in character classes. The two OK values
|
||||
must be last. */
|
||||
|
@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;
|
|||
uint32_t *parsed_pattern = cb->parsed_pattern;
|
||||
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
|
||||
uint32_t meta_quantifier = 0;
|
||||
uint32_t add_after_mark = 0;
|
||||
uint16_t nest_depth = 0;
|
||||
int after_manual_callout = 0;
|
||||
int expect_cond_assert = 0;
|
||||
|
@ -2434,11 +2477,17 @@ while (ptr < ptrend)
|
|||
/* EITHER: not both options set */
|
||||
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
|
||||
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
|
||||
/* OR: character > 255 */
|
||||
c > 255 ||
|
||||
/* OR: not a # comment or white space */
|
||||
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
|
||||
))
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* OR: character > 255 AND not Unicode Pattern White Space */
|
||||
(c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
|
||||
#endif
|
||||
/* OR: not a # comment or isspace() white space */
|
||||
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/* and not CHAR_NEL when Unicode is supported */
|
||||
&& c != CHAR_NEL
|
||||
#endif
|
||||
)))
|
||||
{
|
||||
PCRE2_SIZE verbnamelength;
|
||||
|
||||
|
@ -2461,6 +2510,16 @@ while (ptr < ptrend)
|
|||
goto FAILED;
|
||||
}
|
||||
*verblengthptr = (uint32_t)verbnamelength;
|
||||
|
||||
/* If this name was on a verb such as (*ACCEPT) which does not continue,
|
||||
a (*MARK) was generated for the name. We now add the original verb as the
|
||||
next item. */
|
||||
|
||||
if (add_after_mark != 0)
|
||||
{
|
||||
*parsed_pattern++ = add_after_mark;
|
||||
add_after_mark = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case CHAR_BACKSLASH:
|
||||
|
@ -2510,11 +2569,18 @@ while (ptr < ptrend)
|
|||
|
||||
/* Skip over whitespace and # comments in extended mode. Note that c is a
|
||||
character, not a code unit, so we must not use MAX_255 to test its size
|
||||
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
|
||||
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
|
||||
whitespace characters are those designated as "Pattern White Space" by
|
||||
Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
|
||||
U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
|
||||
subset of space characters that match \h and \v. */
|
||||
|
||||
if ((options & PCRE2_EXTENDED) != 0)
|
||||
{
|
||||
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
|
||||
#endif
|
||||
if (c == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
while (ptr < ptrend)
|
||||
|
@ -3206,7 +3272,6 @@ while (ptr < ptrend)
|
|||
tempptr = ptr;
|
||||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
|
||||
options, TRUE, cb);
|
||||
|
||||
if (errorcode != 0)
|
||||
{
|
||||
CLASS_ESCAPE_FAILED:
|
||||
|
@ -3454,13 +3519,25 @@ while (ptr < ptrend)
|
|||
|
||||
if (*ptr++ == CHAR_COLON) /* Skip past : or ) */
|
||||
{
|
||||
if (verbs[i].has_arg < 0) /* Argument is forbidden */
|
||||
/* Some optional arguments can be treated as a preceding (*MARK) */
|
||||
|
||||
if (verbs[i].has_arg < 0)
|
||||
{
|
||||
errorcode = ERR59;
|
||||
goto FAILED;
|
||||
add_after_mark = verbs[i].meta;
|
||||
*parsed_pattern++ = META_MARK;
|
||||
}
|
||||
*parsed_pattern++ = verbs[i].meta +
|
||||
((verbs[i].meta != META_MARK)? 0x00010000u:0);
|
||||
|
||||
/* The remaining verbs with arguments (except *MARK) need a different
|
||||
opcode. */
|
||||
|
||||
else
|
||||
{
|
||||
*parsed_pattern++ = verbs[i].meta +
|
||||
((verbs[i].meta != META_MARK)? 0x00010000u:0);
|
||||
}
|
||||
|
||||
/* Set up for reading the name in the main loop. */
|
||||
|
||||
verblengthptr = parsed_pattern++;
|
||||
verbnamestart = ptr;
|
||||
inverbname = TRUE;
|
||||
|
@ -3521,17 +3598,39 @@ while (ptr < ptrend)
|
|||
|
||||
else
|
||||
{
|
||||
BOOL hyphenok = TRUE;
|
||||
uint32_t oldoptions = options;
|
||||
|
||||
top_nest->reset_group = 0;
|
||||
top_nest->max_group = 0;
|
||||
set = unset = 0;
|
||||
optset = &set;
|
||||
|
||||
/* ^ at the start unsets imnsx and disables the subsequent use of - */
|
||||
|
||||
if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
|
||||
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
|
||||
hyphenok = FALSE;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
|
||||
*ptr != CHAR_COLON)
|
||||
{
|
||||
switch (*ptr++)
|
||||
{
|
||||
case CHAR_MINUS: optset = &unset; break;
|
||||
case CHAR_MINUS:
|
||||
if (!hyphenok)
|
||||
{
|
||||
errorcode = ERR94;
|
||||
ptr--; /* Correct the offset */
|
||||
goto FAILED;
|
||||
}
|
||||
optset = &unset;
|
||||
hyphenok = FALSE;
|
||||
break;
|
||||
|
||||
case CHAR_J: /* Record that it changed in the external options */
|
||||
*optset |= PCRE2_DUPNAMES;
|
||||
|
@ -3591,7 +3690,7 @@ while (ptr < ptrend)
|
|||
|
||||
/* If nothing changed, no need to record. */
|
||||
|
||||
if (set != 0 || unset != 0)
|
||||
if (options != oldoptions)
|
||||
{
|
||||
*parsed_pattern++ = META_OPTIONS;
|
||||
*parsed_pattern++ = options;
|
||||
|
@ -3896,9 +3995,8 @@ while (ptr < ptrend)
|
|||
if (*ptr == CHAR_DOT)
|
||||
{
|
||||
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
|
||||
if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
|
||||
goto FAILED;
|
||||
if (minor < 10) minor *= 10;
|
||||
minor = (*ptr++ - CHAR_0) * 10;
|
||||
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
|
||||
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
goto BAD_VERSION_CONDITION;
|
||||
}
|
||||
|
@ -4261,11 +4359,11 @@ goto FAILED;
|
|||
|
||||
|
||||
/*************************************************
|
||||
* Find first significant op code *
|
||||
* Find first significant opcode *
|
||||
*************************************************/
|
||||
|
||||
/* This is called by several functions that scan a compiled expression looking
|
||||
for a fixed first character, or an anchoring op code etc. It skips over things
|
||||
for a fixed first character, or an anchoring opcode etc. It skips over things
|
||||
that do not influence this. For some calls, it makes sense to skip negative
|
||||
forward and all backward assertions, and also the \b assertion; for others it
|
||||
does not.
|
||||
|
@ -5472,7 +5570,7 @@ for (;; pptr++)
|
|||
set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
|
||||
of the extra data and reset the pointer. This is so that very large
|
||||
classes that contain a zillion wide characters or Unicode property tests
|
||||
do not overwrite the work space (which is on the stack). */
|
||||
do not overwrite the workspace (which is on the stack). */
|
||||
|
||||
if (class_uchardata > class_uchardata_base)
|
||||
{
|
||||
|
@ -5563,7 +5661,7 @@ for (;; pptr++)
|
|||
if (class_has_8bitchar > 0)
|
||||
{
|
||||
*code++ |= XCL_MAP;
|
||||
memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
|
||||
(void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
|
||||
CU2BYTES(class_uchardata - code));
|
||||
if (negate_class && !xclass_has_prop)
|
||||
for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
|
||||
|
@ -5655,6 +5753,7 @@ for (;; pptr++)
|
|||
cb->had_pruneorskip = TRUE;
|
||||
/* Fall through */
|
||||
case META_MARK:
|
||||
case META_COMMIT_ARG:
|
||||
VERB_ARG:
|
||||
*code++ = verbops[(meta - META_MARK) >> 16];
|
||||
/* The length is in characters. */
|
||||
|
@ -6509,7 +6608,7 @@ for (;; pptr++)
|
|||
|
||||
/* Wrap the recursion call in OP_BRA brackets. */
|
||||
|
||||
memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
|
||||
(void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
|
||||
op_previous = *previous = OP_BRA;
|
||||
PUT(previous, 1, 2 + 2*LINK_SIZE);
|
||||
previous[2 + 2*LINK_SIZE] = OP_KET;
|
||||
|
@ -6589,7 +6688,7 @@ for (;; pptr++)
|
|||
|
||||
if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
|
||||
{
|
||||
memmove(previous + 1, previous, CU2BYTES(len));
|
||||
(void)memmove(previous + 1, previous, CU2BYTES(len));
|
||||
code++;
|
||||
if (repeat_max == 0)
|
||||
{
|
||||
|
@ -6610,7 +6709,7 @@ for (;; pptr++)
|
|||
else
|
||||
{
|
||||
int linkoffset;
|
||||
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
|
||||
(void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
|
||||
code += 2 + LINK_SIZE;
|
||||
*previous++ = OP_BRAZERO + repeat_type;
|
||||
*previous++ = OP_BRA;
|
||||
|
@ -6811,7 +6910,7 @@ for (;; pptr++)
|
|||
if (*bracode == OP_COND || *bracode == OP_SCOND)
|
||||
{
|
||||
int nlen = (int)(code - bracode);
|
||||
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
|
||||
(void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
|
||||
code += 1 + LINK_SIZE;
|
||||
nlen += 1 + LINK_SIZE;
|
||||
*bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
|
||||
|
@ -7082,7 +7181,7 @@ for (;; pptr++)
|
|||
|
||||
else
|
||||
{
|
||||
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
|
||||
(void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
|
||||
code += 1 + LINK_SIZE;
|
||||
len += 1 + LINK_SIZE;
|
||||
tempcode[0] = OP_ONCE;
|
||||
|
@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the
|
|||
beginning. We accumulate in a local variable to save frequent testing of
|
||||
lengthptr for NULL. We cannot do this by looking at the value of 'code' at the
|
||||
start and end of each alternative, because compiled items are discarded during
|
||||
the pre-compile phase so that the work space is not exceeded. */
|
||||
the pre-compile phase so that the workspace is not exceeded. */
|
||||
|
||||
length = 2 + 2*LINK_SIZE + skipunits;
|
||||
|
||||
|
@ -7622,7 +7721,7 @@ for (;;)
|
|||
{
|
||||
if (cb->open_caps->flag)
|
||||
{
|
||||
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
|
||||
(void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
|
||||
CU2BYTES(code - start_bracket));
|
||||
*start_bracket = OP_ONCE;
|
||||
code += 1 + LINK_SIZE;
|
||||
|
@ -7765,10 +7864,11 @@ do {
|
|||
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
|
||||
}
|
||||
|
||||
/* Condition */
|
||||
/* Condition. If there is no second branch, it can't be anchored. */
|
||||
|
||||
else if (op == OP_COND)
|
||||
else if (op == OP_COND || op == OP_SCOND)
|
||||
{
|
||||
if (scode[GET(scode,1)] != OP_ALT) return FALSE;
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -8003,6 +8103,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)
|
|||
|
||||
if (crc < 0)
|
||||
{
|
||||
memmove(slot + cb->name_entry_size, slot,
|
||||
(void)memmove(slot + cb->name_entry_size, slot,
|
||||
CU2BYTES((tablecount - i) * cb->name_entry_size));
|
||||
break;
|
||||
}
|
||||
|
@ -8311,6 +8412,7 @@ for (;; pptr++)
|
|||
break;
|
||||
|
||||
case META_MARK: /* Add the length of the name. */
|
||||
case META_COMMIT_ARG:
|
||||
case META_PRUNE_ARG:
|
||||
case META_SKIP_ARG:
|
||||
case META_THEN_ARG:
|
||||
|
@ -8501,6 +8603,7 @@ for (;; pptr++)
|
|||
goto EXIT;
|
||||
|
||||
case META_MARK:
|
||||
case META_COMMIT_ARG:
|
||||
case META_PRUNE_ARG:
|
||||
case META_SKIP_ARG:
|
||||
case META_THEN_ARG:
|
||||
|
@ -8572,6 +8675,32 @@ for (;; pptr++)
|
|||
case META_LOOKAHEADNOT:
|
||||
pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||
|
||||
/* Also ignore any qualifiers that follow a lookahead assertion. */
|
||||
|
||||
switch (pptr[1])
|
||||
{
|
||||
case META_ASTERISK:
|
||||
case META_ASTERISK_PLUS:
|
||||
case META_ASTERISK_QUERY:
|
||||
case META_PLUS:
|
||||
case META_PLUS_PLUS:
|
||||
case META_PLUS_QUERY:
|
||||
case META_QUERY:
|
||||
case META_QUERY_PLUS:
|
||||
case META_QUERY_QUERY:
|
||||
pptr++;
|
||||
break;
|
||||
|
||||
case META_MINMAX:
|
||||
case META_MINMAX_PLUS:
|
||||
case META_MINMAX_QUERY:
|
||||
pptr += 3;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Lookbehinds can be ignored, but must themselves be checked. */
|
||||
|
@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|||
break;
|
||||
|
||||
case META_MARK:
|
||||
case META_COMMIT_ARG:
|
||||
case META_PRUNE_ARG:
|
||||
case META_SKIP_ARG:
|
||||
case META_THEN_ARG:
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -1066,11 +1066,12 @@ BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
|
|||
uint32_t pattype = options & TYPE_OPTIONS;
|
||||
|
||||
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
|
||||
(pattype & (~pattype+1)) != pattype || /* More than one type set */
|
||||
pattype == 0) /* No type set */
|
||||
{
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
|
@ -1081,7 +1082,11 @@ if (ccontext == NULL) ccontext =
|
|||
/* Check UTF if required. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
|
||||
if (utf)
|
||||
{
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
|
||||
}
|
||||
#else
|
||||
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
|
@ -1126,6 +1131,7 @@ for (i = 0; i < 2; i++)
|
|||
break;
|
||||
|
||||
default:
|
||||
*bufflenptr = 0; /* Error offset */
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
|
|
|
@ -181,7 +181,8 @@ static const uint8_t coptable[] = {
|
|||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, /* COMMIT, COMMIT_ARG */
|
||||
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
|
||||
};
|
||||
|
||||
|
@ -254,7 +255,8 @@ static const uint8_t poptable[] = {
|
|||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
|
||||
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, /* COMMIT, COMMIT_ARG */
|
||||
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
|
||||
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
|
||||
};
|
||||
|
||||
|
@ -292,6 +294,35 @@ typedef struct stateblock {
|
|||
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
|
||||
|
||||
|
||||
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
|
||||
local working space and output vectors that were created on the stack. This has
|
||||
caused issues for some patterns, especially in small-stack environments such as
|
||||
Windows. A new scheme is now in use which sets up a vector on the stack, but if
|
||||
this is too small, heap memory is used, up to the heap_limit. The main
|
||||
parameters are all numbers of ints because the workspace is a vector of ints.
|
||||
|
||||
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
|
||||
defined in pcre2_internal.h so as to be available to pcre2test when it is
|
||||
finding the minimum heap requirement for a match. */
|
||||
|
||||
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
|
||||
|
||||
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
|
||||
#define RWS_RSIZE 1000 /* Work size for recursion */
|
||||
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
|
||||
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
|
||||
|
||||
/* This structure is at the start of each workspace block. */
|
||||
|
||||
typedef struct RWS_anchor {
|
||||
struct RWS_anchor *next;
|
||||
unsigned int size; /* Number of ints */
|
||||
unsigned int free; /* Number of ints */
|
||||
} RWS_anchor;
|
||||
|
||||
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Process a callout *
|
||||
|
@ -353,6 +384,61 @@ return (mb->callout)(cb, mb->callout_data);
|
|||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Expand local workspace memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when internal_dfa_match() is about to be called
|
||||
recursively and there is insufficient working space left in the current
|
||||
workspace block. If there's an existing next block, use it; otherwise get a new
|
||||
block unless the heap limit is reached.
|
||||
|
||||
Arguments:
|
||||
rwsptr pointer to block pointer (updated)
|
||||
ovecsize space needed for an ovector
|
||||
mb the match block
|
||||
|
||||
Returns: 0 rwsptr has been updated
|
||||
!0 an error code
|
||||
*/
|
||||
|
||||
static int
|
||||
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
|
||||
{
|
||||
RWS_anchor *rws = *rwsptr;
|
||||
RWS_anchor *new;
|
||||
|
||||
if (rws->next != NULL)
|
||||
{
|
||||
new = rws->next;
|
||||
}
|
||||
|
||||
/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
|
||||
kibibytes. */
|
||||
|
||||
else
|
||||
{
|
||||
unsigned int newsize = rws->size * 2;
|
||||
unsigned int heapleft = (unsigned int)
|
||||
(((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
|
||||
if (newsize > heapleft) newsize = heapleft;
|
||||
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
|
||||
return PCRE2_ERROR_HEAPLIMIT;
|
||||
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
|
||||
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
mb->heap_used += newsize;
|
||||
new->next = NULL;
|
||||
new->size = newsize;
|
||||
rws->next = new;
|
||||
}
|
||||
|
||||
new->free = new->size - RWS_ANCHOR_SIZE;
|
||||
*rwsptr = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a Regular Expression - DFA engine *
|
||||
*************************************************/
|
||||
|
@ -431,7 +517,8 @@ internal_dfa_match(
|
|||
uint32_t offsetcount,
|
||||
int *workspace,
|
||||
int wscount,
|
||||
uint32_t rlevel)
|
||||
uint32_t rlevel,
|
||||
int *RWS)
|
||||
{
|
||||
stateblock *active_states, *new_states, *temp_states;
|
||||
stateblock *next_active_state, *next_new_state;
|
||||
|
@ -788,7 +875,7 @@ for (;;)
|
|||
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
|
||||
match_count = 0;
|
||||
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
|
||||
if (count > 0) memmove(offsets + 2, offsets,
|
||||
if (count > 0) (void)memmove(offsets + 2, offsets,
|
||||
(size_t)count * sizeof(PCRE2_SIZE));
|
||||
if (offsetcount >= 2)
|
||||
{
|
||||
|
@ -2587,10 +2674,22 @@ for (;;)
|
|||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
{
|
||||
PCRE2_SPTR endasscode = code + GET(code, 1);
|
||||
PCRE2_SIZE local_offsets[2];
|
||||
int rc;
|
||||
int local_workspace[1000];
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SPTR endasscode = code + GET(code, 1);
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||
|
||||
|
@ -2600,10 +2699,13 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
|
||||
|
@ -2615,8 +2717,6 @@ for (;;)
|
|||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
{
|
||||
PCRE2_SIZE local_offsets[1000];
|
||||
int local_workspace[1000];
|
||||
int codelink = (int)GET(code, 1);
|
||||
PCRE2_UCHAR condcode;
|
||||
|
||||
|
@ -2673,8 +2773,22 @@ for (;;)
|
|||
else
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
|
||||
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
|
||||
|
||||
|
@ -2684,10 +2798,13 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
|
||||
if ((rc >= 0) ==
|
||||
|
@ -2702,13 +2819,25 @@ for (;;)
|
|||
/*-----------------------------------------------------------------*/
|
||||
case OP_RECURSE:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
dfa_recursion_info *ri;
|
||||
PCRE2_SIZE local_offsets[1000];
|
||||
int local_workspace[1000];
|
||||
PCRE2_SPTR callpat = start_code + GET(code, 1);
|
||||
uint32_t recno = (callpat == mb->start_code)? 0 :
|
||||
GET2(callpat, 1 + LINK_SIZE);
|
||||
int rc;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||
|
||||
/* Check for repeating a recursion without advancing the subject
|
||||
pointer. This should catch convoluted mutual recursions. (Some simple
|
||||
|
@ -2732,11 +2861,13 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
|
||||
mb->recursive = new_recursive.prevrec; /* Done this recursion */
|
||||
|
||||
/* Ran out of internal offsets */
|
||||
|
@ -2782,10 +2913,25 @@ for (;;)
|
|||
case OP_SCBRAPOS:
|
||||
case OP_BRAPOSZERO:
|
||||
{
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
PCRE2_SIZE charcount, matched_count;
|
||||
PCRE2_SPTR local_ptr = ptr;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
BOOL allow_zero;
|
||||
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (codevalue == OP_BRAPOSZERO)
|
||||
{
|
||||
allow_zero = TRUE;
|
||||
|
@ -2798,19 +2944,17 @@ for (;;)
|
|||
|
||||
for (matched_count = 0;; matched_count++)
|
||||
{
|
||||
PCRE2_SIZE local_offsets[2];
|
||||
int local_workspace[1000];
|
||||
|
||||
int rc = internal_dfa_match(
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
code, /* this subexpression's code */
|
||||
local_ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
/* Failed to match */
|
||||
|
||||
|
@ -2827,6 +2971,8 @@ for (;;)
|
|||
local_ptr += charcount; /* Advance temporary position ptr */
|
||||
}
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
/* At this point we have matched the subpattern matched_count
|
||||
times, and local_ptr is pointing to the character after the end of the
|
||||
last match. */
|
||||
|
@ -2869,19 +3015,35 @@ for (;;)
|
|||
/*-----------------------------------------------------------------*/
|
||||
case OP_ONCE:
|
||||
{
|
||||
PCRE2_SIZE local_offsets[2];
|
||||
int local_workspace[1000];
|
||||
int rc;
|
||||
int *local_workspace;
|
||||
PCRE2_SIZE *local_offsets;
|
||||
RWS_anchor *rws = (RWS_anchor *)RWS;
|
||||
|
||||
int rc = internal_dfa_match(
|
||||
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
|
||||
{
|
||||
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
|
||||
if (rc != 0) return rc;
|
||||
RWS = (int *)rws;
|
||||
}
|
||||
|
||||
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
|
||||
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
|
||||
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
rc = internal_dfa_match(
|
||||
mb, /* fixed match data */
|
||||
code, /* this subexpression's code */
|
||||
ptr, /* where we currently are */
|
||||
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
RWS_RSIZE, /* size of same */
|
||||
rlevel, /* function recursion level */
|
||||
RWS); /* recursion workspace */
|
||||
|
||||
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
|
@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
|||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
|
||||
{
|
||||
int rc;
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
||||
PCRE2_SPTR start_match;
|
||||
|
@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit;
|
|||
PCRE2_SPTR req_cu_ptr;
|
||||
|
||||
BOOL utf, anchored, startline, firstline;
|
||||
|
||||
BOOL has_first_cu = FALSE;
|
||||
BOOL has_req_cu = FALSE;
|
||||
|
||||
PCRE2_UCHAR first_cu = 0;
|
||||
PCRE2_UCHAR first_cu2 = 0;
|
||||
PCRE2_UCHAR req_cu = 0;
|
||||
|
@ -3088,6 +3251,17 @@ pcre2_callout_block cb;
|
|||
dfa_match_block actual_match_block;
|
||||
dfa_match_block *mb = &actual_match_block;
|
||||
|
||||
/* Set up a starting block of memory for use during recursive calls to
|
||||
internal_dfa_match(). By putting this on the stack, it minimizes resource use
|
||||
in the case when it is not needed. If this is too small, more memory is
|
||||
obtained from the heap. At the start of each block is an anchor structure.*/
|
||||
|
||||
int base_recursion_workspace[RWS_BASE_SIZE];
|
||||
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
|
||||
rws->next = NULL;
|
||||
rws->size = RWS_BASE_SIZE;
|
||||
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
|
||||
|
||||
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
|
||||
subject string. */
|
||||
|
||||
|
@ -3184,6 +3358,7 @@ if (mcontext == NULL)
|
|||
mb->memctl = re->memctl;
|
||||
mb->match_limit = PRIV(default_match_context).match_limit;
|
||||
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
|
||||
mb->heap_limit = PRIV(default_match_context).heap_limit;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3198,6 +3373,7 @@ else
|
|||
mb->memctl = mcontext->memctl;
|
||||
mb->match_limit = mcontext->match_limit;
|
||||
mb->match_limit_depth = mcontext->depth_limit;
|
||||
mb->heap_limit = mcontext->heap_limit;
|
||||
}
|
||||
|
||||
if (mb->match_limit > re->limit_match)
|
||||
|
@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match)
|
|||
if (mb->match_limit_depth > re->limit_depth)
|
||||
mb->match_limit_depth = re->limit_depth;
|
||||
|
||||
if (mb->heap_limit > re->limit_heap)
|
||||
mb->heap_limit = re->limit_heap;
|
||||
|
||||
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
re->name_count * re->name_entry_size;
|
||||
mb->tables = re->tables;
|
||||
|
@ -3215,6 +3394,7 @@ mb->start_offset = start_offset;
|
|||
mb->moptions = options;
|
||||
mb->poptions = re->overall_options;
|
||||
mb->match_call_count = 0;
|
||||
mb->heap_used = 0;
|
||||
|
||||
/* Process the \R and newline settings. */
|
||||
|
||||
|
@ -3351,8 +3531,6 @@ a match. */
|
|||
|
||||
for (;;)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* ----------------- Start of match optimizations ---------------- */
|
||||
|
||||
/* There are some optimizations that avoid running the match if a known
|
||||
|
@ -3544,7 +3722,7 @@ for (;;)
|
|||
in characters, we treat it as code units to avoid spending too much time
|
||||
in this optimization. */
|
||||
|
||||
if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH;
|
||||
if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
|
||||
|
||||
/* If req_cu is set, we know that that code unit must appear in the
|
||||
subject for the match to succeed. If the first code unit is set, req_cu
|
||||
|
@ -3621,7 +3799,8 @@ for (;;)
|
|||
(uint32_t)match_data->oveccount * 2, /* actual size of same */
|
||||
workspace, /* workspace vector */
|
||||
(int)wscount, /* size of same */
|
||||
0); /* function recurse level */
|
||||
0, /* function recurse level */
|
||||
base_recursion_workspace); /* initial workspace for recursion */
|
||||
|
||||
/* Anything other than "no match" means we are done, always; otherwise, carry
|
||||
on only if not anchored. */
|
||||
|
@ -3637,7 +3816,7 @@ for (;;)
|
|||
match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
|
||||
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->rc = rc;
|
||||
return rc;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Advance to the next subject character unless we are at the end of a line
|
||||
|
@ -3668,8 +3847,18 @@ for (;;)
|
|||
|
||||
} /* "Bumpalong" loop */
|
||||
|
||||
NOMATCH_EXIT:
|
||||
rc = PCRE2_ERROR_NOMATCH;
|
||||
|
||||
return PCRE2_ERROR_NOMATCH;
|
||||
EXIT:
|
||||
while (rws->next != NULL)
|
||||
{
|
||||
RWS_anchor *next = rws->next;
|
||||
rws->next = next->next;
|
||||
mb->memctl.free(next, mb->memctl.memory_data);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* End of pcre2_dfa_match.c */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -107,7 +107,7 @@ static const unsigned char compile_error_texts[] =
|
|||
/* 35 */
|
||||
"lookbehind is too complicated\0"
|
||||
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
|
||||
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
|
@ -133,7 +133,8 @@ static const unsigned char compile_error_texts[] =
|
|||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
|
||||
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
|
||||
/* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
|
||||
"obsolete error (should not occur)\0" /* Was the above */
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"group number is too big\0"
|
||||
|
@ -160,7 +161,7 @@ static const unsigned char compile_error_texts[] =
|
|||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"digits missing in \\x{} or \\o{} or \\N{U+}\0"
|
||||
"syntax error or number too big in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
|
@ -178,6 +179,8 @@ static const unsigned char compile_error_texts[] =
|
|||
"internal error: bad code value in parsed_skip()\0"
|
||||
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||
"invalid option bits with PCRE2_LITERAL\0"
|
||||
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||
"invalid hyphen in option setting\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
@ -255,11 +258,13 @@ static const unsigned char match_error_texts[] =
|
|||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start is not supported\0"
|
||||
"match with end before start or start moved backwards is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
"internal error - duplicate substitution match\0"
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -129,11 +129,11 @@ while (eptr < end_subject)
|
|||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
||||
allows any number of them before a following Extended_Pictographic. */
|
||||
|
||||
if (rgb != ucp_gbExtend ||
|
||||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
eptr += len;
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -131,6 +131,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -165,6 +165,16 @@ by "configure". */
|
|||
#define INT64_OR_DOUBLE double
|
||||
#endif
|
||||
|
||||
/* External (in the C sense) functions and tables that are private to the
|
||||
libraries are always referenced using the PRIV macro. This makes it possible
|
||||
for pcre2test.c to include some of the source files from the libraries using a
|
||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||
code that a non-static object is being referenced. */
|
||||
|
||||
#ifndef PRIV
|
||||
#define PRIV(name) _pcre2_##name
|
||||
#endif
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
@ -178,50 +188,15 @@ option on the command line. */
|
|||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
neither (there some non-Unix environments where this is the case). */
|
||||
/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
|
||||
a macro that calls an emulating function. */
|
||||
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#ifdef HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
static void *
|
||||
pcre2_memmove(void *d, const void *s, size_t n)
|
||||
{
|
||||
size_t i;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
}
|
||||
#define memmove(a, b, c) pcre2_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
#undef memmove /* Some systems may have a macro */
|
||||
#define memmove(a, b, c) PRIV(memmove)(a, b, c)
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
/* External (in the C sense) functions and tables that are private to the
|
||||
libraries are always referenced using the PRIV macro. This makes it possible
|
||||
for pcre2test.c to include some of the source files from the libraries using a
|
||||
different PRIV definition to avoid name clashes. It also makes it clear in the
|
||||
code that a non-static object is being referenced. */
|
||||
|
||||
#ifndef PRIV
|
||||
#define PRIV(name) _pcre2_##name
|
||||
#endif
|
||||
|
||||
/* This is an unsigned int value that no UTF character can ever have, as
|
||||
Unicode doesn't go beyond 0x0010ffff. */
|
||||
|
||||
|
@ -247,12 +222,17 @@ not rely on this. */
|
|||
pcre2_match() is allocated on the system stack, of this size (bytes). The size
|
||||
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
|
||||
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
|
||||
on the number of capturing parentheses) so 20K handles quite a few frames. A
|
||||
on the number of capturing parentheses) so 20KiB handles quite a few frames. A
|
||||
larger vector on the heap is obtained for patterns that need more frames. The
|
||||
maximum size of this can be limited. */
|
||||
|
||||
#define START_FRAMES_SIZE 20480
|
||||
|
||||
/* Similarly, for DFA matching, an initial internal workspace vector is
|
||||
allocated on the stack. */
|
||||
|
||||
#define DFA_START_RWS_SIZE 30720
|
||||
|
||||
/* Define the default BSR convention. */
|
||||
|
||||
#ifdef BSR_ANYCRLF
|
||||
|
@ -585,14 +565,15 @@ these tables. */
|
|||
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||
#define cbit_length 320 /* Length of the cbits table */
|
||||
|
||||
/* Bit definitions for entries in the ctypes table. */
|
||||
/* Bit definitions for entries in the ctypes table. Do not change these values
|
||||
without checking pcre2_jit_compile.c, which has an assertion to ensure that
|
||||
ctype_word has the value 16. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_digit 0x04
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_xdigit 0x08 /* not actually used any more */
|
||||
#define ctype_word 0x10 /* alphanumeric or '_' */
|
||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length of the tables. */
|
||||
|
@ -1267,36 +1248,6 @@ contain characters with values greater than 255. */
|
|||
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
|
||||
/* Escape items that are just an encoding of a particular data value. These
|
||||
appear in the escapes[] table in pcre2_compile.c as positive numbers. */
|
||||
|
||||
#ifndef ESC_a
|
||||
#define ESC_a CHAR_BEL
|
||||
#endif
|
||||
|
||||
#ifndef ESC_e
|
||||
#define ESC_e CHAR_ESC
|
||||
#endif
|
||||
|
||||
#ifndef ESC_f
|
||||
#define ESC_f CHAR_FF
|
||||
#endif
|
||||
|
||||
#ifndef ESC_n
|
||||
#define ESC_n CHAR_LF
|
||||
#endif
|
||||
|
||||
#ifndef ESC_r
|
||||
#define ESC_r CHAR_CR
|
||||
#endif
|
||||
|
||||
/* We can't officially use ESC_t because it is a POSIX reserved identifier
|
||||
(presumably because of all the others like size_t). */
|
||||
|
||||
#ifndef ESC_tee
|
||||
#define ESC_tee CHAR_HT
|
||||
#endif
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||
for a data character. In the escapes[] table in pcre2_compile.c their values
|
||||
|
@ -1578,23 +1529,26 @@ enum {
|
|||
OP_THEN, /* 155 */
|
||||
OP_THEN_ARG, /* 156 same, but with argument */
|
||||
OP_COMMIT, /* 157 */
|
||||
OP_COMMIT_ARG, /* 158 same, but with argument */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
/* These are forced failure and success verbs. FAIL and ACCEPT do accept an
|
||||
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
|
||||
without the need for a special opcode. */
|
||||
|
||||
OP_FAIL, /* 158 */
|
||||
OP_ACCEPT, /* 159 */
|
||||
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */
|
||||
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */
|
||||
OP_FAIL, /* 159 */
|
||||
OP_ACCEPT, /* 160 */
|
||||
OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
|
||||
OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 162 */
|
||||
OP_SKIPZERO, /* 163 */
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
OP_DEFINE, /* 163 */
|
||||
OP_DEFINE, /* 164 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
|
@ -1650,7 +1604,7 @@ some cases doesn't actually use these names at all). */
|
|||
"Cond false", "Cond true", \
|
||||
"Brazero", "Braminzero", "Braposzero", \
|
||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
||||
"*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
|
||||
"*ACCEPT", "*ASSERT_ACCEPT", \
|
||||
"Close", "Skip zero", "Define"
|
||||
|
||||
|
@ -1742,7 +1696,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||
1, 3, /* SKIP, SKIP_ARG */ \
|
||||
1, 3, /* THEN, THEN_ARG */ \
|
||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1, 3, /* COMMIT, COMMIT_ARG */ \
|
||||
1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||
1 /* DEFINE */
|
||||
|
||||
|
@ -1896,7 +1851,7 @@ extern const ucd_record PRIV(ucd_records)[];
|
|||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
extern const ucd_record PRIV(dummy_ucd_record)[];
|
||||
#endif
|
||||
extern const uint8_t PRIV(ucd_stage1)[];
|
||||
extern const uint16_t PRIV(ucd_stage1)[];
|
||||
extern const uint16_t PRIV(ucd_stage2)[];
|
||||
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||
extern const uint32_t PRIV(ucp_gentype)[];
|
||||
|
@ -1976,6 +1931,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
|||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||
uint32_t *, BOOL);
|
||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
||||
|
||||
/* This function is needed only when memmove() is not available. */
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
|
||||
extern void * _pcre2_memmove(void *, const void *, size_t);
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
|
||||
|
||||
|
|
|
@ -793,11 +793,23 @@ typedef struct heapframe {
|
|||
uint8_t return_id; /* Where to go on in internal "return" */
|
||||
uint8_t op; /* Processing opcode */
|
||||
|
||||
/* At this point, the structure is 16-bit aligned. On most architectures
|
||||
the alignment requirement for a pointer will ensure that the eptr field below
|
||||
is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
|
||||
that is 16-bit aligned. We must therefore ensure that what comes between here
|
||||
and eptr is an odd multiple of 16 bits so as to get back into 32-bit
|
||||
alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
|
||||
fudges in the other cases. In the 32-bit case the padding comes first so that
|
||||
the occu field itself is 32-bit aligned. Without the padding, this structure
|
||||
is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR occu[6]; /* Used for other case code units */
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
PCRE2_UCHAR occu[2]; /* Used for other case code units */
|
||||
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||
#else
|
||||
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
|
||||
PCRE2_UCHAR occu[1]; /* Used for other case code units */
|
||||
#endif
|
||||
|
||||
|
@ -818,6 +830,9 @@ typedef struct heapframe {
|
|||
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
|
||||
} heapframe;
|
||||
|
||||
/* This typedef is a check that the size of the heapframe structure is a
|
||||
multiple of PCRE2_SIZE. See various comments above. */
|
||||
|
||||
typedef char check_heapframe_size[
|
||||
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
|
||||
|
||||
|
@ -881,6 +896,8 @@ typedef struct dfa_match_block {
|
|||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE heap_limit; /* As it says */
|
||||
PCRE2_SIZE heap_used; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of calls of internal function */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -839,6 +839,7 @@ switch(*cc)
|
|||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
@ -939,6 +940,7 @@ while (cc < ccend)
|
|||
common->control_head_ptr = 1;
|
||||
/* Fall through. */
|
||||
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_MARK:
|
||||
if (common->mark_ptr == 0)
|
||||
|
@ -1553,6 +1555,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -1733,6 +1736,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -2041,6 +2045,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -2428,6 +2433,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -3666,7 +3672,8 @@ if (!common->utf)
|
|||
#endif
|
||||
|
||||
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
@ -5894,6 +5901,8 @@ for (i = 0; i < 32; i++)
|
|||
}
|
||||
}
|
||||
|
||||
if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
|
||||
|
||||
i = 0;
|
||||
j = 0;
|
||||
|
||||
|
@ -6627,7 +6636,8 @@ if (needstype || needsscript)
|
|||
#endif
|
||||
|
||||
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
@ -7254,10 +7264,11 @@ while (cc < end_subject)
|
|||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
||||
allows any number of them before a following Extended_Pictographic. */
|
||||
|
||||
if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
prevcc = cc;
|
||||
|
@ -7309,10 +7320,11 @@ while (cc < end_subject)
|
|||
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
||||
}
|
||||
|
||||
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows
|
||||
any number of Extend before a following E_Modifier. */
|
||||
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
||||
allows any number of them before a following Extended_Pictographic. */
|
||||
|
||||
if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
cc++;
|
||||
|
@ -10346,7 +10358,8 @@ backtrack_common *backtrack;
|
|||
PCRE2_UCHAR opcode = *cc;
|
||||
PCRE2_SPTR ccend = cc + 1;
|
||||
|
||||
if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
|
||||
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
|
||||
opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
|
||||
ccend += 2 + cc[1];
|
||||
|
||||
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
|
||||
|
@ -10358,7 +10371,7 @@ if (opcode == OP_SKIP)
|
|||
return ccend;
|
||||
}
|
||||
|
||||
if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
|
||||
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
|
||||
|
@ -10677,6 +10690,7 @@ while (cc < ccend)
|
|||
case OP_THEN:
|
||||
case OP_THEN_ARG:
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
cc = compile_control_verb_matchingpath(common, cc, parent);
|
||||
break;
|
||||
|
||||
|
@ -11751,6 +11765,7 @@ while (current)
|
|||
break;
|
||||
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
if (!common->local_quit_available)
|
||||
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
|
||||
if (common->quit_label == NULL)
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -141,13 +141,6 @@ for (i = 0; i < 256; i++)
|
|||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
|
||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||
In this instance, that is ok because we want binary zero to be flagged as a
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
|
|
|
@ -43,11 +43,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* These defines enables debugging code */
|
||||
/* These defines enable debugging code */
|
||||
|
||||
//#define DEBUG_FRAMES_DISPLAY
|
||||
//#define DEBUG_SHOW_OPS
|
||||
//#define DEBUG_SHOW_RMATCH
|
||||
/* #define DEBUG_FRAMES_DISPLAY */
|
||||
/* #define DEBUG_SHOW_OPS */
|
||||
/* #define DEBUG_SHOW_RMATCH */
|
||||
|
||||
#ifdef DEBUG_FRAME_DISPLAY
|
||||
#include <stdarg.h>
|
||||
|
@ -149,7 +149,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */
|
|||
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
||||
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
|
||||
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
|
||||
RM31, RM32, RM33, RM34, RM35 };
|
||||
RM31, RM32, RM33, RM34, RM35, RM36 };
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
enum { RM100=100, RM101 };
|
||||
|
@ -770,7 +770,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
/* ===================================================================== */
|
||||
/* Real or forced end of the pattern, assertion, or recursion. In an
|
||||
assertion ACCEPT, update the last used pointer and remember the current
|
||||
frame so that the captures can be fished out of it. */
|
||||
frame so that the captures and mark can be fished out of it. */
|
||||
|
||||
case OP_ASSERT_ACCEPT:
|
||||
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
|
||||
|
@ -1776,7 +1776,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
|
||||
/* ===================================================================== */
|
||||
/* Match a bit-mapped character class, possibly repeatedly. These op codes
|
||||
/* Match a bit-mapped character class, possibly repeatedly. These opcodes
|
||||
are used when all the characters in the class have values in the range
|
||||
0-255, and either the matching is caseful, or the characters are in the
|
||||
range 0-127 when UTF processing is enabled. The only difference between
|
||||
|
@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||
|
||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||
go too far. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(Fecode, RM201);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
||||
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||
BACKCHAR(Feptr);
|
||||
}
|
||||
}
|
||||
|
@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||
|
||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||
go too far. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
RMATCH(Fecode, RM101);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
||||
if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) BACKCHAR(Feptr);
|
||||
#endif
|
||||
|
@ -2456,7 +2464,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
/* ===================================================================== */
|
||||
/* Match a single character type repeatedly. Note that the property type
|
||||
does not need to be in a stack frame as it not used within an RMATCH()
|
||||
does not need to be in a stack frame as it is not used within an RMATCH()
|
||||
loop. */
|
||||
|
||||
#define Lstart_eptr F->temp_sptr[0]
|
||||
|
@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||
|
||||
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||
Unicode character. Use <= pp to ensure backtracking doesn't go too far.
|
||||
*/
|
||||
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||
go too far. */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
|
@ -4135,7 +4143,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
break;
|
||||
|
||||
/* The "byte" (i.e. "code unit") case is the same as non-UTF */
|
||||
/* The "byte" (i.e. "code unit") case is the same as non-UTF */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
fc = Lmax - Lmin;
|
||||
|
@ -5111,7 +5119,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
/* Positive assertions are like other groups except that PCRE doesn't allow
|
||||
the effect of (*THEN) to escape beyond an assertion; it is therefore
|
||||
treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
|
||||
captures retained. Any other return is an error. */
|
||||
captures and mark retained. Any other return is an error. */
|
||||
|
||||
#define Lframe_type F->temp_32[0]
|
||||
|
||||
|
@ -5128,6 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
(char *)assert_accept_frame + offsetof(heapframe, ovector),
|
||||
assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
|
||||
Foffset_top = assert_accept_frame->offset_top;
|
||||
Fmark = assert_accept_frame->mark;
|
||||
break;
|
||||
}
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
|
@ -5416,7 +5425,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
Feptr -= number;
|
||||
}
|
||||
|
||||
/* Save the earliest consulted character, then skip to next op code */
|
||||
/* Save the earliest consulted character, then skip to next opcode */
|
||||
|
||||
if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
|
||||
Fecode += 1 + LINK_SIZE;
|
||||
|
@ -5501,7 +5510,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
frame so that it points to the final branch. */
|
||||
|
||||
case OP_ONCE:
|
||||
Fback_frame = ((char *)F - (char *)P) + frame_size;
|
||||
Fback_frame = ((char *)F - (char *)P);
|
||||
for (;;)
|
||||
{
|
||||
uint32_t y = GET(P->ecode,1);
|
||||
|
@ -5829,6 +5838,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
mb->verb_current_recurse = Fcurrent_recurse;
|
||||
RRETURN(MATCH_COMMIT);
|
||||
|
||||
case OP_COMMIT_ARG:
|
||||
Fmark = mb->nomatch_mark = Fecode + 2;
|
||||
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
mb->verb_current_recurse = Fcurrent_recurse;
|
||||
RRETURN(MATCH_COMMIT);
|
||||
|
||||
case OP_PRUNE:
|
||||
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
|
@ -5921,7 +5937,7 @@ in rrc. */
|
|||
|
||||
RETURN_SWITCH:
|
||||
if (Frdepth == 0) return rrc; /* Exit from the top level */
|
||||
F = (heapframe *)((char *)F - Fback_frame); /* Back track */
|
||||
F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
|
||||
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
|
||||
|
||||
#ifdef DEBUG_SHOW_RMATCH
|
||||
|
@ -5934,7 +5950,7 @@ switch (Freturn_id)
|
|||
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
|
||||
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
|
||||
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
|
||||
LBL(33) LBL(34) LBL(35)
|
||||
LBL(33) LBL(34) LBL(35) LBL(36)
|
||||
|
||||
#ifdef SUPPORT_WIDE_CHARS
|
||||
LBL(100) LBL(101)
|
||||
|
@ -6275,7 +6291,7 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
|||
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||
large. Ensure that there are at least 10 available frames by getting an initial
|
||||
vector on the heap if necessary, except when the heap limit prevents this. Get
|
||||
fewer if possible. (The heap limit is in kilobytes.) */
|
||||
fewer if possible. (The heap limit is in kibibytes.) */
|
||||
|
||||
if (frame_size <= START_FRAMES_SIZE/10)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -390,6 +390,7 @@ while (TRUE)
|
|||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -127,7 +127,25 @@ dst_bytes += tables_length;
|
|||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
memcpy(dst_bytes, (char *)re, re->blocksize);
|
||||
(void)memcpy(dst_bytes, (char *)re, re->blocksize);
|
||||
|
||||
/* Certain fields in the compiled code block are re-set during
|
||||
deserialization. In order to ensure that the serialized data stream is always
|
||||
the same for the same pattern, set them to zero here. We can't assume the
|
||||
copy of the pattern is correctly aligned for accessing the fields as part of
|
||||
a structure. Note the use of sizeof(void *) in the second of these, to
|
||||
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
|
||||
pointer to uint8_t), gcc gives a warning because the first argument is also a
|
||||
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
|
||||
it didn't stop Coverity giving the same complaint. */
|
||||
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
|
||||
sizeof(pcre2_memctl));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
|
||||
sizeof(void *));
|
||||
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
|
||||
sizeof(void *));
|
||||
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -50,6 +50,42 @@ functions work only on 8-bit data. */
|
|||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Emulated memmove() for systems without it *
|
||||
*************************************************/
|
||||
|
||||
/* This function can make use of bcopy() if it is available. Otherwise do it by
|
||||
steam, as there some non-Unix environments that lack both memmove() and
|
||||
bcopy(). */
|
||||
|
||||
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
|
||||
void *
|
||||
PRIV(memmove)(void *d, const void *s, size_t n)
|
||||
{
|
||||
#ifdef HAVE_BCOPY
|
||||
bcopy(s, d, n);
|
||||
return d;
|
||||
#else
|
||||
size_t i;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
#endif /* not HAVE_BCOPY */
|
||||
}
|
||||
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -707,6 +707,7 @@ for (;;)
|
|||
/* Skip these, but we need to add in the name length. */
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
@ -956,6 +957,7 @@ do
|
|||
case OP_CIRCM:
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COND:
|
||||
case OP_CREF:
|
||||
case OP_FALSE:
|
||||
|
@ -1274,7 +1276,7 @@ do
|
|||
break;
|
||||
|
||||
/* Single character types set the bits and stop. Note that if PCRE2_UCP
|
||||
is set, we do not see these op codes because \d etc are converted to
|
||||
is set, we do not see these opcodes because \d etc are converted to
|
||||
properties. Therefore, these apply in the case when only characters less
|
||||
than 256 are recognized to match the types. */
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -238,10 +238,12 @@ PCRE2_SPTR repend;
|
|||
PCRE2_SIZE extra_needed = 0;
|
||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[3];
|
||||
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
|
||||
|
||||
/* Partial matching is not valid. */
|
||||
|
||||
|
@ -361,13 +363,33 @@ do
|
|||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0])
|
||||
or start before the current point in the subject are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
empty string that starts after the initial match offset. We have tried again
|
||||
at the match point in case the pattern is one like /(?<=\G.)/ which can never
|
||||
match at its starting point, so running the match achieves the bumpalong. If
|
||||
we do get the same (null) match at the original match point, it isn't such a
|
||||
pattern, so we now do the empty string magic. In all other cases, a repeat
|
||||
match should never occur. */
|
||||
|
||||
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
}
|
||||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
@ -799,13 +821,18 @@ do
|
|||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output. Update the start offset to
|
||||
point to the rest of the subject string. If we matched an empty string,
|
||||
do the magic for global matches. */
|
||||
|
||||
start_offset = ovector[1];
|
||||
goptions = (ovector[0] != ovector[1])? 0 :
|
||||
/* The replacement has been copied to the output. Save the details of this
|
||||
match. See above for how this data is used. If we matched an empty string, do
|
||||
the magic for global matches. Finally, update the start offset to point to
|
||||
the rest of the subject string. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
ovecsave[2] = start_offset;
|
||||
|
||||
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
start_offset = ovector[1];
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject. */
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = {
|
|||
|
||||
/* This table encodes the rules for finding the end of an extended grapheme
|
||||
cluster. Every code point has a grapheme break property which is one of the
|
||||
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
|
||||
the properties of two adjacent code points. The left property selects a word
|
||||
from the table, and the right property selects a bit from that word like this:
|
||||
ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
|
||||
10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
|
||||
code points. The left property selects a word from the table, and the right
|
||||
property selects a bit from that word like this:
|
||||
|
||||
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
|
||||
|
||||
|
@ -166,49 +167,41 @@ are implementing).
|
|||
|
||||
6. Do not break after Prepend characters.
|
||||
|
||||
7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
|
||||
by E_Modifier). Extend characters are allowed before the modifier; this
|
||||
cannot be represented in this table, the code has to deal with it.
|
||||
7. Do not break within emoji modifier sequences or emoji zwj sequences. That
|
||||
is, do not break between characters with the Extended_Pictographic property.
|
||||
Extend and ZWJ characters are allowed between the characters; this cannot be
|
||||
represented in this table, the code has to deal with it.
|
||||
|
||||
8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
|
||||
E_Base_GAZ).
|
||||
|
||||
9. Do not break within emoji flag sequences. That is, do not break between
|
||||
8. Do not break within emoji flag sequences. That is, do not break between
|
||||
regional indicator (RI) symbols if there are an odd number of RI characters
|
||||
before the break point. This table encodes "join RI characters"; the code
|
||||
has to deal with checking for previous adjoining RIs.
|
||||
|
||||
10. Otherwise, break everywhere.
|
||||
9. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
ESZ, /* 3 Extend */
|
||||
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
ESZ, /* 3 Extend */
|
||||
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
|
||||
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
|
||||
(1<<ucp_gbRegionalIndicator)|
|
||||
(1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
|
||||
(1<<ucp_gbE_Base_GAZ)|
|
||||
(1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
|
||||
ESZ, /* 5 SpacingMark */
|
||||
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
|
||||
(1<<ucp_gbRegionalIndicator),
|
||||
ESZ, /* 5 SpacingMark */
|
||||
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
|
||||
(1<<ucp_gbLVT),
|
||||
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
|
||||
ESZ|(1<<ucp_gbT), /* 8 T */
|
||||
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
|
||||
ESZ|(1<<ucp_gbT), /* 10 LVT */
|
||||
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
|
||||
ESZ, /* 12 Other */
|
||||
ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
|
||||
ESZ, /* 14 E_Modifier */
|
||||
ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
|
||||
ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
|
||||
ESZ /* 12 Glue_After_Zwj */
|
||||
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
|
||||
ESZ|(1<<ucp_gbT), /* 8 T */
|
||||
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
|
||||
ESZ|(1<<ucp_gbT), /* 10 LVT */
|
||||
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
|
||||
ESZ, /* 12 Other */
|
||||
ESZ, /* 13 ZWJ */
|
||||
ESZ|(1<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
|
||||
};
|
||||
|
||||
#undef ESZ
|
||||
|
@ -282,6 +275,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||
|
@ -292,9 +286,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
|
||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
|
@ -330,6 +326,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_M0 STR_M "\0"
|
||||
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||
#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||
|
@ -337,6 +334,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
|
@ -364,6 +362,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
|
@ -397,6 +396,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
#define STRING_So0 STR_S STR_o "\0"
|
||||
#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
|
||||
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
|
@ -469,6 +469,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Dogra0
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Elbasan0
|
||||
|
@ -479,9 +480,11 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Grantha0
|
||||
STRING_Greek0
|
||||
STRING_Gujarati0
|
||||
STRING_Gunjala_Gondi0
|
||||
STRING_Gurmukhi0
|
||||
STRING_Han0
|
||||
STRING_Hangul0
|
||||
STRING_Hanifi_Rohingya0
|
||||
STRING_Hanunoo0
|
||||
STRING_Hatran0
|
||||
STRING_Hebrew0
|
||||
|
@ -517,6 +520,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Lydian0
|
||||
STRING_M0
|
||||
STRING_Mahajani0
|
||||
STRING_Makasar0
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Manichaean0
|
||||
|
@ -524,6 +528,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Masaram_Gondi0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Medefaidrin0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Mende_Kikakui0
|
||||
STRING_Meroitic_Cursive0
|
||||
|
@ -551,6 +556,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Old_North_Arabian0
|
||||
STRING_Old_Permic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_Sogdian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
|
@ -584,6 +590,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
STRING_So0
|
||||
STRING_Sogdian0
|
||||
STRING_Sora_Sompeng0
|
||||
STRING_Soyombo0
|
||||
STRING_Sundanese0
|
||||
|
@ -656,154 +663,161 @@ const ucp_type_table PRIV(utt)[] = {
|
|||
{ 265, PT_SC, ucp_Cyrillic },
|
||||
{ 274, PT_SC, ucp_Deseret },
|
||||
{ 282, PT_SC, ucp_Devanagari },
|
||||
{ 293, PT_SC, ucp_Duployan },
|
||||
{ 302, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 323, PT_SC, ucp_Elbasan },
|
||||
{ 331, PT_SC, ucp_Ethiopic },
|
||||
{ 340, PT_SC, ucp_Georgian },
|
||||
{ 349, PT_SC, ucp_Glagolitic },
|
||||
{ 360, PT_SC, ucp_Gothic },
|
||||
{ 367, PT_SC, ucp_Grantha },
|
||||
{ 375, PT_SC, ucp_Greek },
|
||||
{ 381, PT_SC, ucp_Gujarati },
|
||||
{ 390, PT_SC, ucp_Gurmukhi },
|
||||
{ 399, PT_SC, ucp_Han },
|
||||
{ 403, PT_SC, ucp_Hangul },
|
||||
{ 410, PT_SC, ucp_Hanunoo },
|
||||
{ 418, PT_SC, ucp_Hatran },
|
||||
{ 425, PT_SC, ucp_Hebrew },
|
||||
{ 432, PT_SC, ucp_Hiragana },
|
||||
{ 441, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 458, PT_SC, ucp_Inherited },
|
||||
{ 468, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 490, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 513, PT_SC, ucp_Javanese },
|
||||
{ 522, PT_SC, ucp_Kaithi },
|
||||
{ 529, PT_SC, ucp_Kannada },
|
||||
{ 537, PT_SC, ucp_Katakana },
|
||||
{ 546, PT_SC, ucp_Kayah_Li },
|
||||
{ 555, PT_SC, ucp_Kharoshthi },
|
||||
{ 566, PT_SC, ucp_Khmer },
|
||||
{ 572, PT_SC, ucp_Khojki },
|
||||
{ 579, PT_SC, ucp_Khudawadi },
|
||||
{ 589, PT_GC, ucp_L },
|
||||
{ 591, PT_LAMP, 0 },
|
||||
{ 594, PT_SC, ucp_Lao },
|
||||
{ 598, PT_SC, ucp_Latin },
|
||||
{ 604, PT_SC, ucp_Lepcha },
|
||||
{ 611, PT_SC, ucp_Limbu },
|
||||
{ 617, PT_SC, ucp_Linear_A },
|
||||
{ 626, PT_SC, ucp_Linear_B },
|
||||
{ 635, PT_SC, ucp_Lisu },
|
||||
{ 640, PT_PC, ucp_Ll },
|
||||
{ 643, PT_PC, ucp_Lm },
|
||||
{ 646, PT_PC, ucp_Lo },
|
||||
{ 649, PT_PC, ucp_Lt },
|
||||
{ 652, PT_PC, ucp_Lu },
|
||||
{ 655, PT_SC, ucp_Lycian },
|
||||
{ 662, PT_SC, ucp_Lydian },
|
||||
{ 669, PT_GC, ucp_M },
|
||||
{ 671, PT_SC, ucp_Mahajani },
|
||||
{ 680, PT_SC, ucp_Malayalam },
|
||||
{ 690, PT_SC, ucp_Mandaic },
|
||||
{ 698, PT_SC, ucp_Manichaean },
|
||||
{ 709, PT_SC, ucp_Marchen },
|
||||
{ 717, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 731, PT_PC, ucp_Mc },
|
||||
{ 734, PT_PC, ucp_Me },
|
||||
{ 737, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 750, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 764, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 781, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 802, PT_SC, ucp_Miao },
|
||||
{ 807, PT_PC, ucp_Mn },
|
||||
{ 810, PT_SC, ucp_Modi },
|
||||
{ 815, PT_SC, ucp_Mongolian },
|
||||
{ 825, PT_SC, ucp_Mro },
|
||||
{ 829, PT_SC, ucp_Multani },
|
||||
{ 837, PT_SC, ucp_Myanmar },
|
||||
{ 845, PT_GC, ucp_N },
|
||||
{ 847, PT_SC, ucp_Nabataean },
|
||||
{ 857, PT_PC, ucp_Nd },
|
||||
{ 860, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 872, PT_SC, ucp_Newa },
|
||||
{ 877, PT_SC, ucp_Nko },
|
||||
{ 881, PT_PC, ucp_Nl },
|
||||
{ 884, PT_PC, ucp_No },
|
||||
{ 887, PT_SC, ucp_Nushu },
|
||||
{ 893, PT_SC, ucp_Ogham },
|
||||
{ 899, PT_SC, ucp_Ol_Chiki },
|
||||
{ 908, PT_SC, ucp_Old_Hungarian },
|
||||
{ 922, PT_SC, ucp_Old_Italic },
|
||||
{ 933, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 951, PT_SC, ucp_Old_Permic },
|
||||
{ 962, PT_SC, ucp_Old_Persian },
|
||||
{ 974, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 992, PT_SC, ucp_Old_Turkic },
|
||||
{ 1003, PT_SC, ucp_Oriya },
|
||||
{ 1009, PT_SC, ucp_Osage },
|
||||
{ 1015, PT_SC, ucp_Osmanya },
|
||||
{ 1023, PT_GC, ucp_P },
|
||||
{ 1025, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1038, PT_SC, ucp_Palmyrene },
|
||||
{ 1048, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1060, PT_PC, ucp_Pc },
|
||||
{ 1063, PT_PC, ucp_Pd },
|
||||
{ 1066, PT_PC, ucp_Pe },
|
||||
{ 1069, PT_PC, ucp_Pf },
|
||||
{ 1072, PT_SC, ucp_Phags_Pa },
|
||||
{ 1081, PT_SC, ucp_Phoenician },
|
||||
{ 1092, PT_PC, ucp_Pi },
|
||||
{ 1095, PT_PC, ucp_Po },
|
||||
{ 1098, PT_PC, ucp_Ps },
|
||||
{ 1101, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1117, PT_SC, ucp_Rejang },
|
||||
{ 1124, PT_SC, ucp_Runic },
|
||||
{ 1130, PT_GC, ucp_S },
|
||||
{ 1132, PT_SC, ucp_Samaritan },
|
||||
{ 1142, PT_SC, ucp_Saurashtra },
|
||||
{ 1153, PT_PC, ucp_Sc },
|
||||
{ 1156, PT_SC, ucp_Sharada },
|
||||
{ 1164, PT_SC, ucp_Shavian },
|
||||
{ 1172, PT_SC, ucp_Siddham },
|
||||
{ 1180, PT_SC, ucp_SignWriting },
|
||||
{ 1192, PT_SC, ucp_Sinhala },
|
||||
{ 1200, PT_PC, ucp_Sk },
|
||||
{ 1203, PT_PC, ucp_Sm },
|
||||
{ 1206, PT_PC, ucp_So },
|
||||
{ 1209, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1222, PT_SC, ucp_Soyombo },
|
||||
{ 1230, PT_SC, ucp_Sundanese },
|
||||
{ 1240, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1253, PT_SC, ucp_Syriac },
|
||||
{ 1260, PT_SC, ucp_Tagalog },
|
||||
{ 1268, PT_SC, ucp_Tagbanwa },
|
||||
{ 1277, PT_SC, ucp_Tai_Le },
|
||||
{ 1284, PT_SC, ucp_Tai_Tham },
|
||||
{ 1293, PT_SC, ucp_Tai_Viet },
|
||||
{ 1302, PT_SC, ucp_Takri },
|
||||
{ 1308, PT_SC, ucp_Tamil },
|
||||
{ 1314, PT_SC, ucp_Tangut },
|
||||
{ 1321, PT_SC, ucp_Telugu },
|
||||
{ 1328, PT_SC, ucp_Thaana },
|
||||
{ 1335, PT_SC, ucp_Thai },
|
||||
{ 1340, PT_SC, ucp_Tibetan },
|
||||
{ 1348, PT_SC, ucp_Tifinagh },
|
||||
{ 1357, PT_SC, ucp_Tirhuta },
|
||||
{ 1365, PT_SC, ucp_Ugaritic },
|
||||
{ 1374, PT_SC, ucp_Vai },
|
||||
{ 1378, PT_SC, ucp_Warang_Citi },
|
||||
{ 1390, PT_ALNUM, 0 },
|
||||
{ 1394, PT_PXSPACE, 0 },
|
||||
{ 1398, PT_SPACE, 0 },
|
||||
{ 1402, PT_UCNC, 0 },
|
||||
{ 1406, PT_WORD, 0 },
|
||||
{ 1410, PT_SC, ucp_Yi },
|
||||
{ 1413, PT_GC, ucp_Z },
|
||||
{ 1415, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1432, PT_PC, ucp_Zl },
|
||||
{ 1435, PT_PC, ucp_Zp },
|
||||
{ 1438, PT_PC, ucp_Zs }
|
||||
{ 293, PT_SC, ucp_Dogra },
|
||||
{ 299, PT_SC, ucp_Duployan },
|
||||
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 329, PT_SC, ucp_Elbasan },
|
||||
{ 337, PT_SC, ucp_Ethiopic },
|
||||
{ 346, PT_SC, ucp_Georgian },
|
||||
{ 355, PT_SC, ucp_Glagolitic },
|
||||
{ 366, PT_SC, ucp_Gothic },
|
||||
{ 373, PT_SC, ucp_Grantha },
|
||||
{ 381, PT_SC, ucp_Greek },
|
||||
{ 387, PT_SC, ucp_Gujarati },
|
||||
{ 396, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 410, PT_SC, ucp_Gurmukhi },
|
||||
{ 419, PT_SC, ucp_Han },
|
||||
{ 423, PT_SC, ucp_Hangul },
|
||||
{ 430, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 446, PT_SC, ucp_Hanunoo },
|
||||
{ 454, PT_SC, ucp_Hatran },
|
||||
{ 461, PT_SC, ucp_Hebrew },
|
||||
{ 468, PT_SC, ucp_Hiragana },
|
||||
{ 477, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 494, PT_SC, ucp_Inherited },
|
||||
{ 504, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 526, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 549, PT_SC, ucp_Javanese },
|
||||
{ 558, PT_SC, ucp_Kaithi },
|
||||
{ 565, PT_SC, ucp_Kannada },
|
||||
{ 573, PT_SC, ucp_Katakana },
|
||||
{ 582, PT_SC, ucp_Kayah_Li },
|
||||
{ 591, PT_SC, ucp_Kharoshthi },
|
||||
{ 602, PT_SC, ucp_Khmer },
|
||||
{ 608, PT_SC, ucp_Khojki },
|
||||
{ 615, PT_SC, ucp_Khudawadi },
|
||||
{ 625, PT_GC, ucp_L },
|
||||
{ 627, PT_LAMP, 0 },
|
||||
{ 630, PT_SC, ucp_Lao },
|
||||
{ 634, PT_SC, ucp_Latin },
|
||||
{ 640, PT_SC, ucp_Lepcha },
|
||||
{ 647, PT_SC, ucp_Limbu },
|
||||
{ 653, PT_SC, ucp_Linear_A },
|
||||
{ 662, PT_SC, ucp_Linear_B },
|
||||
{ 671, PT_SC, ucp_Lisu },
|
||||
{ 676, PT_PC, ucp_Ll },
|
||||
{ 679, PT_PC, ucp_Lm },
|
||||
{ 682, PT_PC, ucp_Lo },
|
||||
{ 685, PT_PC, ucp_Lt },
|
||||
{ 688, PT_PC, ucp_Lu },
|
||||
{ 691, PT_SC, ucp_Lycian },
|
||||
{ 698, PT_SC, ucp_Lydian },
|
||||
{ 705, PT_GC, ucp_M },
|
||||
{ 707, PT_SC, ucp_Mahajani },
|
||||
{ 716, PT_SC, ucp_Makasar },
|
||||
{ 724, PT_SC, ucp_Malayalam },
|
||||
{ 734, PT_SC, ucp_Mandaic },
|
||||
{ 742, PT_SC, ucp_Manichaean },
|
||||
{ 753, PT_SC, ucp_Marchen },
|
||||
{ 761, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 775, PT_PC, ucp_Mc },
|
||||
{ 778, PT_PC, ucp_Me },
|
||||
{ 781, PT_SC, ucp_Medefaidrin },
|
||||
{ 793, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 806, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 820, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 837, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 858, PT_SC, ucp_Miao },
|
||||
{ 863, PT_PC, ucp_Mn },
|
||||
{ 866, PT_SC, ucp_Modi },
|
||||
{ 871, PT_SC, ucp_Mongolian },
|
||||
{ 881, PT_SC, ucp_Mro },
|
||||
{ 885, PT_SC, ucp_Multani },
|
||||
{ 893, PT_SC, ucp_Myanmar },
|
||||
{ 901, PT_GC, ucp_N },
|
||||
{ 903, PT_SC, ucp_Nabataean },
|
||||
{ 913, PT_PC, ucp_Nd },
|
||||
{ 916, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 928, PT_SC, ucp_Newa },
|
||||
{ 933, PT_SC, ucp_Nko },
|
||||
{ 937, PT_PC, ucp_Nl },
|
||||
{ 940, PT_PC, ucp_No },
|
||||
{ 943, PT_SC, ucp_Nushu },
|
||||
{ 949, PT_SC, ucp_Ogham },
|
||||
{ 955, PT_SC, ucp_Ol_Chiki },
|
||||
{ 964, PT_SC, ucp_Old_Hungarian },
|
||||
{ 978, PT_SC, ucp_Old_Italic },
|
||||
{ 989, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1007, PT_SC, ucp_Old_Permic },
|
||||
{ 1018, PT_SC, ucp_Old_Persian },
|
||||
{ 1030, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1042, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1060, PT_SC, ucp_Old_Turkic },
|
||||
{ 1071, PT_SC, ucp_Oriya },
|
||||
{ 1077, PT_SC, ucp_Osage },
|
||||
{ 1083, PT_SC, ucp_Osmanya },
|
||||
{ 1091, PT_GC, ucp_P },
|
||||
{ 1093, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1106, PT_SC, ucp_Palmyrene },
|
||||
{ 1116, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1128, PT_PC, ucp_Pc },
|
||||
{ 1131, PT_PC, ucp_Pd },
|
||||
{ 1134, PT_PC, ucp_Pe },
|
||||
{ 1137, PT_PC, ucp_Pf },
|
||||
{ 1140, PT_SC, ucp_Phags_Pa },
|
||||
{ 1149, PT_SC, ucp_Phoenician },
|
||||
{ 1160, PT_PC, ucp_Pi },
|
||||
{ 1163, PT_PC, ucp_Po },
|
||||
{ 1166, PT_PC, ucp_Ps },
|
||||
{ 1169, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1185, PT_SC, ucp_Rejang },
|
||||
{ 1192, PT_SC, ucp_Runic },
|
||||
{ 1198, PT_GC, ucp_S },
|
||||
{ 1200, PT_SC, ucp_Samaritan },
|
||||
{ 1210, PT_SC, ucp_Saurashtra },
|
||||
{ 1221, PT_PC, ucp_Sc },
|
||||
{ 1224, PT_SC, ucp_Sharada },
|
||||
{ 1232, PT_SC, ucp_Shavian },
|
||||
{ 1240, PT_SC, ucp_Siddham },
|
||||
{ 1248, PT_SC, ucp_SignWriting },
|
||||
{ 1260, PT_SC, ucp_Sinhala },
|
||||
{ 1268, PT_PC, ucp_Sk },
|
||||
{ 1271, PT_PC, ucp_Sm },
|
||||
{ 1274, PT_PC, ucp_So },
|
||||
{ 1277, PT_SC, ucp_Sogdian },
|
||||
{ 1285, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1298, PT_SC, ucp_Soyombo },
|
||||
{ 1306, PT_SC, ucp_Sundanese },
|
||||
{ 1316, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1329, PT_SC, ucp_Syriac },
|
||||
{ 1336, PT_SC, ucp_Tagalog },
|
||||
{ 1344, PT_SC, ucp_Tagbanwa },
|
||||
{ 1353, PT_SC, ucp_Tai_Le },
|
||||
{ 1360, PT_SC, ucp_Tai_Tham },
|
||||
{ 1369, PT_SC, ucp_Tai_Viet },
|
||||
{ 1378, PT_SC, ucp_Takri },
|
||||
{ 1384, PT_SC, ucp_Tamil },
|
||||
{ 1390, PT_SC, ucp_Tangut },
|
||||
{ 1397, PT_SC, ucp_Telugu },
|
||||
{ 1404, PT_SC, ucp_Thaana },
|
||||
{ 1411, PT_SC, ucp_Thai },
|
||||
{ 1416, PT_SC, ucp_Tibetan },
|
||||
{ 1424, PT_SC, ucp_Tifinagh },
|
||||
{ 1433, PT_SC, ucp_Tirhuta },
|
||||
{ 1441, PT_SC, ucp_Ugaritic },
|
||||
{ 1450, PT_SC, ucp_Vai },
|
||||
{ 1454, PT_SC, ucp_Warang_Citi },
|
||||
{ 1466, PT_ALNUM, 0 },
|
||||
{ 1470, PT_PXSPACE, 0 },
|
||||
{ 1474, PT_SPACE, 0 },
|
||||
{ 1478, PT_UCNC, 0 },
|
||||
{ 1482, PT_WORD, 0 },
|
||||
{ 1486, PT_SC, ucp_Yi },
|
||||
{ 1489, PT_GC, ucp_Z },
|
||||
{ 1491, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1508, PT_PC, ucp_Zl },
|
||||
{ 1511, PT_PC, ucp_Zp },
|
||||
{ 1514, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -100,27 +100,25 @@ enum {
|
|||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. */
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
comes from the emoji-data.txt file. */
|
||||
|
||||
enum {
|
||||
ucp_gbCR, /* 0 */
|
||||
ucp_gbLF, /* 1 */
|
||||
ucp_gbControl, /* 2 */
|
||||
ucp_gbExtend, /* 3 */
|
||||
ucp_gbPrepend, /* 4 */
|
||||
ucp_gbSpacingMark, /* 5 */
|
||||
ucp_gbL, /* 6 Hangul syllable type L */
|
||||
ucp_gbV, /* 7 Hangul syllable type V */
|
||||
ucp_gbT, /* 8 Hangul syllable type T */
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegionalIndicator, /* 11 */
|
||||
ucp_gbOther, /* 12 */
|
||||
ucp_gbE_Base, /* 13 */
|
||||
ucp_gbE_Modifier, /* 14 */
|
||||
ucp_gbE_Base_GAZ, /* 15 */
|
||||
ucp_gbZWJ, /* 16 */
|
||||
ucp_gbGlue_After_Zwj /* 17 */
|
||||
ucp_gbCR, /* 0 */
|
||||
ucp_gbLF, /* 1 */
|
||||
ucp_gbControl, /* 2 */
|
||||
ucp_gbExtend, /* 3 */
|
||||
ucp_gbPrepend, /* 4 */
|
||||
ucp_gbSpacingMark, /* 5 */
|
||||
ucp_gbL, /* 6 Hangul syllable type L */
|
||||
ucp_gbV, /* 7 Hangul syllable type V */
|
||||
ucp_gbT, /* 8 Hangul syllable type T */
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegionalIndicator, /* 11 */
|
||||
ucp_gbOther, /* 12 */
|
||||
ucp_gbZWJ, /* 13 */
|
||||
ucp_gbExtended_Pictographic /* 14 */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
|
@ -274,7 +272,15 @@ enum {
|
|||
ucp_Masaram_Gondi,
|
||||
ucp_Nushu,
|
||||
ucp_Soyombo,
|
||||
ucp_Zanabazar_Square
|
||||
ucp_Zanabazar_Square,
|
||||
/* New for Unicode 11.0.0 */
|
||||
ucp_Dogra,
|
||||
ucp_Gunjala_Gondi,
|
||||
ucp_Hanifi_Rohingya,
|
||||
ucp_Makasar,
|
||||
ucp_Medefaidrin,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Sogdian
|
||||
};
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
|
||||
|
||||
Other macros:
|
||||
SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT
|
||||
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
|
||||
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
|
||||
*/
|
||||
|
||||
|
@ -147,17 +147,23 @@
|
|||
#define SLJIT_CONFIG_UNSUPPORTED 1
|
||||
#endif
|
||||
|
||||
#else /* !_WIN32 */
|
||||
#else /* _WIN32 */
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define SLJIT_CONFIG_X86_64 1
|
||||
#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
|
||||
#define SLJIT_CONFIG_ARM_THUMB2 1
|
||||
#elif (defined(_M_ARM) && _M_ARM >= 7)
|
||||
#define SLJIT_CONFIG_ARM_V7 1
|
||||
#elif defined(_ARM_)
|
||||
#define SLJIT_CONFIG_ARM_V5 1
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
#define SLJIT_CONFIG_ARM_64 1
|
||||
#else
|
||||
#define SLJIT_CONFIG_X86_32 1
|
||||
#endif
|
||||
|
||||
#endif /* !WIN32 */
|
||||
#endif /* !_WIN32 */
|
||||
#endif /* SLJIT_CONFIG_AUTO */
|
||||
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
|
@ -324,6 +330,11 @@
|
|||
sparc_cache_flush((from), (to))
|
||||
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
|
||||
|
||||
#elif defined _WIN32
|
||||
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
|
||||
|
||||
#else
|
||||
|
||||
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
|
||||
|
@ -371,12 +382,18 @@ typedef int sljit_sw;
|
|||
#define SLJIT_64BIT_ARCHITECTURE 1
|
||||
#define SLJIT_WORD_SHIFT 3
|
||||
#ifdef _WIN32
|
||||
#ifdef __GNUC__
|
||||
/* These types do not require windows.h */
|
||||
typedef unsigned long long sljit_uw;
|
||||
typedef long long sljit_sw;
|
||||
#else
|
||||
typedef unsigned __int64 sljit_uw;
|
||||
typedef __int64 sljit_sw;
|
||||
#else
|
||||
#endif
|
||||
#else /* !_WIN32 */
|
||||
typedef unsigned long int sljit_uw;
|
||||
typedef long int sljit_sw;
|
||||
#endif
|
||||
#endif /* _WIN32 */
|
||||
#endif
|
||||
|
||||
typedef sljit_uw sljit_p;
|
||||
|
@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
|
|||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 26
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
|
||||
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
|
||||
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
|
||||
|
||||
|
|
|
@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
|
|||
void *retval;
|
||||
|
||||
#ifdef MAP_ANON
|
||||
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
|
||||
int flags = MAP_PRIVATE | MAP_ANON;
|
||||
|
||||
#ifdef MAP_JIT
|
||||
flags |= MAP_JIT;
|
||||
#endif
|
||||
|
||||
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
|
||||
#else
|
||||
if (dev_zero < 0) {
|
||||
if (open_dev_zero())
|
||||
|
|
|
@ -26,6 +26,13 @@
|
|||
|
||||
#include "sljitLir.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
|
||||
#include <windows.h>
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
|
||||
|
||||
/* These libraries are needed for the macros below. */
|
||||
|
@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
|
|||
|
||||
#endif
|
||||
|
||||
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|
||||
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
|
||||
{
|
||||
|
|
|
@ -138,7 +138,7 @@ of sljitConfigInternal.h */
|
|||
be specified as scratch registers and the fifth one as saved register
|
||||
on the CPU above and any user code which requires four scratch
|
||||
registers can run unmodified. The SLJIT compiler automatically saves
|
||||
the content of the two extra scrath register on the stack. Scratch
|
||||
the content of the two extra scratch register on the stack. Scratch
|
||||
registers can also be preserved by saving their value on the stack
|
||||
but this needs to be done manually.
|
||||
|
||||
|
@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
|
|||
be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
|
||||
register can hold any 32 or 64 bit value, and it is converted to a 32 bit
|
||||
compatible format first. This conversion is free (no instructions are
|
||||
emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
|
||||
emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
|
||||
value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
|
||||
|
||||
Note: memory addressing always uses 64 bit values on 64 bit systems so
|
||||
|
@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
|
|||
*/
|
||||
#define SLJIT_F32_OP SLJIT_I32_OP
|
||||
|
||||
/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
|
||||
to the result of an operation. Other CPUs (MIPS) does not have status
|
||||
/* Many CPUs (x86, ARM, PPC) have status flags which can be set according
|
||||
to the result of an operation. Other CPUs (MIPS) do not have status
|
||||
flags, and results must be stored in registers. To cover both architecture
|
||||
types efficiently only two flags are defined by SLJIT:
|
||||
|
||||
|
@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
|
|||
|
||||
Using these flags can reduce the number of emitted instructions. E.g. a
|
||||
fast loop can be implemented by decreasing a counter register and set the
|
||||
zero flag to jump back if the counter register is not reached zero.
|
||||
zero flag to jump back if the counter register has not reached zero.
|
||||
|
||||
Motivation: although CPUs can set a large number of flags, usually their
|
||||
values are ignored or only one of them is used. Emulating a large number
|
||||
of flags on systems without flag register is complicated so SLJIT
|
||||
instructions must specify the flag they want to use and only that flag
|
||||
will be emulated. The last arithmetic instruction can be repeated if
|
||||
multiple flags needs to be checked.
|
||||
multiple flags need to be checked.
|
||||
*/
|
||||
|
||||
/* Set Zero status flag. */
|
||||
|
@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
/* Starting index of opcodes for sljit_emit_op1. */
|
||||
#define SLJIT_OP1_BASE 32
|
||||
|
||||
/* The MOV instruction transfer data from source to destination.
|
||||
/* The MOV instruction transfers data from source to destination.
|
||||
|
||||
MOV instruction suffixes:
|
||||
|
||||
|
@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
|
|||
#define SLJIT_FAST_CALL 25
|
||||
/* Called function must be declared with the SLJIT_FUNC attribute. */
|
||||
#define SLJIT_CALL 26
|
||||
/* Called function must be decalred with cdecl attribute.
|
||||
/* Called function must be declared with cdecl attribute.
|
||||
This is the default attribute for C functions. */
|
||||
#define SLJIT_CALL_CDECL 27
|
||||
|
||||
|
@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
|
|||
/* Set the destination address of the jump to this label. */
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
|
||||
|
||||
/* Emit an indirect jump or fast call. Both direct and indirect form
|
||||
/* Emit an indirect jump or fast call.
|
||||
Direct form: set src to SLJIT_IMM() and srcw to the address
|
||||
Indirect form: any other valid addressing mode
|
||||
type must be between SLJIT_JUMP and SLJIT_FAST_CALL
|
||||
|
@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
|
|||
#define SLJIT_MEM_POST 0x1000
|
||||
|
||||
/* Emit a single memory load or store with update instruction. When the
|
||||
requested instruction from is not supported by the CPU, it returns
|
||||
requested instruction form is not supported by the CPU, it returns
|
||||
with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
|
||||
allows specializing tight loops based on the supported instruction
|
||||
forms (see SLJIT_MEM_SUPP flag).
|
||||
|
|
|
@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
|
|||
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
|
||||
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
|
||||
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
|
||||
#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5)
|
||||
#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
|
||||
|
||||
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
|
||||
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
|
||||
|
||||
/* r18 - platform register, currently not used */
|
||||
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
|
||||
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
|
||||
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
|
||||
};
|
||||
|
||||
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
||||
|
@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
|
||||
#define ADC 0x9a000000
|
||||
#define ADD 0x8b000000
|
||||
#define ADDE 0x8b200000
|
||||
#define ADDI 0x91000000
|
||||
#define AND 0x8a000000
|
||||
#define ANDI 0x92000000
|
||||
|
@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define FSUB 0x1e603800
|
||||
#define LDRI 0xf9400000
|
||||
#define LDP 0xa9400000
|
||||
#define LDP_PST 0xa8c00000
|
||||
#define LDP_PRE 0xa9c00000
|
||||
#define LDR_PRE 0xf8400c00
|
||||
#define LSLV 0x9ac02000
|
||||
#define LSRV 0x9ac02400
|
||||
#define MADD 0x9b000000
|
||||
|
@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
|
||||
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
|
||||
local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
|
||||
local_size = (local_size + 15) & ~0xf;
|
||||
compiler->local_size = local_size;
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
|
||||
if (saved_regs_size & 0x8)
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
|
||||
if (local_size <= (63 * sizeof(sljit_sw))) {
|
||||
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
|
||||
offs = (local_size - saved_regs_size) << (15 - 3);
|
||||
} else {
|
||||
offs = 0 << 15;
|
||||
if (saved_regs_size & 0x8) {
|
||||
offs = 1 << 15;
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
}
|
||||
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
|
||||
if (saved_regs_size > 0)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
|
||||
}
|
||||
local_size = (local_size + 15) & ~0xf;
|
||||
compiler->local_size = local_size + saved_regs_size;
|
||||
|
||||
FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
|
||||
| RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 4096)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
|
||||
else if (local_size > 256)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
|
||||
#endif
|
||||
|
||||
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
|
||||
prev = -1;
|
||||
offs = 2 << 15;
|
||||
for (i = SLJIT_S0; i >= tmp; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(prev == -1);
|
||||
if (prev != -1)
|
||||
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
|
||||
|
||||
if (compiler->local_size > (63 * sizeof(sljit_sw))) {
|
||||
/* The local_size is already adjusted by the saved registers. */
|
||||
if (local_size > 0xfff) {
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
if (local_size)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
|
||||
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
|
||||
|
||||
args = get_arg_count(arg_types);
|
||||
|
||||
|
@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
if (args >= 3)
|
||||
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 4096) {
|
||||
if (local_size < 4 * 4096) {
|
||||
/* No need for a loop. */
|
||||
if (local_size >= 2 * 4096) {
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
if (local_size >= 2 * 4096) {
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
local_size -= 4096;
|
||||
}
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
|
||||
FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
|
||||
if (local_size > 256) {
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
}
|
||||
else if (local_size > 0)
|
||||
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
|
||||
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
|
||||
}
|
||||
else if (local_size > 256) {
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
|
||||
}
|
||||
else if (local_size > 0)
|
||||
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
|
||||
|
||||
#else /* !_WIN32 */
|
||||
|
||||
/* The local_size does not include saved registers size. */
|
||||
if (local_size > 0xfff) {
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
if (local_size != 0)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
|
|||
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
|
||||
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
|
||||
{
|
||||
sljit_s32 saved_regs_size;
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
|
||||
|
||||
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
|
||||
local_size = (local_size + 15) & ~0xf;
|
||||
compiler->local_size = local_size;
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
|
||||
if (saved_regs_size & 0x8)
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
|
||||
compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
|
|||
|
||||
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
|
||||
|
||||
local_size = compiler->local_size;
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
|
||||
if (saved_regs_size & 0x8)
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
|
||||
if (local_size <= (63 * sizeof(sljit_sw)))
|
||||
offs = (local_size - saved_regs_size) << (15 - 3);
|
||||
local_size = compiler->local_size - saved_regs_size;
|
||||
|
||||
/* Load LR as early as possible. */
|
||||
if (local_size == 0)
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
|
||||
else if (local_size < 63 * sizeof(sljit_sw)) {
|
||||
FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
|
||||
| RN(SLJIT_SP) | (local_size << (15 - 3))));
|
||||
}
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
|
||||
offs = 0 << 15;
|
||||
if (saved_regs_size & 0x8) {
|
||||
offs = 1 << 15;
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
}
|
||||
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
|
||||
if (local_size > 0xfff) {
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
if (local_size)
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
|
||||
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
|
||||
}
|
||||
|
||||
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
|
||||
prev = -1;
|
||||
offs = 2 << 15;
|
||||
for (i = SLJIT_S0; i >= tmp; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(prev == -1);
|
||||
if (prev != -1)
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
|
||||
|
||||
if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
|
||||
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
|
||||
} else if (saved_regs_size > 0) {
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
|
||||
return SLJIT_SUCCESS;
|
||||
/* These two can be executed in parallel. */
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
|
||||
return push_inst(compiler, RET | RN(TMP_LR));
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
|
|||
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
|
||||
{
|
||||
sljit_s32 dst_reg;
|
||||
sljit_ins ins;
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
|
||||
|
||||
SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
|
||||
|
||||
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
|
||||
|
||||
if (offset <= 0xffffff && offset >= -0xffffff) {
|
||||
ins = ADDI;
|
||||
if (offset < 0) {
|
||||
offset = -offset;
|
||||
ins = SUBI;
|
||||
}
|
||||
|
||||
if (offset <= 0xfff)
|
||||
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
|
||||
|
||||
offset &= 0xfff;
|
||||
if (offset != 0)
|
||||
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
FAIL_IF(load_immediate (compiler, dst_reg, offset));
|
||||
/* Add extended register form. */
|
||||
FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
|
||||
}
|
||||
|
||||
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
|
||||
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
|
||||
{
|
||||
struct sljit_const *const_;
|
||||
|
|
|
@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define ASRSI 0x1000
|
||||
#define ASR_W 0xfa40f000
|
||||
#define ASR_WI 0xea4f0020
|
||||
#define BCC 0xd000
|
||||
#define BICI 0xf0200000
|
||||
#define BKPT 0xbe00
|
||||
#define BLX 0x4780
|
||||
|
@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define EORS 0x4040
|
||||
#define EOR_W 0xea800000
|
||||
#define IT 0xbf00
|
||||
#define LDRI 0xf8500800
|
||||
#define LSLS 0x4080
|
||||
#define LSLSI 0x0000
|
||||
#define LSL_W 0xfa00f000
|
||||
|
@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define SBCI 0xf1600000
|
||||
#define SBCS 0x4180
|
||||
#define SBC_W 0xeb600000
|
||||
#define SDIV 0xfb90f0f0
|
||||
#define SMULL 0xfb800000
|
||||
#define STR_SP 0x9000
|
||||
#define SUBS 0x1a00
|
||||
|
@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define SXTH 0xb200
|
||||
#define SXTH_W 0xfa0ff080
|
||||
#define TST 0x4200
|
||||
#define UDIV 0xfbb0f0f0
|
||||
#define UMULL 0xfba00000
|
||||
#define UXTB 0xb2c0
|
||||
#define UXTB_W 0xfa5ff080
|
||||
|
@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
|
|||
|
||||
/* Really complex instruction form for branches. */
|
||||
s = (diff >> 23) & 0x1;
|
||||
j1 = (~(diff >> 21) ^ s) & 0x1;
|
||||
j2 = (~(diff >> 22) ^ s) & 0x1;
|
||||
j1 = (~(diff >> 22) ^ s) & 0x1;
|
||||
j2 = (~(diff >> 21) ^ s) & 0x1;
|
||||
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
|
||||
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
|
||||
|
||||
|
@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
|
|||
{
|
||||
sljit_uw tmp;
|
||||
|
||||
/* MOVS cannot be used since it destroy flags. */
|
||||
|
||||
if (imm >= 0x10000) {
|
||||
tmp = get_imm(imm);
|
||||
if (tmp != INVALID_IMM)
|
||||
|
@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
{
|
||||
sljit_s32 args, size, i, tmp;
|
||||
sljit_ins push = 0;
|
||||
#ifdef _WIN32
|
||||
sljit_uw imm;
|
||||
#endif
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
|
@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
|
||||
local_size = ((size + local_size + 7) & ~7) - size;
|
||||
compiler->local_size = local_size;
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 256) {
|
||||
if (local_size > 4096)
|
||||
imm = get_imm(4096);
|
||||
else
|
||||
imm = get_imm(local_size & ~0xff);
|
||||
|
||||
SLJIT_ASSERT(imm != INVALID_IMM);
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
|
||||
}
|
||||
#else
|
||||
if (local_size > 0) {
|
||||
if (local_size <= (127 << 2))
|
||||
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
|
||||
else
|
||||
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
|
||||
}
|
||||
#endif
|
||||
|
||||
args = get_arg_count(arg_types);
|
||||
|
||||
|
@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
if (args >= 3)
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 256) {
|
||||
if (local_size > 4096) {
|
||||
imm = get_imm(4096);
|
||||
SLJIT_ASSERT(imm != INVALID_IMM);
|
||||
|
||||
if (local_size < 4 * 4096) {
|
||||
if (local_size > 2 * 4096) {
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
if (local_size > 2 * 4096) {
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
local_size -= 4096;
|
||||
|
||||
SLJIT_ASSERT(local_size > 0);
|
||||
}
|
||||
else {
|
||||
FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
|
||||
FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
|
||||
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
|
||||
|
||||
local_size &= 0xfff;
|
||||
|
||||
if (local_size != 0)
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
}
|
||||
|
||||
if (local_size >= 256) {
|
||||
imm = get_imm(local_size & ~0xff);
|
||||
SLJIT_ASSERT(imm != INVALID_IMM);
|
||||
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
}
|
||||
}
|
||||
|
||||
local_size &= 0xff;
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
|
||||
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
|
||||
}
|
||||
else if (local_size > 0)
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
|
||||
#endif
|
||||
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
|
|||
/* Operators */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#ifdef _WIN32
|
||||
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
|
||||
extern long long __rt_sdiv(int denominator, int numerator);
|
||||
#elif defined(__GNUC__)
|
||||
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
|
||||
extern int __aeabi_idivmod(int numerator, int denominator);
|
||||
#else
|
||||
|
@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
|
||||
{
|
||||
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
|
||||
sljit_sw saved_reg_list[3];
|
||||
sljit_sw saved_reg_count;
|
||||
#endif
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_emit_op0(compiler, op));
|
||||
|
@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
| (reg_map[SLJIT_R0] << 12)
|
||||
| (reg_map[SLJIT_R0] << 16)
|
||||
| reg_map[SLJIT_R1]);
|
||||
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
|
||||
case SLJIT_DIVMOD_UW:
|
||||
case SLJIT_DIVMOD_SW:
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
|
||||
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
|
||||
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
|
||||
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
|
||||
case SLJIT_DIV_UW:
|
||||
case SLJIT_DIV_SW:
|
||||
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
|
||||
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
|
||||
case SLJIT_DIVMOD_UW:
|
||||
case SLJIT_DIVMOD_SW:
|
||||
case SLJIT_DIV_UW:
|
||||
|
@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#ifdef _WIN32
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
|
||||
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
|
||||
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
|
||||
#elif defined(__GNUC__)
|
||||
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
|
||||
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
|
||||
#else
|
||||
|
@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
|
||||
}
|
||||
return SLJIT_SUCCESS;
|
||||
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
|
||||
}
|
||||
|
||||
return SLJIT_SUCCESS;
|
||||
|
|
|
@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
|
|||
sljit_ins ins = NOP;
|
||||
sljit_u8 offsets[4];
|
||||
|
||||
SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
|
||||
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
|
||||
|
||||
arg_types >>= SLJIT_DEF_SHIFT;
|
||||
|
||||
|
@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
|
|||
else if (arg_count != word_arg_count)
|
||||
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
|
||||
else if (arg_count == 1)
|
||||
ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
|
||||
ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
|
||||
|
||||
arg_count--;
|
||||
word_arg_count--;
|
||||
|
|
|
@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
|
|||
sljit_ins prev_ins = NOP;
|
||||
sljit_ins ins = NOP;
|
||||
|
||||
SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
|
||||
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
|
||||
|
||||
arg_types >>= SLJIT_DEF_SHIFT;
|
||||
|
||||
|
@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
|
|||
if (arg_count != word_arg_count)
|
||||
ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
|
||||
else if (arg_count == 1)
|
||||
ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
|
||||
ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4);
|
||||
arg_count--;
|
||||
word_arg_count--;
|
||||
break;
|
||||
|
|
|
@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins;
|
|||
#define RETURN_ADDR_REG 31
|
||||
|
||||
/* Flags are kept in volatile registers. */
|
||||
#define EQUAL_FLAG 31
|
||||
#define EQUAL_FLAG 3
|
||||
#define OTHER_FLAG 1
|
||||
|
||||
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
|
||||
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
|
||||
|
||||
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
|
||||
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
|
||||
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
|
||||
};
|
||||
|
||||
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
|
||||
|
@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
/* Frequent case. */
|
||||
FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
|
||||
base = S(SLJIT_SP);
|
||||
offs = local_size - (sljit_sw)sizeof(sljit_sw);
|
||||
}
|
||||
else {
|
||||
FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
|
||||
FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size));
|
||||
FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
|
||||
FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
|
||||
FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
|
||||
base = S(TMP_REG2);
|
||||
local_size = 0;
|
||||
offs = -(sljit_sw)sizeof(sljit_sw);
|
||||
}
|
||||
|
||||
offs = local_size - (sljit_sw)(sizeof(sljit_sw));
|
||||
FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
|
||||
|
||||
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
|
||||
|
@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
|
|||
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
|
||||
tmp_ar = reg_ar;
|
||||
delay_slot = reg_ar;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
tmp_ar = DR(TMP_REG1);
|
||||
delay_slot = MOVABLE_INS;
|
||||
}
|
||||
|
@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
|
|||
|
||||
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
|
||||
{
|
||||
sljit_s32 tmp_ar, base, delay_slot;
|
||||
|
||||
if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
|
||||
return compiler->error;
|
||||
compiler->cache_arg = 0;
|
||||
compiler->cache_argw = 0;
|
||||
return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
|
||||
|
||||
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
|
||||
tmp_ar = reg_ar;
|
||||
delay_slot = reg_ar;
|
||||
}
|
||||
else {
|
||||
tmp_ar = DR(TMP_REG1);
|
||||
delay_slot = MOVABLE_INS;
|
||||
}
|
||||
base = arg & REG_MASK;
|
||||
|
||||
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
|
||||
argw &= 0x3;
|
||||
|
||||
if (SLJIT_UNLIKELY(argw)) {
|
||||
FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar));
|
||||
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
|
||||
}
|
||||
else
|
||||
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar));
|
||||
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
|
||||
}
|
||||
|
||||
FAIL_IF(load_immediate(compiler, tmp_ar, argw));
|
||||
|
||||
if (base != 0)
|
||||
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
|
||||
|
||||
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
|
||||
}
|
||||
|
||||
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
|
||||
|
|
|
@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
|
||||
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
|
||||
if (args > 0) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
|
||||
inst += 2;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
|
||||
inst += 2;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
|
||||
*inst++ = 0x24;
|
||||
*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
|
||||
inst[2] = 0x24;
|
||||
inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
|
||||
}
|
||||
#else
|
||||
if (args > 0) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
|
||||
*inst++ = sizeof(sljit_sw) * 2;
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
|
||||
inst[2] = sizeof(sljit_sw) * 2;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
|
||||
*inst++ = sizeof(sljit_sw) * 3;
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
|
||||
inst[2] = sizeof(sljit_sw) * 3;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
|
||||
*inst++ = sizeof(sljit_sw) * 4;
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
|
||||
inst[2] = sizeof(sljit_sw) * 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
compiler->local_size = local_size;
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size > 1024) {
|
||||
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
|
||||
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
|
||||
#else
|
||||
/* Space for a single argument. This amount is excluded when the stack is allocated below. */
|
||||
local_size -= sizeof(sljit_sw);
|
||||
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
|
||||
#endif
|
||||
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
|
||||
if (local_size > 0) {
|
||||
if (local_size <= 4 * 4096) {
|
||||
if (local_size > 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
|
||||
if (local_size > 2 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
|
||||
if (local_size > 3 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
|
||||
}
|
||||
else {
|
||||
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
|
||||
EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
|
||||
|
||||
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
|
||||
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
|
||||
FAIL_IF(!inst);
|
||||
|
||||
INC_SIZE(2);
|
||||
inst[0] = JNE_i8;
|
||||
inst[1] = (sljit_s8) -16;
|
||||
}
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
|
||||
|
||||
compiler->mode32 = 0;
|
||||
|
||||
#ifdef _WIN64
|
||||
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
|
||||
if (fscratches >= 6 || fsaveds >= 1)
|
||||
|
@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
|
||||
#ifndef _WIN64
|
||||
if (args > 0) {
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
|
||||
inst[0] = REX_W;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = REX_W | REX_R;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
|
||||
inst[0] = REX_W | REX_R;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = REX_W | REX_R;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
|
||||
inst[0] = REX_W | REX_R;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
|
||||
}
|
||||
#else
|
||||
if (args > 0) {
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
|
||||
inst[0] = REX_W;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
|
||||
inst[0] = REX_W;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = REX_W | REX_B;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
|
||||
inst[0] = REX_W | REX_B;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
compiler->local_size = local_size;
|
||||
|
||||
#ifdef _WIN64
|
||||
if (local_size > 1024) {
|
||||
/* Allocate stack for the callback, which grows the stack. */
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
|
||||
FAIL_IF(!inst);
|
||||
INC_SIZE(4 + (3 + sizeof(sljit_s32)));
|
||||
*inst++ = REX_W;
|
||||
*inst++ = GROUP_BINARY_83;
|
||||
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
|
||||
/* Allocated size for registers must be divisible by 8. */
|
||||
SLJIT_ASSERT(!(saved_register_size & 0x7));
|
||||
/* Aligned to 16 byte. */
|
||||
if (saved_register_size & 0x8) {
|
||||
*inst++ = 5 * sizeof(sljit_sw);
|
||||
local_size -= 5 * sizeof(sljit_sw);
|
||||
} else {
|
||||
*inst++ = 4 * sizeof(sljit_sw);
|
||||
local_size -= 4 * sizeof(sljit_sw);
|
||||
if (local_size > 0) {
|
||||
if (local_size <= 4 * 4096) {
|
||||
if (local_size > 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
|
||||
if (local_size > 2 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
|
||||
if (local_size > 3 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
|
||||
}
|
||||
/* Second instruction */
|
||||
SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_rm_i32;
|
||||
*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
|
||||
sljit_unaligned_store_s32(inst, local_size);
|
||||
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|
||||
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
|
||||
compiler->skip_checks = 1;
|
||||
#endif
|
||||
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
|
||||
else {
|
||||
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
|
||||
|
||||
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
|
||||
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
|
||||
FAIL_IF(!inst);
|
||||
|
||||
INC_SIZE(2);
|
||||
inst[0] = JNE_i8;
|
||||
inst[1] = (sljit_s8) -19;
|
||||
}
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (local_size > 0) {
|
||||
if (local_size <= 127) {
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
|
||||
FAIL_IF(!inst);
|
||||
INC_SIZE(4);
|
||||
*inst++ = REX_W;
|
||||
*inst++ = GROUP_BINARY_83;
|
||||
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
|
||||
*inst++ = local_size;
|
||||
}
|
||||
else {
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
|
||||
FAIL_IF(!inst);
|
||||
INC_SIZE(7);
|
||||
*inst++ = REX_W;
|
||||
*inst++ = GROUP_BINARY_81;
|
||||
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
|
||||
sljit_unaligned_store_s32(inst, local_size);
|
||||
inst += sizeof(sljit_s32);
|
||||
}
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
|
|
|
@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
|
|||
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
|
||||
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
|
||||
static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
|
||||
{
|
||||
/* Workaround for calling the internal _chkstk() function on Windows.
|
||||
This function touches all 4k pages belongs to the requested stack space,
|
||||
which size is passed in local_size. This is necessary on Windows where
|
||||
the stack can only grow in 4k steps. However, this function just burn
|
||||
CPU cycles if the stack is large enough. However, you don't know it in
|
||||
advance, so it must always be called. I think this is a bad design in
|
||||
general even if it has some reasons. */
|
||||
*(volatile sljit_s32*)alloca(local_size) = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
#include "sljitNativeX86_32.c"
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue