2018-05-24 06:13:24 +00:00
|
|
|
/*************************************************
|
|
|
|
* Perl-Compatible Regular Expressions *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
|
|
and semantics are as close as possible to those of the Perl 5 language.
|
|
|
|
|
|
|
|
Written by Philip Hazel
|
|
|
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
2022-05-17 14:38:55 +00:00
|
|
|
New API code Copyright (c) 2016-2021 University of Cambridge
|
2018-05-24 06:13:24 +00:00
|
|
|
|
|
|
|
-----------------------------------------------------------------------------
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer.
|
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
-----------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* This module contains an internal function that is used to match a Unicode
|
|
|
|
extended grapheme sequence. It is used by both pcre2_match() and
|
|
|
|
pcre2_def_match(). However, it is called only when Unicode support is being
|
|
|
|
compiled. Nevertheless, we provide a dummy function when there is no Unicode
|
|
|
|
support, because some compilers do not like functionless source files. */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#include "pcre2_internal.h"
|
|
|
|
|
|
|
|
|
|
|
|
/* Dummy function */
|
|
|
|
|
|
|
|
#ifndef SUPPORT_UNICODE
|
|
|
|
PCRE2_SPTR
|
|
|
|
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
|
|
|
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
|
|
|
{
|
|
|
|
(void)c;
|
|
|
|
(void)eptr;
|
|
|
|
(void)start_subject;
|
|
|
|
(void)end_subject;
|
|
|
|
(void)utf;
|
|
|
|
(void)xcount;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
|
|
/*************************************************
|
|
|
|
* Match an extended grapheme sequence *
|
|
|
|
*************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
Arguments:
|
|
|
|
c the first character
|
|
|
|
eptr pointer to next character
|
|
|
|
start_subject pointer to start of subject
|
|
|
|
end_subject pointer to end of subject
|
|
|
|
utf TRUE if in UTF mode
|
|
|
|
xcount pointer to count of additional characters,
|
|
|
|
or NULL if count not needed
|
|
|
|
|
|
|
|
Returns: pointer after the end of the sequence
|
|
|
|
*/
|
|
|
|
|
|
|
|
PCRE2_SPTR
|
|
|
|
PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
|
|
|
|
PCRE2_SPTR end_subject, BOOL utf, int *xcount)
|
|
|
|
{
|
|
|
|
int lgb = UCD_GRAPHBREAK(c);
|
|
|
|
|
|
|
|
while (eptr < end_subject)
|
|
|
|
{
|
|
|
|
int rgb;
|
|
|
|
int len = 1;
|
|
|
|
if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
|
|
|
|
rgb = UCD_GRAPHBREAK(c);
|
2019-07-11 08:27:11 +00:00
|
|
|
if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
|
2018-05-24 06:13:24 +00:00
|
|
|
|
|
|
|
/* Not breaking between Regional Indicators is allowed only if there
|
|
|
|
are an even number of preceding RIs. */
|
|
|
|
|
2022-05-17 14:38:55 +00:00
|
|
|
if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
|
2018-05-24 06:13:24 +00:00
|
|
|
{
|
|
|
|
int ricount = 0;
|
|
|
|
PCRE2_SPTR bptr = eptr - 1;
|
|
|
|
if (utf) BACKCHAR(bptr);
|
|
|
|
|
|
|
|
/* bptr is pointing to the left-hand character */
|
|
|
|
|
|
|
|
while (bptr > start_subject)
|
|
|
|
{
|
|
|
|
bptr--;
|
|
|
|
if (utf)
|
|
|
|
{
|
|
|
|
BACKCHAR(bptr);
|
|
|
|
GETCHAR(c, bptr);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
c = *bptr;
|
2022-05-17 14:38:55 +00:00
|
|
|
if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
|
2018-05-24 06:13:24 +00:00
|
|
|
ricount++;
|
|
|
|
}
|
|
|
|
if ((ricount & 1) != 0) break; /* Grapheme break required */
|
|
|
|
}
|
|
|
|
|
2019-03-04 13:25:49 +00:00
|
|
|
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
|
|
|
allows any number of them before a following Extended_Pictographic. */
|
2018-05-24 06:13:24 +00:00
|
|
|
|
2019-03-04 13:25:49 +00:00
|
|
|
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
|
|
|
lgb != ucp_gbExtended_Pictographic)
|
2018-05-24 06:13:24 +00:00
|
|
|
lgb = rgb;
|
|
|
|
|
|
|
|
eptr += len;
|
|
|
|
if (xcount != NULL) *xcount += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return eptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SUPPORT_UNICODE */
|
|
|
|
|
|
|
|
/* End of pcre2_extuni.c */
|