e3e2f06324
The pattern and replacement matching behaviour has been changed purely due to the nature of switching to a standards-compliant library. One mistake in the previous behaviour was that named groups didn't have a number. This has been corrected. As names are actually just an alias of numbered groups, RegExMatch::get_name_dict() is now get_names() and is a dict referring to the group number it represents. Duplicate names are enabled and the with the first matching instance used. Due the lack of a suitable equivalent in PCRE2, RegExMatch::expand() was removed.
121 lines
3.7 KiB
C
121 lines
3.7 KiB
C
/*************************************************
|
|
* Perl-Compatible Regular Expressions *
|
|
*************************************************/
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
and semantics are as close as possible to those of the Perl 5 language.
|
|
|
|
Written by Philip Hazel
|
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
|
New API code Copyright (c) 2016 University of Cambridge
|
|
|
|
-----------------------------------------------------------------------------
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
|
|
/* This file contains a function that converts a Unicode character code point
|
|
into a UTF string. The behaviour is different for each code unit width. */
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "pcre2_internal.h"
|
|
|
|
|
|
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
|
Supply a dummy function because some compilers do not like empty source
|
|
modules. */
|
|
|
|
#ifndef SUPPORT_UNICODE
|
|
unsigned int
|
|
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
|
{
|
|
(void)(cvalue);
|
|
(void)(buffer);
|
|
return 0;
|
|
}
|
|
#else /* SUPPORT_UNICODE */
|
|
|
|
|
|
/*************************************************
|
|
* Convert code point to UTF *
|
|
*************************************************/
|
|
|
|
/*
|
|
Arguments:
|
|
cvalue the character value
|
|
buffer pointer to buffer for result
|
|
|
|
Returns: number of code units placed in the buffer
|
|
*/
|
|
|
|
unsigned int
|
|
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
|
{
|
|
/* Convert to UTF-8 */
|
|
|
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
int i, j;
|
|
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
|
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
|
buffer += i;
|
|
for (j = i; j > 0; j--)
|
|
{
|
|
*buffer-- = 0x80 | (cvalue & 0x3f);
|
|
cvalue >>= 6;
|
|
}
|
|
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
|
return i + 1;
|
|
|
|
/* Convert to UTF-16 */
|
|
|
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
if (cvalue <= 0xffff)
|
|
{
|
|
*buffer = (PCRE2_UCHAR)cvalue;
|
|
return 1;
|
|
}
|
|
cvalue -= 0x10000;
|
|
*buffer++ = 0xd800 | (cvalue >> 10);
|
|
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
|
return 2;
|
|
|
|
/* Convert to UTF-32 */
|
|
|
|
#else
|
|
*buffer = (PCRE2_UCHAR)cvalue;
|
|
return 1;
|
|
#endif
|
|
}
|
|
#endif /* SUPPORT_UNICODE */
|
|
|
|
/* End of pcre_ord2utf.c */
|