Merge pull request #2993 from leezh/nrex-v0.1
updated the RegEx library nrex to v0.1
This commit is contained in:
commit
afb895f197
|
@ -26960,7 +26960,7 @@ This method controls whether the position between two cached points is interpola
|
|||
Lazy (non-greedy) quantifiers [code]*?[/code]
|
||||
Begining [code]^[/code] and end [code]$[/code] anchors
|
||||
Alternation [code]|[/code]
|
||||
Backreferences [code]\1[/code] to [code]\9[/code]
|
||||
Backreferences [code]\1[/code] and [code]\g{1}[/code]
|
||||
POSIX character classes [code][[:alnum:]][/code]
|
||||
Lookahead [code](?=)[/code], [code](?!)[/code] and lookbehind [code](?<=)[/code], [code](?<!)[/code]
|
||||
ASCII [code]\xFF[/code] and Unicode [code]\uFFFF[/code] code points (in a style similar to Python)
|
||||
|
@ -26972,9 +26972,10 @@ This method controls whether the position between two cached points is interpola
|
|||
</return>
|
||||
<argument index="0" name="pattern" type="String">
|
||||
</argument>
|
||||
<argument index="1" name="expanded" type="bool" default="true">
|
||||
<argument index="1" name="capture" type="int" default="9">
|
||||
</argument>
|
||||
<description>
|
||||
Compiles and assign the regular expression pattern to use. The limit on the number of capturing groups can be specified or made unlimited if negative.
|
||||
</description>
|
||||
</method>
|
||||
<method name="find" qualifiers="const">
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# NREX: Node RegEx
|
||||
|
||||
Version 0.1
|
||||
|
||||
Small node-based regular expression library. It only does text pattern
|
||||
matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp`
|
||||
and `nrex_config.h` to your project and follow the example:
|
||||
|
@ -32,7 +34,7 @@ Currently supported features:
|
|||
* Unicode `\uFFFF` code points
|
||||
* Positive `(?=)` and negative `(?!)` lookahead
|
||||
* Positive `(?<=)` and negative `(?<!)` lookbehind (fixed length and no alternations)
|
||||
* Backreferences `\1` to `\9` (with option to expand to `\99`)
|
||||
* Backreferences `\1` and `\g{1}` (limited by default to 9 - can be unlimited)
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// NREX: Node RegEx
|
||||
// Version 0.1
|
||||
//
|
||||
// Copyright (c) 2015, Zher Huei Lee
|
||||
// All rights reserved.
|
||||
|
@ -299,6 +300,10 @@ struct nrex_node_group : public nrex_node
|
|||
{
|
||||
length = 1;
|
||||
}
|
||||
if (mode == LookAhead || mode == LookBehind)
|
||||
{
|
||||
quantifiable = false;
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~nrex_node_group()
|
||||
|
@ -322,6 +327,10 @@ struct nrex_node_group : public nrex_node
|
|||
int offset = 0;
|
||||
if (mode == LookBehind)
|
||||
{
|
||||
if (pos < length)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
offset = length;
|
||||
}
|
||||
int res = childset[i]->test(s, pos - offset);
|
||||
|
@ -450,7 +459,7 @@ struct nrex_node_char : public nrex_node
|
|||
|
||||
int test(nrex_search* s, int pos) const
|
||||
{
|
||||
if (s->end == pos || s->at(pos) != ch)
|
||||
if (s->end <= pos || 0 > pos || s->at(pos) != ch)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -473,7 +482,7 @@ struct nrex_node_range : public nrex_node
|
|||
|
||||
int test(nrex_search* s, int pos) const
|
||||
{
|
||||
if (s->end == pos)
|
||||
if (s->end <= pos || 0 > pos)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -555,7 +564,7 @@ struct nrex_node_class : public nrex_node
|
|||
|
||||
int test(nrex_search* s, int pos) const
|
||||
{
|
||||
if (s->end == pos)
|
||||
if (s->end <= pos || 0 > pos)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -727,7 +736,7 @@ struct nrex_node_shorthand : public nrex_node
|
|||
|
||||
int test(nrex_search* s, int pos) const
|
||||
{
|
||||
if (s->end == pos)
|
||||
if (s->end <= pos || 0 > pos)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -811,16 +820,12 @@ struct nrex_node_quantifier : public nrex_node
|
|||
|
||||
int test(nrex_search* s, int pos) const
|
||||
{
|
||||
return test_step(s, pos, 1);
|
||||
return test_step(s, pos, 0, pos);
|
||||
}
|
||||
|
||||
int test_step(nrex_search* s, int pos, int level) const
|
||||
int test_step(nrex_search* s, int pos, int level, int start) const
|
||||
{
|
||||
if (max == 0)
|
||||
{
|
||||
return pos;
|
||||
}
|
||||
if ((max >= 1 && level > max) || pos > s->end)
|
||||
if (pos > s->end)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
@ -840,14 +845,26 @@ struct nrex_node_quantifier : public nrex_node
|
|||
return res;
|
||||
}
|
||||
}
|
||||
int res = child->test(s, pos);
|
||||
if (s->complete)
|
||||
if (max >= 0 && level > max)
|
||||
{
|
||||
return res;
|
||||
return -1;
|
||||
}
|
||||
if (level > 1 && level > min + 1 && pos == start)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
int res = pos;
|
||||
if (level >= 1)
|
||||
{
|
||||
res = child->test(s, pos);
|
||||
if (s->complete)
|
||||
{
|
||||
return res;
|
||||
}
|
||||
}
|
||||
if (res >= 0)
|
||||
{
|
||||
int res_step = test_step(s, res, level + 1);
|
||||
int res_step = test_step(s, res, level + 1, start);
|
||||
if (res_step >= 0)
|
||||
{
|
||||
return res_step;
|
||||
|
@ -983,6 +1000,13 @@ nrex::nrex()
|
|||
{
|
||||
}
|
||||
|
||||
nrex::nrex(const nrex_char* pattern, int captures)
|
||||
: _capturing(0)
|
||||
, _root(NULL)
|
||||
{
|
||||
compile(pattern, captures);
|
||||
}
|
||||
|
||||
nrex::~nrex()
|
||||
{
|
||||
if (_root)
|
||||
|
@ -1008,10 +1032,14 @@ void nrex::reset()
|
|||
|
||||
int nrex::capture_size() const
|
||||
{
|
||||
return _capturing + 1;
|
||||
if (_root)
|
||||
{
|
||||
return _capturing + 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool nrex::compile(const nrex_char* pattern, bool extended)
|
||||
bool nrex::compile(const nrex_char* pattern, int captures)
|
||||
{
|
||||
reset();
|
||||
nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing));
|
||||
|
@ -1053,7 +1081,7 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
|
|||
NREX_COMPILE_ERROR("unrecognised qualifier for group");
|
||||
}
|
||||
}
|
||||
else if ((!extended && _capturing < 9) || (extended && _capturing < 99))
|
||||
else if (captures >= 0 && _capturing < captures)
|
||||
{
|
||||
nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing));
|
||||
stack.top()->add_child(group);
|
||||
|
@ -1190,15 +1218,6 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
|
|||
}
|
||||
else if (nrex_is_quantifier(c[0]))
|
||||
{
|
||||
if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
|
||||
{
|
||||
if (c[0] == '{')
|
||||
{
|
||||
stack.top()->add_child(NREX_NEW(nrex_node_char('{')));
|
||||
continue;
|
||||
}
|
||||
NREX_COMPILE_ERROR("element not quantifiable");
|
||||
}
|
||||
int min = 0;
|
||||
int max = -1;
|
||||
bool valid_quantifier = true;
|
||||
|
@ -1270,6 +1289,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
|
|||
}
|
||||
if (valid_quantifier)
|
||||
{
|
||||
if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
|
||||
{
|
||||
NREX_COMPILE_ERROR("element not quantifiable");
|
||||
}
|
||||
nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
|
||||
if (min == max)
|
||||
{
|
||||
|
@ -1323,20 +1346,26 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
|
|||
stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
|
||||
++c;
|
||||
}
|
||||
else if ('1' <= c[1] && c[1] <= '9')
|
||||
else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
|
||||
{
|
||||
int ref = 0;
|
||||
if (extended && '0' <= c[2] && c[2] <= '9')
|
||||
bool unclosed = false;
|
||||
if (c[1] == 'g')
|
||||
{
|
||||
ref = int(c[1] - '0') * 10 + int(c[2] - '0');
|
||||
unclosed = true;
|
||||
c = &c[2];
|
||||
}
|
||||
else
|
||||
while ('0' <= c[1] && c[1] <= '9')
|
||||
{
|
||||
ref = int(c[1] - '0');
|
||||
ref = ref * 10 + int(c[1] - '0');
|
||||
++c;
|
||||
}
|
||||
if (ref > _capturing)
|
||||
if (c[1] == '}')
|
||||
{
|
||||
unclosed = false;
|
||||
++c;
|
||||
}
|
||||
if (ref > _capturing || ref <= 0 || unclosed)
|
||||
{
|
||||
NREX_COMPILE_ERROR("backreference to non-existent capture");
|
||||
}
|
||||
|
@ -1377,6 +1406,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended)
|
|||
|
||||
bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
|
||||
{
|
||||
if (!_root)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
nrex_search s(str, captures);
|
||||
if (end >= offset)
|
||||
{
|
||||
|
@ -1386,7 +1419,7 @@ bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int en
|
|||
{
|
||||
s.end = NREX_STRLEN(str);
|
||||
}
|
||||
for (int i = offset; i < s.end; ++i)
|
||||
for (int i = offset; i <= s.end; ++i)
|
||||
{
|
||||
for (int c = 0; c <= _capturing; ++c)
|
||||
{
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// NREX: Node RegEx
|
||||
// Version 0.1
|
||||
//
|
||||
// Copyright (c) 2015, Zher Huei Lee
|
||||
// All rights reserved.
|
||||
|
@ -59,7 +60,32 @@ class nrex
|
|||
int _capturing;
|
||||
nrex_node* _root;
|
||||
public:
|
||||
|
||||
/*!
|
||||
* \brief Initialises an empty regex container
|
||||
*/
|
||||
nrex();
|
||||
|
||||
/*!
|
||||
* \brief Initialises and compiles the regex pattern
|
||||
*
|
||||
* This calls nrex::compile() with the same arguments. To check whether
|
||||
* the compilation was successfull, use nrex::valid().
|
||||
*
|
||||
* If the NREX_THROW_ERROR was defined it would automatically throw a
|
||||
* runtime error nrex_compile_error if it encounters a problem when
|
||||
* parsing the pattern.
|
||||
*
|
||||
* \param pattern The regex pattern
|
||||
* \param captures The maximum number of capture groups to allow. Any
|
||||
* extra would be converted to non-capturing groups.
|
||||
* If negative, no limit would be imposed. Defaults
|
||||
* to 9.
|
||||
*
|
||||
* \see nrex::compile()
|
||||
*/
|
||||
nrex(const nrex_char* pattern, int captures = 9);
|
||||
|
||||
~nrex();
|
||||
|
||||
/*!
|
||||
|
@ -78,9 +104,9 @@ class nrex
|
|||
*
|
||||
* This is used to provide the array size of the captures needed for
|
||||
* nrex::match() to work. The size is actually the number of capture
|
||||
* groups + one for the matching of the entire pattern. The result is
|
||||
* always capped at 10 or 100, depending on the extend option given in
|
||||
* nrex::compile() (default 10).
|
||||
* groups + one for the matching of the entire pattern. This can be
|
||||
* capped using the extra argument given in nrex::compile()
|
||||
* (default 10).
|
||||
*
|
||||
* \return The number of captures
|
||||
*/
|
||||
|
@ -97,12 +123,13 @@ class nrex
|
|||
* parsing the pattern.
|
||||
*
|
||||
* \param pattern The regex pattern
|
||||
* \param extended If true, raises the limit on number of capture
|
||||
* groups and back-references to 99. Otherwise limited
|
||||
* to 9. Defaults to false.
|
||||
* \param captures The maximum number of capture groups to allow. Any
|
||||
* extra would be converted to non-capturing groups.
|
||||
* If negative, no limit would be imposed. Defaults
|
||||
* to 9.
|
||||
* \return True if the pattern was succesfully compiled
|
||||
*/
|
||||
bool compile(const nrex_char* pattern, bool extended = false);
|
||||
bool compile(const nrex_char* pattern, int captures = 9);
|
||||
|
||||
/*!
|
||||
* \brief Uses the pattern to search through the provided string
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
void RegEx::_bind_methods() {
|
||||
|
||||
ObjectTypeDB::bind_method(_MD("compile","pattern", "expanded"),&RegEx::compile, DEFVAL(true));
|
||||
ObjectTypeDB::bind_method(_MD("compile","pattern", "capture"),&RegEx::compile, DEFVAL(9));
|
||||
ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1));
|
||||
ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear);
|
||||
ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid);
|
||||
|
@ -68,11 +68,11 @@ String RegEx::get_capture(int capture) const {
|
|||
|
||||
}
|
||||
|
||||
Error RegEx::compile(const String& p_pattern, bool expanded) {
|
||||
Error RegEx::compile(const String& p_pattern, int capture) {
|
||||
|
||||
clear();
|
||||
|
||||
exp.compile(p_pattern.c_str(), expanded);
|
||||
exp.compile(p_pattern.c_str(), capture);
|
||||
|
||||
ERR_FAIL_COND_V( !exp.valid(), FAILED );
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ public:
|
|||
bool is_valid() const;
|
||||
int get_capture_count() const;
|
||||
String get_capture(int capture) const;
|
||||
Error compile(const String& p_pattern, bool expanded = false);
|
||||
Error compile(const String& p_pattern, int capture = 9);
|
||||
int find(const String& p_text, int p_start = 0, int p_end = -1) const;
|
||||
|
||||
RegEx();
|
||||
|
|
Loading…
Reference in New Issue