diff --git a/doc/base/classes.xml b/doc/base/classes.xml index ab1320908eb..15804b24cc4 100644 --- a/doc/base/classes.xml +++ b/doc/base/classes.xml @@ -26960,7 +26960,7 @@ This method controls whether the position between two cached points is interpola Lazy (non-greedy) quantifiers [code]*?[/code] Begining [code]^[/code] and end [code]$[/code] anchors Alternation [code]|[/code] - Backreferences [code]\1[/code] to [code]\9[/code] + Backreferences [code]\1[/code] and [code]\g{1}[/code] POSIX character classes [code][[:alnum:]][/code] Lookahead [code](?=)[/code], [code](?!)[/code] and lookbehind [code](?<=)[/code], [code](?<!)[/code] ASCII [code]\xFF[/code] and Unicode [code]\uFFFF[/code] code points (in a style similar to Python) @@ -26972,9 +26972,10 @@ This method controls whether the position between two cached points is interpola - + + Compiles and assign the regular expression pattern to use. The limit on the number of capturing groups can be specified or made unlimited if negative. diff --git a/drivers/nrex/README.md b/drivers/nrex/README.md index 951b301c1ed..9ff67992dca 100644 --- a/drivers/nrex/README.md +++ b/drivers/nrex/README.md @@ -1,5 +1,7 @@ # NREX: Node RegEx +Version 0.1 + Small node-based regular expression library. It only does text pattern matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp` and `nrex_config.h` to your project and follow the example: @@ -32,7 +34,7 @@ Currently supported features: * Unicode `\uFFFF` code points * Positive `(?=)` and negative `(?!)` lookahead * Positive `(?<=)` and negative `(?test(s, pos - offset); @@ -450,7 +459,7 @@ struct nrex_node_char : public nrex_node int test(nrex_search* s, int pos) const { - if (s->end == pos || s->at(pos) != ch) + if (s->end <= pos || 0 > pos || s->at(pos) != ch) { return -1; } @@ -473,7 +482,7 @@ struct nrex_node_range : public nrex_node int test(nrex_search* s, int pos) const { - if (s->end == pos) + if (s->end <= pos || 0 > pos) { return -1; } @@ -555,7 +564,7 @@ struct nrex_node_class : public nrex_node int test(nrex_search* s, int pos) const { - if (s->end == pos) + if (s->end <= pos || 0 > pos) { return -1; } @@ -727,7 +736,7 @@ struct nrex_node_shorthand : public nrex_node int test(nrex_search* s, int pos) const { - if (s->end == pos) + if (s->end <= pos || 0 > pos) { return -1; } @@ -811,16 +820,12 @@ struct nrex_node_quantifier : public nrex_node int test(nrex_search* s, int pos) const { - return test_step(s, pos, 1); + return test_step(s, pos, 0, pos); } - int test_step(nrex_search* s, int pos, int level) const + int test_step(nrex_search* s, int pos, int level, int start) const { - if (max == 0) - { - return pos; - } - if ((max >= 1 && level > max) || pos > s->end) + if (pos > s->end) { return -1; } @@ -840,14 +845,26 @@ struct nrex_node_quantifier : public nrex_node return res; } } - int res = child->test(s, pos); - if (s->complete) + if (max >= 0 && level > max) { - return res; + return -1; + } + if (level > 1 && level > min + 1 && pos == start) + { + return -1; + } + int res = pos; + if (level >= 1) + { + res = child->test(s, pos); + if (s->complete) + { + return res; + } } if (res >= 0) { - int res_step = test_step(s, res, level + 1); + int res_step = test_step(s, res, level + 1, start); if (res_step >= 0) { return res_step; @@ -983,6 +1000,13 @@ nrex::nrex() { } +nrex::nrex(const nrex_char* pattern, int captures) + : _capturing(0) + , _root(NULL) +{ + compile(pattern, captures); +} + nrex::~nrex() { if (_root) @@ -1008,10 +1032,14 @@ void nrex::reset() int nrex::capture_size() const { - return _capturing + 1; + if (_root) + { + return _capturing + 1; + } + return 0; } -bool nrex::compile(const nrex_char* pattern, bool extended) +bool nrex::compile(const nrex_char* pattern, int captures) { reset(); nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing)); @@ -1053,7 +1081,7 @@ bool nrex::compile(const nrex_char* pattern, bool extended) NREX_COMPILE_ERROR("unrecognised qualifier for group"); } } - else if ((!extended && _capturing < 9) || (extended && _capturing < 99)) + else if (captures >= 0 && _capturing < captures) { nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing)); stack.top()->add_child(group); @@ -1190,15 +1218,6 @@ bool nrex::compile(const nrex_char* pattern, bool extended) } else if (nrex_is_quantifier(c[0])) { - if (stack.top()->back == NULL || !stack.top()->back->quantifiable) - { - if (c[0] == '{') - { - stack.top()->add_child(NREX_NEW(nrex_node_char('{'))); - continue; - } - NREX_COMPILE_ERROR("element not quantifiable"); - } int min = 0; int max = -1; bool valid_quantifier = true; @@ -1270,6 +1289,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended) } if (valid_quantifier) { + if (stack.top()->back == NULL || !stack.top()->back->quantifiable) + { + NREX_COMPILE_ERROR("element not quantifiable"); + } nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max)); if (min == max) { @@ -1323,20 +1346,26 @@ bool nrex::compile(const nrex_char* pattern, bool extended) stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1]))); ++c; } - else if ('1' <= c[1] && c[1] <= '9') + else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{')) { int ref = 0; - if (extended && '0' <= c[2] && c[2] <= '9') + bool unclosed = false; + if (c[1] == 'g') { - ref = int(c[1] - '0') * 10 + int(c[2] - '0'); + unclosed = true; c = &c[2]; } - else + while ('0' <= c[1] && c[1] <= '9') { - ref = int(c[1] - '0'); + ref = ref * 10 + int(c[1] - '0'); ++c; } - if (ref > _capturing) + if (c[1] == '}') + { + unclosed = false; + ++c; + } + if (ref > _capturing || ref <= 0 || unclosed) { NREX_COMPILE_ERROR("backreference to non-existent capture"); } @@ -1377,6 +1406,10 @@ bool nrex::compile(const nrex_char* pattern, bool extended) bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const { + if (!_root) + { + return false; + } nrex_search s(str, captures); if (end >= offset) { @@ -1386,7 +1419,7 @@ bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int en { s.end = NREX_STRLEN(str); } - for (int i = offset; i < s.end; ++i) + for (int i = offset; i <= s.end; ++i) { for (int c = 0; c <= _capturing; ++c) { diff --git a/drivers/nrex/nrex.hpp b/drivers/nrex/nrex.hpp index e26a61c39ae..44e950c5178 100644 --- a/drivers/nrex/nrex.hpp +++ b/drivers/nrex/nrex.hpp @@ -1,4 +1,5 @@ // NREX: Node RegEx +// Version 0.1 // // Copyright (c) 2015, Zher Huei Lee // All rights reserved. @@ -59,7 +60,32 @@ class nrex int _capturing; nrex_node* _root; public: + + /*! + * \brief Initialises an empty regex container + */ nrex(); + + /*! + * \brief Initialises and compiles the regex pattern + * + * This calls nrex::compile() with the same arguments. To check whether + * the compilation was successfull, use nrex::valid(). + * + * If the NREX_THROW_ERROR was defined it would automatically throw a + * runtime error nrex_compile_error if it encounters a problem when + * parsing the pattern. + * + * \param pattern The regex pattern + * \param captures The maximum number of capture groups to allow. Any + * extra would be converted to non-capturing groups. + * If negative, no limit would be imposed. Defaults + * to 9. + * + * \see nrex::compile() + */ + nrex(const nrex_char* pattern, int captures = 9); + ~nrex(); /*! @@ -78,9 +104,9 @@ class nrex * * This is used to provide the array size of the captures needed for * nrex::match() to work. The size is actually the number of capture - * groups + one for the matching of the entire pattern. The result is - * always capped at 10 or 100, depending on the extend option given in - * nrex::compile() (default 10). + * groups + one for the matching of the entire pattern. This can be + * capped using the extra argument given in nrex::compile() + * (default 10). * * \return The number of captures */ @@ -97,12 +123,13 @@ class nrex * parsing the pattern. * * \param pattern The regex pattern - * \param extended If true, raises the limit on number of capture - * groups and back-references to 99. Otherwise limited - * to 9. Defaults to false. + * \param captures The maximum number of capture groups to allow. Any + * extra would be converted to non-capturing groups. + * If negative, no limit would be imposed. Defaults + * to 9. * \return True if the pattern was succesfully compiled */ - bool compile(const nrex_char* pattern, bool extended = false); + bool compile(const nrex_char* pattern, int captures = 9); /*! * \brief Uses the pattern to search through the provided string diff --git a/drivers/nrex/regex.cpp b/drivers/nrex/regex.cpp index 246384b10a6..e8578221a99 100644 --- a/drivers/nrex/regex.cpp +++ b/drivers/nrex/regex.cpp @@ -15,7 +15,7 @@ void RegEx::_bind_methods() { - ObjectTypeDB::bind_method(_MD("compile","pattern", "expanded"),&RegEx::compile, DEFVAL(true)); + ObjectTypeDB::bind_method(_MD("compile","pattern", "capture"),&RegEx::compile, DEFVAL(9)); ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1)); ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear); ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid); @@ -68,11 +68,11 @@ String RegEx::get_capture(int capture) const { } -Error RegEx::compile(const String& p_pattern, bool expanded) { +Error RegEx::compile(const String& p_pattern, int capture) { clear(); - exp.compile(p_pattern.c_str(), expanded); + exp.compile(p_pattern.c_str(), capture); ERR_FAIL_COND_V( !exp.valid(), FAILED ); diff --git a/drivers/nrex/regex.h b/drivers/nrex/regex.h index be52da81495..76aab2aea65 100644 --- a/drivers/nrex/regex.h +++ b/drivers/nrex/regex.h @@ -36,7 +36,7 @@ public: bool is_valid() const; int get_capture_count() const; String get_capture(int capture) const; - Error compile(const String& p_pattern, bool expanded = false); + Error compile(const String& p_pattern, int capture = 9); int find(const String& p_text, int p_start = 0, int p_end = -1) const; RegEx();