From cd29fb22a007b6cbdadde9cb3f912f0b47b786fa Mon Sep 17 00:00:00 2001 From: Sofox Date: Tue, 5 Dec 2023 13:21:57 +0000 Subject: [PATCH] Fixed RegEx search_all for zero length matches/lookahead/lookbehind (cherry picked from commit 7b2fd342e32a87ae57c16f568709dc4bb3fae2a5) --- modules/regex/regex.cpp | 14 ++-- modules/regex/tests/test_regex.h | 139 ++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 7 deletions(-) diff --git a/modules/regex/regex.cpp b/modules/regex/regex.cpp index 704c107f204..d49578d2a9b 100644 --- a/modules/regex/regex.cpp +++ b/modules/regex/regex.cpp @@ -270,16 +270,18 @@ Ref RegEx::search(const String &p_subject, int p_offset, int p_end) TypedArray RegEx::search_all(const String &p_subject, int p_offset, int p_end) const { ERR_FAIL_COND_V_MSG(p_offset < 0, Array(), "RegEx search offset must be >= 0"); - int last_end = -1; + int last_end = 0; TypedArray result; Ref match = search(p_subject, p_offset, p_end); + while (match.is_valid()) { - if (last_end == match->get_end(0)) { - break; - } - result.push_back(match); last_end = match->get_end(0); - match = search(p_subject, match->get_end(0), p_end); + if (match->get_start(0) == last_end) { + last_end++; + } + + result.push_back(match); + match = search(p_subject, last_end, p_end); } return result; } diff --git a/modules/regex/tests/test_regex.h b/modules/regex/tests/test_regex.h index 3e4d7693771..0b401da831c 100644 --- a/modules/regex/tests/test_regex.h +++ b/modules/regex/tests/test_regex.h @@ -164,7 +164,7 @@ TEST_CASE("[RegEx] Uninitialized use") { ERR_PRINT_ON } -TEST_CASE("[RegEx] Empty Pattern") { +TEST_CASE("[RegEx] Empty pattern") { const String s = "Godot"; RegEx re; @@ -222,6 +222,143 @@ TEST_CASE("[RegEx] Match start and end positions") { CHECK(match->get_start("vowel") == 2); CHECK(match->get_end("vowel") == 3); } + +TEST_CASE("[RegEx] Asterisk search all") { + const String s = "Godot Engine"; + + RegEx re("o*"); + REQUIRE(re.is_valid()); + Ref match; + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 13); + + match = all_results[0]; + CHECK(match->get_string(0) == ""); + match = all_results[1]; + CHECK(match->get_string(0) == "o"); + match = all_results[2]; + CHECK(match->get_string(0) == ""); + match = all_results[3]; + CHECK(match->get_string(0) == "o"); + + for (int i = 4; i < 13; i++) { + match = all_results[i]; + CHECK(match->get_string(0) == ""); + } +} + +TEST_CASE("[RegEx] Simple lookahead") { + const String s = "Godot Engine"; + + RegEx re("o(?=t)"); + REQUIRE(re.is_valid()); + Ref match = re.search(s); + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 3); + CHECK(match->get_end(0) == 4); +} + +TEST_CASE("[RegEx] Lookahead groups empty matches") { + const String s = "12"; + + RegEx re("(?=(\\d+))"); + REQUIRE(re.is_valid()); + Ref match = re.search(s); + CHECK(match->get_string(0) == ""); + CHECK(match->get_string(1) == "12"); + + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 2); + + match = all_results[0]; + REQUIRE(match != nullptr); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("12")); + + match = all_results[1]; + REQUIRE(match != nullptr); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("2")); +} + +TEST_CASE("[RegEx] Simple lookbehind") { + const String s = "Godot Engine"; + + RegEx re("(?<=d)o"); + REQUIRE(re.is_valid()); + Ref match = re.search(s); + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 3); + CHECK(match->get_end(0) == 4); +} + +TEST_CASE("[RegEx] Simple lookbehind search all") { + const String s = "ababbaabab"; + + RegEx re("(?<=a)b"); + REQUIRE(re.is_valid()); + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 4); + + Ref match = all_results[0]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 1); + CHECK(match->get_end(0) == 2); + + match = all_results[1]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 3); + CHECK(match->get_end(0) == 4); + + match = all_results[2]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 7); + CHECK(match->get_end(0) == 8); + + match = all_results[3]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 9); + CHECK(match->get_end(0) == 10); +} + +TEST_CASE("[RegEx] Lookbehind groups empty matches") { + const String s = "abaaabab"; + + RegEx re("(?<=(b))"); + REQUIRE(re.is_valid()); + Ref match; + + const Array all_results = re.search_all(s); + CHECK(all_results.size() == 3); + + match = all_results[0]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 2); + CHECK(match->get_end(0) == 2); + CHECK(match->get_start(1) == 1); + CHECK(match->get_end(1) == 2); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("b")); + + match = all_results[1]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 6); + CHECK(match->get_end(0) == 6); + CHECK(match->get_start(1) == 5); + CHECK(match->get_end(1) == 6); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("b")); + + match = all_results[2]; + REQUIRE(match != nullptr); + CHECK(match->get_start(0) == 8); + CHECK(match->get_end(0) == 8); + CHECK(match->get_start(1) == 7); + CHECK(match->get_end(1) == 8); + CHECK(match->get_string(0) == String("")); + CHECK(match->get_string(1) == String("b")); +} + } // namespace TestRegEx #endif // TEST_REGEX_H