diff --git a/modules/text_server_adv/script_iterator.cpp b/modules/text_server_adv/script_iterator.cpp index f9bbd25a5fa..d1e849def8a 100644 --- a/modules/text_server_adv/script_iterator.cpp +++ b/modules/text_server_adv/script_iterator.cpp @@ -30,6 +30,8 @@ #include "script_iterator.h" +// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp + bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) { return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two; } @@ -48,7 +50,8 @@ ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length p_start = 0; } - ParenStackEntry paren_stack[128]; + int paren_size = PAREN_STACK_DEPTH; + ParenStackEntry *paren_stack = (ParenStackEntry *)memalloc(paren_size * sizeof(ParenStackEntry)); int script_start; int script_end = p_start; @@ -64,13 +67,22 @@ ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length UChar32 ch = str[script_end]; UScriptCode sc = uscript_getScript(ch, &err); if (U_FAILURE(err)) { + memfree(paren_stack); ERR_FAIL_MSG(u_errorName(err)); } if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) { if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) { - paren_stack[++paren_sp].pair_index = ch; + // If it's an open character, push it onto the stack. + paren_sp++; + if (unlikely(paren_sp >= paren_size)) { + // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text. + paren_size += PAREN_STACK_DEPTH; + paren_stack = (ParenStackEntry *)memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)); + } + paren_stack[paren_sp].pair_index = ch; paren_stack[paren_sp].script_code = script_code; } else if (paren_sp >= 0) { + // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be poped. UChar32 paired_ch = u_getBidiPairedBracket(ch); while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) { paren_sp -= 1; @@ -87,11 +99,13 @@ ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length if (same_script(script_code, sc)) { if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { script_code = sc; + // Now that we have a final script code, fix any open characters we pushed before we knew the script code. while (start_sp < paren_sp) { paren_stack[++start_sp].script_code = script_code; } } if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) { + // If this character is a close paired character pop the matching open character from the stack. paren_sp -= 1; if (start_sp >= 0) { start_sp -= 1; @@ -109,4 +123,6 @@ ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length script_ranges.push_back(rng); } while (script_end < p_length); + + memfree(paren_stack); } diff --git a/modules/text_server_adv/script_iterator.h b/modules/text_server_adv/script_iterator.h index 896a0e5c15b..5efd40f7c47 100644 --- a/modules/text_server_adv/script_iterator.h +++ b/modules/text_server_adv/script_iterator.h @@ -43,6 +43,8 @@ #include class ScriptIterator { + static const int PAREN_STACK_DEPTH = 128; + public: struct ScriptRange { int start = 0;