From 207e52c161a44869f1af022030c3129b8c38a5f7 Mon Sep 17 00:00:00 2001 From: Haoyu Qiu Date: Thu, 18 Aug 2022 16:20:20 +0800 Subject: [PATCH] Fix String::word_wrap() for long words - Changes `TextServer.string_get_word_breaks()` - Returns pairs of boundary start and end offsets - Accepts `chars_per_line` to return line breaks - Removes `String::word_wrap()` Co-authored-by: bruvzg <7645683+bruvzg@users.noreply.github.com> --- core/string/ustring.cpp | 31 ------- core/string/ustring.h | 1 - doc/classes/TextServer.xml | 9 +- doc/classes/TextServerExtension.xml | 1 + editor/debugger/script_editor_debugger.cpp | 11 ++- editor/scene_tree_editor.cpp | 12 ++- modules/text_server_adv/text_server_adv.cpp | 94 ++++++++++++++++----- modules/text_server_adv/text_server_adv.h | 2 +- modules/text_server_fb/text_server_fb.cpp | 71 +++++++++++++--- modules/text_server_fb/text_server_fb.h | 2 +- platform/linuxbsd/tts_linux.cpp | 11 ++- servers/text/text_server_extension.cpp | 6 +- servers/text/text_server_extension.h | 4 +- servers/text_server.cpp | 2 +- servers/text_server.h | 2 +- tests/servers/test_text_server.h | 48 +++++++---- 16 files changed, 203 insertions(+), 104 deletions(-) diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 4e26b613348..adab6d07c7f 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -220,37 +220,6 @@ void CharString::copy_from(const char *p_cstr) { /* String */ /*************************************************************************/ -//kind of poor should be rewritten properly -String String::word_wrap(int p_chars_per_line) const { - int from = 0; - int last_space = 0; - String ret; - for (int i = 0; i < length(); i++) { - if (i - from >= p_chars_per_line) { - if (last_space == -1) { - ret += substr(from, i - from + 1) + "\n"; - } else { - ret += substr(from, last_space - from) + "\n"; - i = last_space; //rewind - } - from = i + 1; - last_space = -1; - } else if (operator[](i) == ' ' || operator[](i) == '\t') { - last_space = i; - } else if (operator[](i) == '\n') { - ret += substr(from, i - from) + "\n"; - from = i + 1; - last_space = -1; - } - } - - if (from < length()) { - ret += substr(from, length()); - } - - return ret; -} - Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const { // Splits the URL into scheme, host, port, path. Strip credentials when present. String base = *this; diff --git a/core/string/ustring.h b/core/string/ustring.h index ed3848fb8a6..559f679f0f6 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -425,7 +425,6 @@ public: String c_escape_multiline() const; String c_unescape() const; String json_escape() const; - String word_wrap(int p_chars_per_line) const; Error parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const; String property_name_encode() const; diff --git a/doc/classes/TextServer.xml b/doc/classes/TextServer.xml index a4bccfb545d..637e502d496 100644 --- a/doc/classes/TextServer.xml +++ b/doc/classes/TextServer.xml @@ -1548,8 +1548,15 @@ + - Returns array of the word break character offsets. + Returns an array of the word break boundaries. Elements in the returned array are the offsets of the start and end of words. Therefore the length of the array is always even. + When [param chars_per_line] is greater than zero, line break boundaries are returned instead. + [codeblock] + var ts = TextServerManager.get_primary_interface() + print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12] + print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12] + [/codeblock] diff --git a/doc/classes/TextServerExtension.xml b/doc/classes/TextServerExtension.xml index 44e65efc8cd..e144b09eb66 100644 --- a/doc/classes/TextServerExtension.xml +++ b/doc/classes/TextServerExtension.xml @@ -1346,6 +1346,7 @@ + diff --git a/editor/debugger/script_editor_debugger.cpp b/editor/debugger/script_editor_debugger.cpp index deca638f3b0..5cb7016b355 100644 --- a/editor/debugger/script_editor_debugger.cpp +++ b/editor/debugger/script_editor_debugger.cpp @@ -751,7 +751,16 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType reason->add_theme_color_override("font_color", get_theme_color(SNAME("success_color"), SNAME("Editor"))); } reason->set_text(p_reason); - reason->set_tooltip_text(p_reason.word_wrap(80)); + + const PackedInt32Array boundaries = TS->string_get_word_breaks(p_reason, "", 80); + PackedStringArray lines; + for (int i = 0; i < boundaries.size(); i += 2) { + const int start = boundaries[i]; + const int end = boundaries[i + 1]; + lines.append(p_reason.substr(start, end - start + 1)); + } + + reason->set_tooltip_text(String("\n").join(lines)); } void ScriptEditorDebugger::_notification(int p_what) { diff --git a/editor/scene_tree_editor.cpp b/editor/scene_tree_editor.cpp index 092ef306789..30a9dc5bbfd 100644 --- a/editor/scene_tree_editor.cpp +++ b/editor/scene_tree_editor.cpp @@ -132,8 +132,16 @@ void SceneTreeEditor::_cell_button_pressed(Object *p_item, int p_column, int p_i if (config_err.is_empty()) { return; } - config_err = config_err.word_wrap(80); - warning->set_text(config_err); + + const PackedInt32Array boundaries = TS->string_get_word_breaks(config_err, "", 80); + PackedStringArray lines; + for (int i = 0; i < boundaries.size(); i += 2) { + const int start = boundaries[i]; + const int end = boundaries[i + 1]; + lines.append(config_err.substr(start, end - start + 1)); + } + + warning->set_text(String("\n").join(lines)); warning->popup_centered(); } else if (p_id == BUTTON_SIGNALS) { diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp index b4f3389c36f..046973d193f 100644 --- a/modules/text_server_adv/text_server_adv.cpp +++ b/modules/text_server_adv/text_server_adv.cpp @@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String return String::utf16(lower.ptr(), len); } -PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language; // Convert to UTF-16. Char16String utf16 = p_string.utf16(); @@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str HashSet breaks; UErrorCode err = U_ZERO_ERROR; UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err); - if (U_FAILURE(err)) { - // No data loaded - use fallback. - for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (is_whitespace(c) || is_linebreak(c)) { - breaks.insert(i); - } - } - } else { + if (U_SUCCESS(err)) { while (ubrk_next(bi) != UBRK_DONE) { int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1; if (pos != p_string.length() - 1) { @@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str ubrk_close(bi); PackedInt32Array ret; + + int line_start = 0; + int line_end = 0; // End of last word on current line. + int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word. + int word_length = 0; + for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (c == 0xfffc) { - continue; - } - if (u_ispunct(c) && c != 0x005F) { - ret.push_back(i); - continue; - } - if (is_underscore(c)) { - ret.push_back(i); - continue; - } - if (breaks.has(i)) { + const char32_t c = p_string[i]; + + if (is_linebreak(c)) { + // Force newline. + ret.push_back(line_start); ret.push_back(i); + line_start = i + 1; + line_end = line_start; + word_start = line_start; + word_length = 0; + } else if (c == 0xfffc) { continue; + } else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) { + // A whitespace ends current word. + if (word_length > 0) { + line_end = i - 1; + word_start = -1; + word_length = 0; + } + } else if (breaks.has(i)) { + // End current word, no space. + if (word_length > 0) { + line_end = i; + word_start = i + 1; + word_length = 0; + } + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } else { + if (word_start == -1) { + word_start = i; + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } + word_length += 1; + + if (p_chars_per_line > 0) { + if (word_length > p_chars_per_line) { + // Word too long: wrap before current character. + ret.push_back(line_start); + ret.push_back(i); + line_start = i; + line_end = i; + word_start = i; + word_length = 1; + } else if (i - line_start + 1 > p_chars_per_line) { + // Line too long: wrap after the last word. + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } } } + if (line_start < p_string.length()) { + ret.push_back(line_start); + ret.push_back(p_string.length()); + } return ret; } diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h index 8a9aa4356b3..59f44cf1429 100644 --- a/modules/text_server_adv/text_server_adv.h +++ b/modules/text_server_adv/text_server_adv.h @@ -915,7 +915,7 @@ public: MODBIND2RC(String, parse_number, const String &, const String &); MODBIND1RC(String, percent_sign, const String &); - MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int); MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &); MODBIND1RC(bool, spoof_check, const String &); diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp index 19abcde1fd4..2cee360f428 100644 --- a/modules/text_server_fb/text_server_fb.cpp +++ b/modules/text_server_fb/text_server_fb.cpp @@ -4099,26 +4099,69 @@ String TextServerFallback::_string_to_lower(const String &p_string, const String return lower; } -PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { PackedInt32Array ret; + + int line_start = 0; + int line_end = 0; // End of last word on current line. + int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word. + int word_length = 0; + for (int i = 0; i < p_string.length(); i++) { - char32_t c = p_string[i]; - if (c == 0xfffc) { - continue; - } - if (is_punct(c) && c != 0x005F) { - ret.push_back(i); - continue; - } - if (is_underscore(c)) { - ret.push_back(i); - continue; - } - if (is_whitespace(c) || is_linebreak(c)) { + const char32_t c = p_string[i]; + + if (is_linebreak(c)) { + // Force newline. + ret.push_back(line_start); ret.push_back(i); + line_start = i + 1; + line_end = line_start; + word_start = line_start; + word_length = 0; + } else if (c == 0xfffc) { continue; + } else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) { + // A whitespace ends current word. + if (word_length > 0) { + line_end = i - 1; + word_start = -1; + word_length = 0; + } + } else { + if (word_start == -1) { + word_start = i; + if (p_chars_per_line <= 0) { + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } + word_length += 1; + + if (p_chars_per_line > 0) { + if (word_length > p_chars_per_line) { + // Word too long: wrap before current character. + ret.push_back(line_start); + ret.push_back(i); + line_start = i; + line_end = i; + word_start = i; + word_length = 1; + } else if (i - line_start + 1 > p_chars_per_line) { + // Line too long: wrap after the last word. + ret.push_back(line_start); + ret.push_back(line_end + 1); + line_start = word_start; + line_end = line_start; + } + } } } + if (line_start < p_string.length()) { + ret.push_back(line_start); + ret.push_back(p_string.length()); + } return ret; } diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h index 11f37ab6d16..49e89214ecd 100644 --- a/modules/text_server_fb/text_server_fb.h +++ b/modules/text_server_fb/text_server_fb.h @@ -786,7 +786,7 @@ public: MODBIND1RC(double, shaped_text_get_underline_position, const RID &); MODBIND1RC(double, shaped_text_get_underline_thickness, const RID &); - MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &); + MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int); MODBIND2RC(String, string_to_upper, const String &, const String &); MODBIND2RC(String, string_to_lower, const String &, const String &); diff --git a/platform/linuxbsd/tts_linux.cpp b/platform/linuxbsd/tts_linux.cpp index aea1183d3dc..8fa708aad6d 100644 --- a/platform/linuxbsd/tts_linux.cpp +++ b/platform/linuxbsd/tts_linux.cpp @@ -117,13 +117,12 @@ void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNo free_spd_voices(voices); } PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language); - int prev = 0; - for (int i = 0; i < breaks.size(); i++) { - text += message.text.substr(prev, breaks[i] - prev); - text += ""; - prev = breaks[i]; + for (int i = 0; i < breaks.size(); i += 2) { + const int start = breaks[i]; + const int end = breaks[i + 1]; + text += message.text.substr(start, end - start + 1); + text += ""; } - text += message.text.substr(prev, -1); spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data()); spd_set_volume(tts->synth, message.volume * 2 - 100); diff --git a/servers/text/text_server_extension.cpp b/servers/text/text_server_extension.cpp index c2190296506..4baa1db9bfd 100644 --- a/servers/text/text_server_extension.cpp +++ b/servers/text/text_server_extension.cpp @@ -308,7 +308,7 @@ void TextServerExtension::_bind_methods() { GDVIRTUAL_BIND(_strip_diacritics, "string"); GDVIRTUAL_BIND(_is_valid_identifier, "string"); - GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language"); + GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line"); GDVIRTUAL_BIND(_is_confusable, "string", "dict"); GDVIRTUAL_BIND(_spoof_check, "string"); @@ -1379,9 +1379,9 @@ TypedArray TextServerExtension::parse_structured_text(StructuredTextPa return ret; } -PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const { +PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const { PackedInt32Array ret; - GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, ret); + GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, p_chars_per_line, ret); return ret; } diff --git a/servers/text/text_server_extension.h b/servers/text/text_server_extension.h index 56ed2e41ecb..551e4e90874 100644 --- a/servers/text/text_server_extension.h +++ b/servers/text/text_server_extension.h @@ -510,8 +510,8 @@ public: virtual String strip_diacritics(const String &p_string) const override; GDVIRTUAL1RC(String, _strip_diacritics, const String &); - virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override; - GDVIRTUAL2RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &); + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const override; + GDVIRTUAL3RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &, int); virtual bool is_valid_identifier(const String &p_string) const override; GDVIRTUAL1RC(bool, _is_valid_identifier, const String &); diff --git a/servers/text_server.cpp b/servers/text_server.cpp index e11da53852e..c0f235fe503 100644 --- a/servers/text_server.cpp +++ b/servers/text_server.cpp @@ -454,7 +454,7 @@ void TextServer::_bind_methods() { ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL("")); ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL("")); - ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL("")); + ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language", "chars_per_line"), &TextServer::string_get_word_breaks, DEFVAL(""), DEFVAL(0)); ClassDB::bind_method(D_METHOD("is_confusable", "string", "dict"), &TextServer::is_confusable); ClassDB::bind_method(D_METHOD("spoof_check", "string"), &TextServer::spoof_check); diff --git a/servers/text_server.h b/servers/text_server.h index 9508187cbc8..0d94f45b79c 100644 --- a/servers/text_server.h +++ b/servers/text_server.h @@ -493,7 +493,7 @@ public: virtual String percent_sign(const String &p_language = "") const = 0; // String functions. - virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0; + virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const = 0; virtual int64_t is_confusable(const String &p_string, const PackedStringArray &p_dict) const { return -1; }; virtual bool spoof_check(const String &p_string) const { return false; }; diff --git a/tests/servers/test_text_server.h b/tests/servers/test_text_server.h index 297f7d2068a..b3c120e0ba8 100644 --- a/tests/servers/test_text_server.h +++ b/tests/servers/test_text_server.h @@ -593,12 +593,18 @@ TEST_SUITE("[TextServer]") { String text1 = U"linguistically similar and effectively form"; // 14^ 22^ 26^ 38^ PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en"); - CHECK(breaks.size() == 4); - if (breaks.size() == 4) { - CHECK(breaks[0] == 14); - CHECK(breaks[1] == 22); - CHECK(breaks[2] == 26); - CHECK(breaks[3] == 38); + CHECK(breaks.size() == 10); + if (breaks.size() == 10) { + CHECK(breaks[0] == 0); + CHECK(breaks[1] == 14); + CHECK(breaks[2] == 15); + CHECK(breaks[3] == 22); + CHECK(breaks[4] == 23); + CHECK(breaks[5] == 26); + CHECK(breaks[6] == 27); + CHECK(breaks[7] == 38); + CHECK(breaks[8] == 39); + CHECK(breaks[9] == 43); } } @@ -608,16 +614,26 @@ TEST_SUITE("[TextServer]") { // 3^ 7^ 13^ 16^ 20^ 25^ 29^ 32^ PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th"); - CHECK(breaks.size() == 8); - if (breaks.size() == 8) { - CHECK(breaks[0] == 3); - CHECK(breaks[1] == 7); - CHECK(breaks[2] == 13); - CHECK(breaks[3] == 16); - CHECK(breaks[4] == 20); - CHECK(breaks[5] == 25); - CHECK(breaks[6] == 29); - CHECK(breaks[7] == 32); + CHECK(breaks.size() == 18); + if (breaks.size() == 18) { + CHECK(breaks[0] == 0); + CHECK(breaks[1] == 4); + CHECK(breaks[2] == 4); + CHECK(breaks[3] == 8); + CHECK(breaks[4] == 8); + CHECK(breaks[5] == 14); + CHECK(breaks[6] == 14); + CHECK(breaks[7] == 17); + CHECK(breaks[8] == 17); + CHECK(breaks[9] == 21); + CHECK(breaks[10] == 21); + CHECK(breaks[11] == 26); + CHECK(breaks[12] == 26); + CHECK(breaks[13] == 30); + CHECK(breaks[14] == 30); + CHECK(breaks[15] == 33); + CHECK(breaks[16] == 33); + CHECK(breaks[17] == 42); } } }