Fix String::word_wrap() for long words

- Changes `TextServer.string_get_word_breaks()`
  - Returns pairs of boundary start and end offsets
  - Accepts `chars_per_line` to return line breaks
- Removes `String::word_wrap()`

Co-authored-by: bruvzg <7645683+bruvzg@users.noreply.github.com>
This commit is contained in:
Haoyu Qiu 2022-08-18 16:20:20 +08:00
parent f18f2740da
commit 207e52c161
16 changed files with 203 additions and 104 deletions

View File

@ -220,37 +220,6 @@ void CharString::copy_from(const char *p_cstr) {
/* String */
/*************************************************************************/
//kind of poor should be rewritten properly
String String::word_wrap(int p_chars_per_line) const {
int from = 0;
int last_space = 0;
String ret;
for (int i = 0; i < length(); i++) {
if (i - from >= p_chars_per_line) {
if (last_space == -1) {
ret += substr(from, i - from + 1) + "\n";
} else {
ret += substr(from, last_space - from) + "\n";
i = last_space; //rewind
}
from = i + 1;
last_space = -1;
} else if (operator[](i) == ' ' || operator[](i) == '\t') {
last_space = i;
} else if (operator[](i) == '\n') {
ret += substr(from, i - from) + "\n";
from = i + 1;
last_space = -1;
}
}
if (from < length()) {
ret += substr(from, length());
}
return ret;
}
Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const {
// Splits the URL into scheme, host, port, path. Strip credentials when present.
String base = *this;

View File

@ -425,7 +425,6 @@ public:
String c_escape_multiline() const;
String c_unescape() const;
String json_escape() const;
String word_wrap(int p_chars_per_line) const;
Error parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const;
String property_name_encode() const;

View File

@ -1548,8 +1548,15 @@
<return type="PackedInt32Array" />
<param index="0" name="string" type="String" />
<param index="1" name="language" type="String" default="&quot;&quot;" />
<param index="2" name="chars_per_line" type="int" default="0" />
<description>
Returns array of the word break character offsets.
Returns an array of the word break boundaries. Elements in the returned array are the offsets of the start and end of words. Therefore the length of the array is always even.
When [param chars_per_line] is greater than zero, line break boundaries are returned instead.
[codeblock]
var ts = TextServerManager.get_primary_interface()
print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12]
print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12]
[/codeblock]
</description>
</method>
<method name="string_to_lower" qualifiers="const">

View File

@ -1346,6 +1346,7 @@
<return type="PackedInt32Array" />
<param index="0" name="string" type="String" />
<param index="1" name="language" type="String" />
<param index="2" name="chars_per_line" type="int" />
<description>
</description>
</method>

View File

@ -751,7 +751,16 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType
reason->add_theme_color_override("font_color", get_theme_color(SNAME("success_color"), SNAME("Editor")));
}
reason->set_text(p_reason);
reason->set_tooltip_text(p_reason.word_wrap(80));
const PackedInt32Array boundaries = TS->string_get_word_breaks(p_reason, "", 80);
PackedStringArray lines;
for (int i = 0; i < boundaries.size(); i += 2) {
const int start = boundaries[i];
const int end = boundaries[i + 1];
lines.append(p_reason.substr(start, end - start + 1));
}
reason->set_tooltip_text(String("\n").join(lines));
}
void ScriptEditorDebugger::_notification(int p_what) {

View File

@ -132,8 +132,16 @@ void SceneTreeEditor::_cell_button_pressed(Object *p_item, int p_column, int p_i
if (config_err.is_empty()) {
return;
}
config_err = config_err.word_wrap(80);
warning->set_text(config_err);
const PackedInt32Array boundaries = TS->string_get_word_breaks(config_err, "", 80);
PackedStringArray lines;
for (int i = 0; i < boundaries.size(); i += 2) {
const int start = boundaries[i];
const int end = boundaries[i + 1];
lines.append(config_err.substr(start, end - start + 1));
}
warning->set_text(String("\n").join(lines));
warning->popup_centered();
} else if (p_id == BUTTON_SIGNALS) {

View File

@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String
return String::utf16(lower.ptr(), len);
}
PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const {
PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language;
// Convert to UTF-16.
Char16String utf16 = p_string.utf16();
@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
HashSet<int> breaks;
UErrorCode err = U_ZERO_ERROR;
UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
if (U_FAILURE(err)) {
// No data loaded - use fallback.
for (int i = 0; i < p_string.length(); i++) {
char32_t c = p_string[i];
if (is_whitespace(c) || is_linebreak(c)) {
breaks.insert(i);
}
}
} else {
if (U_SUCCESS(err)) {
while (ubrk_next(bi) != UBRK_DONE) {
int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
if (pos != p_string.length() - 1) {
@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
ubrk_close(bi);
PackedInt32Array ret;
int line_start = 0;
int line_end = 0; // End of last word on current line.
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
int word_length = 0;
for (int i = 0; i < p_string.length(); i++) {
char32_t c = p_string[i];
if (c == 0xfffc) {
continue;
}
if (u_ispunct(c) && c != 0x005F) {
const char32_t c = p_string[i];
if (is_linebreak(c)) {
// Force newline.
ret.push_back(line_start);
ret.push_back(i);
line_start = i + 1;
line_end = line_start;
word_start = line_start;
word_length = 0;
} else if (c == 0xfffc) {
continue;
} else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
// A whitespace ends current word.
if (word_length > 0) {
line_end = i - 1;
word_start = -1;
word_length = 0;
}
if (is_underscore(c)) {
} else if (breaks.has(i)) {
// End current word, no space.
if (word_length > 0) {
line_end = i;
word_start = i + 1;
word_length = 0;
}
if (p_chars_per_line <= 0) {
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
} else {
if (word_start == -1) {
word_start = i;
if (p_chars_per_line <= 0) {
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
}
word_length += 1;
if (p_chars_per_line > 0) {
if (word_length > p_chars_per_line) {
// Word too long: wrap before current character.
ret.push_back(line_start);
ret.push_back(i);
continue;
line_start = i;
line_end = i;
word_start = i;
word_length = 1;
} else if (i - line_start + 1 > p_chars_per_line) {
// Line too long: wrap after the last word.
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
if (breaks.has(i)) {
ret.push_back(i);
continue;
}
}
}
if (line_start < p_string.length()) {
ret.push_back(line_start);
ret.push_back(p_string.length());
}
return ret;
}

View File

@ -915,7 +915,7 @@ public:
MODBIND2RC(String, parse_number, const String &, const String &);
MODBIND1RC(String, percent_sign, const String &);
MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);
MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &);
MODBIND1RC(bool, spoof_check, const String &);

View File

@ -4099,26 +4099,69 @@ String TextServerFallback::_string_to_lower(const String &p_string, const String
return lower;
}
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language) const {
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
PackedInt32Array ret;
int line_start = 0;
int line_end = 0; // End of last word on current line.
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
int word_length = 0;
for (int i = 0; i < p_string.length(); i++) {
char32_t c = p_string[i];
if (c == 0xfffc) {
continue;
}
if (is_punct(c) && c != 0x005F) {
const char32_t c = p_string[i];
if (is_linebreak(c)) {
// Force newline.
ret.push_back(line_start);
ret.push_back(i);
line_start = i + 1;
line_end = line_start;
word_start = line_start;
word_length = 0;
} else if (c == 0xfffc) {
continue;
} else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
// A whitespace ends current word.
if (word_length > 0) {
line_end = i - 1;
word_start = -1;
word_length = 0;
}
if (is_underscore(c)) {
} else {
if (word_start == -1) {
word_start = i;
if (p_chars_per_line <= 0) {
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
}
word_length += 1;
if (p_chars_per_line > 0) {
if (word_length > p_chars_per_line) {
// Word too long: wrap before current character.
ret.push_back(line_start);
ret.push_back(i);
continue;
line_start = i;
line_end = i;
word_start = i;
word_length = 1;
} else if (i - line_start + 1 > p_chars_per_line) {
// Line too long: wrap after the last word.
ret.push_back(line_start);
ret.push_back(line_end + 1);
line_start = word_start;
line_end = line_start;
}
if (is_whitespace(c) || is_linebreak(c)) {
ret.push_back(i);
continue;
}
}
}
if (line_start < p_string.length()) {
ret.push_back(line_start);
ret.push_back(p_string.length());
}
return ret;
}

View File

@ -786,7 +786,7 @@ public:
MODBIND1RC(double, shaped_text_get_underline_position, const RID &);
MODBIND1RC(double, shaped_text_get_underline_thickness, const RID &);
MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);
MODBIND2RC(String, string_to_upper, const String &, const String &);
MODBIND2RC(String, string_to_lower, const String &, const String &);

View File

@ -117,13 +117,12 @@ void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNo
free_spd_voices(voices);
}
PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
int prev = 0;
for (int i = 0; i < breaks.size(); i++) {
text += message.text.substr(prev, breaks[i] - prev);
text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>";
prev = breaks[i];
for (int i = 0; i < breaks.size(); i += 2) {
const int start = breaks[i];
const int end = breaks[i + 1];
text += message.text.substr(start, end - start + 1);
text += "<mark name=\"" + String::num_int64(end, 10) + "\"/>";
}
text += message.text.substr(prev, -1);
spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data());
spd_set_volume(tts->synth, message.volume * 2 - 100);

View File

@ -308,7 +308,7 @@ void TextServerExtension::_bind_methods() {
GDVIRTUAL_BIND(_strip_diacritics, "string");
GDVIRTUAL_BIND(_is_valid_identifier, "string");
GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language");
GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line");
GDVIRTUAL_BIND(_is_confusable, "string", "dict");
GDVIRTUAL_BIND(_spoof_check, "string");
@ -1379,9 +1379,9 @@ TypedArray<Vector2i> TextServerExtension::parse_structured_text(StructuredTextPa
return ret;
}
PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const {
PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
PackedInt32Array ret;
GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, ret);
GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, p_chars_per_line, ret);
return ret;
}

View File

@ -510,8 +510,8 @@ public:
virtual String strip_diacritics(const String &p_string) const override;
GDVIRTUAL1RC(String, _strip_diacritics, const String &);
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
GDVIRTUAL2RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &);
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const override;
GDVIRTUAL3RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &, int);
virtual bool is_valid_identifier(const String &p_string) const override;
GDVIRTUAL1RC(bool, _is_valid_identifier, const String &);

View File

@ -454,7 +454,7 @@ void TextServer::_bind_methods() {
ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL(""));
ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL(""));
ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL(""));
ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language", "chars_per_line"), &TextServer::string_get_word_breaks, DEFVAL(""), DEFVAL(0));
ClassDB::bind_method(D_METHOD("is_confusable", "string", "dict"), &TextServer::is_confusable);
ClassDB::bind_method(D_METHOD("spoof_check", "string"), &TextServer::spoof_check);

View File

@ -493,7 +493,7 @@ public:
virtual String percent_sign(const String &p_language = "") const = 0;
// String functions.
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0;
virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const = 0;
virtual int64_t is_confusable(const String &p_string, const PackedStringArray &p_dict) const { return -1; };
virtual bool spoof_check(const String &p_string) const { return false; };

View File

@ -593,12 +593,18 @@ TEST_SUITE("[TextServer]") {
String text1 = U"linguistically similar and effectively form";
// 14^ 22^ 26^ 38^
PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en");
CHECK(breaks.size() == 4);
if (breaks.size() == 4) {
CHECK(breaks[0] == 14);
CHECK(breaks[1] == 22);
CHECK(breaks[2] == 26);
CHECK(breaks[3] == 38);
CHECK(breaks.size() == 10);
if (breaks.size() == 10) {
CHECK(breaks[0] == 0);
CHECK(breaks[1] == 14);
CHECK(breaks[2] == 15);
CHECK(breaks[3] == 22);
CHECK(breaks[4] == 23);
CHECK(breaks[5] == 26);
CHECK(breaks[6] == 27);
CHECK(breaks[7] == 38);
CHECK(breaks[8] == 39);
CHECK(breaks[9] == 43);
}
}
@ -608,16 +614,26 @@ TEST_SUITE("[TextServer]") {
// 3^ 7^ 13^ 16^ 20^ 25^ 29^ 32^
PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th");
CHECK(breaks.size() == 8);
if (breaks.size() == 8) {
CHECK(breaks[0] == 3);
CHECK(breaks[1] == 7);
CHECK(breaks[2] == 13);
CHECK(breaks[3] == 16);
CHECK(breaks[4] == 20);
CHECK(breaks[5] == 25);
CHECK(breaks[6] == 29);
CHECK(breaks[7] == 32);
CHECK(breaks.size() == 18);
if (breaks.size() == 18) {
CHECK(breaks[0] == 0);
CHECK(breaks[1] == 4);
CHECK(breaks[2] == 4);
CHECK(breaks[3] == 8);
CHECK(breaks[4] == 8);
CHECK(breaks[5] == 14);
CHECK(breaks[6] == 14);
CHECK(breaks[7] == 17);
CHECK(breaks[8] == 17);
CHECK(breaks[9] == 21);
CHECK(breaks[10] == 21);
CHECK(breaks[11] == 26);
CHECK(breaks[12] == 26);
CHECK(breaks[13] == 30);
CHECK(breaks[14] == 30);
CHECK(breaks[15] == 33);
CHECK(breaks[16] == 33);
CHECK(breaks[17] == 42);
}
}
}