[TextServer] Fix get_word_breaks and it uses.
This commit is contained in:
parent
04a530f91f
commit
b5e3238109
|
@ -1739,8 +1739,9 @@
|
||||||
When [param chars_per_line] is greater than zero, line break boundaries are returned instead.
|
When [param chars_per_line] is greater than zero, line break boundaries are returned instead.
|
||||||
[codeblock]
|
[codeblock]
|
||||||
var ts = TextServerManager.get_primary_interface()
|
var ts = TextServerManager.get_primary_interface()
|
||||||
print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12]
|
print(ts.string_get_word_breaks("The Godot Engine, 4")) # Prints [0, 3, 4, 9, 10, 16, 18, 19], which corresponds to the following substrings: "The", "Godot", "Engine", "4"
|
||||||
print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12]
|
print(ts.string_get_word_breaks("The Godot Engine, 4", "en", 5)) # Prints [0, 3, 4, 9, 10, 15, 15, 19], which corresponds to the following substrings: "The", "Godot", "Engin", "e, 4"
|
||||||
|
print(ts.string_get_word_breaks("The Godot Engine, 4", "en", 10)) # Prints [0, 9, 10, 19], which corresponds to the following substrings: "The Godot", "Engine, 4"
|
||||||
[/codeblock]
|
[/codeblock]
|
||||||
</description>
|
</description>
|
||||||
</method>
|
</method>
|
||||||
|
|
|
@ -840,7 +840,7 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType
|
||||||
for (int i = 0; i < boundaries.size(); i += 2) {
|
for (int i = 0; i < boundaries.size(); i += 2) {
|
||||||
const int start = boundaries[i];
|
const int start = boundaries[i];
|
||||||
const int end = boundaries[i + 1];
|
const int end = boundaries[i + 1];
|
||||||
lines.append(p_reason.substr(start, end - start + 1));
|
lines.append(p_reason.substr(start, end - start));
|
||||||
}
|
}
|
||||||
|
|
||||||
reason->set_tooltip_text(String("\n").join(lines));
|
reason->set_tooltip_text(String("\n").join(lines));
|
||||||
|
|
|
@ -7048,10 +7048,10 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
|
||||||
|
|
||||||
HashSet<int> breaks;
|
HashSet<int> breaks;
|
||||||
UErrorCode err = U_ZERO_ERROR;
|
UErrorCode err = U_ZERO_ERROR;
|
||||||
UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
|
UBreakIterator *bi = ubrk_open(UBRK_WORD, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
|
||||||
if (U_SUCCESS(err)) {
|
if (U_SUCCESS(err)) {
|
||||||
while (ubrk_next(bi) != UBRK_DONE) {
|
while (ubrk_next(bi) != UBRK_DONE) {
|
||||||
int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
|
int pos = _convert_pos(p_string, utf16, ubrk_current(bi));
|
||||||
if (pos != p_string.length() - 1) {
|
if (pos != p_string.length() - 1) {
|
||||||
breaks.insert(pos);
|
breaks.insert(pos);
|
||||||
}
|
}
|
||||||
|
@ -7061,79 +7061,111 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
|
||||||
|
|
||||||
PackedInt32Array ret;
|
PackedInt32Array ret;
|
||||||
|
|
||||||
int line_start = 0;
|
if (p_chars_per_line > 0) {
|
||||||
int line_end = 0; // End of last word on current line.
|
int line_start = 0;
|
||||||
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
|
int last_break = -1;
|
||||||
int word_length = 0;
|
int line_length = 0;
|
||||||
|
|
||||||
for (int i = 0; i < p_string.length(); i++) {
|
for (int i = 0; i < p_string.length(); i++) {
|
||||||
const char32_t c = p_string[i];
|
const char32_t c = p_string[i];
|
||||||
|
|
||||||
if (is_linebreak(c)) {
|
bool is_lb = is_linebreak(c);
|
||||||
// Force newline.
|
bool is_ws = is_whitespace(c);
|
||||||
ret.push_back(line_start);
|
bool is_p = (u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;
|
||||||
ret.push_back(i);
|
|
||||||
line_start = i + 1;
|
if (is_lb) {
|
||||||
line_end = line_start;
|
if (line_length > 0) {
|
||||||
word_start = line_start;
|
|
||||||
word_length = 0;
|
|
||||||
} else if (c == 0xfffc) {
|
|
||||||
continue;
|
|
||||||
} else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
|
|
||||||
// A whitespace ends current word.
|
|
||||||
if (word_length > 0) {
|
|
||||||
line_end = i - 1;
|
|
||||||
word_start = -1;
|
|
||||||
word_length = 0;
|
|
||||||
}
|
|
||||||
} else if (breaks.has(i)) {
|
|
||||||
// End current word, no space.
|
|
||||||
if (word_length > 0) {
|
|
||||||
line_end = i;
|
|
||||||
word_start = i + 1;
|
|
||||||
word_length = 0;
|
|
||||||
}
|
|
||||||
if (p_chars_per_line <= 0) {
|
|
||||||
ret.push_back(line_start);
|
|
||||||
ret.push_back(line_end + 1);
|
|
||||||
line_start = word_start;
|
|
||||||
line_end = line_start;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (word_start == -1) {
|
|
||||||
word_start = i;
|
|
||||||
if (p_chars_per_line <= 0) {
|
|
||||||
ret.push_back(line_start);
|
ret.push_back(line_start);
|
||||||
ret.push_back(line_end + 1);
|
ret.push_back(i);
|
||||||
line_start = word_start;
|
|
||||||
line_end = line_start;
|
|
||||||
}
|
}
|
||||||
|
line_start = i;
|
||||||
|
line_length = 0;
|
||||||
|
last_break = -1;
|
||||||
|
continue;
|
||||||
|
} else if (breaks.has(i) || is_ws || is_p) {
|
||||||
|
last_break = i;
|
||||||
}
|
}
|
||||||
word_length += 1;
|
|
||||||
|
|
||||||
if (p_chars_per_line > 0) {
|
if (line_length == p_chars_per_line) {
|
||||||
if (word_length > p_chars_per_line) {
|
if (last_break != -1) {
|
||||||
// Word too long: wrap before current character.
|
int last_break_w_spaces = last_break;
|
||||||
|
while (last_break > line_start && is_whitespace(p_string[last_break - 1])) {
|
||||||
|
last_break--;
|
||||||
|
}
|
||||||
|
if (line_start != last_break) {
|
||||||
|
ret.push_back(line_start);
|
||||||
|
ret.push_back(last_break);
|
||||||
|
}
|
||||||
|
while (last_break_w_spaces < p_string.length() && is_whitespace(p_string[last_break_w_spaces])) {
|
||||||
|
last_break_w_spaces++;
|
||||||
|
}
|
||||||
|
line_start = last_break_w_spaces;
|
||||||
|
if (last_break_w_spaces < i) {
|
||||||
|
line_length = i - last_break_w_spaces;
|
||||||
|
} else {
|
||||||
|
i = last_break_w_spaces;
|
||||||
|
line_length = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
ret.push_back(line_start);
|
ret.push_back(line_start);
|
||||||
ret.push_back(i);
|
ret.push_back(i);
|
||||||
line_start = i;
|
line_start = i;
|
||||||
line_end = i;
|
line_length = 0;
|
||||||
word_start = i;
|
|
||||||
word_length = 1;
|
|
||||||
} else if (i - line_start + 1 > p_chars_per_line) {
|
|
||||||
// Line too long: wrap after the last word.
|
|
||||||
ret.push_back(line_start);
|
|
||||||
ret.push_back(line_end + 1);
|
|
||||||
line_start = word_start;
|
|
||||||
line_end = line_start;
|
|
||||||
}
|
}
|
||||||
|
last_break = -1;
|
||||||
}
|
}
|
||||||
|
line_length++;
|
||||||
|
}
|
||||||
|
if (line_length > 0) {
|
||||||
|
ret.push_back(line_start);
|
||||||
|
ret.push_back(p_string.length());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
|
||||||
|
int word_length = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < p_string.length(); i++) {
|
||||||
|
const char32_t c = p_string[i];
|
||||||
|
|
||||||
|
bool is_lb = is_linebreak(c);
|
||||||
|
bool is_ws = is_whitespace(c);
|
||||||
|
bool is_p = (u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;
|
||||||
|
|
||||||
|
if (word_start == -1) {
|
||||||
|
if (!is_lb && !is_ws && !is_p) {
|
||||||
|
word_start = i;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_lb) {
|
||||||
|
if (word_start != -1 && word_length > 0) {
|
||||||
|
ret.push_back(word_start);
|
||||||
|
ret.push_back(i);
|
||||||
|
}
|
||||||
|
word_start = -1;
|
||||||
|
word_length = 0;
|
||||||
|
} else if (breaks.has(i) || is_ws || is_p) {
|
||||||
|
if (word_start != -1 && word_length > 0) {
|
||||||
|
ret.push_back(word_start);
|
||||||
|
ret.push_back(i);
|
||||||
|
}
|
||||||
|
if (is_ws || is_p) {
|
||||||
|
word_start = -1;
|
||||||
|
} else {
|
||||||
|
word_start = i;
|
||||||
|
}
|
||||||
|
word_length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
if (word_start != -1 && word_length > 0) {
|
||||||
|
ret.push_back(word_start);
|
||||||
|
ret.push_back(p_string.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (line_start < p_string.length()) {
|
|
||||||
ret.push_back(line_start);
|
|
||||||
ret.push_back(p_string.length());
|
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4492,65 +4492,105 @@ String TextServerFallback::_string_to_title(const String &p_string, const String
|
||||||
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int64_t p_chars_per_line) const {
|
PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int64_t p_chars_per_line) const {
|
||||||
PackedInt32Array ret;
|
PackedInt32Array ret;
|
||||||
|
|
||||||
int line_start = 0;
|
if (p_chars_per_line > 0) {
|
||||||
int line_end = 0; // End of last word on current line.
|
int line_start = 0;
|
||||||
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
|
int last_break = -1;
|
||||||
int word_length = 0;
|
int line_length = 0;
|
||||||
|
|
||||||
for (int i = 0; i < p_string.length(); i++) {
|
for (int i = 0; i < p_string.length(); i++) {
|
||||||
const char32_t c = p_string[i];
|
const char32_t c = p_string[i];
|
||||||
|
|
||||||
if (is_linebreak(c)) {
|
bool is_lb = is_linebreak(c);
|
||||||
// Force newline.
|
bool is_ws = is_whitespace(c);
|
||||||
ret.push_back(line_start);
|
bool is_p = (is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;
|
||||||
ret.push_back(i);
|
|
||||||
line_start = i + 1;
|
if (is_lb) {
|
||||||
line_end = line_start;
|
if (line_length > 0) {
|
||||||
word_start = line_start;
|
|
||||||
word_length = 0;
|
|
||||||
} else if (c == 0xfffc) {
|
|
||||||
continue;
|
|
||||||
} else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
|
|
||||||
// A whitespace ends current word.
|
|
||||||
if (word_length > 0) {
|
|
||||||
line_end = i - 1;
|
|
||||||
word_start = -1;
|
|
||||||
word_length = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (word_start == -1) {
|
|
||||||
word_start = i;
|
|
||||||
if (p_chars_per_line <= 0) {
|
|
||||||
ret.push_back(line_start);
|
ret.push_back(line_start);
|
||||||
ret.push_back(line_end + 1);
|
ret.push_back(i);
|
||||||
line_start = word_start;
|
|
||||||
line_end = line_start;
|
|
||||||
}
|
}
|
||||||
|
line_start = i;
|
||||||
|
line_length = 0;
|
||||||
|
last_break = -1;
|
||||||
|
continue;
|
||||||
|
} else if (is_ws || is_p) {
|
||||||
|
last_break = i;
|
||||||
}
|
}
|
||||||
word_length += 1;
|
|
||||||
|
|
||||||
if (p_chars_per_line > 0) {
|
if (line_length == p_chars_per_line) {
|
||||||
if (word_length > p_chars_per_line) {
|
if (last_break != -1) {
|
||||||
// Word too long: wrap before current character.
|
int last_break_w_spaces = last_break;
|
||||||
|
while (last_break > line_start && is_whitespace(p_string[last_break - 1])) {
|
||||||
|
last_break--;
|
||||||
|
}
|
||||||
|
if (line_start != last_break) {
|
||||||
|
ret.push_back(line_start);
|
||||||
|
ret.push_back(last_break);
|
||||||
|
}
|
||||||
|
while (last_break_w_spaces < p_string.length() && is_whitespace(p_string[last_break_w_spaces])) {
|
||||||
|
last_break_w_spaces++;
|
||||||
|
}
|
||||||
|
line_start = last_break_w_spaces;
|
||||||
|
if (last_break_w_spaces < i) {
|
||||||
|
line_length = i - last_break_w_spaces;
|
||||||
|
} else {
|
||||||
|
i = last_break_w_spaces;
|
||||||
|
line_length = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
ret.push_back(line_start);
|
ret.push_back(line_start);
|
||||||
ret.push_back(i);
|
ret.push_back(i);
|
||||||
line_start = i;
|
line_start = i;
|
||||||
line_end = i;
|
line_length = 0;
|
||||||
word_start = i;
|
|
||||||
word_length = 1;
|
|
||||||
} else if (i - line_start + 1 > p_chars_per_line) {
|
|
||||||
// Line too long: wrap after the last word.
|
|
||||||
ret.push_back(line_start);
|
|
||||||
ret.push_back(line_end + 1);
|
|
||||||
line_start = word_start;
|
|
||||||
line_end = line_start;
|
|
||||||
}
|
}
|
||||||
|
last_break = -1;
|
||||||
}
|
}
|
||||||
|
line_length++;
|
||||||
|
}
|
||||||
|
if (line_length > 0) {
|
||||||
|
ret.push_back(line_start);
|
||||||
|
ret.push_back(p_string.length());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
|
||||||
|
int word_length = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < p_string.length(); i++) {
|
||||||
|
const char32_t c = p_string[i];
|
||||||
|
|
||||||
|
bool is_lb = is_linebreak(c);
|
||||||
|
bool is_ws = is_whitespace(c);
|
||||||
|
bool is_p = (is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || c == 0xfffc;
|
||||||
|
|
||||||
|
if (word_start == -1) {
|
||||||
|
if (!is_lb && !is_ws && !is_p) {
|
||||||
|
word_start = i;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_lb) {
|
||||||
|
if (word_start != -1 && word_length > 0) {
|
||||||
|
ret.push_back(word_start);
|
||||||
|
ret.push_back(i);
|
||||||
|
}
|
||||||
|
word_start = -1;
|
||||||
|
word_length = 0;
|
||||||
|
} else if (is_ws || is_p) {
|
||||||
|
if (word_start != -1 && word_length > 0) {
|
||||||
|
ret.push_back(word_start);
|
||||||
|
ret.push_back(i);
|
||||||
|
}
|
||||||
|
word_start = -1;
|
||||||
|
word_length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
word_length++;
|
||||||
|
}
|
||||||
|
if (word_start != -1 && word_length > 0) {
|
||||||
|
ret.push_back(word_start);
|
||||||
|
ret.push_back(p_string.length());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (line_start < p_string.length()) {
|
|
||||||
ret.push_back(line_start);
|
|
||||||
ret.push_back(p_string.length());
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,12 +149,18 @@ void TTS_Linux::_speech_event(int p_msg_id, int p_type) {
|
||||||
}
|
}
|
||||||
|
|
||||||
PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
|
PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
|
||||||
|
int prev_end = -1;
|
||||||
for (int i = 0; i < breaks.size(); i += 2) {
|
for (int i = 0; i < breaks.size(); i += 2) {
|
||||||
const int start = breaks[i];
|
const int start = breaks[i];
|
||||||
const int end = breaks[i + 1];
|
const int end = breaks[i + 1];
|
||||||
text += message.text.substr(start, end - start + 1);
|
if (prev_end != -1 && prev_end != start) {
|
||||||
|
text += message.text.substr(prev_end, start - prev_end);
|
||||||
|
}
|
||||||
|
text += message.text.substr(start, end - start);
|
||||||
text += "<mark name=\"" + String::num_int64(end, 10) + "\"/>";
|
text += "<mark name=\"" + String::num_int64(end, 10) + "\"/>";
|
||||||
|
prev_end = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
spd_set_synthesis_voice(synth, message.voice.utf8().get_data());
|
spd_set_synthesis_voice(synth, message.voice.utf8().get_data());
|
||||||
spd_set_volume(synth, message.volume * 2 - 100);
|
spd_set_volume(synth, message.volume * 2 - 100);
|
||||||
spd_set_voice_pitch(synth, (message.pitch - 1) * 100);
|
spd_set_voice_pitch(synth, (message.pitch - 1) * 100);
|
||||||
|
|
Loading…
Reference in New Issue