From ee19a092d9b5fa81404c7ab5778b70be8b4eab5a Mon Sep 17 00:00:00 2001 From: A Thousand Ships <96648715+AThousandShips@users.noreply.github.com> Date: Thu, 30 May 2024 16:36:50 +0200 Subject: [PATCH] [Core] Optimize some `String` methods Avoids unnecessary COW checks by using pointers directly. --- core/string/ustring.cpp | 81 +++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 3d37e17ef83..6be8ad126cc 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -1658,30 +1658,40 @@ char32_t String::char_lowercase(char32_t p_char) { } String String::to_upper() const { - String upper = *this; - - for (int i = 0; i < upper.size(); i++) { - const char32_t s = upper[i]; - const char32_t t = _find_upper(s); - if (s != t) { // avoid copy on write - upper[i] = t; - } + if (is_empty()) { + return *this; } + String upper; + upper.resize(size()); + const char32_t *old_ptr = ptr(); + char32_t *upper_ptrw = upper.ptrw(); + + while (*old_ptr) { + *upper_ptrw++ = _find_upper(*old_ptr++); + } + + *upper_ptrw = 0; + return upper; } String String::to_lower() const { - String lower = *this; - - for (int i = 0; i < lower.size(); i++) { - const char32_t s = lower[i]; - const char32_t t = _find_lower(s); - if (s != t) { // avoid copy on write - lower[i] = t; - } + if (is_empty()) { + return *this; } + String lower; + lower.resize(size()); + const char32_t *old_ptr = ptr(); + char32_t *lower_ptrw = lower.ptrw(); + + while (*old_ptr) { + *lower_ptrw++ = _find_lower(*old_ptr++); + } + + *lower_ptrw = 0; + return lower; } @@ -1919,15 +1929,16 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) { static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; String ret; - char v[2] = { 0, 0 }; + ret.resize(p_len * 2 + 1); + char32_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < p_len; i++) { - v[0] = hex[p_buffer[i] >> 4]; - ret += v; - v[0] = hex[p_buffer[i] & 0xF]; - ret += v; + *ret_ptrw++ = hex[p_buffer[i] >> 4]; + *ret_ptrw++ = hex[p_buffer[i] & 0xF]; } + *ret_ptrw = 0; + return ret; } @@ -1950,11 +1961,12 @@ Vector String::hex_decode() const { Vector out; int len = length() / 2; out.resize(len); + uint8_t *out_ptrw = out.ptrw(); for (int i = 0; i < len; i++) { char32_t c; HEX_TO_BYTE(first, i * 2); HEX_TO_BYTE(second, i * 2 + 1); - out.write[i] = first * 16 + second; + out_ptrw[i] = first * 16 + second; } return out; #undef HEX_TO_BYTE @@ -1975,14 +1987,16 @@ CharString String::ascii(bool p_allow_extended) const { CharString cs; cs.resize(size()); + char *cs_ptrw = cs.ptrw(); + const char32_t *this_ptr = ptr(); for (int i = 0; i < size(); i++) { - char32_t c = operator[](i); + char32_t c = this_ptr[i]; if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) { - cs[i] = c; + cs_ptrw[i] = c; } else { print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c)); - cs[i] = 0x20; // ascii doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane + cs_ptrw[i] = 0x20; // ASCII doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane. } } @@ -3115,8 +3129,9 @@ Vector String::md5_buffer() const { Vector ret; ret.resize(16); + uint8_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < 16; i++) { - ret.write[i] = hash[i]; + ret_ptrw[i] = hash[i]; } return ret; } @@ -3128,8 +3143,9 @@ Vector String::sha1_buffer() const { Vector ret; ret.resize(20); + uint8_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < 20; i++) { - ret.write[i] = hash[i]; + ret_ptrw[i] = hash[i]; } return ret; @@ -3142,8 +3158,9 @@ Vector String::sha256_buffer() const { Vector ret; ret.resize(32); + uint8_t *ret_ptrw = ret.ptrw(); for (int i = 0; i < 32; i++) { - ret.write[i] = hash[i]; + ret_ptrw[i] = hash[i]; } return ret; } @@ -3871,8 +3888,9 @@ Vector String::bigrams() const { return b; } b.resize(n_pairs); + String *b_ptrw = b.ptrw(); for (int i = 0; i < n_pairs; i++) { - b.write[i] = substr(i, 2); + b_ptrw[i] = substr(i, 2); } return b; } @@ -4757,8 +4775,9 @@ String String::xml_unescape() const { return String(); } str.resize(len + 1); - _xml_unescape(get_data(), l, str.ptrw()); - str[len] = 0; + char32_t *str_ptrw = str.ptrw(); + _xml_unescape(get_data(), l, str_ptrw); + str_ptrw[len] = 0; return str; }