Merge pull request #92548 from AThousandShips/string_cleaning

[Core] Optimize some `String` methods
This commit is contained in:
Rémi Verschelde 2024-08-26 23:28:24 +02:00
commit c7702b350a
No known key found for this signature in database
GPG Key ID: C3336907360768E1

View File

@ -1694,30 +1694,40 @@ char32_t String::char_lowercase(char32_t p_char) {
}
String String::to_upper() const {
String upper = *this;
for (int i = 0; i < upper.size(); i++) {
const char32_t s = upper[i];
const char32_t t = _find_upper(s);
if (s != t) { // avoid copy on write
upper[i] = t;
}
if (is_empty()) {
return *this;
}
String upper;
upper.resize(size());
const char32_t *old_ptr = ptr();
char32_t *upper_ptrw = upper.ptrw();
while (*old_ptr) {
*upper_ptrw++ = _find_upper(*old_ptr++);
}
*upper_ptrw = 0;
return upper;
}
String String::to_lower() const {
String lower = *this;
for (int i = 0; i < lower.size(); i++) {
const char32_t s = lower[i];
const char32_t t = _find_lower(s);
if (s != t) { // avoid copy on write
lower[i] = t;
}
if (is_empty()) {
return *this;
}
String lower;
lower.resize(size());
const char32_t *old_ptr = ptr();
char32_t *lower_ptrw = lower.ptrw();
while (*old_ptr) {
*lower_ptrw++ = _find_lower(*old_ptr++);
}
*lower_ptrw = 0;
return lower;
}
@ -1955,15 +1965,16 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
String ret;
char v[2] = { 0, 0 };
ret.resize(p_len * 2 + 1);
char32_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < p_len; i++) {
v[0] = hex[p_buffer[i] >> 4];
ret += v;
v[0] = hex[p_buffer[i] & 0xF];
ret += v;
*ret_ptrw++ = hex[p_buffer[i] >> 4];
*ret_ptrw++ = hex[p_buffer[i] & 0xF];
}
*ret_ptrw = 0;
return ret;
}
@ -1986,11 +1997,12 @@ Vector<uint8_t> String::hex_decode() const {
Vector<uint8_t> out;
int len = length() / 2;
out.resize(len);
uint8_t *out_ptrw = out.ptrw();
for (int i = 0; i < len; i++) {
char32_t c;
HEX_TO_BYTE(first, i * 2);
HEX_TO_BYTE(second, i * 2 + 1);
out.write[i] = first * 16 + second;
out_ptrw[i] = first * 16 + second;
}
return out;
#undef HEX_TO_BYTE
@ -2011,14 +2023,16 @@ CharString String::ascii(bool p_allow_extended) const {
CharString cs;
cs.resize(size());
char *cs_ptrw = cs.ptrw();
const char32_t *this_ptr = ptr();
for (int i = 0; i < size(); i++) {
char32_t c = operator[](i);
char32_t c = this_ptr[i];
if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
cs[i] = c;
cs_ptrw[i] = c;
} else {
print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
cs[i] = 0x20; // ascii doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane
cs_ptrw[i] = 0x20; // ASCII doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane.
}
}
@ -3151,8 +3165,9 @@ Vector<uint8_t> String::md5_buffer() const {
Vector<uint8_t> ret;
ret.resize(16);
uint8_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < 16; i++) {
ret.write[i] = hash[i];
ret_ptrw[i] = hash[i];
}
return ret;
}
@ -3164,8 +3179,9 @@ Vector<uint8_t> String::sha1_buffer() const {
Vector<uint8_t> ret;
ret.resize(20);
uint8_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < 20; i++) {
ret.write[i] = hash[i];
ret_ptrw[i] = hash[i];
}
return ret;
@ -3178,8 +3194,9 @@ Vector<uint8_t> String::sha256_buffer() const {
Vector<uint8_t> ret;
ret.resize(32);
uint8_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < 32; i++) {
ret.write[i] = hash[i];
ret_ptrw[i] = hash[i];
}
return ret;
}
@ -3917,8 +3934,9 @@ Vector<String> String::bigrams() const {
return b;
}
b.resize(n_pairs);
String *b_ptrw = b.ptrw();
for (int i = 0; i < n_pairs; i++) {
b.write[i] = substr(i, 2);
b_ptrw[i] = substr(i, 2);
}
return b;
}
@ -4897,8 +4915,9 @@ String String::xml_unescape() const {
return String();
}
str.resize(len + 1);
_xml_unescape(get_data(), l, str.ptrw());
str[len] = 0;
char32_t *str_ptrw = str.ptrw();
_xml_unescape(get_data(), l, str_ptrw);
str_ptrw[len] = 0;
return str;
}