Merge pull request #46551 from bruvzg/json_utf16

JSON parser: add UTF-16 surrogate pairs support.
This commit is contained in:
Rémi Verschelde 2021-03-01 12:42:09 +01:00 committed by GitHub
commit b98b2744a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -234,6 +234,52 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to
}
index += 4; //will add at the end anyway
if ((res & 0xfffffc00) == 0xd800) {
if (p_str[index + 1] != '\\' || p_str[index + 2] != 'u') {
r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
return ERR_PARSE_ERROR;
}
index += 2;
char32_t trail = 0;
for (int j = 0; j < 4; j++) {
char32_t c = p_str[index + j + 1];
if (c == 0) {
r_err_str = "Unterminated String";
return ERR_PARSE_ERROR;
}
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
r_err_str = "Malformed hex constant in string";
return ERR_PARSE_ERROR;
}
char32_t v;
if (c >= '0' && c <= '9') {
v = c - '0';
} else if (c >= 'a' && c <= 'f') {
v = c - 'a';
v += 10;
} else if (c >= 'A' && c <= 'F') {
v = c - 'A';
v += 10;
} else {
ERR_PRINT("Bug parsing hex constant.");
v = 0;
}
trail <<= 4;
trail |= v;
}
if ((trail & 0xfffffc00) == 0xdc00) {
res = (res << 10UL) + trail - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
index += 4; //will add at the end anyway
} else {
r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
return ERR_PARSE_ERROR;
}
} else if ((res & 0xfffffc00) == 0xdc00) {
r_err_str = "Invalid UTF-16 sequence in string, unpaired trail surrogate";
return ERR_PARSE_ERROR;
}
} break;
default: {
res = next;