Fix XMLParser behavior for comments and premature endings

(cherry picked from commit 549ad70760)
This commit is contained in:
Haoyu Qiu 2021-05-18 15:01:21 +08:00 committed by Rémi Verschelde
parent c311b4c039
commit 3f89287ddb
No known key found for this signature in database
GPG Key ID: C3336907360768E1

View File

@ -132,7 +132,7 @@ void XMLParser::_parse_closing_xml_element() {
++P; ++P;
const char *pBeginClose = P; const char *pBeginClose = P;
while (*P != '>') { while (*P && *P != '>') {
++P; ++P;
} }
@ -140,20 +140,26 @@ void XMLParser::_parse_closing_xml_element() {
#ifdef DEBUG_XML #ifdef DEBUG_XML
print_line("XML CLOSE: " + node_name); print_line("XML CLOSE: " + node_name);
#endif #endif
if (*P) {
++P; ++P;
} }
}
void XMLParser::_ignore_definition() { void XMLParser::_ignore_definition() {
node_type = NODE_UNKNOWN; node_type = NODE_UNKNOWN;
char *F = P; char *F = P;
// move until end marked with '>' reached // move until end marked with '>' reached
while (*P != '>') { while (*P && *P != '>') {
++P; ++P;
} }
node_name.parse_utf8(F, P - F); node_name.parse_utf8(F, P - F);
if (*P) {
++P; ++P;
} }
}
bool XMLParser::_parse_cdata() { bool XMLParser::_parse_cdata() {
if (*(P + 1) != '[') { if (*(P + 1) != '[') {
@ -170,6 +176,7 @@ bool XMLParser::_parse_cdata() {
} }
if (!*P) { if (!*P) {
node_name = "";
return true; return true;
} }
@ -188,10 +195,9 @@ bool XMLParser::_parse_cdata() {
} }
if (cDataEnd) { if (cDataEnd) {
node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin)); cDataEnd = P;
} else {
node_name = "";
} }
node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin));
#ifdef DEBUG_XML #ifdef DEBUG_XML
print_line("XML CDATA: " + node_name); print_line("XML CDATA: " + node_name);
#endif #endif
@ -203,24 +209,45 @@ void XMLParser::_parse_comment() {
node_type = NODE_COMMENT; node_type = NODE_COMMENT;
P += 1; P += 1;
char *pCommentBegin = P; char *pEndOfInput = data + length;
char *pCommentBegin;
char *pCommentEnd;
if (P + 1 < pEndOfInput && P[0] == '-' && P[1] == '-') {
// Comment, use '-->' as end.
pCommentBegin = P + 2;
for (pCommentEnd = pCommentBegin; pCommentEnd + 2 < pEndOfInput; pCommentEnd++) {
if (pCommentEnd[0] == '-' && pCommentEnd[1] == '-' && pCommentEnd[2] == '>') {
break;
}
}
if (pCommentEnd + 2 < pEndOfInput) {
P = pCommentEnd + 3;
} else {
P = pCommentEnd = pEndOfInput;
}
} else {
// Like document type definition, match angle brackets.
pCommentBegin = P;
int count = 1; int count = 1;
while (*P && count) {
// move until end of comment reached
while (count) {
if (*P == '>') { if (*P == '>') {
--count; --count;
} else if (*P == '<') { } else if (*P == '<') {
++count; ++count;
} }
++P; ++P;
} }
P -= 3; if (count) {
node_name = String::utf8(pCommentBegin + 2, (int)(P - pCommentBegin - 2)); pCommentEnd = P;
P += 3; } else {
pCommentEnd = P - 1;
}
}
node_name = String::utf8(pCommentBegin, (int)(pCommentEnd - pCommentBegin));
#ifdef DEBUG_XML #ifdef DEBUG_XML
print_line("XML COMMENT: " + node_name); print_line("XML COMMENT: " + node_name);
#endif #endif
@ -235,14 +262,14 @@ void XMLParser::_parse_opening_xml_element() {
const char *startName = P; const char *startName = P;
// find end of element // find end of element
while (*P != '>' && !_is_white_space(*P)) { while (*P && *P != '>' && !_is_white_space(*P)) {
++P; ++P;
} }
const char *endName = P; const char *endName = P;
// find attributes // find attributes
while (*P != '>') { while (*P && *P != '>') {
if (_is_white_space(*P)) { if (_is_white_space(*P)) {
++P; ++P;
} else { } else {
@ -252,10 +279,14 @@ void XMLParser::_parse_opening_xml_element() {
// read the attribute names // read the attribute names
const char *attributeNameBegin = P; const char *attributeNameBegin = P;
while (!_is_white_space(*P) && *P != '=') { while (*P && !_is_white_space(*P) && *P != '=') {
++P; ++P;
} }
if (!*P) {
break;
}
const char *attributeNameEnd = P; const char *attributeNameEnd = P;
++P; ++P;
@ -266,7 +297,7 @@ void XMLParser::_parse_opening_xml_element() {
} }
if (!*P) { // malformatted xml file if (!*P) { // malformatted xml file
return; break;
} }
const char attributeQuoteChar = *P; const char attributeQuoteChar = *P;
@ -278,12 +309,10 @@ void XMLParser::_parse_opening_xml_element() {
++P; ++P;
} }
if (!*P) { // malformatted xml file
return;
}
const char *attributeValueEnd = P; const char *attributeValueEnd = P;
if (*P) {
++P; ++P;
}
Attribute attr; Attribute attr;
attr.name = String::utf8(attributeNameBegin, attr.name = String::utf8(attributeNameBegin,
@ -315,8 +344,10 @@ void XMLParser::_parse_opening_xml_element() {
print_line("XML OPEN: " + node_name); print_line("XML OPEN: " + node_name);
#endif #endif
if (*P) {
++P; ++P;
} }
}
void XMLParser::_parse_current_node() { void XMLParser::_parse_current_node() {
char *start = P; char *start = P;
@ -327,10 +358,6 @@ void XMLParser::_parse_current_node() {
++P; ++P;
} }
if (!*P) {
return;
}
if (P - start > 0) { if (P - start > 0) {
// we found some text, store it // we found some text, store it
if (_set_text(start, P)) { if (_set_text(start, P)) {
@ -338,6 +365,10 @@ void XMLParser::_parse_current_node() {
} }
} }
if (!*P) {
return;
}
++P; ++P;
// based on current token, parse and report next element // based on current token, parse and report next element