From 03ea77407c8a1e3d525e367e72d67c5152937747 Mon Sep 17 00:00:00 2001 From: George Marques Date: Thu, 9 Feb 2023 11:17:37 -0300 Subject: [PATCH] GDScript: Be more lenient with identifiers - Allow identifiers similar to keywords if they are in ASCII range. - Allow constants to be treated as regular identifiers. - Allow keywords that can be used as identifiers in expressions. --- modules/gdscript/gdscript_parser.cpp | 16 ++++++++++++++-- modules/gdscript/gdscript_tokenizer.cpp | 17 ++++++++++++++--- .../allow_id_similar_to_keyword_in_ascii.gd | 3 +++ .../allow_id_similar_to_keyword_in_ascii.out | 2 ++ .../features/allowed_keywords_as_identifiers.gd | 16 ++++++++++++++++ .../allowed_keywords_as_identifiers.out | 6 ++++++ .../parser/warnings/confusable_identifier.gd | 7 +++++++ .../parser/warnings/confusable_identifier.out | 6 +++++- 8 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd create mode 100644 modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out create mode 100644 modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd create mode 100644 modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index c6e4222213b..0393a8c241f 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -2145,7 +2145,12 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr make_completion_context(COMPLETION_IDENTIFIER, nullptr); GDScriptTokenizer::Token token = current; - ParseFunction prefix_rule = get_rule(token.type)->prefix; + GDScriptTokenizer::Token::Type token_type = token.type; + if (token.is_identifier()) { + // Allow keywords that can be treated as identifiers. + token_type = GDScriptTokenizer::Token::IDENTIFIER; + } + ParseFunction prefix_rule = get_rule(token_type)->prefix; if (prefix_rule == nullptr) { // Expected expression. Let the caller give the proper error message. @@ -3010,7 +3015,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p path_state = PATH_STATE_NODE_NAME; } else if (current.is_node_name()) { advance(); - get_node->full_path += previous.get_identifier(); + String identifier = previous.get_identifier(); +#ifdef DEBUG_ENABLED + // Check spoofing. + if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier)) { + push_warning(get_node, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier); + } +#endif + get_node->full_path += identifier; path_state = PATH_STATE_NODE_NAME; } else if (!check(GDScriptTokenizer::Token::SLASH) && !check(GDScriptTokenizer::Token::PERCENT)) { diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index d7f1114fd3b..d586380c413 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -168,7 +168,11 @@ bool GDScriptTokenizer::Token::is_identifier() const { switch (type) { case IDENTIFIER: case MATCH: // Used in String.match(). - case CONST_INF: // Used in Vector{2,3,4}.INF + // Allow constants to be treated as regular identifiers. + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: return true; default: return false; @@ -188,6 +192,10 @@ bool GDScriptTokenizer::Token::is_node_name() const { case CLASS_NAME: case CLASS: case CONST: + case CONST_PI: + case CONST_INF: + case CONST_NAN: + case CONST_TAU: case CONTINUE: case ELIF: case ELSE: @@ -530,9 +538,12 @@ void GDScriptTokenizer::make_keyword_list() { #endif // DEBUG_ENABLED GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { + bool only_ascii = _peek(-1) < 128; + // Consume all identifier characters. while (is_unicode_identifier_continue(_peek())) { - _advance(); + char32_t c = _advance(); + only_ascii = only_ascii && c < 128; } int len = _current - _start; @@ -587,7 +598,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { #ifdef DEBUG_ENABLED // Additional checks for identifiers but only in debug and if it's available in TextServer. - if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { + if (!only_ascii && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) { int64_t confusable = TS->is_confusable(name, keyword_list); if (confusable >= 0) { push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable])); diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd new file mode 100644 index 00000000000..390d314b948 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.gd @@ -0,0 +1,3 @@ +func test(): + var P1 = "ok" # Technically it is visually similar to keyword "PI" but allowed since it's in ASCII range. + print(P1) diff --git a/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out new file mode 100644 index 00000000000..1b47ed10dc0 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allow_id_similar_to_keyword_in_ascii.out @@ -0,0 +1,2 @@ +GDTEST_OK +ok diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd new file mode 100644 index 00000000000..7e1982597c8 --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.gd @@ -0,0 +1,16 @@ +func test(): + # The following keywords are allowed as identifiers: + var match = "match" + print(match) + + var PI = "PI" + print(PI) + + var INF = "INF" + print(INF) + + var NAN = "NAN" + print(NAN) + + var TAU = "TAU" + print(TAU) diff --git a/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out new file mode 100644 index 00000000000..aae2ae13d5c --- /dev/null +++ b/modules/gdscript/tests/scripts/parser/features/allowed_keywords_as_identifiers.out @@ -0,0 +1,6 @@ +GDTEST_OK +match +PI +INF +NAN +TAU diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd index e2caac8ffd9..41b38c4bba9 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd +++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd @@ -1,5 +1,12 @@ +extends Node + func test(): var port = 0 # Only latin characters. var pοrt = 1 # The "ο" is Greek omicron. prints(port, pοrt) + +# Do not call this since nodes aren't in the tree. It is just a parser check. +func nodes(): + var _node1 = $port # Only latin characters. + var _node2 = $pοrt # The "ο" is Greek omicron. diff --git a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out index c4833964438..c189204285a 100644 --- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out +++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out @@ -1,6 +1,10 @@ GDTEST_OK >> WARNING ->> Line: 3 +>> Line: 5 +>> CONFUSABLE_IDENTIFIER +>> The identifier "pοrt" has misleading characters and might be confused with something else. +>> WARNING +>> Line: 12 >> CONFUSABLE_IDENTIFIER >> The identifier "pοrt" has misleading characters and might be confused with something else. 0 1