Merge pull request #71676 from vnen/gdscript-unicode-identifiers
Add support for Unicode identifiers in GDScript and Expression
This commit is contained in:
commit
5726bf578d
@ -434,14 +434,13 @@ Error Expression::_get_token(Token &r_token) {
|
|||||||
}
|
}
|
||||||
return OK;
|
return OK;
|
||||||
|
|
||||||
} else if (is_ascii_char(cchar) || is_underscore(cchar)) {
|
} else if (is_unicode_identifier_start(cchar)) {
|
||||||
String id;
|
String id = String::chr(cchar);
|
||||||
bool first = true;
|
cchar = GET_CHAR();
|
||||||
|
|
||||||
while (is_ascii_char(cchar) || is_underscore(cchar) || (!first && is_digit(cchar))) {
|
while (is_unicode_identifier_continue(cchar)) {
|
||||||
id += String::chr(cchar);
|
id += String::chr(cchar);
|
||||||
cchar = GET_CHAR();
|
cchar = GET_CHAR();
|
||||||
first = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
str_ofs--; //go back one
|
str_ofs--; //go back one
|
||||||
|
@ -384,6 +384,9 @@
|
|||||||
<member name="debug/gdscript/warnings/assert_always_true" type="int" setter="" getter="" default="1">
|
<member name="debug/gdscript/warnings/assert_always_true" type="int" setter="" getter="" default="1">
|
||||||
When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when an [code]assert[/code] call always evaluates to true.
|
When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when an [code]assert[/code] call always evaluates to true.
|
||||||
</member>
|
</member>
|
||||||
|
<member name="debug/gdscript/warnings/confusable_identifier" type="int" setter="" getter="" default="1">
|
||||||
|
When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when an indentifier contains characters that can be confused with something else, like when mixing different alphabets.
|
||||||
|
</member>
|
||||||
<member name="debug/gdscript/warnings/constant_used_as_function" type="int" setter="" getter="" default="1">
|
<member name="debug/gdscript/warnings/constant_used_as_function" type="int" setter="" getter="" default="1">
|
||||||
When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when a constant is used as a function.
|
When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when a constant is used as a function.
|
||||||
</member>
|
</member>
|
||||||
|
@ -41,6 +41,7 @@
|
|||||||
#include "core/os/os.h"
|
#include "core/os/os.h"
|
||||||
#include "core/string/string_builder.h"
|
#include "core/string/string_builder.h"
|
||||||
#include "gdscript_warning.h"
|
#include "gdscript_warning.h"
|
||||||
|
#include "servers/text_server.h"
|
||||||
#endif // DEBUG_ENABLED
|
#endif // DEBUG_ENABLED
|
||||||
|
|
||||||
#ifdef TOOLS_ENABLED
|
#ifdef TOOLS_ENABLED
|
||||||
@ -186,24 +187,6 @@ void GDScriptParser::push_error(const String &p_message, const Node *p_origin) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_ENABLED
|
#ifdef DEBUG_ENABLED
|
||||||
void GDScriptParser::push_warning(const Node *p_source, GDScriptWarning::Code p_code, const String &p_symbol1, const String &p_symbol2, const String &p_symbol3, const String &p_symbol4) {
|
|
||||||
ERR_FAIL_COND(p_source == nullptr);
|
|
||||||
Vector<String> symbols;
|
|
||||||
if (!p_symbol1.is_empty()) {
|
|
||||||
symbols.push_back(p_symbol1);
|
|
||||||
}
|
|
||||||
if (!p_symbol2.is_empty()) {
|
|
||||||
symbols.push_back(p_symbol2);
|
|
||||||
}
|
|
||||||
if (!p_symbol3.is_empty()) {
|
|
||||||
symbols.push_back(p_symbol3);
|
|
||||||
}
|
|
||||||
if (!p_symbol4.is_empty()) {
|
|
||||||
symbols.push_back(p_symbol4);
|
|
||||||
}
|
|
||||||
push_warning(p_source, p_code, symbols);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GDScriptParser::push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Vector<String> &p_symbols) {
|
void GDScriptParser::push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Vector<String> &p_symbols) {
|
||||||
ERR_FAIL_COND(p_source == nullptr);
|
ERR_FAIL_COND(p_source == nullptr);
|
||||||
if (is_ignoring_warnings) {
|
if (is_ignoring_warnings) {
|
||||||
@ -2251,7 +2234,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_expression(bool p_can_assi
|
|||||||
}
|
}
|
||||||
|
|
||||||
GDScriptParser::IdentifierNode *GDScriptParser::parse_identifier() {
|
GDScriptParser::IdentifierNode *GDScriptParser::parse_identifier() {
|
||||||
return static_cast<IdentifierNode *>(parse_identifier(nullptr, false));
|
IdentifierNode *identifier = static_cast<IdentifierNode *>(parse_identifier(nullptr, false));
|
||||||
|
#ifdef DEBUG_ENABLED
|
||||||
|
// Check for spoofing here (if available in TextServer) since this isn't called inside expressions. This is only relevant for declarations.
|
||||||
|
if (identifier && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier->name.operator String())) {
|
||||||
|
push_warning(identifier, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier->name.operator String());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return identifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
GDScriptParser::ExpressionNode *GDScriptParser::parse_identifier(ExpressionNode *p_previous_operand, bool p_can_assign) {
|
GDScriptParser::ExpressionNode *GDScriptParser::parse_identifier(ExpressionNode *p_previous_operand, bool p_can_assign) {
|
||||||
|
@ -1361,8 +1361,11 @@ private:
|
|||||||
void clear();
|
void clear();
|
||||||
void push_error(const String &p_message, const Node *p_origin = nullptr);
|
void push_error(const String &p_message, const Node *p_origin = nullptr);
|
||||||
#ifdef DEBUG_ENABLED
|
#ifdef DEBUG_ENABLED
|
||||||
void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const String &p_symbol1 = String(), const String &p_symbol2 = String(), const String &p_symbol3 = String(), const String &p_symbol4 = String());
|
|
||||||
void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Vector<String> &p_symbols);
|
void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Vector<String> &p_symbols);
|
||||||
|
template <typename... Symbols>
|
||||||
|
void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Symbols &...p_symbols) {
|
||||||
|
push_warning(p_source, p_code, Vector<String>{ p_symbols... });
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void make_completion_context(CompletionType p_type, Node *p_node, int p_argument = -1, bool p_force = false);
|
void make_completion_context(CompletionType p_type, Node *p_node, int p_argument = -1, bool p_force = false);
|
||||||
|
@ -31,10 +31,14 @@
|
|||||||
#include "gdscript_tokenizer.h"
|
#include "gdscript_tokenizer.h"
|
||||||
|
|
||||||
#include "core/error/error_macros.h"
|
#include "core/error/error_macros.h"
|
||||||
|
#include "core/string/char_utils.h"
|
||||||
|
|
||||||
#ifdef TOOLS_ENABLED
|
#ifdef TOOLS_ENABLED
|
||||||
#include "editor/editor_settings.h"
|
#include "editor/editor_settings.h"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef DEBUG_ENABLED
|
||||||
|
#include "servers/text_server.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
static const char *token_names[] = {
|
static const char *token_names[] = {
|
||||||
"Empty", // EMPTY,
|
"Empty", // EMPTY,
|
||||||
@ -435,10 +439,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, To
|
|||||||
}
|
}
|
||||||
|
|
||||||
GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
|
GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
|
||||||
if (!is_ascii_identifier_char(_peek())) {
|
if (is_unicode_identifier_start(_peek())) {
|
||||||
|
_advance(); // Consume start character.
|
||||||
|
} else {
|
||||||
push_error("Expected annotation identifier after \"@\".");
|
push_error("Expected annotation identifier after \"@\".");
|
||||||
}
|
}
|
||||||
while (is_ascii_identifier_char(_peek())) {
|
while (is_unicode_identifier_continue(_peek())) {
|
||||||
// Consume all identifier characters.
|
// Consume all identifier characters.
|
||||||
_advance();
|
_advance();
|
||||||
}
|
}
|
||||||
@ -447,7 +453,6 @@ GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
|
|||||||
return annotation;
|
return annotation;
|
||||||
}
|
}
|
||||||
|
|
||||||
GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
|
|
||||||
#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
|
#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
|
||||||
KEYWORD_GROUP('a') \
|
KEYWORD_GROUP('a') \
|
||||||
KEYWORD("as", Token::AS) \
|
KEYWORD("as", Token::AS) \
|
||||||
@ -512,8 +517,21 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
|
|||||||
#define MIN_KEYWORD_LENGTH 2
|
#define MIN_KEYWORD_LENGTH 2
|
||||||
#define MAX_KEYWORD_LENGTH 10
|
#define MAX_KEYWORD_LENGTH 10
|
||||||
|
|
||||||
// Consume all alphanumeric characters.
|
#ifdef DEBUG_ENABLED
|
||||||
while (is_ascii_identifier_char(_peek())) {
|
void GDScriptTokenizer::make_keyword_list() {
|
||||||
|
#define KEYWORD_LINE(keyword, token_type) keyword,
|
||||||
|
#define KEYWORD_GROUP_IGNORE(group)
|
||||||
|
keyword_list = {
|
||||||
|
KEYWORDS(KEYWORD_GROUP_IGNORE, KEYWORD_LINE)
|
||||||
|
};
|
||||||
|
#undef KEYWORD_LINE
|
||||||
|
#undef KEYWORD_GROUP_IGNORE
|
||||||
|
}
|
||||||
|
#endif // DEBUG_ENABLED
|
||||||
|
|
||||||
|
GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
|
||||||
|
// Consume all identifier characters.
|
||||||
|
while (is_unicode_identifier_continue(_peek())) {
|
||||||
_advance();
|
_advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -565,15 +583,28 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Not a keyword, so must be an identifier.
|
// Not a keyword, so must be an identifier.
|
||||||
return make_identifier(name);
|
Token id = make_identifier(name);
|
||||||
|
|
||||||
|
#ifdef DEBUG_ENABLED
|
||||||
|
// Additional checks for identifiers but only in debug and if it's available in TextServer.
|
||||||
|
if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
|
||||||
|
int64_t confusable = TS->is_confusable(name, keyword_list);
|
||||||
|
if (confusable >= 0) {
|
||||||
|
push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // DEBUG_ENABLED
|
||||||
|
|
||||||
|
return id;
|
||||||
|
|
||||||
#undef KEYWORDS
|
|
||||||
#undef MIN_KEYWORD_LENGTH
|
|
||||||
#undef MAX_KEYWORD_LENGTH
|
|
||||||
#undef KEYWORD_GROUP_CASE
|
#undef KEYWORD_GROUP_CASE
|
||||||
#undef KEYWORD
|
#undef KEYWORD
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef MAX_KEYWORD_LENGTH
|
||||||
|
#undef MIN_KEYWORD_LENGTH
|
||||||
|
#undef KEYWORDS
|
||||||
|
|
||||||
void GDScriptTokenizer::newline(bool p_make_token) {
|
void GDScriptTokenizer::newline(bool p_make_token) {
|
||||||
// Don't overwrite previous newline, nor create if we want a line continuation.
|
// Don't overwrite previous newline, nor create if we want a line continuation.
|
||||||
if (p_make_token && !pending_newline && !line_continuation) {
|
if (p_make_token && !pending_newline && !line_continuation) {
|
||||||
@ -720,7 +751,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
|
|||||||
error.rightmost_column = column + 1;
|
error.rightmost_column = column + 1;
|
||||||
push_error(error);
|
push_error(error);
|
||||||
has_error = true;
|
has_error = true;
|
||||||
} else if (is_ascii_identifier_char(_peek())) {
|
} else if (is_unicode_identifier_start(_peek()) || is_unicode_identifier_continue(_peek())) {
|
||||||
// Letter at the end of the number.
|
// Letter at the end of the number.
|
||||||
push_error("Invalid numeric notation.");
|
push_error("Invalid numeric notation.");
|
||||||
}
|
}
|
||||||
@ -1311,7 +1342,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
|
|||||||
|
|
||||||
if (is_digit(c)) {
|
if (is_digit(c)) {
|
||||||
return number();
|
return number();
|
||||||
} else if (is_ascii_identifier_char(c)) {
|
} else if (is_unicode_identifier_start(c)) {
|
||||||
return potential_identifier();
|
return potential_identifier();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1504,7 +1535,11 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return make_error(vformat(R"(Unknown character "%s".)", String(&c, 1)));
|
if (is_whitespace(c)) {
|
||||||
|
return make_error(vformat(R"(Invalid white space character "\\u%X".)", static_cast<int32_t>(c)));
|
||||||
|
} else {
|
||||||
|
return make_error(vformat(R"(Unknown character "%s".)", String(&c, 1)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1514,4 +1549,7 @@ GDScriptTokenizer::GDScriptTokenizer() {
|
|||||||
tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");
|
tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");
|
||||||
}
|
}
|
||||||
#endif // TOOLS_ENABLED
|
#endif // TOOLS_ENABLED
|
||||||
|
#ifdef DEBUG_ENABLED
|
||||||
|
make_keyword_list();
|
||||||
|
#endif // DEBUG_ENABLED
|
||||||
}
|
}
|
||||||
|
@ -224,6 +224,9 @@ private:
|
|||||||
char32_t indent_char = '\0';
|
char32_t indent_char = '\0';
|
||||||
int position = 0;
|
int position = 0;
|
||||||
int length = 0;
|
int length = 0;
|
||||||
|
#ifdef DEBUG_ENABLED
|
||||||
|
Vector<String> keyword_list;
|
||||||
|
#endif // DEBUG_ENABLED
|
||||||
|
|
||||||
#ifdef TOOLS_ENABLED
|
#ifdef TOOLS_ENABLED
|
||||||
HashMap<int, CommentData> comments;
|
HashMap<int, CommentData> comments;
|
||||||
@ -239,6 +242,10 @@ private:
|
|||||||
void _skip_whitespace();
|
void _skip_whitespace();
|
||||||
void check_indent();
|
void check_indent();
|
||||||
|
|
||||||
|
#ifdef DEBUG_ENABLED
|
||||||
|
void make_keyword_list();
|
||||||
|
#endif // DEBUG_ENABLED
|
||||||
|
|
||||||
Token make_error(const String &p_message);
|
Token make_error(const String &p_message);
|
||||||
void push_error(const String &p_message);
|
void push_error(const String &p_message);
|
||||||
void push_error(const Token &p_error);
|
void push_error(const Token &p_error);
|
||||||
|
@ -155,6 +155,10 @@ String GDScriptWarning::get_message() const {
|
|||||||
CHECK_SYMBOLS(2);
|
CHECK_SYMBOLS(2);
|
||||||
return vformat(R"(The function '%s()' is a static function but was called from an instance. Instead, it should be directly called from the type: '%s.%s()'.)", symbols[0], symbols[1], symbols[0]);
|
return vformat(R"(The function '%s()' is a static function but was called from an instance. Instead, it should be directly called from the type: '%s.%s()'.)", symbols[0], symbols[1], symbols[0]);
|
||||||
}
|
}
|
||||||
|
case CONFUSABLE_IDENTIFIER: {
|
||||||
|
CHECK_SYMBOLS(1);
|
||||||
|
return vformat(R"(The identifier "%s" has misleading characters and might be confused with something else.)", symbols[0]);
|
||||||
|
}
|
||||||
case WARNING_MAX:
|
case WARNING_MAX:
|
||||||
break; // Can't happen, but silences warning
|
break; // Can't happen, but silences warning
|
||||||
}
|
}
|
||||||
@ -219,6 +223,7 @@ String GDScriptWarning::get_name_from_code(Code p_code) {
|
|||||||
"SHADOWED_GLOBAL_IDENTIFIER",
|
"SHADOWED_GLOBAL_IDENTIFIER",
|
||||||
"INT_ASSIGNED_TO_ENUM",
|
"INT_ASSIGNED_TO_ENUM",
|
||||||
"STATIC_CALLED_ON_INSTANCE",
|
"STATIC_CALLED_ON_INSTANCE",
|
||||||
|
"CONFUSABLE_IDENTIFIER",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert((sizeof(names) / sizeof(*names)) == WARNING_MAX, "Amount of warning types don't match the amount of warning names.");
|
static_assert((sizeof(names) / sizeof(*names)) == WARNING_MAX, "Amount of warning types don't match the amount of warning names.");
|
||||||
|
@ -78,6 +78,7 @@ public:
|
|||||||
SHADOWED_GLOBAL_IDENTIFIER, // A global class or function has the same name as variable.
|
SHADOWED_GLOBAL_IDENTIFIER, // A global class or function has the same name as variable.
|
||||||
INT_ASSIGNED_TO_ENUM, // An integer value was assigned to an enum-typed variable without casting.
|
INT_ASSIGNED_TO_ENUM, // An integer value was assigned to an enum-typed variable without casting.
|
||||||
STATIC_CALLED_ON_INSTANCE, // A static method was called on an instance of a class instead of on the class itself.
|
STATIC_CALLED_ON_INSTANCE, // A static method was called on an instance of a class instead of on the class itself.
|
||||||
|
CONFUSABLE_IDENTIFIER, // The identifier contains misleading characters that can be confused. E.g. "usеr" (has Cyrillic "е" instead of Latin "e").
|
||||||
WARNING_MAX,
|
WARNING_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2,4 +2,4 @@ GDTEST_OK
|
|||||||
>> WARNING
|
>> WARNING
|
||||||
>> Line: 2
|
>> Line: 2
|
||||||
>> UNUSED_PARAMETER
|
>> UNUSED_PARAMETER
|
||||||
>>
|
>> The parameter 'unused' is never used in the function ''. If this is intended, prefix it with an underscore: '_unused'
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
func test():
|
||||||
|
var аs # Using Cyrillic "а".
|
||||||
|
print(аs)
|
@ -0,0 +1,2 @@
|
|||||||
|
GDTEST_PARSER_ERROR
|
||||||
|
Identifier "аs" is visually similar to the GDScript keyword "as" and thus not allowed.
|
@ -0,0 +1,35 @@
|
|||||||
|
const π = PI
|
||||||
|
var ㄥ = π
|
||||||
|
|
||||||
|
func test():
|
||||||
|
var փորձարկում = "test"
|
||||||
|
prints("փորձարկում", փորձարկում)
|
||||||
|
var امتحان = "test"
|
||||||
|
prints("امتحان", امتحان)
|
||||||
|
var পরীক্ষা = "test"
|
||||||
|
prints("পরীক্ষা", পরীক্ষা)
|
||||||
|
var тест = "test"
|
||||||
|
prints("тест", тест)
|
||||||
|
var जाँच = "test"
|
||||||
|
prints("जाँच", जाँच)
|
||||||
|
var 기준 = "test"
|
||||||
|
prints("기준", 기준)
|
||||||
|
var 测试 = "test"
|
||||||
|
prints("测试", 测试)
|
||||||
|
var テスト = "test"
|
||||||
|
prints("テスト", テスト)
|
||||||
|
var 試験 = "test"
|
||||||
|
prints("試験", 試験)
|
||||||
|
var പരീക്ഷ = "test"
|
||||||
|
prints("പരീക്ഷ", പരീക്ഷ)
|
||||||
|
var ทดสอบ = "test"
|
||||||
|
prints("ทดสอบ", ทดสอบ)
|
||||||
|
var δοκιμή = "test"
|
||||||
|
prints("δοκιμή", δοκιμή)
|
||||||
|
|
||||||
|
const d = 1.1
|
||||||
|
_process(d)
|
||||||
|
print(is_equal_approx(ㄥ, PI + (d * PI)))
|
||||||
|
|
||||||
|
func _process(Δ: float) -> void:
|
||||||
|
ㄥ += Δ * π
|
@ -0,0 +1,14 @@
|
|||||||
|
GDTEST_OK
|
||||||
|
փորձարկում test
|
||||||
|
امتحان test
|
||||||
|
পরীক্ষা test
|
||||||
|
тест test
|
||||||
|
जाँच test
|
||||||
|
기준 test
|
||||||
|
测试 test
|
||||||
|
テスト test
|
||||||
|
試験 test
|
||||||
|
പരീക്ഷ test
|
||||||
|
ทดสอบ test
|
||||||
|
δοκιμή test
|
||||||
|
true
|
@ -0,0 +1,5 @@
|
|||||||
|
func test():
|
||||||
|
var port = 0 # Only latin characters.
|
||||||
|
var pοrt = 1 # The "ο" is Greek omicron.
|
||||||
|
|
||||||
|
prints(port, pοrt)
|
@ -0,0 +1,6 @@
|
|||||||
|
GDTEST_OK
|
||||||
|
>> WARNING
|
||||||
|
>> Line: 3
|
||||||
|
>> CONFUSABLE_IDENTIFIER
|
||||||
|
>> The identifier "pοrt" has misleading characters and might be confused with something else.
|
||||||
|
0 1
|
Loading…
Reference in New Issue
Block a user