diff --git a/modules/gdscript/gdscript_analyzer.cpp b/modules/gdscript/gdscript_analyzer.cpp index 5da2bb5cc18..e754ba70ec8 100644 --- a/modules/gdscript/gdscript_analyzer.cpp +++ b/modules/gdscript/gdscript_analyzer.cpp @@ -872,6 +872,9 @@ void GDScriptAnalyzer::resolve_node(GDScriptParser::Node *p_node) { case GDScriptParser::Node::SIGNAL: // Nothing to do. break; + case GDScriptParser::Node::LAMBDA: + // FIXME: Recurse into lambda. + break; } } @@ -1489,6 +1492,7 @@ void GDScriptAnalyzer::reduce_expression(GDScriptParser::ExpressionNode *p_expre case GDScriptParser::Node::FOR: case GDScriptParser::Node::FUNCTION: case GDScriptParser::Node::IF: + case GDScriptParser::Node::LAMBDA: case GDScriptParser::Node::MATCH: case GDScriptParser::Node::MATCH_BRANCH: case GDScriptParser::Node::PARAMETER: diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index d910137510d..b13b03b3199 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -402,6 +402,8 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_ } GDScriptTokenizer::Token GDScriptParser::advance() { + lambda_ended = false; // Empty marker since we're past the end in any case. + if (current.type == GDScriptTokenizer::Token::TK_EOF) { ERR_FAIL_COND_V_MSG(current.type == GDScriptTokenizer::Token::TK_EOF, current, "GDScript parser bug: Trying to advance past the end of stream."); } @@ -428,7 +430,7 @@ bool GDScriptParser::match(GDScriptTokenizer::Token::Type p_token_type) { return true; } -bool GDScriptParser::check(GDScriptTokenizer::Token::Type p_token_type) { +bool GDScriptParser::check(GDScriptTokenizer::Token::Type p_token_type) const { if (p_token_type == GDScriptTokenizer::Token::IDENTIFIER) { return current.is_identifier(); } @@ -443,7 +445,7 @@ bool GDScriptParser::consume(GDScriptTokenizer::Token::Type p_token_type, const return false; } -bool GDScriptParser::is_at_end() { +bool GDScriptParser::is_at_end() const { return check(GDScriptTokenizer::Token::TK_EOF); } @@ -494,16 +496,34 @@ void GDScriptParser::pop_multiline() { tokenizer.set_multiline_mode(multiline_stack.size() > 0 ? multiline_stack.back()->get() : false); } -bool GDScriptParser::is_statement_end() { +bool GDScriptParser::is_statement_end_token() const { return check(GDScriptTokenizer::Token::NEWLINE) || check(GDScriptTokenizer::Token::SEMICOLON) || check(GDScriptTokenizer::Token::TK_EOF); } +bool GDScriptParser::is_statement_end() const { + return lambda_ended || in_lambda || is_statement_end_token(); +} + void GDScriptParser::end_statement(const String &p_context) { bool found = false; while (is_statement_end() && !is_at_end()) { // Remove sequential newlines/semicolons. + if (is_statement_end_token()) { + // Only consume if this is an actual token. + advance(); + } else if (lambda_ended) { + lambda_ended = false; // Consume this "token". + found = true; + break; + } else { + if (!found) { + lambda_ended = true; // Mark the lambda as done since we found something else to end the statement. + found = true; + } + break; + } + found = true; - advance(); } if (!found && !is_at_end()) { push_error(vformat(R"(Expected end of statement after %s, found "%s" instead.)", p_context, current.get_name())); @@ -1182,6 +1202,51 @@ GDScriptParser::EnumNode *GDScriptParser::parse_enum() { return enum_node; } +void GDScriptParser::parse_function_signature(FunctionNode *p_function, SuiteNode *p_body, const String &p_type) { + if (!check(GDScriptTokenizer::Token::PARENTHESIS_CLOSE) && !is_at_end()) { + bool default_used = false; + do { + if (check(GDScriptTokenizer::Token::PARENTHESIS_CLOSE)) { + // Allow for trailing comma. + break; + } + ParameterNode *parameter = parse_parameter(); + if (parameter == nullptr) { + break; + } + if (parameter->default_value != nullptr) { + default_used = true; + } else { + if (default_used) { + push_error("Cannot have a mandatory parameters after optional parameters."); + continue; + } + } + if (p_function->parameters_indices.has(parameter->identifier->name)) { + push_error(vformat(R"(Parameter with name "%s" was already declared for this %s.)", parameter->identifier->name, p_type)); + } else { + p_function->parameters_indices[parameter->identifier->name] = p_function->parameters.size(); + p_function->parameters.push_back(parameter); + p_body->add_local(parameter); + } + } while (match(GDScriptTokenizer::Token::COMMA)); + } + + pop_multiline(); + consume(GDScriptTokenizer::Token::PARENTHESIS_CLOSE, vformat(R"*(Expected closing ")" after %s parameters.)*", p_type)); + + if (match(GDScriptTokenizer::Token::FORWARD_ARROW)) { + make_completion_context(COMPLETION_TYPE_NAME_OR_VOID, p_function); + p_function->return_type = parse_type(true); + if (p_function->return_type == nullptr) { + push_error(R"(Expected return type or "void" after "->".)"); + } + } + + // TODO: Improve token consumption so it synchronizes to a statement boundary. This way we can get into the function body with unrecognized tokens. + consume(GDScriptTokenizer::Token::COLON, vformat(R"(Expected ":" after %s declaration.)", p_type)); +} + GDScriptParser::FunctionNode *GDScriptParser::parse_function() { bool _static = false; if (previous.type == GDScriptTokenizer::Token::STATIC) { @@ -1205,55 +1270,13 @@ GDScriptParser::FunctionNode *GDScriptParser::parse_function() { function->identifier = parse_identifier(); function->is_static = _static; - push_multiline(true); - consume(GDScriptTokenizer::Token::PARENTHESIS_OPEN, R"(Expected opening "(" after function name.)"); - SuiteNode *body = alloc_node(); SuiteNode *previous_suite = current_suite; current_suite = body; - if (!check(GDScriptTokenizer::Token::PARENTHESIS_CLOSE) && !is_at_end()) { - bool default_used = false; - do { - if (check(GDScriptTokenizer::Token::PARENTHESIS_CLOSE)) { - // Allow for trailing comma. - break; - } - ParameterNode *parameter = parse_parameter(); - if (parameter == nullptr) { - break; - } - if (parameter->default_value != nullptr) { - default_used = true; - } else { - if (default_used) { - push_error("Cannot have a mandatory parameters after optional parameters."); - continue; - } - } - if (function->parameters_indices.has(parameter->identifier->name)) { - push_error(vformat(R"(Parameter with name "%s" was already declared for this function.)", parameter->identifier->name)); - } else { - function->parameters_indices[parameter->identifier->name] = function->parameters.size(); - function->parameters.push_back(parameter); - body->add_local(parameter); - } - } while (match(GDScriptTokenizer::Token::COMMA)); - } - - pop_multiline(); - consume(GDScriptTokenizer::Token::PARENTHESIS_CLOSE, R"*(Expected closing ")" after function parameters.)*"); - - if (match(GDScriptTokenizer::Token::FORWARD_ARROW)) { - make_completion_context(COMPLETION_TYPE_NAME_OR_VOID, function); - function->return_type = parse_type(true); - if (function->return_type == nullptr) { - push_error(R"(Expected return type or "void" after "->".)"); - } - } - - // TODO: Improve token consumption so it synchronizes to a statement boundary. This way we can get into the function body with unrecognized tokens. - consume(GDScriptTokenizer::Token::COLON, R"(Expected ":" after function declaration.)"); + push_multiline(true); + consume(GDScriptTokenizer::Token::PARENTHESIS_OPEN, R"(Expected opening "(" after function name.)"); + parse_function_signature(function, body, "function"); current_suite = previous_suite; function->body = parse_suite("function declaration", body); @@ -1339,29 +1362,33 @@ bool GDScriptParser::register_annotation(const MethodInfo &p_info, uint32_t p_ta return true; } -GDScriptParser::SuiteNode *GDScriptParser::parse_suite(const String &p_context, SuiteNode *p_suite) { +GDScriptParser::SuiteNode *GDScriptParser::parse_suite(const String &p_context, SuiteNode *p_suite, bool p_for_lambda) { SuiteNode *suite = p_suite != nullptr ? p_suite : alloc_node(); suite->parent_block = current_suite; current_suite = suite; bool multiline = false; - if (check(GDScriptTokenizer::Token::NEWLINE)) { + if (match(GDScriptTokenizer::Token::NEWLINE)) { multiline = true; } if (multiline) { - consume(GDScriptTokenizer::Token::NEWLINE, vformat(R"(Expected newline after %s.)", p_context)); - if (!consume(GDScriptTokenizer::Token::INDENT, vformat(R"(Expected indented block after %s.)", p_context))) { current_suite = suite->parent_block; return suite; } } + int error_count = 0; + do { Node *statement = parse_statement(); if (statement == nullptr) { + if (error_count++ > 100) { + push_error("Too many statement errors.", suite); + break; + } continue; } suite->statements.push_back(statement); @@ -1396,12 +1423,22 @@ GDScriptParser::SuiteNode *GDScriptParser::parse_suite(const String &p_context, break; } - } while (multiline && !check(GDScriptTokenizer::Token::DEDENT) && !is_at_end()); + } while (multiline && !check(GDScriptTokenizer::Token::DEDENT) && !lambda_ended && !is_at_end()); if (multiline) { - consume(GDScriptTokenizer::Token::DEDENT, vformat(R"(Missing unindent at the end of %s.)", p_context)); + if (!lambda_ended) { + consume(GDScriptTokenizer::Token::DEDENT, vformat(R"(Missing unindent at the end of %s.)", p_context)); + + } else { + match(GDScriptTokenizer::Token::DEDENT); + } + } else if (previous.type == GDScriptTokenizer::Token::SEMICOLON) { + consume(GDScriptTokenizer::Token::NEWLINE, vformat(R"(Expected newline after ";" at the end of %s.)", p_context)); } + if (p_for_lambda) { + lambda_ended = true; + } current_suite = suite->parent_block; return suite; } @@ -1458,6 +1495,10 @@ GDScriptParser::Node *GDScriptParser::parse_statement() { push_error(R"(Constructor cannot return a value.)"); } n_return->return_value = parse_expression(false); + } else if (in_lambda && !is_statement_end_token()) { + // Try to parse it anyway as this might not be the statement end in a lambda. + // If this fails the expression will be nullptr, but that's the same as no return, so it's fine. + n_return->return_value = parse_expression(false); } result = n_return; @@ -1486,10 +1527,18 @@ GDScriptParser::Node *GDScriptParser::parse_statement() { default: { // Expression statement. ExpressionNode *expression = parse_expression(true); // Allow assignment here. + bool has_ended_lambda = false; if (expression == nullptr) { - push_error(vformat(R"(Expected statement, found "%s" instead.)", previous.get_name())); + if (in_lambda) { + // If it's not a valid expression beginning, it might be the continuation of the outer expression where this lambda is. + lambda_ended = true; + has_ended_lambda = true; + } else { + push_error(vformat(R"(Expected statement, found "%s" instead.)", previous.get_name())); + } } end_statement("expression"); + lambda_ended = lambda_ended || has_ended_lambda; result = expression; #ifdef DEBUG_ENABLED @@ -1513,7 +1562,7 @@ GDScriptParser::Node *GDScriptParser::parse_statement() { if (unreachable && result != nullptr) { current_suite->has_unreachable_code = true; if (current_function) { - push_warning(result, GDScriptWarning::UNREACHABLE_CODE, current_function->identifier->name); + push_warning(result, GDScriptWarning::UNREACHABLE_CODE, current_function->identifier ? current_function->identifier->name : ""); } else { // TODO: Properties setters and getters with unreachable code are not being warned } @@ -1953,7 +2002,7 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr // Completion can appear whenever an expression is expected. make_completion_context(COMPLETION_IDENTIFIER, nullptr); - GDScriptTokenizer::Token token = advance(); + GDScriptTokenizer::Token token = current; ParseFunction prefix_rule = get_rule(token.type)->prefix; if (prefix_rule == nullptr) { @@ -1961,6 +2010,8 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_precedence(Precedence p_pr return nullptr; } + advance(); // Only consume the token if there's a valid rule. + ExpressionNode *previous_operand = (this->*prefix_rule)(nullptr, p_can_assign); while (p_precedence <= get_rule(current.type)->precedence) { @@ -2055,6 +2106,9 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_self(ExpressionNode *p_pre if (current_function && current_function->is_static) { push_error(R"(Cannot use "self" inside a static function.)"); } + if (in_lambda) { + push_error(R"(Cannot use "self" inside a lambda.)"); + } SelfNode *self = alloc_node(); self->current_class = current_class; return self; @@ -2675,6 +2729,61 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_preload(ExpressionNode *p_ return preload; } +GDScriptParser::ExpressionNode *GDScriptParser::parse_lambda(ExpressionNode *p_previous_operand, bool p_can_assign) { + LambdaNode *lambda = alloc_node(); + FunctionNode *function = alloc_node(); + + if (match(GDScriptTokenizer::Token::IDENTIFIER)) { + function->identifier = parse_identifier(); + } + + bool multiline_context = multiline_stack.back()->get(); + + // Reset the multiline stack since we don't want the multiline mode one in the lambda body. + push_multiline(false); + if (multiline_context) { + tokenizer.push_expression_indented_block(); + } + + push_multiline(true); // For the parameters. + if (function->identifier) { + consume(GDScriptTokenizer::Token::PARENTHESIS_OPEN, R"(Expected opening "(" after lambda name.)"); + } else { + consume(GDScriptTokenizer::Token::PARENTHESIS_OPEN, R"(Expected opening "(" after "func".)"); + } + + FunctionNode *previous_function = current_function; + current_function = function; + + SuiteNode *body = alloc_node(); + SuiteNode *previous_suite = current_suite; + current_suite = body; + + parse_function_signature(function, body, "lambda"); + + current_suite = previous_suite; + + bool previous_in_lambda = in_lambda; + in_lambda = true; + + function->body = parse_suite("lambda declaration", body, true); + + pop_multiline(); + + if (multiline_context) { + // If we're in multiline mode, we want to skip the spurious DEDENT and NEWLINE tokens. + while (check(GDScriptTokenizer::Token::DEDENT) || check(GDScriptTokenizer::Token::INDENT) || check(GDScriptTokenizer::Token::NEWLINE)) { + current = tokenizer.scan(); // Not advance() since we don't want to change the previous token. + } + tokenizer.pop_expression_indented_block(); + } + + current_function = previous_function; + in_lambda = previous_in_lambda; + lambda->function = function; + return lambda; +} + GDScriptParser::ExpressionNode *GDScriptParser::parse_invalid_token(ExpressionNode *p_previous_operand, bool p_can_assign) { // Just for better error messages. GDScriptTokenizer::Token::Type invalid = previous.type; @@ -3019,7 +3128,7 @@ GDScriptParser::ParseRule *GDScriptParser::get_rule(GDScriptTokenizer::Token::Ty { nullptr, nullptr, PREC_NONE }, // CONST, { nullptr, nullptr, PREC_NONE }, // ENUM, { nullptr, nullptr, PREC_NONE }, // EXTENDS, - { nullptr, nullptr, PREC_NONE }, // FUNC, + { &GDScriptParser::parse_lambda, nullptr, PREC_NONE }, // FUNC, { nullptr, &GDScriptParser::parse_binary_operator, PREC_CONTENT_TEST }, // IN, { nullptr, &GDScriptParser::parse_binary_operator, PREC_TYPE_TEST }, // IS, { nullptr, nullptr, PREC_NONE }, // NAMESPACE, @@ -3755,6 +3864,10 @@ void GDScriptParser::TreePrinter::print_dictionary(DictionaryNode *p_dictionary) } void GDScriptParser::TreePrinter::print_expression(ExpressionNode *p_expression) { + if (p_expression == nullptr) { + push_text(""); + return; + } switch (p_expression->type) { case Node::ARRAY: print_array(static_cast(p_expression)); @@ -3783,6 +3896,9 @@ void GDScriptParser::TreePrinter::print_expression(ExpressionNode *p_expression) case Node::IDENTIFIER: print_identifier(static_cast(p_expression)); break; + case Node::LAMBDA: + print_lambda(static_cast(p_expression)); + break; case Node::LITERAL: print_literal(static_cast(p_expression)); break; @@ -3842,12 +3958,17 @@ void GDScriptParser::TreePrinter::print_for(ForNode *p_for) { decrease_indent(); } -void GDScriptParser::TreePrinter::print_function(FunctionNode *p_function) { +void GDScriptParser::TreePrinter::print_function(FunctionNode *p_function, const String &p_context) { for (const List::Element *E = p_function->annotations.front(); E != nullptr; E = E->next()) { print_annotation(E->get()); } - push_text("Function "); - print_identifier(p_function->identifier); + push_text(p_context); + push_text(" "); + if (p_function->identifier) { + print_identifier(p_function->identifier); + } else { + push_text(""); + } push_text("( "); for (int i = 0; i < p_function->parameters.size(); i++) { if (i > 0) { @@ -3901,6 +4022,10 @@ void GDScriptParser::TreePrinter::print_if(IfNode *p_if, bool p_is_elif) { } } +void GDScriptParser::TreePrinter::print_lambda(LambdaNode *p_lambda) { + print_function(p_lambda->function, "Lambda"); +} + void GDScriptParser::TreePrinter::print_literal(LiteralNode *p_literal) { // Prefix for string types. switch (p_literal->value.get_type()) { diff --git a/modules/gdscript/gdscript_parser.h b/modules/gdscript/gdscript_parser.h index 272d21ffce3..522b47feba3 100644 --- a/modules/gdscript/gdscript_parser.h +++ b/modules/gdscript/gdscript_parser.h @@ -267,6 +267,7 @@ public: GET_NODE, IDENTIFIER, IF, + LAMBDA, LITERAL, MATCH, MATCH_BRANCH, @@ -789,6 +790,18 @@ public: } }; + struct LambdaNode : public ExpressionNode { + FunctionNode *function = nullptr; + + bool has_name() const { + return function && function->identifier; + } + + LambdaNode() { + type = LAMBDA; + } + }; + struct LiteralNode : public ExpressionNode { Variant value; @@ -1191,6 +1204,8 @@ private: CompletionCall completion_call; List completion_call_stack; bool passed_cursor = false; + bool in_lambda = false; + bool lambda_ended = false; // Marker for when a lambda ends, to apply an end of statement if needed. typedef bool (GDScriptParser::*AnnotationAction)(const AnnotationNode *p_annotation, Node *p_target); struct AnnotationInfo { @@ -1278,10 +1293,11 @@ private: GDScriptTokenizer::Token advance(); bool match(GDScriptTokenizer::Token::Type p_token_type); - bool check(GDScriptTokenizer::Token::Type p_token_type); + bool check(GDScriptTokenizer::Token::Type p_token_type) const; bool consume(GDScriptTokenizer::Token::Type p_token_type, const String &p_error_message); - bool is_at_end(); - bool is_statement_end(); + bool is_at_end() const; + bool is_statement_end_token() const; + bool is_statement_end() const; void end_statement(const String &p_context); void synchronize(); void push_multiline(bool p_state); @@ -1299,7 +1315,8 @@ private: EnumNode *parse_enum(); ParameterNode *parse_parameter(); FunctionNode *parse_function(); - SuiteNode *parse_suite(const String &p_context, SuiteNode *p_suite = nullptr); + void parse_function_signature(FunctionNode *p_function, SuiteNode *p_body, const String &p_type); + SuiteNode *parse_suite(const String &p_context, SuiteNode *p_suite = nullptr, bool p_for_lambda = false); // Annotations AnnotationNode *parse_annotation(uint32_t p_valid_targets); bool register_annotation(const MethodInfo &p_info, uint32_t p_target_kinds, AnnotationAction p_apply, int p_optional_arguments = 0, bool p_is_vararg = false); @@ -1354,6 +1371,7 @@ private: ExpressionNode *parse_await(ExpressionNode *p_previous_operand, bool p_can_assign); ExpressionNode *parse_attribute(ExpressionNode *p_previous_operand, bool p_can_assign); ExpressionNode *parse_subscript(ExpressionNode *p_previous_operand, bool p_can_assign); + ExpressionNode *parse_lambda(ExpressionNode *p_previous_operand, bool p_can_assign); ExpressionNode *parse_invalid_token(ExpressionNode *p_previous_operand, bool p_can_assign); TypeNode *parse_type(bool p_allow_void = false); #ifdef TOOLS_ENABLED @@ -1415,10 +1433,11 @@ public: void print_expression(ExpressionNode *p_expression); void print_enum(EnumNode *p_enum); void print_for(ForNode *p_for); - void print_function(FunctionNode *p_function); + void print_function(FunctionNode *p_function, const String &p_context = "Function"); void print_get_node(GetNodeNode *p_get_node); void print_if(IfNode *p_if, bool p_is_elif = false); void print_identifier(IdentifierNode *p_identifier); + void print_lambda(LambdaNode *p_lambda); void print_literal(LiteralNode *p_literal); void print_match(MatchNode *p_match); void print_match_branch(MatchBranchNode *p_match_branch); diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index e432dfc891e..2e6388d92f8 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -242,6 +242,16 @@ void GDScriptTokenizer::set_multiline_mode(bool p_state) { multiline_mode = p_state; } +void GDScriptTokenizer::push_expression_indented_block() { + indent_stack_stack.push_back(indent_stack); +} + +void GDScriptTokenizer::pop_expression_indented_block() { + ERR_FAIL_COND(indent_stack_stack.size() == 0); + indent_stack = indent_stack_stack.back()->get(); + indent_stack_stack.pop_back(); +} + int GDScriptTokenizer::get_cursor_line() const { return cursor_line; } diff --git a/modules/gdscript/gdscript_tokenizer.h b/modules/gdscript/gdscript_tokenizer.h index bea4b140197..84b82c07f05 100644 --- a/modules/gdscript/gdscript_tokenizer.h +++ b/modules/gdscript/gdscript_tokenizer.h @@ -217,6 +217,7 @@ private: Token last_newline; int pending_indents = 0; List indent_stack; + List> indent_stack_stack; // For lambdas, which require manipulating the indentation point. List paren_stack; char32_t indent_char = '\0'; int position = 0; @@ -263,6 +264,8 @@ public: void set_multiline_mode(bool p_state); bool is_past_cursor() const; static String get_token_name(Token::Type p_token_type); + void push_expression_indented_block(); // For lambdas, or blocks inside expressions. + void pop_expression_indented_block(); // For lambdas, or blocks inside expressions. GDScriptTokenizer(); }; diff --git a/modules/gdscript/tests/test_gdscript.cpp b/modules/gdscript/tests/test_gdscript.cpp index e70f221c0ae..445de79e9ef 100644 --- a/modules/gdscript/tests/test_gdscript.cpp +++ b/modules/gdscript/tests/test_gdscript.cpp @@ -66,7 +66,7 @@ static void test_tokenizer(const String &p_code, const Vector &p_lines) StringBuilder token; token += " --> "; // Padding for line number. - for (int l = current.start_line; l <= current.end_line; l++) { + for (int l = current.start_line; l <= current.end_line && l <= p_lines.size(); l++) { print_line(vformat("%04d %s", l, p_lines[l - 1]).replace("\t", tab)); }