godot/modules/gdscript/gdscript_tokenizer.h

317 lines
8.7 KiB
C++

/**************************************************************************/
/* gdscript_tokenizer.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#ifndef GDSCRIPT_TOKENIZER_H
#define GDSCRIPT_TOKENIZER_H
#include "core/templates/hash_map.h"
#include "core/templates/hash_set.h"
#include "core/templates/list.h"
#include "core/templates/vector.h"
#include "core/variant/variant.h"
#ifdef MINGW_ENABLED
#undef CONST
#undef IN
#undef VOID
#endif
class GDScriptTokenizer {
public:
enum CursorPlace {
CURSOR_NONE,
CURSOR_BEGINNING,
CURSOR_MIDDLE,
CURSOR_END,
};
struct Token {
enum Type {
EMPTY,
// Basic
ANNOTATION,
IDENTIFIER,
LITERAL,
// Comparison
LESS,
LESS_EQUAL,
GREATER,
GREATER_EQUAL,
EQUAL_EQUAL,
BANG_EQUAL,
// Logical
AND,
OR,
NOT,
AMPERSAND_AMPERSAND,
PIPE_PIPE,
BANG,
// Bitwise
AMPERSAND,
PIPE,
TILDE,
CARET,
LESS_LESS,
GREATER_GREATER,
// Math
PLUS,
MINUS,
STAR,
STAR_STAR,
SLASH,
PERCENT,
// Assignment
EQUAL,
PLUS_EQUAL,
MINUS_EQUAL,
STAR_EQUAL,
STAR_STAR_EQUAL,
SLASH_EQUAL,
PERCENT_EQUAL,
LESS_LESS_EQUAL,
GREATER_GREATER_EQUAL,
AMPERSAND_EQUAL,
PIPE_EQUAL,
CARET_EQUAL,
// Control flow
IF,
ELIF,
ELSE,
FOR,
WHILE,
BREAK,
CONTINUE,
PASS,
RETURN,
MATCH,
WHEN,
// Keywords
AS,
ASSERT,
AWAIT,
BREAKPOINT,
CLASS,
CLASS_NAME,
CONST,
ENUM,
EXTENDS,
FUNC,
IN,
IS,
NAMESPACE,
PRELOAD,
SELF,
SIGNAL,
STATIC,
SUPER,
TRAIT,
VAR,
VOID,
YIELD,
// Punctuation
BRACKET_OPEN,
BRACKET_CLOSE,
BRACE_OPEN,
BRACE_CLOSE,
PARENTHESIS_OPEN,
PARENTHESIS_CLOSE,
COMMA,
SEMICOLON,
PERIOD,
PERIOD_PERIOD,
COLON,
DOLLAR,
FORWARD_ARROW,
UNDERSCORE,
// Whitespace
NEWLINE,
INDENT,
DEDENT,
// Constants
CONST_PI,
CONST_TAU,
CONST_INF,
CONST_NAN,
// Error message improvement
VCS_CONFLICT_MARKER,
BACKTICK,
QUESTION_MARK,
// Special
ERROR,
TK_EOF, // "EOF" is reserved
TK_MAX
};
Type type = EMPTY;
Variant literal;
int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
int cursor_position = -1;
CursorPlace cursor_place = CURSOR_NONE;
String source;
const char *get_name() const;
bool can_precede_bin_op() const;
bool is_identifier() const;
bool is_node_name() const;
StringName get_identifier() const { return literal; }
Token(Type p_type) {
type = p_type;
}
Token() {}
};
#ifdef TOOLS_ENABLED
struct CommentData {
String comment;
// true: Comment starts at beginning of line or after indentation.
// false: Inline comment (starts after some code).
bool new_line = false;
CommentData() {}
CommentData(const String &p_comment, bool p_new_line) {
comment = p_comment;
new_line = p_new_line;
}
};
virtual const HashMap<int, CommentData> &get_comments() const = 0;
#endif // TOOLS_ENABLED
static String get_token_name(Token::Type p_token_type);
virtual int get_cursor_line() const = 0;
virtual int get_cursor_column() const = 0;
virtual void set_cursor_position(int p_line, int p_column) = 0;
virtual void set_multiline_mode(bool p_state) = 0;
virtual bool is_past_cursor() const = 0;
virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
virtual bool is_text() = 0;
virtual Token scan() = 0;
virtual ~GDScriptTokenizer() {}
};
class GDScriptTokenizerText : public GDScriptTokenizer {
String source;
const char32_t *_source = nullptr;
const char32_t *_current = nullptr;
int line = -1, column = -1;
int cursor_line = -1, cursor_column = -1;
int tab_size = 4;
// Keep track of multichar tokens.
const char32_t *_start = nullptr;
int start_line = 0, start_column = 0;
int leftmost_column = 0, rightmost_column = 0;
// Info cache.
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
bool multiline_mode = false;
List<Token> error_stack;
bool pending_newline = false;
Token last_token;
Token last_newline;
int pending_indents = 0;
List<int> indent_stack;
List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
List<char32_t> paren_stack;
char32_t indent_char = '\0';
int position = 0;
int length = 0;
Vector<int> continuation_lines;
#ifdef DEBUG_ENABLED
Vector<String> keyword_list;
#endif // DEBUG_ENABLED
#ifdef TOOLS_ENABLED
HashMap<int, CommentData> comments;
#endif // TOOLS_ENABLED
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
int indent_level() const { return indent_stack.size(); }
bool has_error() const { return !error_stack.is_empty(); }
Token pop_error();
char32_t _advance();
String _get_indent_char_name(char32_t ch);
void _skip_whitespace();
void check_indent();
#ifdef DEBUG_ENABLED
void make_keyword_list();
#endif // DEBUG_ENABLED
Token make_error(const String &p_message);
void push_error(const String &p_message);
void push_error(const Token &p_error);
Token make_paren_error(char32_t p_paren);
Token make_token(Token::Type p_type);
Token make_literal(const Variant &p_literal);
Token make_identifier(const StringName &p_identifier);
Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
void push_paren(char32_t p_char);
bool pop_paren(char32_t p_expected);
void newline(bool p_make_token);
Token number();
Token potential_identifier();
Token string();
Token annotation();
public:
void set_source_code(const String &p_source_code);
const Vector<int> &get_continuation_lines() const { return continuation_lines; }
virtual int get_cursor_line() const override;
virtual int get_cursor_column() const override;
virtual void set_cursor_position(int p_line, int p_column) override;
virtual void set_multiline_mode(bool p_state) override;
virtual bool is_past_cursor() const override;
virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.
virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.
virtual bool is_text() override { return true; }
#ifdef TOOLS_ENABLED
virtual const HashMap<int, CommentData> &get_comments() const override {
return comments;
}
#endif // TOOLS_ENABLED
virtual Token scan() override;
GDScriptTokenizerText();
};
#endif // GDSCRIPT_TOKENIZER_H