godot/modules/gdscript/gdscript_tokenizer.h
Rémi Verschelde b5334d14f7
Update copyright statements to 2021
Happy new year to the wonderful Godot community!

2020 has been a tough year for most of us personally, but a good year for
Godot development nonetheless with a huge amount of work done towards Godot
4.0 and great improvements backported to the long-lived 3.2 branch.

We've had close to 400 contributors to engine code this year, authoring near
7,000 commit! (And that's only for the `master` branch and for the engine code,
there's a lot more when counting docs, demos and other first-party repos.)

Here's to a great year 2021 for all Godot users 🎆
2021-01-01 20:19:21 +01:00

272 lines
6.9 KiB
C++

/*************************************************************************/
/* gdscript_tokenizer.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef GDSCRIPT_TOKENIZER_H
#define GDSCRIPT_TOKENIZER_H
#include "core/templates/list.h"
#include "core/templates/map.h"
#include "core/templates/set.h"
#include "core/templates/vector.h"
#include "core/variant/variant.h"
class GDScriptTokenizer {
public:
enum CursorPlace {
CURSOR_NONE,
CURSOR_BEGINNING,
CURSOR_MIDDLE,
CURSOR_END,
};
struct Token {
enum Type {
EMPTY,
// Basic
ANNOTATION,
IDENTIFIER,
LITERAL,
// Comparison
LESS,
LESS_EQUAL,
GREATER,
GREATER_EQUAL,
EQUAL_EQUAL,
BANG_EQUAL,
// Logical
AND,
OR,
NOT,
AMPERSAND_AMPERSAND,
PIPE_PIPE,
BANG,
// Bitwise
AMPERSAND,
PIPE,
TILDE,
CARET,
LESS_LESS,
GREATER_GREATER,
// Math
PLUS,
MINUS,
STAR,
SLASH,
PERCENT,
// Assignment
EQUAL,
PLUS_EQUAL,
MINUS_EQUAL,
STAR_EQUAL,
SLASH_EQUAL,
PERCENT_EQUAL,
LESS_LESS_EQUAL,
GREATER_GREATER_EQUAL,
AMPERSAND_EQUAL,
PIPE_EQUAL,
CARET_EQUAL,
// Control flow
IF,
ELIF,
ELSE,
FOR,
WHILE,
BREAK,
CONTINUE,
PASS,
RETURN,
MATCH,
// Keywords
AS,
ASSERT,
AWAIT,
BREAKPOINT,
CLASS,
CLASS_NAME,
CONST,
ENUM,
EXTENDS,
FUNC,
IN,
IS,
NAMESPACE,
PRELOAD,
SELF,
SIGNAL,
STATIC,
SUPER,
TRAIT,
VAR,
VOID,
YIELD,
// Punctuation
BRACKET_OPEN,
BRACKET_CLOSE,
BRACE_OPEN,
BRACE_CLOSE,
PARENTHESIS_OPEN,
PARENTHESIS_CLOSE,
COMMA,
SEMICOLON,
PERIOD,
PERIOD_PERIOD,
COLON,
DOLLAR,
FORWARD_ARROW,
UNDERSCORE,
// Whitespace
NEWLINE,
INDENT,
DEDENT,
// Constants
CONST_PI,
CONST_TAU,
CONST_INF,
CONST_NAN,
// Error message improvement
VCS_CONFLICT_MARKER,
BACKTICK,
QUESTION_MARK,
// Special
ERROR,
TK_EOF, // "EOF" is reserved
TK_MAX
};
Type type = EMPTY;
Variant literal;
int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
int cursor_position = -1;
CursorPlace cursor_place = CURSOR_NONE;
String source;
const char *get_name() const;
bool is_identifier() const;
bool is_node_name() const;
StringName get_identifier() const { return source; }
Token(Type p_type) {
type = p_type;
}
Token() {
type = EMPTY;
}
};
#ifdef TOOLS_ENABLED
struct CommentData {
String comment;
bool new_line = false;
CommentData() {}
CommentData(const String &p_comment, bool p_new_line) {
comment = p_comment;
new_line = p_new_line;
}
};
const Map<int, CommentData> &get_comments() const {
return comments;
}
#endif // TOOLS_ENABLED
private:
String source;
const char32_t *_source = nullptr;
const char32_t *_current = nullptr;
int line = -1, column = -1;
int cursor_line = -1, cursor_column = -1;
int tab_size = 4;
// Keep track of multichar tokens.
const char32_t *_start = nullptr;
int start_line = 0, start_column = 0;
int leftmost_column = 0, rightmost_column = 0;
// Info cache.
bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
bool multiline_mode = false;
List<Token> error_stack;
bool pending_newline = false;
Token last_newline;
int pending_indents = 0;
List<int> indent_stack;
List<char32_t> paren_stack;
char32_t indent_char = '\0';
int position = 0;
int length = 0;
#ifdef TOOLS_ENABLED
Map<int, CommentData> comments;
#endif // TOOLS_ENABLED
_FORCE_INLINE_ bool _is_at_end() { return position >= length; }
_FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
int indent_level() const { return indent_stack.size(); }
bool has_error() const { return !error_stack.is_empty(); }
Token pop_error();
char32_t _advance();
void _skip_whitespace();
void check_indent();
Token make_error(const String &p_message);
void push_error(const String &p_message);
void push_error(const Token &p_error);
Token make_paren_error(char32_t p_paren);
Token make_token(Token::Type p_type);
Token make_literal(const Variant &p_literal);
Token make_identifier(const StringName &p_identifier);
Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
void push_paren(char32_t p_char);
bool pop_paren(char32_t p_expected);
void newline(bool p_make_token);
Token number();
Token potential_identifier();
Token string();
Token annotation();
public:
Token scan();
void set_source_code(const String &p_source_code);
int get_cursor_line() const;
int get_cursor_column() const;
void set_cursor_position(int p_line, int p_column);
void set_multiline_mode(bool p_state);
bool is_past_cursor() const;
static String get_token_name(Token::Type p_token_type);
GDScriptTokenizer();
};
#endif