godot/modules/fbx/fbx_parser/FBXTokenizer.cpp

/*************************************************************************/
/*  FBXTokenizer.cpp                                                     */
/*************************************************************************/
/*                       This file is part of:                           */
/*                           GODOT ENGINE                                */
/*                      https://godotengine.org                          */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
/*                                                                       */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the       */
/* "Software"), to deal in the Software without restriction, including   */
/* without limitation the rights to use, copy, modify, merge, publish,   */
/* distribute, sublicense, and/or sell copies of the Software, and to    */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions:                                             */
/*                                                                       */
/* The above copyright notice and this permission notice shall be        */
/* included in all copies or substantial portions of the Software.       */
/*                                                                       */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
/*************************************************************************/

/*
Open Asset Import Library (assimp)
----------------------------------------------------------------------

Copyright (c) 2006-2019, assimp team


All rights reserved.

Redistribution and use of this software in source and binary forms,
with or without modification, are permitted provided that the
following conditions are met:

* Redistributions of source code must retain the above
  copyright notice, this list of conditions and the
  following disclaimer.

* Redistributions in binary form must reproduce the above
  copyright notice, this list of conditions and the
  following disclaimer in the documentation and/or other
  materials provided with the distribution.

* Neither the name of the assimp team, nor the names of its
  contributors may be used to endorse or promote products
  derived from this software without specific prior
  written permission of the assimp team.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

----------------------------------------------------------------------
*/

/** @file  FBXTokenizer.cpp
 *  @brief Implementation of the FBX broadphase lexer
 */

// tab width for logging columns
#define ASSIMP_FBX_TAB_WIDTH 4

#include "FBXTokenizer.h"
#include "core/string/print_string.h"

namespace FBXDocParser {

// ------------------------------------------------------------------------------------------------
Token::Token(const char *p_sbegin, const char *p_send, TokenType p_type, unsigned int p_line, unsigned int p_column) :
		sbegin(p_sbegin),
		send(p_send),
		type(p_type),
		line(p_line),
		column(p_column) {
#ifdef DEBUG_ENABLED
	contents = std::string(sbegin, static_cast<size_t>(send - sbegin));
#endif
}

// ------------------------------------------------------------------------------------------------
Token::~Token() {
}

namespace {

// ------------------------------------------------------------------------------------------------
void TokenizeError(const std::string &message, unsigned int line, unsigned int column) {
	print_error("[FBX-Tokenize]" + String(message.c_str()) + " " + itos(line) + ":" + itos(column));
}

// process a potential data token up to 'cur', adding it to 'output_tokens'.
// ------------------------------------------------------------------------------------------------
void ProcessDataToken(TokenList &output_tokens, const char *&start, const char *&end,
		unsigned int line,
		unsigned int column,
		TokenType type = TokenType_DATA,
		bool must_have_token = false) {
	if (start && end) {
		// sanity check:
		// tokens should have no whitespace outside quoted text and [start,end] should
		// properly delimit the valid range.
		bool in_double_quotes = false;
		for (const char *c = start; c != end + 1; ++c) {
			if (*c == '\"') {
				in_double_quotes = !in_double_quotes;
			}

			if (!in_double_quotes && IsSpaceOrNewLine(*c)) {
				TokenizeError("unexpected whitespace in token", line, column);
			}
		}

		if (in_double_quotes) {
			TokenizeError("non-terminated double quotes", line, column);
		}

		output_tokens.push_back(new_Token(start, end + 1, type, line, column));
	} else if (must_have_token) {
		TokenizeError("unexpected character, expected data token", line, column);
	}

	start = end = nullptr;
}
} // namespace

// ------------------------------------------------------------------------------------------------
void Tokenize(TokenList &output_tokens, const char *input, size_t length, bool &corrupt) {
	// line and column numbers numbers are one-based
	unsigned int line = 1;
	unsigned int column = 1;

	bool comment = false;
	bool in_double_quotes = false;
	bool pending_data_token = false;

	const char *token_begin = nullptr, *token_end = nullptr;

	// input (starting string), *cur the current string, column +=
	// modified to fix strlen() and stop buffer overflow
	for (size_t x = 0; x < length; x++) {
		const char c = input[x];
		const char *cur = &input[x];
		column += (c == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1);

		if (IsLineEnd(c)) {
			comment = false;

			column = 0;
			++line;
		}

		if (comment) {
			continue;
		}

		if (in_double_quotes) {
			if (c == '\"') {
				in_double_quotes = false;
				token_end = cur;

				ProcessDataToken(output_tokens, token_begin, token_end, line, column);
				pending_data_token = false;
			}
			continue;
		}

		switch (c) {
			case '\"':
				if (token_begin) {
					TokenizeError("unexpected double-quote", line, column);
					corrupt = true;
					return;
				}
				token_begin = cur;
				in_double_quotes = true;
				continue;

			case ';':
				ProcessDataToken(output_tokens, token_begin, token_end, line, column);
				comment = true;
				continue;

			case '{':
				ProcessDataToken(output_tokens, token_begin, token_end, line, column);
				output_tokens.push_back(new_Token(cur, cur + 1, TokenType_OPEN_BRACKET, line, column));
				continue;

			case '}':
				ProcessDataToken(output_tokens, token_begin, token_end, line, column);
				output_tokens.push_back(new_Token(cur, cur + 1, TokenType_CLOSE_BRACKET, line, column));
				continue;

			case ',':
				if (pending_data_token) {
					ProcessDataToken(output_tokens, token_begin, token_end, line, column, TokenType_DATA, true);
				}
				output_tokens.push_back(new_Token(cur, cur + 1, TokenType_COMMA, line, column));
				continue;

			case ':':
				if (pending_data_token) {
					ProcessDataToken(output_tokens, token_begin, token_end, line, column, TokenType_KEY, true);
				} else {
					TokenizeError("unexpected colon", line, column);
				}
				continue;
		}

		if (IsSpaceOrNewLine(c)) {
			if (token_begin) {
				// peek ahead and check if the next token is a colon in which
				// case this counts as KEY token.
				TokenType type = TokenType_DATA;
				for (const char *peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) {
					if (*peek == ':') {
						type = TokenType_KEY;
						cur = peek;
						break;
					}
				}

				ProcessDataToken(output_tokens, token_begin, token_end, line, column, type);
			}

			pending_data_token = false;
		} else {
			token_end = cur;
			if (!token_begin) {
				token_begin = cur;
			}

			pending_data_token = true;
		}
	}
}
} // namespace FBXDocParser
Port FBX module from commit 68013d23932688e57b489600f4517dd280edc464 Ports FBX module from 3.2 branch to 4.0 This is the only time the plugin will be updated from 3.2 and marks the final time we do this, from now on we will backport FBX to 3.2 with fixes. Changelog: - fixed crash importing files with buggy format (because of bad newlines in ASCII data, this is yet to be fixed fully) - fixed const correctness with C++/C version change - rewrote material handling to be simpler and better - ports from 3.2 to 4.0 the fbx importer 2020-12-22 22:29:48 +00:00			`/*************************************************************************/`
			`/* FBXTokenizer.cpp */`
			`/*************************************************************************/`
			`/* This file is part of: */`
			`/* GODOT ENGINE */`
			`/* https://godotengine.org */`
			`/*************************************************************************/`
Update copyright statements to 2021 Happy new year to the wonderful Godot community! 2020 has been a tough year for most of us personally, but a good year for Godot development nonetheless with a huge amount of work done towards Godot 4.0 and great improvements backported to the long-lived 3.2 branch. We've had close to 400 contributors to engine code this year, authoring near 7,000 commit! (And that's only for the `master` branch and for the engine code, there's a lot more when counting docs, demos and other first-party repos.) Here's to a great year 2021 for all Godot users 🎆 2021-01-01 19:13:46 +00:00			`/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */`
			`/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */`
Port FBX module from commit 68013d23932688e57b489600f4517dd280edc464 Ports FBX module from 3.2 branch to 4.0 This is the only time the plugin will be updated from 3.2 and marks the final time we do this, from now on we will backport FBX to 3.2 with fixes. Changelog: - fixed crash importing files with buggy format (because of bad newlines in ASCII data, this is yet to be fixed fully) - fixed const correctness with C++/C version change - rewrote material handling to be simpler and better - ports from 3.2 to 4.0 the fbx importer 2020-12-22 22:29:48 +00:00			`/* */`
			`/* Permission is hereby granted, free of charge, to any person obtaining */`
			`/* a copy of this software and associated documentation files (the */`
			`/* "Software"), to deal in the Software without restriction, including */`
			`/* without limitation the rights to use, copy, modify, merge, publish, */`
			`/* distribute, sublicense, and/or sell copies of the Software, and to */`
			`/* permit persons to whom the Software is furnished to do so, subject to */`
			`/* the following conditions: */`
			`/* */`
			`/* The above copyright notice and this permission notice shall be */`
			`/* included in all copies or substantial portions of the Software. */`
			`/* */`
			`/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */`
			`/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */`
			`/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/`
			`/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */`
			`/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */`
			`/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */`
			`/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */`
			`/*************************************************************************/`

			`/*`
			`Open Asset Import Library (assimp)`
			`----------------------------------------------------------------------`

			`Copyright (c) 2006-2019, assimp team`


			`All rights reserved.`

			`Redistribution and use of this software in source and binary forms,`
			`with or without modification, are permitted provided that the`
			`following conditions are met:`

			`* Redistributions of source code must retain the above`
			`copyright notice, this list of conditions and the`
			`following disclaimer.`

			`* Redistributions in binary form must reproduce the above`
			`copyright notice, this list of conditions and the`
			`following disclaimer in the documentation and/or other`
			`materials provided with the distribution.`

			`* Neither the name of the assimp team, nor the names of its`
			`contributors may be used to endorse or promote products`
			`derived from this software without specific prior`
			`written permission of the assimp team.`

			`THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`

			`----------------------------------------------------------------------`
			`*/`

			`/** @file FBXTokenizer.cpp`
			`* @brief Implementation of the FBX broadphase lexer`
			`*/`

			`// tab width for logging columns`
			`#define ASSIMP_FBX_TAB_WIDTH 4`

			`#include "FBXTokenizer.h"`
			`#include "core/string/print_string.h"`

			`namespace FBXDocParser {`

			`// ------------------------------------------------------------------------------------------------`
			`Token::Token(const char p_sbegin, const char p_send, TokenType p_type, unsigned int p_line, unsigned int p_column) :`
			`sbegin(p_sbegin),`
			`send(p_send),`
			`type(p_type),`
			`line(p_line),`
			`column(p_column) {`
			`#ifdef DEBUG_ENABLED`
			`contents = std::string(sbegin, static_cast<size_t>(send - sbegin));`
			`#endif`
			`}`

			`// ------------------------------------------------------------------------------------------------`
			`Token::~Token() {`
			`}`

			`namespace {`

			`// ------------------------------------------------------------------------------------------------`
			`void TokenizeError(const std::string &message, unsigned int line, unsigned int column) {`
			`print_error("[FBX-Tokenize]" + String(message.c_str()) + " " + itos(line) + ":" + itos(column));`
			`}`

			`// process a potential data token up to 'cur', adding it to 'output_tokens'.`
			`// ------------------------------------------------------------------------------------------------`
			`void ProcessDataToken(TokenList &output_tokens, const char &start, const char &end,`
			`unsigned int line,`
			`unsigned int column,`
			`TokenType type = TokenType_DATA,`
			`bool must_have_token = false) {`
			`if (start && end) {`
			`// sanity check:`
			`// tokens should have no whitespace outside quoted text and [start,end] should`
			`// properly delimit the valid range.`
			`bool in_double_quotes = false;`
			`for (const char *c = start; c != end + 1; ++c) {`
			`if (*c == '\"') {`
			`in_double_quotes = !in_double_quotes;`
			`}`

			`if (!in_double_quotes && IsSpaceOrNewLine(*c)) {`
			`TokenizeError("unexpected whitespace in token", line, column);`
			`}`
			`}`

			`if (in_double_quotes) {`
			`TokenizeError("non-terminated double quotes", line, column);`
			`}`

			`output_tokens.push_back(new_Token(start, end + 1, type, line, column));`
			`} else if (must_have_token) {`
			`TokenizeError("unexpected character, expected data token", line, column);`
			`}`

			`start = end = nullptr;`
			`}`
			`} // namespace`

			`// ------------------------------------------------------------------------------------------------`
This stops using FBXPropertyTable as a pointer. The base object will inherit the property table, for every FBX object, if it doesn't exist it will be ignored. The previous code was dangerous and not simple to understand, this makes the code simpler and should result in no leaks with PropertyTable. Features/Fixes: Adds ability for multiple millions of polygons to be loaded. Fixes memory leaks with tokens Fixes memory leaks with property table Fixes loading some corrupt files Fixes meshes not having a unique name to the mesh node. Opens up loading for two more versions: 7100 and 7200, up to 2020. Preliminary support for Cinema4D files in parser now, before this was not possible it would cause memory corruption, which is gone now. FBXProperties not being pointers presented simpler challenges in the long run also, fixed a bunch of bugs. 2021-03-30 01:33:06 +00:00			`void Tokenize(TokenList &output_tokens, const char *input, size_t length, bool &corrupt) {`
Port FBX module from commit 68013d23932688e57b489600f4517dd280edc464 Ports FBX module from 3.2 branch to 4.0 This is the only time the plugin will be updated from 3.2 and marks the final time we do this, from now on we will backport FBX to 3.2 with fixes. Changelog: - fixed crash importing files with buggy format (because of bad newlines in ASCII data, this is yet to be fixed fully) - fixed const correctness with C++/C version change - rewrote material handling to be simpler and better - ports from 3.2 to 4.0 the fbx importer 2020-12-22 22:29:48 +00:00			`// line and column numbers numbers are one-based`
			`unsigned int line = 1;`
			`unsigned int column = 1;`

			`bool comment = false;`
			`bool in_double_quotes = false;`
			`bool pending_data_token = false;`

			`const char token_begin = nullptr, token_end = nullptr;`

			`// input (starting string), *cur the current string, column +=`
			`// modified to fix strlen() and stop buffer overflow`
			`for (size_t x = 0; x < length; x++) {`
			`const char c = input[x];`
			`const char *cur = &input[x];`
			`column += (c == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1);`

			`if (IsLineEnd(c)) {`
			`comment = false;`

			`column = 0;`
			`++line;`
			`}`

			`if (comment) {`
			`continue;`
			`}`

			`if (in_double_quotes) {`
			`if (c == '\"') {`
			`in_double_quotes = false;`
			`token_end = cur;`

			`ProcessDataToken(output_tokens, token_begin, token_end, line, column);`
			`pending_data_token = false;`
			`}`
			`continue;`
			`}`

			`switch (c) {`
			`case '\"':`
			`if (token_begin) {`
			`TokenizeError("unexpected double-quote", line, column);`
This stops using FBXPropertyTable as a pointer. The base object will inherit the property table, for every FBX object, if it doesn't exist it will be ignored. The previous code was dangerous and not simple to understand, this makes the code simpler and should result in no leaks with PropertyTable. Features/Fixes: Adds ability for multiple millions of polygons to be loaded. Fixes memory leaks with tokens Fixes memory leaks with property table Fixes loading some corrupt files Fixes meshes not having a unique name to the mesh node. Opens up loading for two more versions: 7100 and 7200, up to 2020. Preliminary support for Cinema4D files in parser now, before this was not possible it would cause memory corruption, which is gone now. FBXProperties not being pointers presented simpler challenges in the long run also, fixed a bunch of bugs. 2021-03-30 01:33:06 +00:00			`corrupt = true;`
			`return;`
Port FBX module from commit 68013d23932688e57b489600f4517dd280edc464 Ports FBX module from 3.2 branch to 4.0 This is the only time the plugin will be updated from 3.2 and marks the final time we do this, from now on we will backport FBX to 3.2 with fixes. Changelog: - fixed crash importing files with buggy format (because of bad newlines in ASCII data, this is yet to be fixed fully) - fixed const correctness with C++/C version change - rewrote material handling to be simpler and better - ports from 3.2 to 4.0 the fbx importer 2020-12-22 22:29:48 +00:00			`}`
			`token_begin = cur;`
			`in_double_quotes = true;`
			`continue;`

			`case ';':`
			`ProcessDataToken(output_tokens, token_begin, token_end, line, column);`
			`comment = true;`
			`continue;`

			`case '{':`
			`ProcessDataToken(output_tokens, token_begin, token_end, line, column);`
			`output_tokens.push_back(new_Token(cur, cur + 1, TokenType_OPEN_BRACKET, line, column));`
			`continue;`

			`case '}':`
			`ProcessDataToken(output_tokens, token_begin, token_end, line, column);`
			`output_tokens.push_back(new_Token(cur, cur + 1, TokenType_CLOSE_BRACKET, line, column));`
			`continue;`

			`case ',':`
			`if (pending_data_token) {`
			`ProcessDataToken(output_tokens, token_begin, token_end, line, column, TokenType_DATA, true);`
			`}`
			`output_tokens.push_back(new_Token(cur, cur + 1, TokenType_COMMA, line, column));`
			`continue;`

			`case ':':`
			`if (pending_data_token) {`
			`ProcessDataToken(output_tokens, token_begin, token_end, line, column, TokenType_KEY, true);`
			`} else {`
			`TokenizeError("unexpected colon", line, column);`
			`}`
			`continue;`
			`}`

			`if (IsSpaceOrNewLine(c)) {`
			`if (token_begin) {`
			`// peek ahead and check if the next token is a colon in which`
			`// case this counts as KEY token.`
			`TokenType type = TokenType_DATA;`
			`for (const char peek = cur; peek && IsSpaceOrNewLine(*peek); ++peek) {`
			`if (*peek == ':') {`
			`type = TokenType_KEY;`
			`cur = peek;`
			`break;`
			`}`
			`}`

			`ProcessDataToken(output_tokens, token_begin, token_end, line, column, type);`
			`}`

			`pending_data_token = false;`
			`} else {`
			`token_end = cur;`
			`if (!token_begin) {`
			`token_begin = cur;`
			`}`

			`pending_data_token = true;`
			`}`
			`}`
			`}`
			`} // namespace FBXDocParser`