From 0d2e02945b07073ed8c76ca118e36da825c0c1ec Mon Sep 17 00:00:00 2001 From: reduz Date: Mon, 24 May 2021 21:25:11 -0300 Subject: [PATCH] Implement shader caching * Shader compilation is now cached. Subsequent loads take less than a millisecond. * Improved game, editor and project manager startup time. * Editor uses .godot/shader_cache to store shaders. * Game uses user://shader_cache * Project manager uses $config_dir/shader_cache * Options to tweak shader caching in project settings. * Editor path configuration moved from EditorSettings to new class, EditorPaths, so it can be available early on (before shaders are compiled). * Reworked ShaderCompilerRD to ensure deterministic shader code creation (else shader may change and cache will be invalidated). * Added shader compression with SMOLV: https://github.com/aras-p/smol-v --- COPYRIGHT.txt | 6 + core/SCsub | 1 + core/config/engine.cpp | 8 + core/config/engine.h | 5 + doc/classes/EditorInterface.xml | 6 + doc/classes/EditorPaths.xml | 49 + doc/classes/EditorSettings.xml | 9 - doc/classes/ProjectSettings.xml | 10 + editor/editor_export.cpp | 10 +- editor/editor_node.cpp | 15 +- editor/editor_node.h | 1 + editor/editor_paths.cpp | 156 ++ editor/editor_paths.h | 72 + editor/editor_plugin.cpp | 5 + editor/editor_plugin.h | 2 + editor/editor_resource_preview.cpp | 2 +- editor/editor_settings.cpp | 105 +- editor/editor_settings.h | 8 +- editor/export_template_manager.cpp | 2 +- .../plugins/asset_library_editor_plugin.cpp | 8 +- editor/plugins/editor_preview_plugins.cpp | 2 +- main/main.cpp | 6 + modules/glslang/register_types.cpp | 7 + modules/mono/godotsharp_dirs.cpp | 4 +- platform/android/export/export.cpp | 4 +- platform/javascript/export/export.cpp | 6 +- platform/osx/export/export.cpp | 8 +- platform/uwp/export/export.cpp | 6 +- .../renderer_rd/renderer_compositor_rd.cpp | 42 + .../renderer_rd/renderer_compositor_rd.h | 2 +- .../renderer_rd/shader_compiler_rd.cpp | 101 +- servers/rendering/renderer_rd/shader_rd.cpp | 290 ++- servers/rendering/renderer_rd/shader_rd.h | 23 +- servers/rendering/rendering_device.cpp | 12 + servers/rendering/rendering_device.h | 4 + servers/rendering_server.cpp | 6 + thirdparty/README.md | 5 + thirdparty/misc/smolv.cpp | 2108 +++++++++++++++++ thirdparty/misc/smolv.h | 169 ++ 39 files changed, 3113 insertions(+), 172 deletions(-) create mode 100644 doc/classes/EditorPaths.xml create mode 100644 editor/editor_paths.cpp create mode 100644 editor/editor_paths.h create mode 100644 thirdparty/misc/smolv.cpp create mode 100644 thirdparty/misc/smolv.h diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index c007a7e2b36..b2a930a4bb0 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -360,6 +360,12 @@ Comment: SMAZ Copyright: 2006-2009, Salvatore Sanfilippo License: BSD-3-clause +Files: ./thirdparty/misc/smolv.cpp + ./thirdparty/misc/smolv.h +Comment: SMOL-V +Copyright: 2016-2020, Aras Pranckevicius +License: public-domain or Unlicense or Expat + Files: ./thirdparty/misc/stb_rect_pack.h ./thirdparty/misc/stb_vorbis.c Comment: stb libraries diff --git a/core/SCsub b/core/SCsub index bdf8544840b..e3ba46be02c 100644 --- a/core/SCsub +++ b/core/SCsub @@ -59,6 +59,7 @@ thirdparty_misc_sources = [ "pcg.cpp", "polypartition.cpp", "clipper.cpp", + "smolv.cpp", ] thirdparty_misc_sources = [thirdparty_misc_dir + file for file in thirdparty_misc_sources] env_thirdparty.add_source_files(thirdparty_obj, thirdparty_misc_sources) diff --git a/core/config/engine.cpp b/core/config/engine.cpp index 2360d66438d..c43e32868c0 100644 --- a/core/config/engine.cpp +++ b/core/config/engine.cpp @@ -31,6 +31,7 @@ #include "engine.h" #include "core/authors.gen.h" +#include "core/config/project_settings.h" #include "core/donors.gen.h" #include "core/license.gen.h" #include "core/version.h" @@ -210,6 +211,13 @@ void Engine::get_singletons(List *p_singletons) { } } +void Engine::set_shader_cache_path(const String &p_path) { + shader_cache_path = p_path; +} +String Engine::get_shader_cache_path() const { + return shader_cache_path; +} + Engine *Engine::singleton = nullptr; Engine *Engine::get_singleton() { diff --git a/core/config/engine.h b/core/config/engine.h index a9080e3dfd2..276da1c7eaa 100644 --- a/core/config/engine.h +++ b/core/config/engine.h @@ -72,6 +72,8 @@ private: static Engine *singleton; + String shader_cache_path; + public: static Engine *get_singleton(); @@ -121,6 +123,9 @@ public: Dictionary get_license_info() const; String get_license_text() const; + void set_shader_cache_path(const String &p_path); + String get_shader_cache_path() const; + bool is_abort_on_gpu_errors_enabled() const; bool is_validation_layers_enabled() const; diff --git a/doc/classes/EditorInterface.xml b/doc/classes/EditorInterface.xml index a5328ce382e..3f324bf1a07 100644 --- a/doc/classes/EditorInterface.xml +++ b/doc/classes/EditorInterface.xml @@ -57,6 +57,12 @@ [b]Note:[/b] This returns the main editor control containing the whole editor, not the 2D or 3D viewports specifically. + + + + + + diff --git a/doc/classes/EditorPaths.xml b/doc/classes/EditorPaths.xml new file mode 100644 index 00000000000..b92927fd53c --- /dev/null +++ b/doc/classes/EditorPaths.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/classes/EditorSettings.xml b/doc/classes/EditorSettings.xml index 016d0128eb8..e7322235169 100644 --- a/doc/classes/EditorSettings.xml +++ b/doc/classes/EditorSettings.xml @@ -124,15 +124,6 @@ Returns the value of the setting specified by [code]name[/code]. This is equivalent to using [method Object.get] on the EditorSettings instance. - - - - - Gets the global settings path for the engine. Inside this path, you can find some standard paths such as: - [code]settings/tmp[/code] - Used for temporary storage of files - [code]settings/templates[/code] - Where export templates are located - - diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index f6ade5583e3..73899daaea5 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -1502,6 +1502,16 @@ Lower-end override for [member rendering/reflections/sky_reflections/texture_array_reflections] on mobile devices, due to performance concerns or driver support. + + + + + + + + + + If [code]true[/code], uses faster but lower-quality Blinn model to generate blurred reflections instead of the GGX model. diff --git a/editor/editor_export.cpp b/editor/editor_export.cpp index b3755bef80f..40313fbeff0 100644 --- a/editor/editor_export.cpp +++ b/editor/editor_export.cpp @@ -1065,7 +1065,7 @@ Error EditorExportPlatform::export_project_files(const Ref & } } else { // Use default text server data. - String icu_data_file = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmp_icu_data"); + String icu_data_file = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmp_icu_data"); TS->save_support_data(icu_data_file); Vector array = FileAccess::get_file_as_array(icu_data_file); err = p_func(p_udata, ts_data, array, idx, total, enc_in_filters, enc_ex_filters, key); @@ -1078,7 +1078,7 @@ Error EditorExportPlatform::export_project_files(const Ref & } String config_file = "project.binary"; - String engine_cfb = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmp" + config_file); + String engine_cfb = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmp" + config_file); ProjectSettings::get_singleton()->save_custom(engine_cfb, custom_map, custom_list); Vector data = FileAccess::get_file_as_array(engine_cfb); DirAccess::remove_file_or_error(engine_cfb); @@ -1100,9 +1100,9 @@ Error EditorExportPlatform::save_pack(const Ref &p_preset, c // Create the temporary export directory if it doesn't exist. DirAccessRef da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); - da->make_dir_recursive(EditorSettings::get_singleton()->get_cache_dir()); + da->make_dir_recursive(EditorPaths::get_singleton()->get_cache_dir()); - String tmppath = EditorSettings::get_singleton()->get_cache_dir().plus_file("packtmp"); + String tmppath = EditorPaths::get_singleton()->get_cache_dir().plus_file("packtmp"); FileAccess *ftmp = FileAccess::open(tmppath, FileAccess::WRITE); ERR_FAIL_COND_V_MSG(!ftmp, ERR_CANT_CREATE, "Cannot create file '" + tmppath + "'."); @@ -1984,7 +1984,7 @@ void EditorExportTextSceneToBinaryPlugin::_export_file(const String &p_path, con if (!convert) { return; } - String tmp_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpfile.res"); + String tmp_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmpfile.res"); Error err = ResourceFormatLoaderText::convert_file_to_binary(p_path, tmp_path); if (err != OK) { DirAccess::remove_file_or_error(tmp_path); diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index 72963012d68..e433ddc6f5e 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -80,6 +80,7 @@ #include "editor/editor_inspector.h" #include "editor/editor_layouts_dialog.h" #include "editor/editor_log.h" +#include "editor/editor_paths.h" #include "editor/editor_plugin.h" #include "editor/editor_properties.h" #include "editor/editor_resource_picker.h" @@ -1457,7 +1458,7 @@ void EditorNode::_save_scene_with_preview(String p_file, int p_idx) { img->convert(Image::FORMAT_RGB8); //save thumbnail directly, as thumbnailer may not update due to actual scene not changing md5 - String temp_path = EditorSettings::get_singleton()->get_cache_dir(); + String temp_path = EditorPaths::get_singleton()->get_cache_dir(); String cache_base = ProjectSettings::get_singleton()->globalize_path(p_file).md5_text(); cache_base = temp_path.plus_file("resthumb-" + cache_base); @@ -2745,10 +2746,10 @@ void EditorNode::_menu_option_confirm(int p_option, bool p_confirmed) { settings_config_dialog->popup_edit_settings(); } break; case SETTINGS_EDITOR_DATA_FOLDER: { - OS::get_singleton()->shell_open(String("file://") + EditorSettings::get_singleton()->get_data_dir()); + OS::get_singleton()->shell_open(String("file://") + EditorPaths::get_singleton()->get_data_dir()); } break; case SETTINGS_EDITOR_CONFIG_FOLDER: { - OS::get_singleton()->shell_open(String("file://") + EditorSettings::get_singleton()->get_settings_dir()); + OS::get_singleton()->shell_open(String("file://") + EditorPaths::get_singleton()->get_settings_dir()); } break; case SETTINGS_MANAGE_EXPORT_TEMPLATES: { export_template_manager->popup_manager(); @@ -3727,10 +3728,15 @@ bool EditorNode::is_scene_in_use(const String &p_path) { return false; } +void EditorNode::register_editor_paths(bool p_for_project_manager) { + EditorPaths::create(p_for_project_manager); +} + void EditorNode::register_editor_types() { ResourceLoader::set_timestamp_on_load(true); ResourceSaver::set_timestamp_on_save(true); + ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); ClassDB::register_class(); @@ -3774,6 +3780,9 @@ void EditorNode::register_editor_types() { void EditorNode::unregister_editor_types() { _init_callbacks.clear(); + if (EditorPaths::get_singleton()) { + EditorPaths::free(); + } } void EditorNode::stop_child_process(OS::ProcessID p_pid) { diff --git a/editor/editor_node.h b/editor/editor_node.h index 9824702d7ba..0680cc85c71 100644 --- a/editor/editor_node.h +++ b/editor/editor_node.h @@ -798,6 +798,7 @@ public: Error export_preset(const String &p_preset, const String &p_path, bool p_debug, bool p_pack_only); + static void register_editor_paths(bool p_for_project_manager); static void register_editor_types(); static void unregister_editor_types(); diff --git a/editor/editor_paths.cpp b/editor/editor_paths.cpp new file mode 100644 index 00000000000..96469d3143d --- /dev/null +++ b/editor/editor_paths.cpp @@ -0,0 +1,156 @@ +/*************************************************************************/ +/* editor_paths.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "editor_paths.h" +#include "core/os/dir_access.h" +#include "core/os/os.h" + +EditorPaths *EditorPaths::singleton = nullptr; + +bool EditorPaths::are_paths_valid() const { + return paths_valid; +} + +String EditorPaths::get_settings_dir() const { + return settings_dir; +} +String EditorPaths::get_data_dir() const { + return data_dir; +} +String EditorPaths::get_config_dir() const { + return config_dir; +} +String EditorPaths::get_cache_dir() const { + return cache_dir; +} +bool EditorPaths::is_self_contained() const { + return self_contained; +} +String EditorPaths::get_self_contained_file() const { + return self_contained_file; +} + +void EditorPaths::create(bool p_for_project_manager) { + ERR_FAIL_COND(singleton != nullptr); + memnew(EditorPaths(p_for_project_manager)); +} +void EditorPaths::free() { + ERR_FAIL_COND(singleton == nullptr); + memdelete(singleton); +} + +void EditorPaths::_bind_methods() { + ClassDB::bind_method(D_METHOD("get_settings_dir"), &EditorPaths::get_settings_dir); + ClassDB::bind_method(D_METHOD("get_data_dir"), &EditorPaths::get_data_dir); + ClassDB::bind_method(D_METHOD("get_config_dir"), &EditorPaths::get_config_dir); + ClassDB::bind_method(D_METHOD("get_cache_dir"), &EditorPaths::get_cache_dir); + ClassDB::bind_method(D_METHOD("is_self_contained"), &EditorPaths::is_self_contained); + ClassDB::bind_method(D_METHOD("get_self_contained_file"), &EditorPaths::get_self_contained_file); +} + +EditorPaths::EditorPaths(bool p_for_project_mamanger) { + singleton = this; + + String exe_path = OS::get_singleton()->get_executable_path().get_base_dir(); + { + DirAccessRef d = DirAccess::create_for_path(exe_path); + + if (d->file_exists(exe_path + "/._sc_")) { + self_contained = true; + self_contained_file = exe_path + "/._sc_"; + } else if (d->file_exists(exe_path + "/_sc_")) { + self_contained = true; + self_contained_file = exe_path + "/_sc_"; + } + } + + String data_path; + String config_path; + String cache_path; + + if (self_contained) { + // editor is self contained, all in same folder + data_path = exe_path; + data_dir = data_path.plus_file("editor_data"); + config_path = exe_path; + config_dir = data_dir; + cache_path = exe_path; + cache_dir = data_dir.plus_file("cache"); + } else { + // Typically XDG_DATA_HOME or %APPDATA% + data_path = OS::get_singleton()->get_data_path(); + data_dir = data_path.plus_file(OS::get_singleton()->get_godot_dir_name()); + // Can be different from data_path e.g. on Linux or macOS + config_path = OS::get_singleton()->get_config_path(); + config_dir = config_path.plus_file(OS::get_singleton()->get_godot_dir_name()); + // Can be different from above paths, otherwise a subfolder of data_dir + cache_path = OS::get_singleton()->get_cache_path(); + if (cache_path == data_path) { + cache_dir = data_dir.plus_file("cache"); + } else { + cache_dir = cache_path.plus_file(OS::get_singleton()->get_godot_dir_name()); + } + } + + paths_valid = (data_path != "" && config_path != "" && cache_path != ""); + + if (paths_valid) { + DirAccessRef dir = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); + if (dir->change_dir(data_dir) != OK) { + dir->make_dir_recursive(data_dir); + if (dir->change_dir(data_dir) != OK) { + ERR_PRINT("Cannot create data directory!"); + paths_valid = false; + } + } + + // Validate/create cache dir + + if (dir->change_dir(EditorPaths::get_singleton()->get_cache_dir()) != OK) { + dir->make_dir_recursive(cache_dir); + if (dir->change_dir(cache_dir) != OK) { + ERR_PRINT("Cannot create cache directory!"); + } + } + + if (p_for_project_mamanger) { + Engine::get_singleton()->set_shader_cache_path(get_data_dir()); + } else { + DirAccessRef dir2 = DirAccess::open("res://"); + if (dir2->change_dir(".godot") != OK) { //ensure the .godot subdir exists + if (dir2->make_dir(".godot") != OK) { + ERR_PRINT("Cannot create res://.godot directory!"); + } + } + + Engine::get_singleton()->set_shader_cache_path("res://.godot"); + } + } +} diff --git a/editor/editor_paths.h b/editor/editor_paths.h new file mode 100644 index 00000000000..096174943d9 --- /dev/null +++ b/editor/editor_paths.h @@ -0,0 +1,72 @@ +/*************************************************************************/ +/* editor_paths.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef EDITORPATHS_H +#define EDITORPATHS_H + +#include "core/config/engine.h" + +class EditorPaths : public Object { + GDCLASS(EditorPaths, Object) + + bool paths_valid = false; + String settings_dir; + String data_dir; //editor data dir + String config_dir; //editor config dir + String cache_dir; //editor cache dir + bool self_contained = false; //true if running self contained + String self_contained_file; //self contained file with configuration + + static EditorPaths *singleton; + +protected: + static void _bind_methods(); + +public: + bool are_paths_valid() const; + + String get_settings_dir() const; + String get_data_dir() const; + String get_config_dir() const; + String get_cache_dir() const; + bool is_self_contained() const; + String get_self_contained_file() const; + + static EditorPaths *get_singleton() { + return singleton; + } + + static void create(bool p_for_project_manager); + static void free(); + + EditorPaths(bool p_for_project_mamanger = false); +}; + +#endif // EDITORPATHS_H diff --git a/editor/editor_plugin.cpp b/editor/editor_plugin.cpp index 6b96cb0f5c7..c5097a17a50 100644 --- a/editor/editor_plugin.cpp +++ b/editor/editor_plugin.cpp @@ -32,6 +32,7 @@ #include "editor/editor_export.h" #include "editor/editor_node.h" +#include "editor/editor_paths.h" #include "editor/editor_settings.h" #include "editor/filesystem_dock.h" #include "editor/project_settings_editor.h" @@ -257,6 +258,9 @@ EditorSelection *EditorInterface::get_selection() { Ref EditorInterface::get_editor_settings() { return EditorSettings::get_singleton(); } +EditorPaths *EditorInterface::get_editor_paths() { + return EditorPaths::get_singleton(); +} EditorResourcePreview *EditorInterface::get_resource_previewer() { return EditorResourcePreview::get_singleton(); @@ -335,6 +339,7 @@ void EditorInterface::_bind_methods() { ClassDB::bind_method(D_METHOD("get_selected_path"), &EditorInterface::get_selected_path); ClassDB::bind_method(D_METHOD("get_current_path"), &EditorInterface::get_current_path); ClassDB::bind_method(D_METHOD("get_file_system_dock"), &EditorInterface::get_file_system_dock); + ClassDB::bind_method(D_METHOD("get_editor_paths"), &EditorInterface::get_editor_paths); ClassDB::bind_method(D_METHOD("set_plugin_enabled", "plugin", "enabled"), &EditorInterface::set_plugin_enabled); ClassDB::bind_method(D_METHOD("is_plugin_enabled", "plugin"), &EditorInterface::is_plugin_enabled); diff --git a/editor/editor_plugin.h b/editor/editor_plugin.h index 37412e5ebe7..3f72e468b27 100644 --- a/editor/editor_plugin.h +++ b/editor/editor_plugin.h @@ -54,6 +54,7 @@ class EditorNode3DGizmoPlugin; class EditorResourcePreview; class EditorFileSystem; class EditorToolAddons; +class EditorPaths; class FileSystemDock; class ScriptEditor; @@ -95,6 +96,7 @@ public: EditorSelection *get_selection(); //EditorImportExport *get_import_export(); Ref get_editor_settings(); + EditorPaths *get_editor_paths(); EditorResourcePreview *get_resource_previewer(); EditorFileSystem *get_resource_file_system(); diff --git a/editor/editor_resource_preview.cpp b/editor/editor_resource_preview.cpp index 138830cdc65..35cf08b4d73 100644 --- a/editor/editor_resource_preview.cpp +++ b/editor/editor_resource_preview.cpp @@ -241,7 +241,7 @@ void EditorResourcePreview::_thread() { _preview_ready(item.path + ":" + itos(item.resource->hash_edited_version()), texture, small_texture, item.id, item.function, item.userdata); } else { - String temp_path = EditorSettings::get_singleton()->get_cache_dir(); + String temp_path = EditorPaths::get_singleton()->get_cache_dir(); String cache_base = ProjectSettings::get_singleton()->globalize_path(item.path).md5_text(); cache_base = temp_path.plus_file("resthumb-" + cache_base); diff --git a/editor/editor_settings.cpp b/editor/editor_settings.cpp index eb8ad9bac43..4e4849871bb 100644 --- a/editor/editor_settings.cpp +++ b/editor/editor_settings.cpp @@ -902,67 +902,26 @@ void EditorSettings::create() { return; //pointless } - DirAccess *dir = nullptr; - - String data_path; - String data_dir; - String config_path; - String config_dir; - String cache_path; - String cache_dir; - Ref extra_config = memnew(ConfigFile); - String exe_path = OS::get_singleton()->get_executable_path().get_base_dir(); - DirAccess *d = DirAccess::create_for_path(exe_path); - bool self_contained = false; - - if (d->file_exists(exe_path + "/._sc_")) { - self_contained = true; - Error err = extra_config->load(exe_path + "/._sc_"); + if (EditorPaths::get_singleton()->is_self_contained()) { + Error err = extra_config->load(EditorPaths::get_singleton()->get_self_contained_file()); if (err != OK) { - ERR_PRINT("Can't load config from path '" + exe_path + "/._sc_'."); - } - } else if (d->file_exists(exe_path + "/_sc_")) { - self_contained = true; - Error err = extra_config->load(exe_path + "/_sc_"); - if (err != OK) { - ERR_PRINT("Can't load config from path '" + exe_path + "/_sc_'."); + ERR_PRINT("Can't load extra config from path :" + EditorPaths::get_singleton()->get_self_contained_file()); } } - memdelete(d); - if (self_contained) { - // editor is self contained, all in same folder - data_path = exe_path; - data_dir = data_path.plus_file("editor_data"); - config_path = exe_path; - config_dir = data_dir; - cache_path = exe_path; - cache_dir = data_dir.plus_file("cache"); - } else { - // Typically XDG_DATA_HOME or %APPDATA% - data_path = OS::get_singleton()->get_data_path(); - data_dir = data_path.plus_file(OS::get_singleton()->get_godot_dir_name()); - // Can be different from data_path e.g. on Linux or macOS - config_path = OS::get_singleton()->get_config_path(); - config_dir = config_path.plus_file(OS::get_singleton()->get_godot_dir_name()); - // Can be different from above paths, otherwise a subfolder of data_dir - cache_path = OS::get_singleton()->get_cache_path(); - if (cache_path == data_path) { - cache_dir = data_dir.plus_file("cache"); - } else { - cache_dir = cache_path.plus_file(OS::get_singleton()->get_godot_dir_name()); - } - } + DirAccess *dir = nullptr; ClassDB::register_class(); //otherwise it can't be unserialized String config_file_path; - if (data_path != "" && config_path != "" && cache_path != "") { + if (EditorPaths::get_singleton()->are_paths_valid()) { // Validate/create data dir and subdirectories + String data_dir = EditorPaths::get_singleton()->get_data_dir(); + dir = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); if (dir->change_dir(data_dir) != OK) { dir->make_dir_recursive(data_dir); @@ -979,22 +938,11 @@ void EditorSettings::create() { dir->change_dir(".."); } - // Validate/create cache dir - - if (dir->change_dir(cache_dir) != OK) { - dir->make_dir_recursive(cache_dir); - if (dir->change_dir(cache_dir) != OK) { - ERR_PRINT("Cannot create cache directory!"); - memdelete(dir); - goto fail; - } - } - // Validate/create config dir and subdirectories - if (dir->change_dir(config_dir) != OK) { - dir->make_dir_recursive(config_dir); - if (dir->change_dir(config_dir) != OK) { + if (dir->change_dir(EditorPaths::get_singleton()->get_config_dir()) != OK) { + dir->make_dir_recursive(EditorPaths::get_singleton()->get_config_dir()); + if (dir->change_dir(EditorPaths::get_singleton()->get_config_dir()) != OK) { ERR_PRINT("Cannot create config directory!"); memdelete(dir); goto fail; @@ -1035,7 +983,7 @@ void EditorSettings::create() { // Validate editor config file String config_file_name = "editor_settings-" + itos(VERSION_MAJOR) + ".tres"; - config_file_path = config_dir.plus_file(config_file_name); + config_file_path = EditorPaths::get_singleton()->get_config_dir().plus_file(config_file_name); if (!dir->file_exists(config_file_name)) { memdelete(dir); goto fail; @@ -1052,9 +1000,6 @@ void EditorSettings::create() { singleton->save_changed_setting = true; singleton->config_file_path = config_file_path; - singleton->settings_dir = config_dir; - singleton->data_dir = data_dir; - singleton->cache_dir = cache_dir; print_verbose("EditorSettings: Load OK!"); @@ -1069,6 +1014,8 @@ void EditorSettings::create() { fail: // patch init projects + String exe_path = OS::get_singleton()->get_executable_path().get_base_dir(); + if (extra_config->has_section("init_projects")) { Vector list = extra_config->get_value("init_projects", "list"); for (int i = 0; i < list.size(); i++) { @@ -1080,9 +1027,6 @@ fail: singleton = Ref(memnew(EditorSettings)); singleton->save_changed_setting = true; singleton->config_file_path = config_file_path; - singleton->settings_dir = config_dir; - singleton->data_dir = data_dir; - singleton->cache_dir = cache_dir; singleton->_load_defaults(extra_config); singleton->setup_language(); singleton->setup_network(); @@ -1312,30 +1256,22 @@ void EditorSettings::add_property_hint(const PropertyInfo &p_hint) { // Data directories -String EditorSettings::get_data_dir() const { - return data_dir; -} - String EditorSettings::get_templates_dir() const { - return get_data_dir().plus_file("templates"); + return EditorPaths::get_singleton()->get_data_dir().plus_file("templates"); } // Config directories -String EditorSettings::get_settings_dir() const { - return settings_dir; -} - String EditorSettings::get_project_settings_dir() const { return EditorSettings::PROJECT_EDITOR_SETTINGS_PATH; } String EditorSettings::get_text_editor_themes_dir() const { - return get_settings_dir().plus_file("text_editor_themes"); + return EditorPaths::get_singleton()->get_settings_dir().plus_file("text_editor_themes"); } String EditorSettings::get_script_templates_dir() const { - return get_settings_dir().plus_file("script_templates"); + return EditorPaths::get_singleton()->get_settings_dir().plus_file("script_templates"); } String EditorSettings::get_project_script_templates_dir() const { @@ -1344,12 +1280,8 @@ String EditorSettings::get_project_script_templates_dir() const { // Cache directory -String EditorSettings::get_cache_dir() const { - return cache_dir; -} - String EditorSettings::get_feature_profiles_dir() const { - return get_settings_dir().plus_file("feature_profiles"); + return EditorPaths::get_singleton()->get_settings_dir().plus_file("feature_profiles"); } // Metadata @@ -1576,7 +1508,7 @@ Vector EditorSettings::get_script_templates(const String &p_extension, c } String EditorSettings::get_editor_layouts_config() const { - return get_settings_dir().plus_file("editor_layouts.cfg"); + return EditorPaths::get_singleton()->get_settings_dir().plus_file("editor_layouts.cfg"); } // Shortcuts @@ -1778,7 +1710,6 @@ void EditorSettings::_bind_methods() { ClassDB::bind_method(D_METHOD("property_get_revert", "name"), &EditorSettings::property_get_revert); ClassDB::bind_method(D_METHOD("add_property_info", "info"), &EditorSettings::_add_property_info_bind); - ClassDB::bind_method(D_METHOD("get_settings_dir"), &EditorSettings::get_settings_dir); ClassDB::bind_method(D_METHOD("get_project_settings_dir"), &EditorSettings::get_project_settings_dir); ClassDB::bind_method(D_METHOD("set_project_metadata", "section", "key", "data"), &EditorSettings::set_project_metadata); diff --git a/editor/editor_settings.h b/editor/editor_settings.h index e5f8527faf1..5c9eec34a29 100644 --- a/editor/editor_settings.h +++ b/editor/editor_settings.h @@ -36,6 +36,7 @@ #include "core/object/class_db.h" #include "core/os/thread_safe.h" #include "core/string/translation.h" +#include "editor/editor_paths.h" #include "scene/gui/shortcut.h" class EditorPlugin; @@ -87,12 +88,7 @@ private: mutable Map> shortcuts; Map>> builtin_action_overrides; - String resource_path; - String settings_dir; - String data_dir; - String cache_dir; String config_file_path; - String project_config_dir; Vector favorites; Vector recent_dirs; @@ -153,12 +149,10 @@ public: String get_data_dir() const; String get_templates_dir() const; - String get_settings_dir() const; String get_project_settings_dir() const; String get_text_editor_themes_dir() const; String get_script_templates_dir() const; String get_project_script_templates_dir() const; - String get_cache_dir() const; String get_feature_profiles_dir() const; void set_project_metadata(const String &p_section, const String &p_key, Variant p_data); diff --git a/editor/export_template_manager.cpp b/editor/export_template_manager.cpp index 0f5c01be0ef..6e0ae403a28 100644 --- a/editor/export_template_manager.cpp +++ b/editor/export_template_manager.cpp @@ -444,7 +444,7 @@ void ExportTemplateManager::_begin_template_download(const String &p_url) { } download_data.clear(); - download_templates->set_download_file(EditorSettings::get_singleton()->get_cache_dir().plus_file("tmp_templates.tpz")); + download_templates->set_download_file(EditorPaths::get_singleton()->get_cache_dir().plus_file("tmp_templates.tpz")); download_templates->set_use_threads(true); Error err = download_templates->request(p_url); diff --git a/editor/plugins/asset_library_editor_plugin.cpp b/editor/plugins/asset_library_editor_plugin.cpp index a0d9afee740..93bb170128f 100644 --- a/editor/plugins/asset_library_editor_plugin.cpp +++ b/editor/plugins/asset_library_editor_plugin.cpp @@ -464,7 +464,7 @@ void EditorAssetLibraryItemDownload::_make_request() { retry->hide(); download->cancel_request(); - download->set_download_file(EditorSettings::get_singleton()->get_cache_dir().plus_file("tmp_asset_" + itos(asset_id)) + ".zip"); + download->set_download_file(EditorPaths::get_singleton()->get_cache_dir().plus_file("tmp_asset_" + itos(asset_id)) + ".zip"); Error err = download->request(host); if (err != OK) { @@ -702,7 +702,7 @@ void EditorAssetLibrary::_image_update(bool use_cache, bool final, const PackedB PackedByteArray image_data = p_data; if (use_cache) { - String cache_filename_base = EditorSettings::get_singleton()->get_cache_dir().plus_file("assetimage_" + image_queue[p_queue_id].image_url.md5_text()); + String cache_filename_base = EditorPaths::get_singleton()->get_cache_dir().plus_file("assetimage_" + image_queue[p_queue_id].image_url.md5_text()); FileAccess *file = FileAccess::open(cache_filename_base + ".data", FileAccess::READ); @@ -781,7 +781,7 @@ void EditorAssetLibrary::_image_request_completed(int p_status, int p_code, cons if (p_code != HTTPClient::RESPONSE_NOT_MODIFIED) { for (int i = 0; i < headers.size(); i++) { if (headers[i].findn("ETag:") == 0) { // Save etag - String cache_filename_base = EditorSettings::get_singleton()->get_cache_dir().plus_file("assetimage_" + image_queue[p_queue_id].image_url.md5_text()); + String cache_filename_base = EditorPaths::get_singleton()->get_cache_dir().plus_file("assetimage_" + image_queue[p_queue_id].image_url.md5_text()); String new_etag = headers[i].substr(headers[i].find(":") + 1, headers[i].length()).strip_edges(); FileAccess *file; @@ -829,7 +829,7 @@ void EditorAssetLibrary::_update_image_queue() { List to_delete; for (Map::Element *E = image_queue.front(); E; E = E->next()) { if (!E->get().active && current_images < max_images) { - String cache_filename_base = EditorSettings::get_singleton()->get_cache_dir().plus_file("assetimage_" + E->get().image_url.md5_text()); + String cache_filename_base = EditorPaths::get_singleton()->get_cache_dir().plus_file("assetimage_" + E->get().image_url.md5_text()); Vector headers; if (FileAccess::exists(cache_filename_base + ".etag") && FileAccess::exists(cache_filename_base + ".data")) { diff --git a/editor/plugins/editor_preview_plugins.cpp b/editor/plugins/editor_preview_plugins.cpp index 18cc5d43fbb..2d79e4f3e34 100644 --- a/editor/plugins/editor_preview_plugins.cpp +++ b/editor/plugins/editor_preview_plugins.cpp @@ -265,7 +265,7 @@ Ref EditorPackedScenePreviewPlugin::generate(const RES &p_from, const } Ref EditorPackedScenePreviewPlugin::generate_from_path(const String &p_path, const Size2 &p_size) const { - String temp_path = EditorSettings::get_singleton()->get_cache_dir(); + String temp_path = EditorPaths::get_singleton()->get_cache_dir(); String cache_base = ProjectSettings::get_singleton()->globalize_path(p_path).md5_text(); cache_base = temp_path.plus_file("resthumb-" + cache_base); diff --git a/main/main.cpp b/main/main.cpp index 2f191b5f63c..d67761db556 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -1445,6 +1445,12 @@ Error Main::setup2(Thread::ID p_main_tid_override) { } #endif +#ifdef TOOLS_ENABLED + if (editor || project_manager) { + EditorNode::register_editor_paths(project_manager); + } +#endif + /* Determine text driver */ if (text_driver == "") { diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index 4331daadfc8..8979eabfc35 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -179,11 +179,18 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage return ret; } +static String _get_cache_key_function_glsl(const RenderingDevice::Capabilities *p_capabilities) { + String version; + version = "SpirVGen=" + itos(glslang::GetSpirvGeneratorVersion()) + ", major=" + itos(p_capabilities->version_major) + ", minor=" + itos(p_capabilities->version_minor) + " , subgroup_size=" + itos(p_capabilities->subgroup_operations) + " , subgroup_ops=" + itos(p_capabilities->subgroup_operations) + " , subgroup_in_shaders=" + itos(p_capabilities->subgroup_in_shaders); + return version; +} + void preregister_glslang_types() { // initialize in case it's not initialized. This is done once per thread // and it's safe to call multiple times glslang::InitializeProcess(); RenderingDevice::shader_set_compile_function(_compile_shader_glsl); + RenderingDevice::shader_set_get_cache_key_function(_get_cache_key_function_glsl); } void register_glslang_types() { diff --git a/modules/mono/godotsharp_dirs.cpp b/modules/mono/godotsharp_dirs.cpp index 020a40575c0..68134b9b209 100644 --- a/modules/mono/godotsharp_dirs.cpp +++ b/modules/mono/godotsharp_dirs.cpp @@ -63,8 +63,8 @@ String _get_expected_build_config() { String _get_mono_user_dir() { #ifdef TOOLS_ENABLED - if (EditorSettings::get_singleton()) { - return EditorSettings::get_singleton()->get_data_dir().plus_file("mono"); + if (EditorPaths::get_singleton()) { + return EditorPaths::get_singleton()->get_data_dir().plus_file("mono"); } else { String settings_path; diff --git a/platform/android/export/export.cpp b/platform/android/export/export.cpp index 073c9dc6ef8..1338b31a649 100644 --- a/platform/android/export/export.cpp +++ b/platform/android/export/export.cpp @@ -1793,7 +1793,7 @@ public: p_debug_flags |= DEBUG_FLAG_REMOTE_DEBUG_LOCALHOST; } - String tmp_export_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpexport." + uitos(OS::get_singleton()->get_unix_time()) + ".apk"); + String tmp_export_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmpexport." + uitos(OS::get_singleton()->get_unix_time()) + ".apk"); #define CLEANUP_AND_RETURN(m_err) \ { \ @@ -2651,7 +2651,7 @@ public: FileAccess *dst_f = nullptr; io2.opaque = &dst_f; - String tmp_unaligned_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpexport-unaligned." + uitos(OS::get_singleton()->get_unix_time()) + ".apk"); + String tmp_unaligned_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmpexport-unaligned." + uitos(OS::get_singleton()->get_unix_time()) + ".apk"); #define CLEANUP_AND_RETURN(m_err) \ { \ diff --git a/platform/javascript/export/export.cpp b/platform/javascript/export/export.cpp index 5096285e339..8ce294f31be 100644 --- a/platform/javascript/export/export.cpp +++ b/platform/javascript/export/export.cpp @@ -63,7 +63,7 @@ private: } void _set_internal_certs(Ref p_crypto) { - const String cache_path = EditorSettings::get_singleton()->get_cache_dir(); + const String cache_path = EditorPaths::get_singleton()->get_cache_dir(); const String key_path = cache_path.plus_file("html5_server.key"); const String crt_path = cache_path.plus_file("html5_server.crt"); bool regen = !FileAccess::exists(key_path) || !FileAccess::exists(crt_path); @@ -138,7 +138,7 @@ public: const String req_file = req[1].get_file(); const String req_ext = req[1].get_extension(); - const String cache_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("web"); + const String cache_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("web"); const String filepath = cache_path.plus_file(req_file); if (!mimes.has(req_ext) || !FileAccess::exists(filepath)) { @@ -888,7 +888,7 @@ Error EditorExportPlatformJavaScript::run(const Ref &p_prese return OK; } - const String dest = EditorSettings::get_singleton()->get_cache_dir().plus_file("web"); + const String dest = EditorPaths::get_singleton()->get_cache_dir().plus_file("web"); DirAccessRef da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM); if (!da->dir_exists(dest)) { Error err = da->make_dir_recursive(dest); diff --git a/platform/osx/export/export.cpp b/platform/osx/export/export.cpp index 3a6a5333dd3..6d995412abb 100644 --- a/platform/osx/export/export.cpp +++ b/platform/osx/export/export.cpp @@ -301,7 +301,7 @@ void EditorExportPlatformOSX::_make_icon(const Ref &p_icon, Vectorcreate_from_image(copy); - String path = EditorSettings::get_singleton()->get_cache_dir().plus_file("icon.png"); + String path = EditorPaths::get_singleton()->get_cache_dir().plus_file("icon.png"); ResourceSaver::save(path, it); FileAccess *f = FileAccess::open(path, FileAccess::READ); @@ -610,7 +610,7 @@ Error EditorExportPlatformOSX::export_project(const Ref &p_p // Create our application bundle. String tmp_app_dir_name = pkg_name + ".app"; - String tmp_app_path_name = EditorSettings::get_singleton()->get_cache_dir().plus_file(tmp_app_dir_name); + String tmp_app_path_name = EditorPaths::get_singleton()->get_cache_dir().plus_file(tmp_app_dir_name); print_line("Exporting to " + tmp_app_path_name); Error err = OK; @@ -774,7 +774,7 @@ Error EditorExportPlatformOSX::export_project(const Ref &p_p String ent_path = p_preset->get("codesign/entitlements/custom_file"); if (sign_enabled && (ent_path == "")) { - ent_path = EditorSettings::get_singleton()->get_cache_dir().plus_file(pkg_name + ".entitlements"); + ent_path = EditorPaths::get_singleton()->get_cache_dir().plus_file(pkg_name + ".entitlements"); FileAccess *ent_f = FileAccess::open(ent_path, FileAccess::WRITE); if (ent_f) { @@ -959,7 +959,7 @@ Error EditorExportPlatformOSX::export_project(const Ref &p_p zlib_filefunc_def io_dst = zipio_create_io_from_file(&dst_f); zipFile zip = zipOpen2(p_path.utf8().get_data(), APPEND_STATUS_CREATE, nullptr, &io_dst); - _zip_folder_recursive(zip, EditorSettings::get_singleton()->get_cache_dir(), pkg_name + ".app", pkg_name); + _zip_folder_recursive(zip, EditorPaths::get_singleton()->get_cache_dir(), pkg_name + ".app", pkg_name); zipClose(zip, nullptr); } diff --git a/platform/uwp/export/export.cpp b/platform/uwp/export/export.cpp index f1a857d4145..351aaa59575 100644 --- a/platform/uwp/export/export.cpp +++ b/platform/uwp/export/export.cpp @@ -567,7 +567,7 @@ void AppxPackager::finish() { // Create and add block map file EditorNode::progress_task_step("export", "Creating block map...", 4); - const String &tmp_blockmap_file_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpblockmap.xml"); + const String &tmp_blockmap_file_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmpblockmap.xml"); make_block_map(tmp_blockmap_file_path); FileAccess *blockmap_file = FileAccess::open(tmp_blockmap_file_path, FileAccess::READ); @@ -585,7 +585,7 @@ void AppxPackager::finish() { EditorNode::progress_task_step("export", "Setting content types...", 5); - const String &tmp_content_types_file_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("tmpcontenttypes.xml"); + const String &tmp_content_types_file_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("tmpcontenttypes.xml"); make_content_types(tmp_content_types_file_path); FileAccess *types_file = FileAccess::open(tmp_content_types_file_path, FileAccess::READ); @@ -879,7 +879,7 @@ class EditorExportPlatformUWP : public EditorExportPlatform { return data; } - String tmp_path = EditorSettings::get_singleton()->get_cache_dir().plus_file("uwp_tmp_logo.png"); + String tmp_path = EditorPaths::get_singleton()->get_cache_dir().plus_file("uwp_tmp_logo.png"); Error err = texture->get_image()->save_png(tmp_path); diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index 0012ba9c277..1337d367622 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -31,6 +31,7 @@ #include "renderer_compositor_rd.h" #include "core/config/project_settings.h" +#include "core/os/dir_access.h" void RendererCompositorRD::prepare_for_blitting_render_targets() { RD::get_singleton()->prepare_screen_for_drawing(); @@ -155,6 +156,43 @@ void RendererCompositorRD::finalize() { RendererCompositorRD *RendererCompositorRD::singleton = nullptr; RendererCompositorRD::RendererCompositorRD() { + { + String shader_cache_dir = Engine::get_singleton()->get_shader_cache_path(); + if (shader_cache_dir == String()) { + shader_cache_dir = "user://"; + } + DirAccessRef da = DirAccess::open(shader_cache_dir); + if (!da) { + ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_dir); + } else { + Error err = da->change_dir("shader_cache"); + if (err != OK) { + err = da->make_dir("shader_cache"); + } + if (err != OK) { + ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_dir); + } else { + shader_cache_dir = shader_cache_dir.plus_file("shader_cache"); + + bool shader_cache_enabled = GLOBAL_GET("rendering/shader_compiler/shader_cache/enabled"); + if (!Engine::get_singleton()->is_editor_hint() && !shader_cache_enabled) { + shader_cache_dir = String(); //disable only if not editor + } + + if (shader_cache_dir != String()) { + bool compress = GLOBAL_GET("rendering/shader_compiler/shader_cache/compress"); + bool use_zstd = GLOBAL_GET("rendering/shader_compiler/shader_cache/use_zstd_compression"); + bool strip_debug = GLOBAL_GET("rendering/shader_compiler/shader_cache/strip_debug"); + + ShaderRD::set_shader_cache_dir(shader_cache_dir); + ShaderRD::set_shader_cache_save_compressed(compress); + ShaderRD::set_shader_cache_save_compressed_zstd(use_zstd); + ShaderRD::set_shader_cache_save_debug(!strip_debug); + } + } + } + } + singleton = this; time = 0; @@ -171,3 +209,7 @@ RendererCompositorRD::RendererCompositorRD() { scene = memnew(RendererSceneRenderImplementation::RenderForwardClustered(storage)); } } + +RendererCompositorRD::~RendererCompositorRD() { + ShaderRD::set_shader_cache_dir(String()); +} diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index 52552f7ee3b..7a783220511 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -118,6 +118,6 @@ public: static RendererCompositorRD *singleton; RendererCompositorRD(); - ~RendererCompositorRD() {} + ~RendererCompositorRD(); }; #endif // RASTERIZER_RD_H diff --git a/servers/rendering/renderer_rd/shader_compiler_rd.cpp b/servers/rendering/renderer_rd/shader_compiler_rd.cpp index 056c8113a7b..7deedb80c3f 100644 --- a/servers/rendering/renderer_rd/shader_compiler_rd.cpp +++ b/servers/rendering/renderer_rd/shader_compiler_rd.cpp @@ -369,17 +369,24 @@ void ShaderCompilerRD::_dump_function_deps(const SL::ShaderNode *p_node, const S ERR_FAIL_COND(fidx == -1); + Vector uses_functions; + for (Set::Element *E = p_node->functions[fidx].uses_function.front(); E; E = E->next()) { - if (added.has(E->get())) { + uses_functions.push_back(E->get()); + } + uses_functions.sort_custom(); //ensure order is deterministic so the same shader is always produced + + for (int k = 0; k < uses_functions.size(); k++) { + if (added.has(uses_functions[k])) { continue; //was added already } - _dump_function_deps(p_node, E->get(), p_func_code, r_to_add, added); + _dump_function_deps(p_node, uses_functions[k], p_func_code, r_to_add, added); SL::FunctionNode *fnode = nullptr; for (int i = 0; i < p_node->functions.size(); i++) { - if (p_node->functions[i].name == E->get()) { + if (p_node->functions[i].name == uses_functions[k]) { fnode = p_node->functions[i].function; break; } @@ -427,9 +434,9 @@ void ShaderCompilerRD::_dump_function_deps(const SL::ShaderNode *p_node, const S header += ")\n"; r_to_add += header; - r_to_add += p_func_code[E->get()]; + r_to_add += p_func_code[uses_functions[k]]; - added.insert(E->get()); + added.insert(uses_functions[k]); } } @@ -581,63 +588,74 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge uniform_defines.resize(max_uniforms); bool uses_uniforms = false; + Vector uniform_names; + for (Map::Element *E = pnode->uniforms.front(); E; E = E->next()) { + uniform_names.push_back(E->key()); + } + + uniform_names.sort_custom(); //ensure order is deterministic so the same shader is always produced + + for (int k = 0; k < uniform_names.size(); k++) { + StringName uniform_name = uniform_names[k]; + const SL::ShaderNode::Uniform &uniform = pnode->uniforms[uniform_name]; + String ucode; - if (E->get().scope == SL::ShaderNode::Uniform::SCOPE_INSTANCE) { + if (uniform.scope == SL::ShaderNode::Uniform::SCOPE_INSTANCE) { //insert, but don't generate any code. - p_actions.uniforms->insert(E->key(), E->get()); + p_actions.uniforms->insert(uniform_name, uniform); continue; //instances are indexed directly, dont need index uniforms } - if (SL::is_sampler_type(E->get().type)) { - ucode = "layout(set = " + itos(actions.texture_layout_set) + ", binding = " + itos(actions.base_texture_binding_index + E->get().texture_order) + ") uniform "; + if (SL::is_sampler_type(uniform.type)) { + ucode = "layout(set = " + itos(actions.texture_layout_set) + ", binding = " + itos(actions.base_texture_binding_index + uniform.texture_order) + ") uniform "; } - bool is_buffer_global = !SL::is_sampler_type(E->get().type) && E->get().scope == SL::ShaderNode::Uniform::SCOPE_GLOBAL; + bool is_buffer_global = !SL::is_sampler_type(uniform.type) && uniform.scope == SL::ShaderNode::Uniform::SCOPE_GLOBAL; if (is_buffer_global) { //this is an integer to index the global table ucode += _typestr(ShaderLanguage::TYPE_UINT); } else { - ucode += _prestr(E->get().precision); - ucode += _typestr(E->get().type); + ucode += _prestr(uniform.precision); + ucode += _typestr(uniform.type); } - ucode += " " + _mkid(E->key()); + ucode += " " + _mkid(uniform_name); ucode += ";\n"; - if (SL::is_sampler_type(E->get().type)) { + if (SL::is_sampler_type(uniform.type)) { for (int j = 0; j < STAGE_MAX; j++) { r_gen_code.stage_globals[j] += ucode; } GeneratedCode::Texture texture; - texture.name = E->key(); - texture.hint = E->get().hint; - texture.type = E->get().type; - texture.filter = E->get().filter; - texture.repeat = E->get().repeat; - texture.global = E->get().scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL; + texture.name = uniform_name; + texture.hint = uniform.hint; + texture.type = uniform.type; + texture.filter = uniform.filter; + texture.repeat = uniform.repeat; + texture.global = uniform.scope == ShaderLanguage::ShaderNode::Uniform::SCOPE_GLOBAL; if (texture.global) { r_gen_code.uses_global_textures = true; } - r_gen_code.texture_uniforms.write[E->get().texture_order] = texture; + r_gen_code.texture_uniforms.write[uniform.texture_order] = texture; } else { if (!uses_uniforms) { uses_uniforms = true; } - uniform_defines.write[E->get().order] = ucode; + uniform_defines.write[uniform.order] = ucode; if (is_buffer_global) { //globals are indices into the global table - uniform_sizes.write[E->get().order] = _get_datatype_size(ShaderLanguage::TYPE_UINT); - uniform_alignments.write[E->get().order] = _get_datatype_alignment(ShaderLanguage::TYPE_UINT); + uniform_sizes.write[uniform.order] = _get_datatype_size(ShaderLanguage::TYPE_UINT); + uniform_alignments.write[uniform.order] = _get_datatype_alignment(ShaderLanguage::TYPE_UINT); } else { - uniform_sizes.write[E->get().order] = _get_datatype_size(E->get().type); - uniform_alignments.write[E->get().order] = _get_datatype_alignment(E->get().type); + uniform_sizes.write[uniform.order] = _get_datatype_size(uniform.type); + uniform_alignments.write[uniform.order] = _get_datatype_alignment(uniform.type); } } - p_actions.uniforms->insert(E->key(), E->get()); + p_actions.uniforms->insert(uniform_name, uniform); } for (int i = 0; i < max_uniforms; i++) { @@ -704,21 +722,32 @@ String ShaderCompilerRD::_dump_node_code(const SL::Node *p_node, int p_level, Ge List> var_frag_to_light; + Vector varying_names; + for (Map::Element *E = pnode->varyings.front(); E; E = E->next()) { - if (E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || E->get().stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { - var_frag_to_light.push_back(Pair(E->key(), E->get())); - fragment_varyings.insert(E->key()); + varying_names.push_back(E->key()); + } + + varying_names.sort_custom(); //ensure order is deterministic so the same shader is always produced + + for (int k = 0; k < varying_names.size(); k++) { + StringName varying_name = varying_names[k]; + const SL::ShaderNode::Varying &varying = pnode->varyings[varying_name]; + + if (varying.stage == SL::ShaderNode::Varying::STAGE_FRAGMENT_TO_LIGHT || varying.stage == SL::ShaderNode::Varying::STAGE_FRAGMENT) { + var_frag_to_light.push_back(Pair(varying_name, varying)); + fragment_varyings.insert(varying_name); continue; } String vcode; - String interp_mode = _interpstr(E->get().interpolation); - vcode += _prestr(E->get().precision); - vcode += _typestr(E->get().type); - vcode += " " + _mkid(E->key()); - if (E->get().array_size > 0) { + String interp_mode = _interpstr(varying.interpolation); + vcode += _prestr(varying.precision); + vcode += _typestr(varying.type); + vcode += " " + _mkid(varying_name); + if (varying.array_size > 0) { vcode += "["; - vcode += itos(E->get().array_size); + vcode += itos(varying.array_size); vcode += "]"; } vcode += ";\n"; diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index f7242a2b17e..6f29ff42bcd 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -30,8 +30,12 @@ #include "shader_rd.h" +#include "core/io/compression.h" +#include "core/os/dir_access.h" +#include "core/os/file_access.h" #include "renderer_compositor_rd.h" #include "servers/rendering/rendering_device.h" +#include "thirdparty/misc/smolv.h" void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) { Vector lines = String(p_code).split("\n"); @@ -97,6 +101,7 @@ void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) { void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name) { name = p_name; + if (p_compute_code) { _add_stage(p_compute_code, STAGE_TYPE_COMPUTE); is_compute = true; @@ -109,6 +114,18 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con _add_stage(p_fragment_code, STAGE_TYPE_FRAGMENT); } } + + StringBuilder tohash; + tohash.append("[VersionKey]"); + tohash.append(RenderingDevice::get_singleton()->shader_get_cache_key()); + tohash.append("[Vertex]"); + tohash.append(p_vertex_code ? p_vertex_code : ""); + tohash.append("[Fragment]"); + tohash.append(p_fragment_code ? p_fragment_code : ""); + tohash.append("[Compute]"); + tohash.append(p_compute_code ? p_compute_code : ""); + + base_sha256 = tohash.as_string().sha256_text(); } RID ShaderRD::version_create() { @@ -131,6 +148,9 @@ void ShaderRD::_clear_version(Version *p_version) { } memdelete_arr(p_version->variants); + if (p_version->variant_stages) { + memdelete_arr(p_version->variant_stages); + } p_version->variants = nullptr; } } @@ -183,7 +203,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { return; //variant is disabled, return } - Vector stages; + Vector &stages = p_version->variant_stages[p_variant]; String error; String current_source; @@ -313,6 +333,197 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio return source_code; } +String ShaderRD::_version_get_sha1(Version *p_version) const { + StringBuilder hash_build; + + hash_build.append("[uniforms]"); + hash_build.append(p_version->uniforms.get_data()); + hash_build.append("[vertex_globals]"); + hash_build.append(p_version->vertex_globals.get_data()); + hash_build.append("[fragment_globals]"); + hash_build.append(p_version->fragment_globals.get_data()); + hash_build.append("[compute_globals]"); + hash_build.append(p_version->compute_globals.get_data()); + + Vector code_sections; + for (Map::Element *E = p_version->code_sections.front(); E; E = E->next()) { + code_sections.push_back(E->key()); + } + code_sections.sort_custom(); + + for (int i = 0; i < code_sections.size(); i++) { + hash_build.append(String("[code:") + String(code_sections[i]) + "]"); + hash_build.append(p_version->code_sections[code_sections[i]].get_data()); + } + for (int i = 0; i < p_version->custom_defines.size(); i++) { + hash_build.append("[custom_defines:" + itos(i) + "]"); + hash_build.append(p_version->custom_defines[i].get_data()); + } + + return hash_build.as_string().sha1_text(); +} + +static const char *shader_file_header = "GDSC"; +static const uint32_t cache_file_version = 1; + +bool ShaderRD::_load_from_cache(Version *p_version) { + String sha1 = _version_get_sha1(p_version); + String path = shader_cache_dir.plus_file(name).plus_file(base_sha256).plus_file(sha1) + ".cache"; + + uint64_t time_from = OS::get_singleton()->get_ticks_usec(); + + FileAccessRef f = FileAccess::open(path, FileAccess::READ); + if (!f) { + return false; + } + + char header[5] = { 0, 0, 0, 0, 0 }; + f->get_buffer((uint8_t *)header, 4); + ERR_FAIL_COND_V(header != String(shader_file_header), false); + + uint32_t file_version = f->get_32(); + if (file_version != cache_file_version) { + return false; // wrong version + } + + uint32_t variant_count = f->get_32(); + + ERR_FAIL_COND_V(variant_count != (uint32_t)variant_defines.size(), false); //should not happen but check + + bool success = true; + for (uint32_t i = 0; i < variant_count; i++) { + uint32_t stage_count = f->get_32(); + p_version->variant_stages[i].resize(stage_count); + for (uint32_t j = 0; j < stage_count; j++) { + p_version->variant_stages[i].write[j].shader_stage = RD::ShaderStage(f->get_32()); + + int compression = f->get_32(); + uint32_t length = f->get_32(); + + if (compression == 0) { + Vector data; + data.resize(length); + + f->get_buffer(data.ptrw(), length); + + p_version->variant_stages[i].write[j].spir_v = data; + } else { + Vector data; + + if (compression == 2) { + //zstd + int smol_length = f->get_32(); + Vector zstd_data; + + zstd_data.resize(smol_length); + f->get_buffer(zstd_data.ptrw(), smol_length); + + data.resize(length); + Compression::decompress(data.ptrw(), data.size(), zstd_data.ptr(), zstd_data.size(), Compression::MODE_ZSTD); + + } else { + data.resize(length); + f->get_buffer(data.ptrw(), length); + } + + Vector spirv; + uint32_t spirv_size = smolv::GetDecodedBufferSize(data.ptr(), data.size()); + spirv.resize(spirv_size); + if (!smolv::Decode(data.ptr(), data.size(), spirv.ptrw(), spirv_size)) { + ERR_PRINT("Malformed smolv input uncompressing shader " + name + ", variant #" + itos(i) + " stage :" + itos(j)); + success = false; + break; + } + p_version->variant_stages[i].write[j].spir_v = spirv; + } + } + } + + if (!success) { + for (uint32_t i = 0; i < variant_count; i++) { + p_version->variant_stages[i].resize(0); + } + return false; + } + + float time_ms = double(OS::get_singleton()->get_ticks_usec() - time_from) / 1000.0; + + print_verbose("Shader cache load success '" + path + "' " + rtos(time_ms) + "ms."); + + for (uint32_t i = 0; i < variant_count; i++) { + RID shader = RD::get_singleton()->shader_create(p_version->variant_stages[i]); + { + MutexLock lock(variant_set_mutex); + p_version->variants[i] = shader; + } + } + + memdelete_arr(p_version->variant_stages); //clear stages + p_version->variant_stages = nullptr; + p_version->valid = true; + return true; +} + +void ShaderRD::_save_to_cache(Version *p_version) { + String sha1 = _version_get_sha1(p_version); + String path = shader_cache_dir.plus_file(name).plus_file(base_sha256).plus_file(sha1) + ".cache"; + + FileAccessRef f = FileAccess::open(path, FileAccess::WRITE); + ERR_FAIL_COND(!f); + f->store_buffer((const uint8_t *)shader_file_header, 4); + f->store_32(cache_file_version); //file version + uint32_t variant_count = variant_defines.size(); + f->store_32(variant_count); //variant count + + for (uint32_t i = 0; i < variant_count; i++) { + f->store_32(p_version->variant_stages[i].size()); //stage count + for (int j = 0; j < p_version->variant_stages[i].size(); j++) { + f->store_32(p_version->variant_stages[i][j].shader_stage); //stage count + Vector spirv = p_version->variant_stages[i][j].spir_v; + + bool save_uncompressed = true; + if (shader_cache_save_compressed) { + smolv::ByteArray smolv; + bool strip_debug = !shader_cache_save_debug; + if (!smolv::Encode(spirv.ptr(), spirv.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) { + ERR_PRINT("Error compressing shader " + name + ", variant #" + itos(i) + " stage :" + itos(i)); + } else { + bool compress_zstd = shader_cache_save_compressed_zstd; + + if (compress_zstd) { + Vector zstd; + zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD)); + int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD); + if (dst_size >= 0 && (uint32_t)dst_size < smolv.size()) { + f->store_32(2); //compressed zstd + f->store_32(smolv.size()); //size of smolv buffer + f->store_32(dst_size); //size of smolv buffer + f->store_buffer(zstd.ptr(), dst_size); //smolv buffer + } else { + compress_zstd = false; + } + } + + if (!compress_zstd) { + f->store_32(1); //compressed + f->store_32(smolv.size()); //size of smolv buffer + f->store_buffer(&smolv[0], smolv.size()); //smolv buffer + } + save_uncompressed = false; + } + } + + if (save_uncompressed) { + f->store_32(0); //uncompressed + f->store_32(spirv.size()); //stage count + f->store_buffer(spirv.ptr(), spirv.size()); //stage count + } + } + } + + f->close(); +} + void ShaderRD::_compile_version(Version *p_version) { _clear_version(p_version); @@ -320,6 +531,15 @@ void ShaderRD::_compile_version(Version *p_version) { p_version->dirty = false; p_version->variants = memnew_arr(RID, variant_defines.size()); + typedef Vector ShaderStageArray; + p_version->variant_stages = memnew_arr(ShaderStageArray, variant_defines.size()); + + if (shader_cache_dir_valid) { + if (_load_from_cache(p_version)) { + return; + } + } + #if 1 RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); @@ -351,10 +571,20 @@ void ShaderRD::_compile_version(Version *p_version) { } } memdelete_arr(p_version->variants); + if (p_version->variant_stages) { + memdelete_arr(p_version->variant_stages); + } p_version->variants = nullptr; + p_version->variant_stages = nullptr; return; + } else if (shader_cache_dir_valid) { + //save shader cache + _save_to_cache(p_version); } + memdelete_arr(p_version->variant_stages); //clear stages + p_version->variant_stages = nullptr; + p_version->valid = true; } @@ -443,6 +673,8 @@ bool ShaderRD::is_variant_enabled(int p_variant) const { return variants_enabled[p_variant]; } +bool ShaderRD::shader_cache_cleanup_on_start = false; + ShaderRD::ShaderRD() { // Do not feel forced to use this, in most cases it makes little to no difference. bool use_32_threads = false; @@ -469,8 +701,64 @@ void ShaderRD::initialize(const Vector &p_variant_defines, const String variant_defines.push_back(p_variant_defines[i].utf8()); variants_enabled.push_back(true); } + + if (shader_cache_dir != String()) { + StringBuilder hash_build; + + hash_build.append("[base_hash]"); + hash_build.append(base_sha256); + hash_build.append("[general_defines]"); + hash_build.append(general_defines.get_data()); + for (int i = 0; i < variant_defines.size(); i++) { + hash_build.append("[variant_defines:" + itos(i) + "]"); + hash_build.append(variant_defines[i].get_data()); + } + + base_sha256 = hash_build.as_string().sha256_text(); + + DirAccessRef d = DirAccess::open(shader_cache_dir); + ERR_FAIL_COND(!d); + if (d->change_dir(name) != OK) { + Error err = d->make_dir(name); + ERR_FAIL_COND(err != OK); + d->change_dir(name); + } + + //erase other versions? + if (shader_cache_cleanup_on_start) { + } + // + if (d->change_dir(base_sha256) != OK) { + Error err = d->make_dir(base_sha256); + ERR_FAIL_COND(err != OK); + } + shader_cache_dir_valid = true; + + print_verbose("Shader '" + name + "' SHA256: " + base_sha256); + } } +void ShaderRD::set_shader_cache_dir(const String &p_dir) { + shader_cache_dir = p_dir; +} + +void ShaderRD::set_shader_cache_save_compressed(bool p_enable) { + shader_cache_save_compressed = p_enable; +} + +void ShaderRD::set_shader_cache_save_compressed_zstd(bool p_enable) { + shader_cache_save_compressed_zstd = p_enable; +} + +void ShaderRD::set_shader_cache_save_debug(bool p_enable) { + shader_cache_save_debug = p_enable; +} + +String ShaderRD::shader_cache_dir; +bool ShaderRD::shader_cache_save_compressed = true; +bool ShaderRD::shader_cache_save_compressed_zstd = true; +bool ShaderRD::shader_cache_save_debug = true; + ShaderRD::~ShaderRD() { List remaining; version_owner.get_owned_list(&remaining); diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index f20d5396217..9a68e02007d 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -59,7 +59,8 @@ class ShaderRD { Map code_sections; Vector custom_defines; - RID *variants; //same size as version defines + Vector *variant_stages = nullptr; + RID *variants = nullptr; //same size as version defines bool valid; bool dirty; @@ -96,10 +97,19 @@ class ShaderRD { bool is_compute = false; - const char *name; + String name; CharString base_compute_defines; + String base_sha256; + + static String shader_cache_dir; + static bool shader_cache_cleanup_on_start; + static bool shader_cache_save_compressed; + static bool shader_cache_save_compressed_zstd; + static bool shader_cache_save_debug; + bool shader_cache_dir_valid = false; + enum StageType { STAGE_TYPE_VERTEX, STAGE_TYPE_FRAGMENT, @@ -113,6 +123,10 @@ class ShaderRD { void _add_stage(const char *p_code, StageType p_stage_type); + String _version_get_sha1(Version *p_version) const; + bool _load_from_cache(Version *p_version); + void _save_to_cache(Version *p_version); + protected: ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); @@ -148,6 +162,11 @@ public: void set_variant_enabled(int p_variant, bool p_enabled); bool is_variant_enabled(int p_variant) const; + static void set_shader_cache_dir(const String &p_dir); + static void set_shader_cache_save_compressed(bool p_enable); + static void set_shader_cache_save_compressed_zstd(bool p_enable); + static void set_shader_cache_save_debug(bool p_enable); + RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version); void initialize(const Vector &p_variant_defines, const String &p_general_defines = ""); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index e6ad0018076..056cec4c1fe 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -40,6 +40,7 @@ RenderingDevice *RenderingDevice::get_singleton() { RenderingDevice::ShaderCompileFunction RenderingDevice::compile_function = nullptr; RenderingDevice::ShaderCacheFunction RenderingDevice::cache_function = nullptr; +RenderingDevice::ShaderGetCacheKeyFunction RenderingDevice::get_cache_key_function = nullptr; void RenderingDevice::shader_set_compile_function(ShaderCompileFunction p_function) { compile_function = p_function; @@ -49,6 +50,10 @@ void RenderingDevice::shader_set_cache_function(ShaderCacheFunction p_function) cache_function = p_function; } +void RenderingDevice::shader_set_get_cache_key_function(ShaderGetCacheKeyFunction p_function) { + get_cache_key_function = p_function; +} + Vector RenderingDevice::shader_compile_from_source(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, bool p_allow_cache) { if (p_allow_cache && cache_function) { Vector cache = cache_function(p_stage, p_source_code, p_language); @@ -62,6 +67,13 @@ Vector RenderingDevice::shader_compile_from_source(ShaderStage p_stage, return compile_function(p_stage, p_source_code, p_language, r_error, &device_capabilities); } +String RenderingDevice::shader_get_cache_key() const { + if (get_cache_key_function) { + return get_cache_key_function(&device_capabilities); + } + return String(); +} + RID RenderingDevice::_texture_create(const Ref &p_format, const Ref &p_view, const TypedArray &p_data) { ERR_FAIL_COND_V(p_format.is_null(), RID()); ERR_FAIL_COND_V(p_view.is_null(), RID()); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 27bded9810f..4dcb9b963ef 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -103,12 +103,14 @@ public: bool supports_multiview = false; // If true this device supports multiview options }; + typedef String (*ShaderGetCacheKeyFunction)(const Capabilities *p_capabilities); typedef Vector (*ShaderCompileFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, const Capabilities *p_capabilities); typedef Vector (*ShaderCacheFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language); private: static ShaderCompileFunction compile_function; static ShaderCacheFunction cache_function; + static ShaderGetCacheKeyFunction get_cache_key_function; static RenderingDevice *singleton; @@ -635,9 +637,11 @@ public: const Capabilities *get_device_capabilities() const { return &device_capabilities; }; virtual Vector shader_compile_from_source(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language = SHADER_LANGUAGE_GLSL, String *r_error = nullptr, bool p_allow_cache = true); + virtual String shader_get_cache_key() const; static void shader_set_compile_function(ShaderCompileFunction p_function); static void shader_set_cache_function(ShaderCacheFunction p_function); + static void shader_set_get_cache_key_function(ShaderGetCacheKeyFunction p_function); struct ShaderStageData { ShaderStage shader_stage; diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index a9601fd661c..4741e90a819 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -2306,6 +2306,12 @@ RenderingServer::RenderingServer() { "rendering/vulkan/rendering/back_end", PROPERTY_HINT_ENUM, "ForwardClustered,ForwardMobile")); + GLOBAL_DEF("rendering/shader_compiler/shader_cache/enabled", true); + GLOBAL_DEF("rendering/shader_compiler/shader_cache/compress", true); + GLOBAL_DEF("rendering/shader_compiler/shader_cache/use_zstd_compression", true); + GLOBAL_DEF("rendering/shader_compiler/shader_cache/strip_debug", false); + GLOBAL_DEF("rendering/shader_compiler/shader_cache/strip_debug.release", true); + GLOBAL_DEF("rendering/reflections/sky_reflections/roughness_layers", 8); GLOBAL_DEF("rendering/reflections/sky_reflections/texture_array_reflections", true); GLOBAL_DEF("rendering/reflections/sky_reflections/texture_array_reflections.mobile", false); diff --git a/thirdparty/README.md b/thirdparty/README.md index 1500e8d6568..03a2ddf5e4f 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -469,6 +469,10 @@ Collection of single-file libraries used in Godot components. * Version: git (2f625846a775501fb69456567409a8b12f10ea25, 2012) * License: BSD-3-Clause * Modifications: use `const char*` instead of `char*` for input string +- `smolv.h` + * Upstream: https://github.com/aras-p/smol-v + * Version: git (4b52c165c13763051a18e80ffbc2ee436314ceb2, 2020) + * License: Public Domain or MIT - `stb_rect_pack.h` * Upstream: https://github.com/nothings/stb * Version: 1.00 (2bb4a0accd4003c1db4c24533981e01b1adfd656, 2019) @@ -731,3 +735,4 @@ Files extracted from upstream source: - lib/{common/,compress/,decompress/,zstd.h} - LICENSE + diff --git a/thirdparty/misc/smolv.cpp b/thirdparty/misc/smolv.cpp new file mode 100644 index 00000000000..26ed7294f9a --- /dev/null +++ b/thirdparty/misc/smolv.cpp @@ -0,0 +1,2108 @@ +// smol-v - public domain - https://github.com/aras-p/smol-v +// authored 2016-2020 by Aras Pranckevicius +// no warranty implied; use at your own risk +// See end of file for license information. + +#include "smolv.h" +#include +#include +#include +#include +#include + +#if !defined(_MSC_VER) && __cplusplus < 201103L +#define static_assert(x,y) +#endif + +#define _SMOLV_ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) + +// -------------------------------------------------------------------------------------------- +// Metadata about known SPIR-V operations + +enum SpvOp +{ + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpVectorShuffleCompact = 13, // not in SPIR-V, added for SMOL-V! + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, + SpvOpSizeOf = 321, + SpvOpTypePipeStorage = 322, + SpvOpConstantPipeStorage = 323, + SpvOpCreatePipeFromPipeStorage = 324, + SpvOpGetKernelLocalSizeForSubgroupCount = 325, + SpvOpGetKernelMaxNumSubgroups = 326, + SpvOpTypeNamedBarrier = 327, + SpvOpNamedBarrierInitialize = 328, + SpvOpMemoryNamedBarrier = 329, + SpvOpModuleProcessed = 330, + SpvOpExecutionModeId = 331, + SpvOpDecorateId = 332, + SpvOpGroupNonUniformElect = 333, + SpvOpGroupNonUniformAll = 334, + SpvOpGroupNonUniformAny = 335, + SpvOpGroupNonUniformAllEqual = 336, + SpvOpGroupNonUniformBroadcast = 337, + SpvOpGroupNonUniformBroadcastFirst = 338, + SpvOpGroupNonUniformBallot = 339, + SpvOpGroupNonUniformInverseBallot = 340, + SpvOpGroupNonUniformBallotBitExtract = 341, + SpvOpGroupNonUniformBallotBitCount = 342, + SpvOpGroupNonUniformBallotFindLSB = 343, + SpvOpGroupNonUniformBallotFindMSB = 344, + SpvOpGroupNonUniformShuffle = 345, + SpvOpGroupNonUniformShuffleXor = 346, + SpvOpGroupNonUniformShuffleUp = 347, + SpvOpGroupNonUniformShuffleDown = 348, + SpvOpGroupNonUniformIAdd = 349, + SpvOpGroupNonUniformFAdd = 350, + SpvOpGroupNonUniformIMul = 351, + SpvOpGroupNonUniformFMul = 352, + SpvOpGroupNonUniformSMin = 353, + SpvOpGroupNonUniformUMin = 354, + SpvOpGroupNonUniformFMin = 355, + SpvOpGroupNonUniformSMax = 356, + SpvOpGroupNonUniformUMax = 357, + SpvOpGroupNonUniformFMax = 358, + SpvOpGroupNonUniformBitwiseAnd = 359, + SpvOpGroupNonUniformBitwiseOr = 360, + SpvOpGroupNonUniformBitwiseXor = 361, + SpvOpGroupNonUniformLogicalAnd = 362, + SpvOpGroupNonUniformLogicalOr = 363, + SpvOpGroupNonUniformLogicalXor = 364, + SpvOpGroupNonUniformQuadBroadcast = 365, + SpvOpGroupNonUniformQuadSwap = 366, +}; +static const int kKnownOpsCount = SpvOpGroupNonUniformQuadSwap+1; + + +static const char* kSpirvOpNames[] = +{ + "Nop", + "Undef", + "SourceContinued", + "Source", + "SourceExtension", + "Name", + "MemberName", + "String", + "Line", + "#9", + "Extension", + "ExtInstImport", + "ExtInst", + "VectorShuffleCompact", + "MemoryModel", + "EntryPoint", + "ExecutionMode", + "Capability", + "#18", + "TypeVoid", + "TypeBool", + "TypeInt", + "TypeFloat", + "TypeVector", + "TypeMatrix", + "TypeImage", + "TypeSampler", + "TypeSampledImage", + "TypeArray", + "TypeRuntimeArray", + "TypeStruct", + "TypeOpaque", + "TypePointer", + "TypeFunction", + "TypeEvent", + "TypeDeviceEvent", + "TypeReserveId", + "TypeQueue", + "TypePipe", + "TypeForwardPointer", + "#40", + "ConstantTrue", + "ConstantFalse", + "Constant", + "ConstantComposite", + "ConstantSampler", + "ConstantNull", + "#47", + "SpecConstantTrue", + "SpecConstantFalse", + "SpecConstant", + "SpecConstantComposite", + "SpecConstantOp", + "#53", + "Function", + "FunctionParameter", + "FunctionEnd", + "FunctionCall", + "#58", + "Variable", + "ImageTexelPointer", + "Load", + "Store", + "CopyMemory", + "CopyMemorySized", + "AccessChain", + "InBoundsAccessChain", + "PtrAccessChain", + "ArrayLength", + "GenericPtrMemSemantics", + "InBoundsPtrAccessChain", + "Decorate", + "MemberDecorate", + "DecorationGroup", + "GroupDecorate", + "GroupMemberDecorate", + "#76", + "VectorExtractDynamic", + "VectorInsertDynamic", + "VectorShuffle", + "CompositeConstruct", + "CompositeExtract", + "CompositeInsert", + "CopyObject", + "Transpose", + "#85", + "SampledImage", + "ImageSampleImplicitLod", + "ImageSampleExplicitLod", + "ImageSampleDrefImplicitLod", + "ImageSampleDrefExplicitLod", + "ImageSampleProjImplicitLod", + "ImageSampleProjExplicitLod", + "ImageSampleProjDrefImplicitLod", + "ImageSampleProjDrefExplicitLod", + "ImageFetch", + "ImageGather", + "ImageDrefGather", + "ImageRead", + "ImageWrite", + "Image", + "ImageQueryFormat", + "ImageQueryOrder", + "ImageQuerySizeLod", + "ImageQuerySize", + "ImageQueryLod", + "ImageQueryLevels", + "ImageQuerySamples", + "#108", + "ConvertFToU", + "ConvertFToS", + "ConvertSToF", + "ConvertUToF", + "UConvert", + "SConvert", + "FConvert", + "QuantizeToF16", + "ConvertPtrToU", + "SatConvertSToU", + "SatConvertUToS", + "ConvertUToPtr", + "PtrCastToGeneric", + "GenericCastToPtr", + "GenericCastToPtrExplicit", + "Bitcast", + "#125", + "SNegate", + "FNegate", + "IAdd", + "FAdd", + "ISub", + "FSub", + "IMul", + "FMul", + "UDiv", + "SDiv", + "FDiv", + "UMod", + "SRem", + "SMod", + "FRem", + "FMod", + "VectorTimesScalar", + "MatrixTimesScalar", + "VectorTimesMatrix", + "MatrixTimesVector", + "MatrixTimesMatrix", + "OuterProduct", + "Dot", + "IAddCarry", + "ISubBorrow", + "UMulExtended", + "SMulExtended", + "#153", + "Any", + "All", + "IsNan", + "IsInf", + "IsFinite", + "IsNormal", + "SignBitSet", + "LessOrGreater", + "Ordered", + "Unordered", + "LogicalEqual", + "LogicalNotEqual", + "LogicalOr", + "LogicalAnd", + "LogicalNot", + "Select", + "IEqual", + "INotEqual", + "UGreaterThan", + "SGreaterThan", + "UGreaterThanEqual", + "SGreaterThanEqual", + "ULessThan", + "SLessThan", + "ULessThanEqual", + "SLessThanEqual", + "FOrdEqual", + "FUnordEqual", + "FOrdNotEqual", + "FUnordNotEqual", + "FOrdLessThan", + "FUnordLessThan", + "FOrdGreaterThan", + "FUnordGreaterThan", + "FOrdLessThanEqual", + "FUnordLessThanEqual", + "FOrdGreaterThanEqual", + "FUnordGreaterThanEqual", + "#192", + "#193", + "ShiftRightLogical", + "ShiftRightArithmetic", + "ShiftLeftLogical", + "BitwiseOr", + "BitwiseXor", + "BitwiseAnd", + "Not", + "BitFieldInsert", + "BitFieldSExtract", + "BitFieldUExtract", + "BitReverse", + "BitCount", + "#206", + "DPdx", + "DPdy", + "Fwidth", + "DPdxFine", + "DPdyFine", + "FwidthFine", + "DPdxCoarse", + "DPdyCoarse", + "FwidthCoarse", + "#216", + "#217", + "EmitVertex", + "EndPrimitive", + "EmitStreamVertex", + "EndStreamPrimitive", + "#222", + "#223", + "ControlBarrier", + "MemoryBarrier", + "#226", + "AtomicLoad", + "AtomicStore", + "AtomicExchange", + "AtomicCompareExchange", + "AtomicCompareExchangeWeak", + "AtomicIIncrement", + "AtomicIDecrement", + "AtomicIAdd", + "AtomicISub", + "AtomicSMin", + "AtomicUMin", + "AtomicSMax", + "AtomicUMax", + "AtomicAnd", + "AtomicOr", + "AtomicXor", + "#243", + "#244", + "Phi", + "LoopMerge", + "SelectionMerge", + "Label", + "Branch", + "BranchConditional", + "Switch", + "Kill", + "Return", + "ReturnValue", + "Unreachable", + "LifetimeStart", + "LifetimeStop", + "#258", + "GroupAsyncCopy", + "GroupWaitEvents", + "GroupAll", + "GroupAny", + "GroupBroadcast", + "GroupIAdd", + "GroupFAdd", + "GroupFMin", + "GroupUMin", + "GroupSMin", + "GroupFMax", + "GroupUMax", + "GroupSMax", + "#272", + "#273", + "ReadPipe", + "WritePipe", + "ReservedReadPipe", + "ReservedWritePipe", + "ReserveReadPipePackets", + "ReserveWritePipePackets", + "CommitReadPipe", + "CommitWritePipe", + "IsValidReserveId", + "GetNumPipePackets", + "GetMaxPipePackets", + "GroupReserveReadPipePackets", + "GroupReserveWritePipePackets", + "GroupCommitReadPipe", + "GroupCommitWritePipe", + "#289", + "#290", + "EnqueueMarker", + "EnqueueKernel", + "GetKernelNDrangeSubGroupCount", + "GetKernelNDrangeMaxSubGroupSize", + "GetKernelWorkGroupSize", + "GetKernelPreferredWorkGroupSizeMultiple", + "RetainEvent", + "ReleaseEvent", + "CreateUserEvent", + "IsValidEvent", + "SetUserEventStatus", + "CaptureEventProfilingInfo", + "GetDefaultQueue", + "BuildNDRange", + "ImageSparseSampleImplicitLod", + "ImageSparseSampleExplicitLod", + "ImageSparseSampleDrefImplicitLod", + "ImageSparseSampleDrefExplicitLod", + "ImageSparseSampleProjImplicitLod", + "ImageSparseSampleProjExplicitLod", + "ImageSparseSampleProjDrefImplicitLod", + "ImageSparseSampleProjDrefExplicitLod", + "ImageSparseFetch", + "ImageSparseGather", + "ImageSparseDrefGather", + "ImageSparseTexelsResident", + "NoLine", + "AtomicFlagTestAndSet", + "AtomicFlagClear", + "ImageSparseRead", + "SizeOf", + "TypePipeStorage", + "ConstantPipeStorage", + "CreatePipeFromPipeStorage", + "GetKernelLocalSizeForSubgroupCount", + "GetKernelMaxNumSubgroups", + "TypeNamedBarrier", + "NamedBarrierInitialize", + "MemoryNamedBarrier", + "ModuleProcessed", + "ExecutionModeId", + "DecorateId", + "GroupNonUniformElect", + "GroupNonUniformAll", + "GroupNonUniformAny", + "GroupNonUniformAllEqual", + "GroupNonUniformBroadcast", + "GroupNonUniformBroadcastFirst", + "GroupNonUniformBallot", + "GroupNonUniformInverseBallot", + "GroupNonUniformBallotBitExtract", + "GroupNonUniformBallotBitCount", + "GroupNonUniformBallotFindLSB", + "GroupNonUniformBallotFindMSB", + "GroupNonUniformShuffle", + "GroupNonUniformShuffleXor", + "GroupNonUniformShuffleUp", + "GroupNonUniformShuffleDown", + "GroupNonUniformIAdd", + "GroupNonUniformFAdd", + "GroupNonUniformIMul", + "GroupNonUniformFMul", + "GroupNonUniformSMin", + "GroupNonUniformUMin", + "GroupNonUniformFMin", + "GroupNonUniformSMax", + "GroupNonUniformUMax", + "GroupNonUniformFMax", + "GroupNonUniformBitwiseAnd", + "GroupNonUniformBitwiseOr", + "GroupNonUniformBitwiseXor", + "GroupNonUniformLogicalAnd", + "GroupNonUniformLogicalOr", + "GroupNonUniformLogicalXor", + "GroupNonUniformQuadBroadcast", + "GroupNonUniformQuadSwap", +}; +static_assert(_SMOLV_ARRAY_SIZE(kSpirvOpNames) == kKnownOpsCount, "kSpirvOpNames table mismatch with known SpvOps"); + + +struct OpData +{ + uint8_t hasResult; // does it have result ID? + uint8_t hasType; // does it have type ID? + uint8_t deltaFromResult; // How many words after (optional) type+result to write out as deltas from result? + uint8_t varrest; // should the rest of words be written in varint encoding? +}; +static const OpData kSpirvOpData[] = +{ + {0, 0, 0, 0}, // Nop + {1, 1, 0, 0}, // Undef + {0, 0, 0, 0}, // SourceContinued + {0, 0, 0, 1}, // Source + {0, 0, 0, 0}, // SourceExtension + {0, 0, 0, 0}, // Name + {0, 0, 0, 0}, // MemberName + {0, 0, 0, 0}, // String + {0, 0, 0, 1}, // Line + {1, 1, 0, 0}, // #9 + {0, 0, 0, 0}, // Extension + {1, 0, 0, 0}, // ExtInstImport + {1, 1, 0, 1}, // ExtInst + {1, 1, 2, 1}, // VectorShuffleCompact - new in SMOLV + {0, 0, 0, 1}, // MemoryModel + {0, 0, 0, 1}, // EntryPoint + {0, 0, 0, 1}, // ExecutionMode + {0, 0, 0, 1}, // Capability + {1, 1, 0, 0}, // #18 + {1, 0, 0, 1}, // TypeVoid + {1, 0, 0, 1}, // TypeBool + {1, 0, 0, 1}, // TypeInt + {1, 0, 0, 1}, // TypeFloat + {1, 0, 0, 1}, // TypeVector + {1, 0, 0, 1}, // TypeMatrix + {1, 0, 0, 1}, // TypeImage + {1, 0, 0, 1}, // TypeSampler + {1, 0, 0, 1}, // TypeSampledImage + {1, 0, 0, 1}, // TypeArray + {1, 0, 0, 1}, // TypeRuntimeArray + {1, 0, 0, 1}, // TypeStruct + {1, 0, 0, 1}, // TypeOpaque + {1, 0, 0, 1}, // TypePointer + {1, 0, 0, 1}, // TypeFunction + {1, 0, 0, 1}, // TypeEvent + {1, 0, 0, 1}, // TypeDeviceEvent + {1, 0, 0, 1}, // TypeReserveId + {1, 0, 0, 1}, // TypeQueue + {1, 0, 0, 1}, // TypePipe + {0, 0, 0, 1}, // TypeForwardPointer + {1, 1, 0, 0}, // #40 + {1, 1, 0, 0}, // ConstantTrue + {1, 1, 0, 0}, // ConstantFalse + {1, 1, 0, 0}, // Constant + {1, 1, 9, 0}, // ConstantComposite + {1, 1, 0, 1}, // ConstantSampler + {1, 1, 0, 0}, // ConstantNull + {1, 1, 0, 0}, // #47 + {1, 1, 0, 0}, // SpecConstantTrue + {1, 1, 0, 0}, // SpecConstantFalse + {1, 1, 0, 0}, // SpecConstant + {1, 1, 9, 0}, // SpecConstantComposite + {1, 1, 0, 0}, // SpecConstantOp + {1, 1, 0, 0}, // #53 + {1, 1, 0, 1}, // Function + {1, 1, 0, 0}, // FunctionParameter + {0, 0, 0, 0}, // FunctionEnd + {1, 1, 9, 0}, // FunctionCall + {1, 1, 0, 0}, // #58 + {1, 1, 0, 1}, // Variable + {1, 1, 0, 0}, // ImageTexelPointer + {1, 1, 1, 1}, // Load + {0, 0, 2, 1}, // Store + {0, 0, 0, 0}, // CopyMemory + {0, 0, 0, 0}, // CopyMemorySized + {1, 1, 0, 1}, // AccessChain + {1, 1, 0, 0}, // InBoundsAccessChain + {1, 1, 0, 0}, // PtrAccessChain + {1, 1, 0, 0}, // ArrayLength + {1, 1, 0, 0}, // GenericPtrMemSemantics + {1, 1, 0, 0}, // InBoundsPtrAccessChain + {0, 0, 0, 1}, // Decorate + {0, 0, 0, 1}, // MemberDecorate + {1, 0, 0, 0}, // DecorationGroup + {0, 0, 0, 0}, // GroupDecorate + {0, 0, 0, 0}, // GroupMemberDecorate + {1, 1, 0, 0}, // #76 + {1, 1, 1, 1}, // VectorExtractDynamic + {1, 1, 2, 1}, // VectorInsertDynamic + {1, 1, 2, 1}, // VectorShuffle + {1, 1, 9, 0}, // CompositeConstruct + {1, 1, 1, 1}, // CompositeExtract + {1, 1, 2, 1}, // CompositeInsert + {1, 1, 1, 0}, // CopyObject + {1, 1, 0, 0}, // Transpose + {1, 1, 0, 0}, // #85 + {1, 1, 0, 0}, // SampledImage + {1, 1, 2, 1}, // ImageSampleImplicitLod + {1, 1, 2, 1}, // ImageSampleExplicitLod + {1, 1, 3, 1}, // ImageSampleDrefImplicitLod + {1, 1, 3, 1}, // ImageSampleDrefExplicitLod + {1, 1, 2, 1}, // ImageSampleProjImplicitLod + {1, 1, 2, 1}, // ImageSampleProjExplicitLod + {1, 1, 3, 1}, // ImageSampleProjDrefImplicitLod + {1, 1, 3, 1}, // ImageSampleProjDrefExplicitLod + {1, 1, 2, 1}, // ImageFetch + {1, 1, 3, 1}, // ImageGather + {1, 1, 3, 1}, // ImageDrefGather + {1, 1, 2, 1}, // ImageRead + {0, 0, 3, 1}, // ImageWrite + {1, 1, 1, 0}, // Image + {1, 1, 1, 0}, // ImageQueryFormat + {1, 1, 1, 0}, // ImageQueryOrder + {1, 1, 2, 0}, // ImageQuerySizeLod + {1, 1, 1, 0}, // ImageQuerySize + {1, 1, 2, 0}, // ImageQueryLod + {1, 1, 1, 0}, // ImageQueryLevels + {1, 1, 1, 0}, // ImageQuerySamples + {1, 1, 0, 0}, // #108 + {1, 1, 1, 0}, // ConvertFToU + {1, 1, 1, 0}, // ConvertFToS + {1, 1, 1, 0}, // ConvertSToF + {1, 1, 1, 0}, // ConvertUToF + {1, 1, 1, 0}, // UConvert + {1, 1, 1, 0}, // SConvert + {1, 1, 1, 0}, // FConvert + {1, 1, 1, 0}, // QuantizeToF16 + {1, 1, 1, 0}, // ConvertPtrToU + {1, 1, 1, 0}, // SatConvertSToU + {1, 1, 1, 0}, // SatConvertUToS + {1, 1, 1, 0}, // ConvertUToPtr + {1, 1, 1, 0}, // PtrCastToGeneric + {1, 1, 1, 0}, // GenericCastToPtr + {1, 1, 1, 1}, // GenericCastToPtrExplicit + {1, 1, 1, 0}, // Bitcast + {1, 1, 0, 0}, // #125 + {1, 1, 1, 0}, // SNegate + {1, 1, 1, 0}, // FNegate + {1, 1, 2, 0}, // IAdd + {1, 1, 2, 0}, // FAdd + {1, 1, 2, 0}, // ISub + {1, 1, 2, 0}, // FSub + {1, 1, 2, 0}, // IMul + {1, 1, 2, 0}, // FMul + {1, 1, 2, 0}, // UDiv + {1, 1, 2, 0}, // SDiv + {1, 1, 2, 0}, // FDiv + {1, 1, 2, 0}, // UMod + {1, 1, 2, 0}, // SRem + {1, 1, 2, 0}, // SMod + {1, 1, 2, 0}, // FRem + {1, 1, 2, 0}, // FMod + {1, 1, 2, 0}, // VectorTimesScalar + {1, 1, 2, 0}, // MatrixTimesScalar + {1, 1, 2, 0}, // VectorTimesMatrix + {1, 1, 2, 0}, // MatrixTimesVector + {1, 1, 2, 0}, // MatrixTimesMatrix + {1, 1, 2, 0}, // OuterProduct + {1, 1, 2, 0}, // Dot + {1, 1, 2, 0}, // IAddCarry + {1, 1, 2, 0}, // ISubBorrow + {1, 1, 2, 0}, // UMulExtended + {1, 1, 2, 0}, // SMulExtended + {1, 1, 0, 0}, // #153 + {1, 1, 1, 0}, // Any + {1, 1, 1, 0}, // All + {1, 1, 1, 0}, // IsNan + {1, 1, 1, 0}, // IsInf + {1, 1, 1, 0}, // IsFinite + {1, 1, 1, 0}, // IsNormal + {1, 1, 1, 0}, // SignBitSet + {1, 1, 2, 0}, // LessOrGreater + {1, 1, 2, 0}, // Ordered + {1, 1, 2, 0}, // Unordered + {1, 1, 2, 0}, // LogicalEqual + {1, 1, 2, 0}, // LogicalNotEqual + {1, 1, 2, 0}, // LogicalOr + {1, 1, 2, 0}, // LogicalAnd + {1, 1, 1, 0}, // LogicalNot + {1, 1, 3, 0}, // Select + {1, 1, 2, 0}, // IEqual + {1, 1, 2, 0}, // INotEqual + {1, 1, 2, 0}, // UGreaterThan + {1, 1, 2, 0}, // SGreaterThan + {1, 1, 2, 0}, // UGreaterThanEqual + {1, 1, 2, 0}, // SGreaterThanEqual + {1, 1, 2, 0}, // ULessThan + {1, 1, 2, 0}, // SLessThan + {1, 1, 2, 0}, // ULessThanEqual + {1, 1, 2, 0}, // SLessThanEqual + {1, 1, 2, 0}, // FOrdEqual + {1, 1, 2, 0}, // FUnordEqual + {1, 1, 2, 0}, // FOrdNotEqual + {1, 1, 2, 0}, // FUnordNotEqual + {1, 1, 2, 0}, // FOrdLessThan + {1, 1, 2, 0}, // FUnordLessThan + {1, 1, 2, 0}, // FOrdGreaterThan + {1, 1, 2, 0}, // FUnordGreaterThan + {1, 1, 2, 0}, // FOrdLessThanEqual + {1, 1, 2, 0}, // FUnordLessThanEqual + {1, 1, 2, 0}, // FOrdGreaterThanEqual + {1, 1, 2, 0}, // FUnordGreaterThanEqual + {1, 1, 0, 0}, // #192 + {1, 1, 0, 0}, // #193 + {1, 1, 2, 0}, // ShiftRightLogical + {1, 1, 2, 0}, // ShiftRightArithmetic + {1, 1, 2, 0}, // ShiftLeftLogical + {1, 1, 2, 0}, // BitwiseOr + {1, 1, 2, 0}, // BitwiseXor + {1, 1, 2, 0}, // BitwiseAnd + {1, 1, 1, 0}, // Not + {1, 1, 4, 0}, // BitFieldInsert + {1, 1, 3, 0}, // BitFieldSExtract + {1, 1, 3, 0}, // BitFieldUExtract + {1, 1, 1, 0}, // BitReverse + {1, 1, 1, 0}, // BitCount + {1, 1, 0, 0}, // #206 + {1, 1, 0, 0}, // DPdx + {1, 1, 0, 0}, // DPdy + {1, 1, 0, 0}, // Fwidth + {1, 1, 0, 0}, // DPdxFine + {1, 1, 0, 0}, // DPdyFine + {1, 1, 0, 0}, // FwidthFine + {1, 1, 0, 0}, // DPdxCoarse + {1, 1, 0, 0}, // DPdyCoarse + {1, 1, 0, 0}, // FwidthCoarse + {1, 1, 0, 0}, // #216 + {1, 1, 0, 0}, // #217 + {0, 0, 0, 0}, // EmitVertex + {0, 0, 0, 0}, // EndPrimitive + {0, 0, 0, 0}, // EmitStreamVertex + {0, 0, 0, 0}, // EndStreamPrimitive + {1, 1, 0, 0}, // #222 + {1, 1, 0, 0}, // #223 + {0, 0, 3, 0}, // ControlBarrier + {0, 0, 2, 0}, // MemoryBarrier + {1, 1, 0, 0}, // #226 + {1, 1, 0, 0}, // AtomicLoad + {0, 0, 0, 0}, // AtomicStore + {1, 1, 0, 0}, // AtomicExchange + {1, 1, 0, 0}, // AtomicCompareExchange + {1, 1, 0, 0}, // AtomicCompareExchangeWeak + {1, 1, 0, 0}, // AtomicIIncrement + {1, 1, 0, 0}, // AtomicIDecrement + {1, 1, 0, 0}, // AtomicIAdd + {1, 1, 0, 0}, // AtomicISub + {1, 1, 0, 0}, // AtomicSMin + {1, 1, 0, 0}, // AtomicUMin + {1, 1, 0, 0}, // AtomicSMax + {1, 1, 0, 0}, // AtomicUMax + {1, 1, 0, 0}, // AtomicAnd + {1, 1, 0, 0}, // AtomicOr + {1, 1, 0, 0}, // AtomicXor + {1, 1, 0, 0}, // #243 + {1, 1, 0, 0}, // #244 + {1, 1, 0, 0}, // Phi + {0, 0, 2, 1}, // LoopMerge + {0, 0, 1, 1}, // SelectionMerge + {1, 0, 0, 0}, // Label + {0, 0, 1, 0}, // Branch + {0, 0, 3, 1}, // BranchConditional + {0, 0, 0, 0}, // Switch + {0, 0, 0, 0}, // Kill + {0, 0, 0, 0}, // Return + {0, 0, 0, 0}, // ReturnValue + {0, 0, 0, 0}, // Unreachable + {0, 0, 0, 0}, // LifetimeStart + {0, 0, 0, 0}, // LifetimeStop + {1, 1, 0, 0}, // #258 + {1, 1, 0, 0}, // GroupAsyncCopy + {0, 0, 0, 0}, // GroupWaitEvents + {1, 1, 0, 0}, // GroupAll + {1, 1, 0, 0}, // GroupAny + {1, 1, 0, 0}, // GroupBroadcast + {1, 1, 0, 0}, // GroupIAdd + {1, 1, 0, 0}, // GroupFAdd + {1, 1, 0, 0}, // GroupFMin + {1, 1, 0, 0}, // GroupUMin + {1, 1, 0, 0}, // GroupSMin + {1, 1, 0, 0}, // GroupFMax + {1, 1, 0, 0}, // GroupUMax + {1, 1, 0, 0}, // GroupSMax + {1, 1, 0, 0}, // #272 + {1, 1, 0, 0}, // #273 + {1, 1, 0, 0}, // ReadPipe + {1, 1, 0, 0}, // WritePipe + {1, 1, 0, 0}, // ReservedReadPipe + {1, 1, 0, 0}, // ReservedWritePipe + {1, 1, 0, 0}, // ReserveReadPipePackets + {1, 1, 0, 0}, // ReserveWritePipePackets + {0, 0, 0, 0}, // CommitReadPipe + {0, 0, 0, 0}, // CommitWritePipe + {1, 1, 0, 0}, // IsValidReserveId + {1, 1, 0, 0}, // GetNumPipePackets + {1, 1, 0, 0}, // GetMaxPipePackets + {1, 1, 0, 0}, // GroupReserveReadPipePackets + {1, 1, 0, 0}, // GroupReserveWritePipePackets + {0, 0, 0, 0}, // GroupCommitReadPipe + {0, 0, 0, 0}, // GroupCommitWritePipe + {1, 1, 0, 0}, // #289 + {1, 1, 0, 0}, // #290 + {1, 1, 0, 0}, // EnqueueMarker + {1, 1, 0, 0}, // EnqueueKernel + {1, 1, 0, 0}, // GetKernelNDrangeSubGroupCount + {1, 1, 0, 0}, // GetKernelNDrangeMaxSubGroupSize + {1, 1, 0, 0}, // GetKernelWorkGroupSize + {1, 1, 0, 0}, // GetKernelPreferredWorkGroupSizeMultiple + {0, 0, 0, 0}, // RetainEvent + {0, 0, 0, 0}, // ReleaseEvent + {1, 1, 0, 0}, // CreateUserEvent + {1, 1, 0, 0}, // IsValidEvent + {0, 0, 0, 0}, // SetUserEventStatus + {0, 0, 0, 0}, // CaptureEventProfilingInfo + {1, 1, 0, 0}, // GetDefaultQueue + {1, 1, 0, 0}, // BuildNDRange + {1, 1, 2, 1}, // ImageSparseSampleImplicitLod + {1, 1, 2, 1}, // ImageSparseSampleExplicitLod + {1, 1, 3, 1}, // ImageSparseSampleDrefImplicitLod + {1, 1, 3, 1}, // ImageSparseSampleDrefExplicitLod + {1, 1, 2, 1}, // ImageSparseSampleProjImplicitLod + {1, 1, 2, 1}, // ImageSparseSampleProjExplicitLod + {1, 1, 3, 1}, // ImageSparseSampleProjDrefImplicitLod + {1, 1, 3, 1}, // ImageSparseSampleProjDrefExplicitLod + {1, 1, 2, 1}, // ImageSparseFetch + {1, 1, 3, 1}, // ImageSparseGather + {1, 1, 3, 1}, // ImageSparseDrefGather + {1, 1, 1, 0}, // ImageSparseTexelsResident + {0, 0, 0, 0}, // NoLine + {1, 1, 0, 0}, // AtomicFlagTestAndSet + {0, 0, 0, 0}, // AtomicFlagClear + {1, 1, 0, 0}, // ImageSparseRead + {1, 1, 0, 0}, // SizeOf + {1, 1, 0, 0}, // TypePipeStorage + {1, 1, 0, 0}, // ConstantPipeStorage + {1, 1, 0, 0}, // CreatePipeFromPipeStorage + {1, 1, 0, 0}, // GetKernelLocalSizeForSubgroupCount + {1, 1, 0, 0}, // GetKernelMaxNumSubgroups + {1, 1, 0, 0}, // TypeNamedBarrier + {1, 1, 0, 1}, // NamedBarrierInitialize + {0, 0, 2, 1}, // MemoryNamedBarrier + {1, 1, 0, 0}, // ModuleProcessed + {0, 0, 0, 1}, // ExecutionModeId + {0, 0, 0, 1}, // DecorateId + {1, 1, 1, 1}, // GroupNonUniformElect + {1, 1, 1, 1}, // GroupNonUniformAll + {1, 1, 1, 1}, // GroupNonUniformAny + {1, 1, 1, 1}, // GroupNonUniformAllEqual + {1, 1, 1, 1}, // GroupNonUniformBroadcast + {1, 1, 1, 1}, // GroupNonUniformBroadcastFirst + {1, 1, 1, 1}, // GroupNonUniformBallot + {1, 1, 1, 1}, // GroupNonUniformInverseBallot + {1, 1, 1, 1}, // GroupNonUniformBallotBitExtract + {1, 1, 1, 1}, // GroupNonUniformBallotBitCount + {1, 1, 1, 1}, // GroupNonUniformBallotFindLSB + {1, 1, 1, 1}, // GroupNonUniformBallotFindMSB + {1, 1, 1, 1}, // GroupNonUniformShuffle + {1, 1, 1, 1}, // GroupNonUniformShuffleXor + {1, 1, 1, 1}, // GroupNonUniformShuffleUp + {1, 1, 1, 1}, // GroupNonUniformShuffleDown + {1, 1, 1, 1}, // GroupNonUniformIAdd + {1, 1, 1, 1}, // GroupNonUniformFAdd + {1, 1, 1, 1}, // GroupNonUniformIMul + {1, 1, 1, 1}, // GroupNonUniformFMul + {1, 1, 1, 1}, // GroupNonUniformSMin + {1, 1, 1, 1}, // GroupNonUniformUMin + {1, 1, 1, 1}, // GroupNonUniformFMin + {1, 1, 1, 1}, // GroupNonUniformSMax + {1, 1, 1, 1}, // GroupNonUniformUMax + {1, 1, 1, 1}, // GroupNonUniformFMax + {1, 1, 1, 1}, // GroupNonUniformBitwiseAnd + {1, 1, 1, 1}, // GroupNonUniformBitwiseOr + {1, 1, 1, 1}, // GroupNonUniformBitwiseXor + {1, 1, 1, 1}, // GroupNonUniformLogicalAnd + {1, 1, 1, 1}, // GroupNonUniformLogicalOr + {1, 1, 1, 1}, // GroupNonUniformLogicalXor + {1, 1, 1, 1}, // GroupNonUniformQuadBroadcast + {1, 1, 1, 1}, // GroupNonUniformQuadSwap +}; +static_assert(_SMOLV_ARRAY_SIZE(kSpirvOpData) == kKnownOpsCount, "kSpirvOpData table mismatch with known SpvOps"); + +// Instruction encoding depends on the table that describes the various SPIR-V opcodes. +// Whenever we change or expand the table, we need to bump up the SMOL-V version, and make +// sure that we can still decode files encoded by an older version. +static int smolv_GetKnownOpsCount(int version) +{ + if (version == 0) + return SpvOpModuleProcessed+1; + if (version == 1) // 2020 February, version 1 added ExecutionModeId..GroupNonUniformQuadSwap + return SpvOpGroupNonUniformQuadSwap+1; + return 0; +} + +static bool smolv_OpHasResult(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return false; + return kSpirvOpData[op].hasResult != 0; +} + +static bool smolv_OpHasType(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return false; + return kSpirvOpData[op].hasType != 0; +} + +static int smolv_OpDeltaFromResult(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return 0; + return kSpirvOpData[op].deltaFromResult; +} + +static bool smolv_OpVarRest(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return false; + return kSpirvOpData[op].varrest != 0; +} + +static bool smolv_OpDebugInfo(SpvOp op, int opsCount) +{ + return + op == SpvOpSourceContinued || + op == SpvOpSource || + op == SpvOpSourceExtension || + op == SpvOpName || + op == SpvOpMemberName || + op == SpvOpString || + op == SpvOpLine || + op == SpvOpNoLine || + op == SpvOpModuleProcessed; +} + + +static int smolv_DecorationExtraOps(int dec) +{ + if (dec == 0 || (dec >= 2 && dec <= 5)) // RelaxedPrecision, Block..ColMajor + return 0; + if (dec >= 29 && dec <= 37) // Stream..XfbStride + return 1; + return -1; // unknown, encode length +} + + +// -------------------------------------------------------------------------------------------- + + +static bool smolv_CheckGenericHeader(const uint32_t* words, size_t wordCount, uint32_t expectedMagic, uint32_t versionMask) +{ + if (!words) + return false; + if (wordCount < 5) + return false; + + uint32_t headerMagic = words[0]; + if (headerMagic != expectedMagic) + return false; + uint32_t headerVersion = words[1] & versionMask; + if (headerVersion < 0x00010000 || headerVersion > 0x00010500) + return false; // only support 1.0 through 1.5 + + return true; +} + +static const int kSpirVHeaderMagic = 0x07230203; +static const int kSmolHeaderMagic = 0x534D4F4C; // "SMOL" + +static const int kSmolCurrEncodingVersion = 1; + +static bool smolv_CheckSpirVHeader(const uint32_t* words, size_t wordCount) +{ + //@TODO: if SPIR-V header magic was reversed, that means the file got written + // in a "big endian" order. Need to byteswap all words then. + return smolv_CheckGenericHeader(words, wordCount, kSpirVHeaderMagic, 0xFFFFFFFF); +} +static bool smolv_CheckSmolHeader(const uint8_t* bytes, size_t byteCount) +{ + if (!smolv_CheckGenericHeader((const uint32_t*)bytes, byteCount/4, kSmolHeaderMagic, 0x00FFFFFF)) + return false; + if (byteCount < 24) // one more word past header to store decoded length + return false; + // SMOL-V version + int smolVersion = ((const uint32_t*)bytes)[1] >> 24; + if (smolVersion < 0 || smolVersion > kSmolCurrEncodingVersion) + return false; + return true; +} + + +static void smolv_Write4(smolv::ByteArray& arr, uint32_t v) +{ + arr.push_back(v & 0xFF); + arr.push_back((v >> 8) & 0xFF); + arr.push_back((v >> 16) & 0xFF); + arr.push_back(v >> 24); +} + +static void smolv_Write4(uint8_t*& buf, uint32_t v) +{ + memcpy(buf, &v, 4); + buf += 4; +} + + +static bool smolv_Read4(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outv) +{ + if (data + 4 > dataEnd) + return false; + outv = (data[0]) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + data += 4; + return true; +} + + +// -------------------------------------------------------------------------------------------- + +// Variable-length integer encoding for unsigned integers. In each byte: +// - highest bit set if more bytes follow, cleared if this is last byte. +// - other 7 bits are the actual value payload. +// Takes 1-5 bytes to encode an integer (values between 0 and 127 take one byte, etc.). + +static void smolv_WriteVarint(smolv::ByteArray& arr, uint32_t v) +{ + while (v > 127) + { + arr.push_back((v & 127) | 128); + v >>= 7; + } + arr.push_back(v & 127); +} + +static bool smolv_ReadVarint(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outVal) +{ + uint32_t v = 0; + uint32_t shift = 0; + while (data < dataEnd) + { + uint8_t b = *data; + v |= (b & 127) << shift; + shift += 7; + data++; + if (!(b & 128)) + break; + } + outVal = v; + return true; //@TODO: report failures +} + +static uint32_t smolv_ZigEncode(int32_t i) +{ + return (uint32_t(i) << 1) ^ (i >> 31); +} + +static int32_t smolv_ZigDecode(uint32_t u) +{ + return (u & 1) ? ((u >> 1) ^ ~0) : (u >> 1); +} + + +// Remap most common Op codes (Load, Store, Decorate, VectorShuffle etc.) to be in < 16 range, for +// more compact varint encoding. This basically swaps rarely used op values that are < 16 with the +// ones that are common. + +static SpvOp smolv_RemapOp(SpvOp op) +{ +# define _SMOLV_SWAP_OP(op1,op2) if (op==op1) return op2; if (op==op2) return op1 + _SMOLV_SWAP_OP(SpvOpDecorate,SpvOpNop); // 0: 24% + _SMOLV_SWAP_OP(SpvOpLoad,SpvOpUndef); // 1: 17% + _SMOLV_SWAP_OP(SpvOpStore,SpvOpSourceContinued); // 2: 9% + _SMOLV_SWAP_OP(SpvOpAccessChain,SpvOpSource); // 3: 7.2% + _SMOLV_SWAP_OP(SpvOpVectorShuffle,SpvOpSourceExtension); // 4: 5.0% + // Name - already small enum value - 5: 4.4% + // MemberName - already small enum value - 6: 2.9% + _SMOLV_SWAP_OP(SpvOpMemberDecorate,SpvOpString); // 7: 4.0% + _SMOLV_SWAP_OP(SpvOpLabel,SpvOpLine); // 8: 0.9% + _SMOLV_SWAP_OP(SpvOpVariable,(SpvOp)9); // 9: 3.9% + _SMOLV_SWAP_OP(SpvOpFMul,SpvOpExtension); // 10: 3.9% + _SMOLV_SWAP_OP(SpvOpFAdd,SpvOpExtInstImport); // 11: 2.5% + // ExtInst - already small enum value - 12: 1.2% + // VectorShuffleCompact - already small enum value - used for compact shuffle encoding + _SMOLV_SWAP_OP(SpvOpTypePointer,SpvOpMemoryModel); // 14: 2.2% + _SMOLV_SWAP_OP(SpvOpFNegate,SpvOpEntryPoint); // 15: 1.1% +# undef _SMOLV_SWAP_OP + return op; +} + + +// For most compact varint encoding of common instructions, the instruction length should come out +// into 3 bits (be <8). SPIR-V instruction lengths are always at least 1, and for some other +// instructions they are guaranteed to be some other minimum length. Adjust the length before encoding, +// and after decoding accordingly. + +static uint32_t smolv_EncodeLen(SpvOp op, uint32_t len) +{ + len--; + if (op == SpvOpVectorShuffle) len -= 4; + if (op == SpvOpVectorShuffleCompact) len -= 4; + if (op == SpvOpDecorate) len -= 2; + if (op == SpvOpLoad) len -= 3; + if (op == SpvOpAccessChain) len -= 3; + return len; +} + +static uint32_t smolv_DecodeLen(SpvOp op, uint32_t len) +{ + len++; + if (op == SpvOpVectorShuffle) len += 4; + if (op == SpvOpVectorShuffleCompact) len += 4; + if (op == SpvOpDecorate) len += 2; + if (op == SpvOpLoad) len += 3; + if (op == SpvOpAccessChain) len += 3; + return len; +} + + +// Shuffling bits of length + opcode to be more compact in varint encoding in typical cases: +// 0x LLLL OOOO is how SPIR-V encodes it (L=length, O=op), we shuffle into: +// 0x LLLO OOLO, so that common case (op<16, len<8) is encoded into one byte. + +static bool smolv_WriteLengthOp(smolv::ByteArray& arr, uint32_t len, SpvOp op) +{ + len = smolv_EncodeLen(op, len); + // SPIR-V length field is 16 bits; if we get a larger value that means something + // was wrong, e.g. a vector shuffle instruction with less than 4 words (and our + // adjustment to common lengths in smolv_EncodeLen wrapped around) + if (len > 0xFFFF) + return false; + op = smolv_RemapOp(op); + uint32_t oplen = ((len >> 4) << 20) | ((op >> 4) << 8) | ((len & 0xF) << 4) | (op & 0xF); + smolv_WriteVarint(arr, oplen); + return true; +} + +static bool smolv_ReadLengthOp(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outLen, SpvOp& outOp) +{ + uint32_t val; + if (!smolv_ReadVarint(data, dataEnd, val)) + return false; + outLen = ((val >> 20) << 4) | ((val >> 4) & 0xF); + outOp = (SpvOp)(((val >> 4) & 0xFFF0) | (val & 0xF)); + + outOp = smolv_RemapOp(outOp); + outLen = smolv_DecodeLen(outOp, outLen); + return true; +} + + + +#define _SMOLV_READ_OP(len, words, op) \ + uint32_t len = words[0] >> 16; \ + if (len < 1) return false; /* malformed instruction, length needs to be at least 1 */ \ + if (words + len > wordsEnd) return false; /* malformed instruction, goes past end of data */ \ + SpvOp op = (SpvOp)(words[0] & 0xFFFF) + + +bool smolv::Encode(const void* spirvData, size_t spirvSize, ByteArray& outSmolv, uint32_t flags, StripOpNameFilterFunc stripFilter) +{ + const size_t wordCount = spirvSize / 4; + if (wordCount * 4 != spirvSize) + return false; + const uint32_t* words = (const uint32_t*)spirvData; + const uint32_t* wordsEnd = words + wordCount; + if (!smolv_CheckSpirVHeader(words, wordCount)) + return false; + + // reserve space in output (typical compression is to about 30%; reserve half of input space) + outSmolv.reserve(outSmolv.size() + spirvSize/2); + + // header (matches SPIR-V one, except different magic) + smolv_Write4(outSmolv, kSmolHeaderMagic); + smolv_Write4(outSmolv, (words[1] & 0x00FFFFFF) + (kSmolCurrEncodingVersion<<24)); // SPIR-V version (_XXX) + SMOL-V version (X___) + smolv_Write4(outSmolv, words[2]); // generator + smolv_Write4(outSmolv, words[3]); // bound + smolv_Write4(outSmolv, words[4]); // schema + + const size_t headerSpirvSizeOffset = outSmolv.size(); // size field may get updated later if stripping is enabled + smolv_Write4(outSmolv, (uint32_t)spirvSize); // space needed to decode (i.e. original SPIR-V size) + + size_t strippedSpirvWordCount = wordCount; + uint32_t prevResult = 0; + uint32_t prevDecorate = 0; + + const int knownOpsCount = smolv_GetKnownOpsCount(kSmolCurrEncodingVersion); + + words += 5; + while (words < wordsEnd) + { + _SMOLV_READ_OP(instrLen, words, op); + + if ((flags & kEncodeFlagStripDebugInfo) && smolv_OpDebugInfo(op, knownOpsCount)) + { + if (!stripFilter || op != SpvOpName || !stripFilter(reinterpret_cast(&words[2]))) + { + strippedSpirvWordCount -= instrLen; + words += instrLen; + continue; + } + } + + // A usual case of vector shuffle, with less than 4 components, each with a value + // in [0..3] range: encode it in a more compact form, with the swizzle pattern in one byte. + // Turn this into a VectorShuffleCompact instruction, that takes up unused slot in Ops. + uint32_t swizzle = 0; + if (op == SpvOpVectorShuffle && instrLen <= 9) + { + uint32_t swz0 = instrLen > 5 ? words[5] : 0; + uint32_t swz1 = instrLen > 6 ? words[6] : 0; + uint32_t swz2 = instrLen > 7 ? words[7] : 0; + uint32_t swz3 = instrLen > 8 ? words[8] : 0; + if (swz0 < 4 && swz1 < 4 && swz2 < 4 && swz3 < 4) + { + op = SpvOpVectorShuffleCompact; + swizzle = (swz0 << 6) | (swz1 << 4) | (swz2 << 2) | (swz3); + } + } + + // length + opcode + if (!smolv_WriteLengthOp(outSmolv, instrLen, op)) + return false; + + size_t ioffs = 1; + // write type as varint, if we have it + if (smolv_OpHasType(op, knownOpsCount)) + { + if (ioffs >= instrLen) + return false; + smolv_WriteVarint(outSmolv, words[ioffs]); + ioffs++; + } + // write result as delta+zig+varint, if we have it + if (smolv_OpHasResult(op, knownOpsCount)) + { + if (ioffs >= instrLen) + return false; + uint32_t v = words[ioffs]; + smolv_WriteVarint(outSmolv, smolv_ZigEncode(v - prevResult)); // some deltas are negative, use zig + prevResult = v; + ioffs++; + } + + // Decorate & MemberDecorate: IDs relative to previous decorate + if (op == SpvOpDecorate || op == SpvOpMemberDecorate) + { + if (ioffs >= instrLen) + return false; + uint32_t v = words[ioffs]; + smolv_WriteVarint(outSmolv, smolv_ZigEncode(v - prevDecorate)); // spirv-remapped deltas often negative, use zig + prevDecorate = v; + ioffs++; + } + + // MemberDecorate special encoding: whole row of MemberDecorate instructions is often referring + // to the same type and linearly increasing member indices. Scan ahead to see how many we have, + // and encode whole bunch as one. + if (op == SpvOpMemberDecorate) + { + // scan ahead until we reach end, non-member-decoration or different type + const uint32_t decorationType = words[ioffs-1]; + const uint32_t* memberWords = words; + uint32_t prevIndex = 0; + uint32_t prevOffset = 0; + // write a byte on how many we have encoded as a bunch + size_t countLocation = outSmolv.size(); + outSmolv.push_back(0); + int count = 0; + while (memberWords < wordsEnd && count < 255) + { + _SMOLV_READ_OP(memberLen, memberWords, memberOp); + if (memberOp != SpvOpMemberDecorate) + break; + if (memberLen < 4) + return false; // invalid input + if (memberWords[1] != decorationType) + break; + + // write member index as delta from previous + uint32_t memberIndex = memberWords[2]; + smolv_WriteVarint(outSmolv, memberIndex - prevIndex); + prevIndex = memberIndex; + + // decoration (and length if not common/known) + uint32_t memberDec = memberWords[3]; + smolv_WriteVarint(outSmolv, memberDec); + const int knownExtraOps = smolv_DecorationExtraOps(memberDec); + if (knownExtraOps == -1) + smolv_WriteVarint(outSmolv, memberLen-4); + else if (unsigned(knownExtraOps) + 4 != memberLen) + return false; // invalid input + + // Offset decorations are most often linearly increasing, so encode as deltas + if (memberDec == 35) // Offset + { + if (memberLen != 5) + return false; + smolv_WriteVarint(outSmolv, memberWords[4]-prevOffset); + prevOffset = memberWords[4]; + } + else + { + // write rest of decorations as varint + for (uint32_t i = 4; i < memberLen; ++i) + smolv_WriteVarint(outSmolv, memberWords[i]); + } + + memberWords += memberLen; + ++count; + } + outSmolv[countLocation] = uint8_t(count); + words = memberWords; + continue; + } + + // Write out this many IDs, encoding them relative+zigzag to result ID + int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount); + for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs) + { + if (ioffs >= instrLen) + return false; + uint32_t delta = prevResult - words[ioffs]; + // some deltas are negative (often on branches, or if program was processed by spirv-remap), + // so use zig encoding + smolv_WriteVarint(outSmolv, smolv_ZigEncode(delta)); + } + + if (op == SpvOpVectorShuffleCompact) + { + // compact vector shuffle, just write out single swizzle byte + outSmolv.push_back(uint8_t(swizzle)); + ioffs = instrLen; + } + else if (smolv_OpVarRest(op, knownOpsCount)) + { + // write out rest of words with variable encoding (expected to be small integers) + for (; ioffs < instrLen; ++ioffs) + smolv_WriteVarint(outSmolv, words[ioffs]); + } + else + { + // write out rest of words without any encoding + for (; ioffs < instrLen; ++ioffs) + smolv_Write4(outSmolv, words[ioffs]); + } + + words += instrLen; + } + + if (strippedSpirvWordCount != wordCount) + { + uint8_t* headerSpirvSize = &outSmolv[headerSpirvSizeOffset]; + smolv_Write4(headerSpirvSize, (uint32_t)strippedSpirvWordCount * 4); + } + + return true; +} + + +size_t smolv::GetDecodedBufferSize(const void* smolvData, size_t smolvSize) +{ + if (!smolv_CheckSmolHeader((const uint8_t*)smolvData, smolvSize)) + return 0; + const uint32_t* words = (const uint32_t*)smolvData; + return words[5]; +} + + +bool smolv::Decode(const void* smolvData, size_t smolvSize, void* spirvOutputBuffer, size_t spirvOutputBufferSize, uint32_t flags) +{ + // check header, and whether we have enough output buffer space + const size_t neededBufferSize = GetDecodedBufferSize(smolvData, smolvSize); + if (neededBufferSize == 0) + return false; // invalid SMOL-V + if (spirvOutputBufferSize < neededBufferSize) + return false; // not enough space in output buffer + if (spirvOutputBuffer == NULL) + return false; // output buffer is null + + const uint8_t* bytes = (const uint8_t*)smolvData; + const uint8_t* bytesEnd = bytes + smolvSize; + + uint8_t* outSpirv = (uint8_t*)spirvOutputBuffer; + + uint32_t val; + int smolVersion = 0; + + // header + smolv_Write4(outSpirv, kSpirVHeaderMagic); bytes += 4; + smolv_Read4(bytes, bytesEnd, val); smolVersion = val >> 24; val &= 0x00FFFFFF; smolv_Write4(outSpirv, val); // version + smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // generator + smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // bound + smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // schema + bytes += 4; // decode buffer size + + // there are two SMOL-V encoding versions, both not indicating anything in their header version field: + // one that is called "before zero" here (2016-08-31 code). Support decoding that one only by presence + // of this special flag. + const bool beforeZeroVersion = smolVersion == 0 && (flags & kDecodeFlagUse20160831AsZeroVersion) != 0; + + const int knownOpsCount = smolv_GetKnownOpsCount(smolVersion); + + uint32_t prevResult = 0; + uint32_t prevDecorate = 0; + + while (bytes < bytesEnd) + { + // read length + opcode + uint32_t instrLen; + SpvOp op; + if (!smolv_ReadLengthOp(bytes, bytesEnd, instrLen, op)) + return false; + const bool wasSwizzle = (op == SpvOpVectorShuffleCompact); + if (wasSwizzle) + op = SpvOpVectorShuffle; + smolv_Write4(outSpirv, (instrLen << 16) | op); + + size_t ioffs = 1; + + // read type as varint, if we have it + if (smolv_OpHasType(op, knownOpsCount)) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + ioffs++; + } + // read result as delta+varint, if we have it + if (smolv_OpHasResult(op, knownOpsCount)) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + val = prevResult + smolv_ZigDecode(val); + smolv_Write4(outSpirv, val); + prevResult = val; + ioffs++; + } + + // Decorate: IDs relative to previous decorate + if (op == SpvOpDecorate || op == SpvOpMemberDecorate) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + // "before zero" version did not use zig encoding for the value + val = prevDecorate + (beforeZeroVersion ? val : smolv_ZigDecode(val)); + smolv_Write4(outSpirv, val); + prevDecorate = val; + ioffs++; + } + + // MemberDecorate special decoding + if (op == SpvOpMemberDecorate && !beforeZeroVersion) + { + if (bytes >= bytesEnd) + return false; // broken input + int count = *bytes++; + int prevIndex = 0; + int prevOffset = 0; + for (int m = 0; m < count; ++m) + { + // read member index + uint32_t memberIndex; + if (!smolv_ReadVarint(bytes, bytesEnd, memberIndex)) return false; + memberIndex += prevIndex; + prevIndex = memberIndex; + + // decoration (and length if not common/known) + uint32_t memberDec; + if (!smolv_ReadVarint(bytes, bytesEnd, memberDec)) return false; + const int knownExtraOps = smolv_DecorationExtraOps(memberDec); + uint32_t memberLen; + if (knownExtraOps == -1) + { + if (!smolv_ReadVarint(bytes, bytesEnd, memberLen)) return false; + memberLen += 4; + } + else + memberLen = 4 + knownExtraOps; + + // write SPIR-V op+length (unless it's first member decoration, in which case it was written before) + if (m != 0) + { + smolv_Write4(outSpirv, (memberLen << 16) | op); + smolv_Write4(outSpirv, prevDecorate); + } + smolv_Write4(outSpirv, memberIndex); + smolv_Write4(outSpirv, memberDec); + // Special case for Offset decorations + if (memberDec == 35) // Offset + { + if (memberLen != 5) + return false; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + val += prevOffset; + smolv_Write4(outSpirv, val); + prevOffset = val; + } + else + { + for (uint32_t i = 4; i < memberLen; ++i) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + } + } + } + continue; + } + + // Read this many IDs, that are relative to result ID + int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount); + // "before zero" version only used zig encoding for IDs of several ops; after + // that ops got zig encoding for their IDs + bool zigDecodeVals = true; + if (beforeZeroVersion) + { + if (op != SpvOpControlBarrier && op != SpvOpMemoryBarrier && op != SpvOpLoopMerge && op != SpvOpSelectionMerge && op != SpvOpBranch && op != SpvOpBranchConditional && op != SpvOpMemoryNamedBarrier) + zigDecodeVals = false; + } + for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + if (zigDecodeVals) + val = smolv_ZigDecode(val); + smolv_Write4(outSpirv, prevResult - val); + } + + if (wasSwizzle && instrLen <= 9) + { + uint32_t swizzle = *bytes++; + if (instrLen > 5) smolv_Write4(outSpirv, (swizzle >> 6) & 3); + if (instrLen > 6) smolv_Write4(outSpirv, (swizzle >> 4) & 3); + if (instrLen > 7) smolv_Write4(outSpirv, (swizzle >> 2) & 3); + if (instrLen > 8) smolv_Write4(outSpirv, swizzle & 3); + } + else if (smolv_OpVarRest(op, knownOpsCount)) + { + // read rest of words with variable encoding + for (; ioffs < instrLen; ++ioffs) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + } + } + else + { + // read rest of words without any encoding + for (; ioffs < instrLen; ++ioffs) + { + if (!smolv_Read4(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + } + } + } + + if ((uint8_t*)spirvOutputBuffer + neededBufferSize != outSpirv) + return false; // something went wrong during decoding? we should have decoded to exact output size + + return true; +} + + + +// -------------------------------------------------------------------------------------------- +// Calculating instruction count / space stats on SPIR-V and SMOL-V + + +struct smolv::Stats +{ + Stats() { memset(this, 0, sizeof(*this)); } + size_t opCounts[kKnownOpsCount]; + size_t opSizes[kKnownOpsCount]; + size_t smolOpSizes[kKnownOpsCount]; + size_t varintCountsOp[6]; + size_t varintCountsType[6]; + size_t varintCountsRes[6]; + size_t varintCountsOther[6]; + size_t totalOps; + size_t totalSize; + size_t totalSizeSmol; + size_t inputCount; +}; + + +smolv::Stats* smolv::StatsCreate() +{ + return new Stats(); +} + +void smolv::StatsDelete(smolv::Stats *s) +{ + delete s; +} + + +bool smolv::StatsCalculate(smolv::Stats* stats, const void* spirvData, size_t spirvSize) +{ + if (!stats) + return false; + + const size_t wordCount = spirvSize / 4; + if (wordCount * 4 != spirvSize) + return false; + const uint32_t* words = (const uint32_t*)spirvData; + const uint32_t* wordsEnd = words + wordCount; + if (!smolv_CheckSpirVHeader(words, wordCount)) + return false; + words += 5; + + stats->inputCount++; + stats->totalSize += wordCount; + + while (words < wordsEnd) + { + _SMOLV_READ_OP(instrLen, words, op); + + if (op < kKnownOpsCount) + { + stats->opCounts[op]++; + stats->opSizes[op] += instrLen; + } + words += instrLen; + stats->totalOps++; + } + + return true; +} + + +bool smolv::StatsCalculateSmol(smolv::Stats* stats, const void* smolvData, size_t smolvSize) +{ + if (!stats) + return false; + + // debugging helper to dump all encoded bytes to stdout, keep at "if 0" +# if 0 +# define _SMOLV_DEBUG_PRINT_ENCODED_BYTES() { \ + printf("Op %-22s ", op < kKnownOpsCount ? kSpirvOpNames[op] : "???"); \ + for (const uint8_t* b = instrBegin; b < bytes; ++b) \ + printf("%02x ", *b); \ + printf("\n"); \ + } +# else +# define _SMOLV_DEBUG_PRINT_ENCODED_BYTES() {} +# endif + + const uint8_t* bytes = (const uint8_t*)smolvData; + const uint8_t* bytesEnd = bytes + smolvSize; + if (!smolv_CheckSmolHeader(bytes, smolvSize)) + return false; + + uint32_t val; + int smolVersion; + bytes += 4; + smolv_Read4(bytes, bytesEnd, val); smolVersion = val >> 24; + const int knownOpsCount = smolv_GetKnownOpsCount(smolVersion); + bytes += 16; + + stats->totalSizeSmol += smolvSize; + + while (bytes < bytesEnd) + { + const uint8_t* instrBegin = bytes; + const uint8_t* varBegin; + + // read length + opcode + uint32_t instrLen; + SpvOp op; + varBegin = bytes; + if (!smolv_ReadLengthOp(bytes, bytesEnd, instrLen, op)) + return false; + const bool wasSwizzle = (op == SpvOpVectorShuffleCompact); + if (wasSwizzle) + op = SpvOpVectorShuffle; + stats->varintCountsOp[bytes-varBegin]++; + + size_t ioffs = 1; + if (smolv_OpHasType(op, knownOpsCount)) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsType[bytes-varBegin]++; + ioffs++; + } + if (smolv_OpHasResult(op, knownOpsCount)) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsRes[bytes-varBegin]++; + ioffs++; + } + + if (op == SpvOpDecorate || op == SpvOpMemberDecorate) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + ioffs++; + } + // MemberDecorate special decoding + if (op == SpvOpMemberDecorate) + { + if (bytes >= bytesEnd) + return false; // broken input + int count = *bytes++; + for (int m = 0; m < count; ++m) + { + uint32_t memberIndex; + if (!smolv_ReadVarint(bytes, bytesEnd, memberIndex)) return false; + uint32_t memberDec; + if (!smolv_ReadVarint(bytes, bytesEnd, memberDec)) return false; + const int knownExtraOps = smolv_DecorationExtraOps(memberDec); + uint32_t memberLen; + if (knownExtraOps == -1) + { + if (!smolv_ReadVarint(bytes, bytesEnd, memberLen)) return false; + memberLen += 4; + } + else + memberLen = 4 + knownExtraOps; + for (uint32_t i = 4; i < memberLen; ++i) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + } + } + stats->smolOpSizes[op] += bytes - instrBegin; + _SMOLV_DEBUG_PRINT_ENCODED_BYTES(); + continue; + } + + int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount); + for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsRes[bytes-varBegin]++; + } + + if (wasSwizzle && instrLen <= 9) + { + bytes++; + } + else if (smolv_OpVarRest(op, knownOpsCount)) + { + for (; ioffs < instrLen; ++ioffs) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsOther[bytes-varBegin]++; + } + } + else + { + for (; ioffs < instrLen; ++ioffs) + { + if (!smolv_Read4(bytes, bytesEnd, val)) return false; + } + } + + if (op < kKnownOpsCount) + { + stats->smolOpSizes[op] += bytes - instrBegin; + } + _SMOLV_DEBUG_PRINT_ENCODED_BYTES(); + } + + return true; +} + +static bool CompareOpCounters (std::pair a, std::pair b) +{ + return a.second > b.second; +} + +void smolv::StatsPrint(const Stats* stats) +{ + if (!stats) + return; + + typedef std::pair OpCounter; + OpCounter counts[kKnownOpsCount]; + OpCounter sizes[kKnownOpsCount]; + OpCounter sizesSmol[kKnownOpsCount]; + for (int i = 0; i < kKnownOpsCount; ++i) + { + counts[i].first = (SpvOp)i; + counts[i].second = stats->opCounts[i]; + sizes[i].first = (SpvOp)i; + sizes[i].second = stats->opSizes[i]; + sizesSmol[i].first = (SpvOp)i; + sizesSmol[i].second = stats->smolOpSizes[i]; + } + std::sort(counts, counts + kKnownOpsCount, CompareOpCounters); + std::sort(sizes, sizes + kKnownOpsCount, CompareOpCounters); + std::sort(sizesSmol, sizesSmol + kKnownOpsCount, CompareOpCounters); + + printf("Stats for %i SPIR-V inputs, total size %i words (%.1fKB):\n", (int)stats->inputCount, (int)stats->totalSize, stats->totalSize * 4.0f / 1024.0f); + printf("Most occuring ops:\n"); + for (int i = 0; i < 30; ++i) + { + SpvOp op = counts[i].first; + printf(" #%2i: %4i %-20s %4i (%4.1f%%)\n", i, op, kSpirvOpNames[op], (int)counts[i].second, (float)counts[i].second / (float)stats->totalOps * 100.0f); + } + printf("Largest total size of ops:\n"); + for (int i = 0; i < 30; ++i) + { + SpvOp op = sizes[i].first; + printf(" #%2i: %-22s %6i (%4.1f%%) avg len %.1f\n", + i, + kSpirvOpNames[op], + (int)sizes[i].second*4, + (float)sizes[i].second / (float)stats->totalSize * 100.0f, + (float)sizes[i].second*4 / (float)stats->opCounts[op] + ); + } + printf("SMOL varint encoding counts per byte length:\n"); + printf(" B: %6s %6s %6s %6s\n", "Op", "Type", "Result", "Other"); + for (int i = 1; i < 6; ++i) + { + printf(" %i: %6i %6i %6i %6i\n", i, (int)stats->varintCountsOp[i], (int)stats->varintCountsType[i], (int)stats->varintCountsRes[i], (int)stats->varintCountsOther[i]); + } + printf("Largest total size of ops in SMOL:\n"); + for (int i = 0; i < 30; ++i) + { + SpvOp op = sizesSmol[i].first; + printf(" #%2i: %-22s %6i (%4.1f%%) avg len %.1f\n", + i, + kSpirvOpNames[op], + (int)sizesSmol[i].second, + (float)sizesSmol[i].second / (float)stats->totalSizeSmol * 100.0f, + (float)sizesSmol[i].second / (float)stats->opCounts[op] + ); + } +} + + +// ------------------------------------------------------------------------------ +// This software is available under 2 licenses -- choose whichever you prefer. +// ------------------------------------------------------------------------------ +// ALTERNATIVE A - MIT License +// Copyright (c) 2016-2020 Aras Pranckevicius +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +// ALTERNATIVE B - Public Domain (www.unlicense.org) +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +// software, either in source code form or as a compiled binary, for any purpose, +// commercial or non-commercial, and by any means. +// In jurisdictions that recognize copyright laws, the author or authors of this +// software dedicate any and all copyright interest in the software to the public +// domain. We make this dedication for the benefit of the public at large and to +// the detriment of our heirs and successors. We intend this dedication to be an +// overt act of relinquishment in perpetuity of all present and future rights to +// this software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ------------------------------------------------------------------------------ diff --git a/thirdparty/misc/smolv.h b/thirdparty/misc/smolv.h new file mode 100644 index 00000000000..798ee4126fd --- /dev/null +++ b/thirdparty/misc/smolv.h @@ -0,0 +1,169 @@ +// smol-v - public domain - https://github.com/aras-p/smol-v +// authored 2016-2020 by Aras Pranckevicius +// no warranty implied; use at your own risk +// See end of file for license information. +// +// +// ### OVERVIEW: +// +// SMOL-V encodes Vulkan/Khronos SPIR-V format programs into a form that is smaller, and is more +// compressible. Normally no changes to the programs are done; they decode +// into exactly same program as what was encoded. Optionally, debug information +// can be removed too. +// +// SPIR-V is a very verbose format, several times larger than same programs expressed in other +// shader formats (e.g. DX11 bytecode, GLSL, DX9 bytecode etc.). The SSA-form with ever increasing +// IDs is not very appreciated by regular data compressors either. SMOL-V does several things +// to improve this: +// - Many words, especially ones that most often have small values, are encoded using +// "varint" scheme (1-5 bytes per word, with just one byte for values in 0..127 range). +// See https://developers.google.com/protocol-buffers/docs/encoding +// - Some IDs used in the program are delta-encoded, relative to previously seen IDs (e.g. Result +// IDs). Often instructions reference things that were computed just before, so this results in +// small deltas. These values are also encoded using "varint" scheme. +// - Reordering instruction opcodes so that the most common ones are the smallest values, for smaller +// varint encoding. +// - Encoding several instructions in a more compact form, e.g. the "typical <=4 component swizzle" +// shape of a VectorShuffle instruction, or sequences of MemberDecorate instructions. +// +// A somewhat similar utility is spirv-remap from glslang, see +// https://github.com/KhronosGroup/glslang/blob/master/README-spirv-remap.txt +// +// +// ### USAGE: +// +// Add source/smolv.h and source/smolv.cpp to your C++ project build. +// Currently it might require C++11 or somesuch; I only tested with Visual Studio 2017/2019, Mac Xcode 11 and Gcc 5.4. +// +// smolv::Encode and smolv::Decode is the basic functionality. +// +// Other functions are for development/statistics purposes, to figure out frequencies and +// distributions of the instructions. +// +// There's a test + compression benchmarking suite in testing/testmain.cpp; using that needs adding +// other files under testing/external to the build too (3rd party code: glslang remapper, Zstd, LZ4). +// +// +// ### LIMITATIONS / TODO: +// +// - SPIR-V where the words got stored in big-endian layout is not supported yet. +// - The whole thing might not work on Big-Endian CPUs. It might, but I'm not 100% sure. +// - Not much prevention is done against malformed/corrupted inputs, TODO. +// - Out of memory cases are not handled. The code will either throw exception +// or crash, depending on your compilation flags. + +#pragma once + +#include +#include +#include + +namespace smolv +{ + typedef std::vector ByteArray; + + enum EncodeFlags + { + kEncodeFlagNone = 0, + kEncodeFlagStripDebugInfo = (1<<0), // Strip all optional SPIR-V instructions (debug names etc.) + }; + enum DecodeFlags + { + kDecodeFlagNone = 0, + kDecodeFlagUse20160831AsZeroVersion = (1 << 0), // For "version zero" of SMOL-V encoding, use 2016 08 31 code path (this is what happens to be used by Unity 2017-2020) + }; + + // Preserve *some* OpName debug names. + // Return true to preserve, false to strip. + // This is really only used to implement a workaround for problems with some Vulkan drivers. + typedef bool(*StripOpNameFilterFunc)(const char* name); + + // ------------------------------------------------------------------- + // Encoding / Decoding + + // Encode SPIR-V into SMOL-V. + // + // Resulting data is appended to outSmolv array (the array is not cleared). + // + // flags is bitset of EncodeFlags values. + // + // Returns false on malformed SPIR-V input; if that happens the output array might get + // partial/broken SMOL-V program. + bool Encode(const void* spirvData, size_t spirvSize, ByteArray& outSmolv, uint32_t flags = kEncodeFlagNone, StripOpNameFilterFunc stripFilter = 0); + + + // Decode SMOL-V into SPIR-V. + // + // Resulting data is written into the passed buffer. Get required buffer space with + // GetDecodeBufferSize; this is the size of decoded SPIR-V program. + // + // flags is bitset of DecodeFlags values. + + // Decoding does no memory allocations. + // + // Returns false on malformed input; if that happens the output buffer might be only partially + // written to. + bool Decode(const void* smolvData, size_t smolvSize, void* spirvOutputBuffer, size_t spirvOutputBufferSize, uint32_t flags = kDecodeFlagNone); + + + // Given a SMOL-V program, get size of the decoded SPIR-V program. + // This is the buffer size that Decode expects. + // + // Returns zero on malformed input (just checks the header, not the full input). + size_t GetDecodedBufferSize(const void* smolvData, size_t smolvSize); + + + // ------------------------------------------------------------------- + // Computing instruction statistics on SPIR-V/SMOL-V programs + + struct Stats; + + Stats* StatsCreate(); + void StatsDelete(Stats* s); + + bool StatsCalculate(Stats* stats, const void* spirvData, size_t spirvSize); + bool StatsCalculateSmol(Stats* stats, const void* smolvData, size_t smolvSize); + void StatsPrint(const Stats* stats); + +} // namespace smolv + + +// ------------------------------------------------------------------------------ +// This software is available under 2 licenses -- choose whichever you prefer. +// ------------------------------------------------------------------------------ +// ALTERNATIVE A - MIT License +// Copyright (c) 2016-2020 Aras Pranckevicius +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +// ALTERNATIVE B - Public Domain (www.unlicense.org) +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +// software, either in source code form or as a compiled binary, for any purpose, +// commercial or non-commercial, and by any means. +// In jurisdictions that recognize copyright laws, the author or authors of this +// software dedicate any and all copyright interest in the software to the public +// domain. We make this dedication for the benefit of the public at large and to +// the detriment of our heirs and successors. We intend this dedication to be an +// overt act of relinquishment in perpetuity of all present and future rights to +// this software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ------------------------------------------------------------------------------