Use a giant UBO to optimize performance in 2D

This removes the countless small UBO writes we had before
and replaces them with a single large write per render pass.

This results in much faster rendering on low-end devices
but improves speed on all devices.
This commit is contained in:
clayjohn 2022-06-20 21:56:26 -07:00
parent 0c23a2cfe3
commit 154b9c1c91
12 changed files with 744 additions and 507 deletions

View File

@ -1925,6 +1925,9 @@
<member name="rendering/gl_compatibility/driver.windows" type="String" setter="" getter="" default="&quot;opengl3&quot;"> <member name="rendering/gl_compatibility/driver.windows" type="String" setter="" getter="" default="&quot;opengl3&quot;">
Windows override for [member rendering/gl_compatibility/driver]. Windows override for [member rendering/gl_compatibility/driver].
</member> </member>
<member name="rendering/gl_compatibility/item_buffer_size" type="int" setter="" getter="" default="16384">
Maximum number of canvas items commands that can be drawn in a single viewport update. If more render commands are issued they will be ignored. Decreasing this limit may improve performance on bandwidth limited devices. Increase this limit if you find that not all objects are being drawn in a frame.
</member>
<member name="rendering/global_illumination/gi/use_half_resolution" type="bool" setter="" getter="" default="false"> <member name="rendering/global_illumination/gi/use_half_resolution" type="bool" setter="" getter="" default="false">
If [code]true[/code], renders [VoxelGI] and SDFGI ([member Environment.sdfgi_enabled]) buffers at halved resolution (e.g. 960×540 when the viewport size is 1920×1080). This improves performance significantly when VoxelGI or SDFGI is enabled, at the cost of artifacts that may be visible on polygon edges. The loss in quality becomes less noticeable as the viewport resolution increases. [LightmapGI] rendering is not affected by this setting. If [code]true[/code], renders [VoxelGI] and SDFGI ([member Environment.sdfgi_enabled]) buffers at halved resolution (e.g. 960×540 when the viewport size is 1920×1080). This improves performance significantly when VoxelGI or SDFGI is enabled, at the cost of artifacts that may be visible on polygon edges. The loss in quality becomes less noticeable as the viewport resolution increases. [LightmapGI] rendering is not affected by this setting.
[b]Note:[/b] This property is only read when the project starts. To set half-resolution GI at run-time, call [method RenderingServer.gi_set_use_half_resolution] instead. [b]Note:[/b] This property is only read when the project starts. To set half-resolution GI at run-time, call [method RenderingServer.gi_set_use_half_resolution] instead.

File diff suppressed because it is too large Load Diff

View File

@ -125,6 +125,23 @@ public:
uint32_t pad2; uint32_t pad2;
}; };
struct PolygonBuffers {
GLuint vertex_buffer;
GLuint vertex_array;
GLuint index_buffer;
int count = 0;
bool color_disabled = false;
Color color;
};
struct {
HashMap<PolygonID, PolygonBuffers> polygons;
PolygonID last_id = 0;
} polygon_buffers;
RendererCanvasRender::PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
void free_polygon(PolygonID p_polygon) override;
struct InstanceData { struct InstanceData {
float world[6]; float world[6];
float color_texture_pixel_size[2]; float color_texture_pixel_size[2];
@ -156,42 +173,71 @@ public:
GLuint canvas_quad_vertices; GLuint canvas_quad_vertices;
GLuint canvas_quad_array; GLuint canvas_quad_array;
GLuint indexed_quad_buffer;
GLuint indexed_quad_array;
GLuint particle_quad_vertices; GLuint particle_quad_vertices;
GLuint particle_quad_array; GLuint particle_quad_array;
GLuint ninepatch_vertices; GLuint ninepatch_vertices;
GLuint ninepatch_elements; GLuint ninepatch_elements;
RID canvas_shader_default_version;
uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
uint32_t max_instances_per_batch = 512;
uint32_t max_instances_per_ubo = 16384;
uint32_t max_instance_buffer_size = 16384 * 128;
} data; } data;
struct Batch {
// Position in the UBO measured in bytes
uint32_t start = 0;
uint32_t instance_count = 0;
RID tex = RID();
RS::CanvasItemTextureFilter filter = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
RS::CanvasItemTextureRepeat repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;
GLES3::CanvasShaderData::BlendMode blend_mode = GLES3::CanvasShaderData::BLEND_MODE_MIX;
Color blend_color = Color(1.0, 1.0, 1.0, 1.0);
Item *clip = nullptr;
RID material = RID();
GLES3::CanvasMaterialData *material_data = nullptr;
CanvasShaderGLES3::ShaderVariant shader_variant = CanvasShaderGLES3::MODE_QUAD;
const Item::Command *command = nullptr;
Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
uint32_t primitive_points = 0;
};
struct DataBuffer {
GLuint ubo = 0;
uint64_t last_frame_used = -3;
GLsync fence = GLsync();
};
struct State { struct State {
GLuint canvas_state_buffer; GLuint canvas_state_buffer;
LocalVector<GLuint> canvas_instance_data_buffers; LocalVector<DataBuffer> canvas_instance_data_buffers;
LocalVector<GLsync> fences; LocalVector<Batch> canvas_instance_batches;
uint32_t current_buffer = 0; uint32_t current_buffer = 0;
uint32_t current_buffer_index = 0;
uint32_t current_batch_index = 0;
InstanceData *instance_data_array = nullptr; InstanceData *instance_data_array = nullptr;
bool canvas_texscreen_used;
RID canvas_shader_current_version;
RID canvas_shader_default_version;
RID current_tex = RID(); RID current_tex = RID();
Size2 current_pixel_size = Size2(); RS::CanvasItemTextureFilter current_filter_mode = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
RID current_normal = RID(); RS::CanvasItemTextureRepeat current_repeat_mode = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;
RID current_specular = RID();
GLES3::Texture *current_tex_ptr;
RID current_shader_version = RID();
RS::PrimitiveType current_primitive = RS::PRIMITIVE_MAX;
uint32_t current_primitive_points = 0;
Item::Command::Type current_command = Item::Command::TYPE_RECT;
bool transparent_render_target = false; bool transparent_render_target = false;
double time = 0.0; double time = 0.0;
uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
uint32_t max_instances_per_batch;
RS::CanvasItemTextureFilter default_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT; RS::CanvasItemTextureFilter default_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT;
RS::CanvasItemTextureRepeat default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT; RS::CanvasItemTextureRepeat default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT;
} state; } state;
@ -229,31 +275,18 @@ public:
bool free(RID p_rid) override; bool free(RID p_rid) override;
void update() override; void update() override;
void _bind_canvas_texture(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, uint32_t &r_index); void _bind_canvas_texture(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat);
void _prepare_canvas_texture(RID p_texture, RS::CanvasItemTextureFilter p_base_filter, RS::CanvasItemTextureRepeat p_base_repeat, uint32_t &r_index, Size2 &r_texpixel_size);
struct PolygonBuffers {
GLuint vertex_buffer;
GLuint vertex_array;
GLuint index_buffer;
int count = 0;
bool color_disabled = false;
Color color;
};
struct {
HashMap<PolygonID, PolygonBuffers> polygons;
PolygonID last_id = 0;
} polygon_buffers;
RendererCanvasRender::PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
void free_polygon(PolygonID p_polygon) override;
void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) override; void canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) override;
void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool p_to_backbuffer = false); void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, uint32_t &r_last_index, bool p_to_backbuffer = false);
void _render_item(RID p_render_target, const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, Light *p_lights, uint32_t &r_index, GLES3::CanvasShaderData::BlendMode p_blend_mode, GLES3::CanvasShaderData::BlendMode &r_last_blend_mode, Color &r_last_blend_color); void _record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch);
void _render_batch(uint32_t &p_max_index); void _render_batch(Light *p_lights, uint32_t p_index);
void _bind_instance_data_buffer(uint32_t p_max_index); void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant);
void _new_batch(bool &r_batch_broken, uint32_t &r_index);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
void _allocate_instance_data_buffer(); void _allocate_instance_data_buffer();
void _align_instance_data_buffer(uint32_t &r_index);
void set_time(double p_time); void set_time(double p_time);

View File

@ -103,8 +103,9 @@ public:
low_end = true; low_end = true;
} }
uint64_t get_frame_number() const { return frame; } _ALWAYS_INLINE_ uint64_t get_frame_number() const { return frame; }
double get_frame_delta_time() const { return delta; } _ALWAYS_INLINE_ double get_frame_delta_time() const { return delta; }
_ALWAYS_INLINE_ double get_total_time() const { return time_total; }
RasterizerGLES3(); RasterizerGLES3();
~RasterizerGLES3(); ~RasterizerGLES3();

View File

@ -60,20 +60,18 @@ out vec2 pixel_size_interp;
void main() { void main() {
vec4 instance_custom = vec4(0.0); vec4 instance_custom = vec4(0.0);
draw_data_instance = gl_InstanceID;
#ifdef USE_PRIMITIVE
//weird bug, #ifdef USE_PRIMITIVE
//this works draw_data_instance = gl_InstanceID;
vec2 vertex; vec2 vertex;
vec2 uv; vec2 uv;
vec4 color; vec4 color;
if (gl_VertexID == 0) { if (gl_VertexID % 3 == 0) {
vertex = draw_data[draw_data_instance].point_a; vertex = draw_data[draw_data_instance].point_a;
uv = draw_data[draw_data_instance].uv_a; uv = draw_data[draw_data_instance].uv_a;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_a_rg), unpackHalf2x16(draw_data[draw_data_instance].color_a_ba)); color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_a_rg), unpackHalf2x16(draw_data[draw_data_instance].color_a_ba));
} else if (gl_VertexID == 1) { } else if (gl_VertexID % 3 == 1) {
vertex = draw_data[draw_data_instance].point_b; vertex = draw_data[draw_data_instance].point_b;
uv = draw_data[draw_data_instance].uv_b; uv = draw_data[draw_data_instance].uv_b;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_b_rg), unpackHalf2x16(draw_data[draw_data_instance].color_b_ba)); color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_b_rg), unpackHalf2x16(draw_data[draw_data_instance].color_b_ba));
@ -86,6 +84,7 @@ void main() {
vec4 bone_weights = vec4(0.0); vec4 bone_weights = vec4(0.0);
#elif defined(USE_ATTRIBUTES) #elif defined(USE_ATTRIBUTES)
draw_data_instance = gl_InstanceID;
#ifdef USE_INSTANCING #ifdef USE_INSTANCING
draw_data_instance = 0; draw_data_instance = 0;
#endif #endif
@ -103,9 +102,9 @@ void main() {
#endif #endif
#else #else
draw_data_instance = gl_VertexID / 6;
vec2 vertex_base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0));
vec2 vertex_base = vertex_base_arr[gl_VertexID]; vec2 vertex_base = vertex_base_arr[gl_VertexID % 6];
vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy); vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy);
vec4 color = draw_data[draw_data_instance].modulation; vec4 color = draw_data[draw_data_instance].modulation;

View File

@ -86,6 +86,8 @@ Config::Config() {
glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size);
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size); glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_offset_alignment);
// the use skeleton software path should be used if either float texture is not supported, // the use skeleton software path should be used if either float texture is not supported,
// OR max_vertex_texture_image_units is zero // OR max_vertex_texture_image_units is zero
use_skeleton_software = (float_texture_supported == false) || (max_vertex_texture_image_units == 0); use_skeleton_software = (float_texture_supported == false) || (max_vertex_texture_image_units == 0);

View File

@ -64,6 +64,8 @@ public:
int max_renderable_lights = 0; int max_renderable_lights = 0;
int max_lights_per_object = 0; int max_lights_per_object = 0;
int uniform_buffer_offset_alignment = 0;
// TODO implement wireframe in OpenGL // TODO implement wireframe in OpenGL
// bool generate_wireframes; // bool generate_wireframes;

View File

@ -1533,9 +1533,11 @@ void TextureStorage::render_target_do_clear_request(RID p_render_target) {
if (!rt->clear_requested) { if (!rt->clear_requested) {
return; return;
} }
glBindFramebuffer(GL_FRAMEBUFFER, rt->fbo);
glClearBufferfv(GL_COLOR, 0, rt->clear_color.components); glClearBufferfv(GL_COLOR, 0, rt->clear_color.components);
rt->clear_requested = false; rt->clear_requested = false;
glBindFramebuffer(GL_FRAMEBUFFER, system_fbo);
} }
void TextureStorage::render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) { void TextureStorage::render_target_set_sdf_size_and_scale(RID p_render_target, RS::ViewportSDFOversize p_size, RS::ViewportSDFScale p_scale) {

View File

@ -126,11 +126,6 @@ struct CanvasTexture {
RS::CanvasItemTextureFilter texture_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT; RS::CanvasItemTextureFilter texture_filter = RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT;
RS::CanvasItemTextureRepeat texture_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT; RS::CanvasItemTextureRepeat texture_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT;
Size2i size_cache = Size2i(1, 1);
bool use_normal_cache = false;
bool use_specular_cache = false;
bool cleared_cache = true;
}; };
/* CANVAS SHADOW */ /* CANVAS SHADOW */

View File

@ -51,6 +51,7 @@ class RasterizerDummy : public RendererCompositor {
private: private:
uint64_t frame = 1; uint64_t frame = 1;
double delta = 0; double delta = 0;
double time = 0.0;
protected: protected:
RasterizerCanvasDummy canvas; RasterizerCanvasDummy canvas;
@ -82,6 +83,7 @@ public:
void begin_frame(double frame_step) override { void begin_frame(double frame_step) override {
frame++; frame++;
delta = frame_step; delta = frame_step;
time += frame_step;
} }
void prepare_for_blitting_render_targets() override {} void prepare_for_blitting_render_targets() override {}
@ -106,6 +108,7 @@ public:
uint64_t get_frame_number() const override { return frame; } uint64_t get_frame_number() const override { return frame; }
double get_frame_delta_time() const override { return delta; } double get_frame_delta_time() const override { return delta; }
double get_total_time() const override { return time; }
RasterizerDummy() {} RasterizerDummy() {}
~RasterizerDummy() {} ~RasterizerDummy() {}

View File

@ -102,6 +102,7 @@ public:
virtual void finalize() = 0; virtual void finalize() = 0;
virtual uint64_t get_frame_number() const = 0; virtual uint64_t get_frame_number() const = 0;
virtual double get_frame_delta_time() const = 0; virtual double get_frame_delta_time() const = 0;
virtual double get_total_time() const = 0;
static bool is_low_end() { return low_end; }; static bool is_low_end() { return low_end; };
virtual bool is_xr_enabled() const; virtual bool is_xr_enabled() const;

View File

@ -2875,6 +2875,10 @@ void RenderingServer::init() {
GLOBAL_DEF("rendering/rendering_device/staging_buffer/texture_upload_region_size_px", 64); GLOBAL_DEF("rendering/rendering_device/staging_buffer/texture_upload_region_size_px", 64);
GLOBAL_DEF("rendering/rendering_device/descriptor_pools/max_descriptors_per_pool", 64); GLOBAL_DEF("rendering/rendering_device/descriptor_pools/max_descriptors_per_pool", 64);
// Number of commands that can be drawn per frame.
GLOBAL_DEF_RST("rendering/gl_compatibility/item_buffer_size", 16384);
ProjectSettings::get_singleton()->set_custom_property_info("rendering/gl_compatibility/item_buffer_size", PropertyInfo(Variant::INT, "rendering/gl_compatibility/item_buffer_size", PROPERTY_HINT_RANGE, "1024,1048576,1"));
GLOBAL_DEF("rendering/shader_compiler/shader_cache/enabled", true); GLOBAL_DEF("rendering/shader_compiler/shader_cache/enabled", true);
GLOBAL_DEF("rendering/shader_compiler/shader_cache/compress", true); GLOBAL_DEF("rendering/shader_compiler/shader_cache/compress", true);
GLOBAL_DEF("rendering/shader_compiler/shader_cache/use_zstd_compression", true); GLOBAL_DEF("rendering/shader_compiler/shader_cache/use_zstd_compression", true);