From 5c8f497a24ab0b2011102cdd809f316d6e6888bc Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Sun, 25 Oct 2020 15:38:54 +0000 Subject: [PATCH] Add project settings to manually specify API usage As a result of the GLES specifications being vague about best practice for how buffers should be used dynamically, different GPUs / platforms appear to have different preferences. Mac in particular seems to have a number of problems in this area, and none of the rendering team uses Macs. So far we have relied on guesswork to choose the best usage, but in an attempt to pin this down, this PR begins to introduce manual selection of options for users to test their configurations. --- .../gles2/rasterizer_canvas_base_gles2.cpp | 22 ++++++++++++------- drivers/gles2/rasterizer_canvas_base_gles2.h | 3 +++ drivers/gles2/rasterizer_canvas_gles2.cpp | 22 +++++++++++++------ drivers/gles2/rasterizer_storage_gles2.cpp | 4 +++- drivers/gles2/rasterizer_storage_gles2.h | 11 +++++++--- .../gles3/rasterizer_canvas_base_gles3.cpp | 19 +++++++++++----- drivers/gles3/rasterizer_canvas_base_gles3.h | 3 +++ drivers/gles3/rasterizer_canvas_gles3.cpp | 22 +++++++++++++------ drivers/gles3/rasterizer_storage_gles3.cpp | 9 +++++++- drivers/gles3/rasterizer_storage_gles3.h | 12 +++++++--- .../gles_common/rasterizer_canvas_batcher.h | 11 ++++++++++ servers/visual_server.cpp | 5 +++++ 12 files changed, 107 insertions(+), 36 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.cpp b/drivers/gles2/rasterizer_canvas_base_gles2.cpp index d7c85646a0b..5a90ffc84f6 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_base_gles2.cpp @@ -447,7 +447,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); @@ -495,7 +495,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); if (storage->config.support_32_bits_indices) { //should check for - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); storage->info.render._2d_draw_call_count++; } else { @@ -503,7 +503,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ for (int i = 0; i < p_index_count; i++) { index16[i] = uint16_t(p_indices[i]); } - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_SHORT, 0); storage->info.render._2d_draw_call_count++; } @@ -517,7 +517,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_c glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); @@ -556,7 +556,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); @@ -588,7 +588,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); if (storage->config.support_32_bits_indices) { //should check for - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); storage->info.render._2d_draw_call_count++; } else { @@ -596,7 +596,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const for (int i = 0; i < p_index_count; i++) { index16[i] = uint16_t(p_indices[i]); } - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_SHORT, 0); storage->info.render._2d_draw_call_count++; } @@ -659,7 +659,7 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 } glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4 * sizeof(float), buffer_data); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4 * sizeof(float), buffer_data, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), NULL); @@ -912,6 +912,12 @@ void RasterizerCanvasBaseGLES2::draw_lens_distortion_rect(const Rect2 &p_rect, f void RasterizerCanvasBaseGLES2::initialize() { + bool flag_stream = GLOBAL_GET("rendering/options/api_usage_legacy/flag_stream"); + if (flag_stream) + _buffer_upload_usage_flag = GL_STREAM_DRAW; + else + _buffer_upload_usage_flag = GL_DYNAMIC_DRAW; + // quad buffer { glGenBuffers(1, &data.canvas_quad_vertices); diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.h b/drivers/gles2/rasterizer_canvas_base_gles2.h index f500db55ed7..f4dbafa35e3 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.h +++ b/drivers/gles2/rasterizer_canvas_base_gles2.h @@ -107,6 +107,9 @@ public: RasterizerStorageGLES2 *storage; + // allow user to choose api usage + GLenum _buffer_upload_usage_flag; + void _set_uniforms(); virtual RID light_internal_create(); diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index fb0284fbd7b..f29f1caae41 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -53,26 +53,34 @@ void RasterizerCanvasGLES2::_batch_upload_buffers() { glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); + // usage flag is a project setting + GLenum buffer_usage_flag = GL_DYNAMIC_DRAW; + if (bdata.buffer_mode_batch_upload_flag_stream) { + buffer_usage_flag = GL_STREAM_DRAW; + } + // orphan the old (for now) - //glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); + if (bdata.buffer_mode_batch_upload_send_null) { + glBufferData(GL_ARRAY_BUFFER, 0, 0, buffer_usage_flag); // GL_DYNAMIC_DRAW); + } switch (bdata.fvf) { case RasterizerStorageCommon::FVF_UNBATCHED: // should not happen break; case RasterizerStorageCommon::FVF_REGULAR: // no change - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_COLOR: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LIGHT_ANGLE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_MODULATED: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LARGE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; } @@ -810,7 +818,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } glBindBuffer(GL_ARRAY_BUFFER, data.ninepatch_vertices); - glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, buffer, GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, buffer, _buffer_upload_usage_flag); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.ninepatch_elements); diff --git a/drivers/gles2/rasterizer_storage_gles2.cpp b/drivers/gles2/rasterizer_storage_gles2.cpp index b2119c1dcef..d155d372173 100644 --- a/drivers/gles2/rasterizer_storage_gles2.cpp +++ b/drivers/gles2/rasterizer_storage_gles2.cpp @@ -3798,7 +3798,7 @@ void RasterizerStorageGLES2::_update_skeleton_transform_buffer(const PoolVector< glBufferData(GL_ARRAY_BUFFER, buffer_size, p_data.read().ptr(), GL_DYNAMIC_DRAW); } else { // this may not be best, it could be better to use glBufferData in both cases. - buffer_orphan_and_upload(resources.skeleton_transform_buffer_size, 0, buffer_size, p_data.read().ptr()); + buffer_orphan_and_upload(resources.skeleton_transform_buffer_size, 0, buffer_size, p_data.read().ptr(), GL_ARRAY_BUFFER, true); } glBindBuffer(GL_ARRAY_BUFFER, 0); @@ -6289,6 +6289,7 @@ void RasterizerStorageGLES2::initialize() { config.force_vertex_shading = GLOBAL_GET("rendering/quality/shading/force_vertex_shading"); config.use_fast_texture_filter = GLOBAL_GET("rendering/quality/filters/use_nearest_mipmap_filter"); + config.should_orphan = GLOBAL_GET("rendering/options/api_usage_legacy/orphan_buffers"); } void RasterizerStorageGLES2::finalize() { @@ -6308,4 +6309,5 @@ void RasterizerStorageGLES2::update_dirty_resources() { RasterizerStorageGLES2::RasterizerStorageGLES2() { RasterizerStorageGLES2::system_fbo = 0; + config.should_orphan = true; } diff --git a/drivers/gles2/rasterizer_storage_gles2.h b/drivers/gles2/rasterizer_storage_gles2.h index 00ee0f8b0d8..f2024de128f 100644 --- a/drivers/gles2/rasterizer_storage_gles2.h +++ b/drivers/gles2/rasterizer_storage_gles2.h @@ -96,6 +96,9 @@ public: GLuint depth_type; GLuint depth_buffer_internalformat; + // in some cases the legacy render didn't orphan. We will mark these + // so the user can switch orphaning off for them. + bool should_orphan; } config; struct Resources { @@ -1334,17 +1337,19 @@ public: virtual String get_video_adapter_name() const; virtual String get_video_adapter_vendor() const; - void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER); + void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER, GLenum p_usage = GL_DYNAMIC_DRAW, bool p_optional_orphan = false); RasterizerStorageGLES2(); }; // standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future // bugs causing pipeline stalls -inline void RasterizerStorageGLES2::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) { +inline void RasterizerStorageGLES2::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target, GLenum p_usage, bool p_optional_orphan) { // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData // Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other) - glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW); + if (!p_optional_orphan || (config.should_orphan)) { + glBufferData(p_target, p_buffer_size, NULL, p_usage); + } glBufferSubData(p_target, p_offset, p_data_size, p_data); } diff --git a/drivers/gles3/rasterizer_canvas_base_gles3.cpp b/drivers/gles3/rasterizer_canvas_base_gles3.cpp index e60e06be40f..7f80a258695 100644 --- a/drivers/gles3/rasterizer_canvas_base_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_base_gles3.cpp @@ -349,7 +349,7 @@ void RasterizerCanvasBaseGLES3::_draw_polygon(const int *p_indices, int p_index_ glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); @@ -417,7 +417,7 @@ void RasterizerCanvasBaseGLES3::_draw_polygon(const int *p_indices, int p_index_ //bind the indices buffer. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag); //draw the triangles. glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); @@ -441,7 +441,7 @@ void RasterizerCanvasBaseGLES3::_draw_generic(GLuint p_primitive, int p_vertex_c //vertex uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); @@ -489,7 +489,7 @@ void RasterizerCanvasBaseGLES3::_draw_generic_indices(GLuint p_primitive, const //vertex uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); @@ -535,7 +535,7 @@ void RasterizerCanvasBaseGLES3::_draw_generic_indices(GLuint p_primitive, const //bind the indices buffer. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag); //draw the triangles. glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); @@ -610,7 +610,7 @@ void RasterizerCanvasBaseGLES3::_draw_gui_primitive(int p_points, const Vector2 glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); //TODO the below call may need to be replaced with: p_points * stride * 4 * sizeof(float), &b[0]); - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4, &b[0]); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4, &b[0], GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glBindVertexArray(data.polygon_buffer_quad_arrays[version]); glDrawArrays(prim[p_points], 0, p_points); @@ -1140,6 +1140,13 @@ void RasterizerCanvasBaseGLES3::draw_window_margins(int *black_margin, RID *blac void RasterizerCanvasBaseGLES3::initialize() { + bool flag_stream = GLOBAL_GET("rendering/options/api_usage_legacy/flag_stream"); + if (flag_stream) { + _buffer_upload_usage_flag = GL_STREAM_DRAW; + } else { + _buffer_upload_usage_flag = GL_DYNAMIC_DRAW; + } + { //quad buffers diff --git a/drivers/gles3/rasterizer_canvas_base_gles3.h b/drivers/gles3/rasterizer_canvas_base_gles3.h index 687b7cae54c..dfb87805799 100644 --- a/drivers/gles3/rasterizer_canvas_base_gles3.h +++ b/drivers/gles3/rasterizer_canvas_base_gles3.h @@ -102,6 +102,9 @@ public: RasterizerStorageGLES3 *storage; + // allow user to choose api usage + GLenum _buffer_upload_usage_flag; + struct LightInternal : public RID_Data { struct UBOData { diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index dc74847cca7..9bb66e26bf8 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -1924,7 +1924,7 @@ void RasterizerCanvasGLES3::canvas_render_items_implementation(Item *p_item_list ris.prev_distance_field = false; glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_item_ubo); - glBufferData(GL_UNIFORM_BUFFER, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data, GL_DYNAMIC_DRAW); + glBufferData(GL_UNIFORM_BUFFER, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data, _buffer_upload_usage_flag); glBindBuffer(GL_UNIFORM_BUFFER, 0); state.current_tex = RID(); @@ -1969,26 +1969,34 @@ void RasterizerCanvasGLES3::_batch_upload_buffers() { glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); + // usage flag is a project setting + GLenum buffer_usage_flag = GL_DYNAMIC_DRAW; + if (bdata.buffer_mode_batch_upload_flag_stream) { + buffer_usage_flag = GL_STREAM_DRAW; + } + // orphan the old (for now) - //glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); + if (bdata.buffer_mode_batch_upload_send_null) { + glBufferData(GL_ARRAY_BUFFER, 0, 0, buffer_usage_flag); // GL_DYNAMIC_DRAW); + } switch (bdata.fvf) { case RasterizerStorageCommon::FVF_UNBATCHED: // should not happen break; case RasterizerStorageCommon::FVF_REGULAR: // no change - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_COLOR: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LIGHT_ANGLE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_MODULATED: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LARGE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; } diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index cd7067aebdf..296258fbee2 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -5086,7 +5086,11 @@ void RasterizerStorageGLES3::update_dirty_multimeshes() { glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); uint32_t buffer_size = multimesh->data.size() * sizeof(float); - glBufferData(GL_ARRAY_BUFFER, buffer_size, multimesh->data.ptr(), GL_DYNAMIC_DRAW); + if (config.should_orphan) { + glBufferData(GL_ARRAY_BUFFER, buffer_size, multimesh->data.ptr(), GL_DYNAMIC_DRAW); + } else { + glBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, multimesh->data.ptr()); + } glBindBuffer(GL_ARRAY_BUFFER, 0); } @@ -8550,6 +8554,8 @@ void RasterizerStorageGLES3::initialize() { } } } + + config.should_orphan = GLOBAL_GET("rendering/options/api_usage_legacy/orphan_buffers"); } void RasterizerStorageGLES3::finalize() { @@ -8569,4 +8575,5 @@ void RasterizerStorageGLES3::update_dirty_resources() { } RasterizerStorageGLES3::RasterizerStorageGLES3() { + config.should_orphan = true; } diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index bbd8687cff8..a8800ba49b8 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -106,6 +106,10 @@ public: bool use_depth_prepass; bool force_vertex_shading; + + // in some cases the legacy render didn't orphan. We will mark these + // so the user can switch orphaning off for them. + bool should_orphan; } config; mutable struct Shaders { @@ -1499,17 +1503,19 @@ public: virtual String get_video_adapter_name() const; virtual String get_video_adapter_vendor() const; - void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER); + void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER, GLenum p_usage = GL_DYNAMIC_DRAW, bool p_optional_orphan = false); RasterizerStorageGLES3(); }; // standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future // bugs causing pipeline stalls -inline void RasterizerStorageGLES3::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) { +inline void RasterizerStorageGLES3::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target, GLenum p_usage, bool p_optional_orphan) { // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData // Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other) - glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW); + if (!p_optional_orphan || (config.should_orphan)) { + glBufferData(p_target, p_buffer_size, NULL, p_usage); + } glBufferSubData(p_target, p_offset, p_data_size, p_data); } diff --git a/drivers/gles_common/rasterizer_canvas_batcher.h b/drivers/gles_common/rasterizer_canvas_batcher.h index c61d856e0d1..8278d04f511 100644 --- a/drivers/gles_common/rasterizer_canvas_batcher.h +++ b/drivers/gles_common/rasterizer_canvas_batcher.h @@ -279,6 +279,9 @@ public: settings_uv_contract = false; settings_uv_contract_amount = 0.0f; + buffer_mode_batch_upload_send_null = true; + buffer_mode_batch_upload_flag_stream = false; + stats_items_sorted = 0; stats_light_items_joined = 0; } @@ -399,6 +402,10 @@ public: int settings_light_max_join_items; int settings_ninepatch_mode; + // buffer orphaning modes + bool buffer_mode_batch_upload_send_null; + bool buffer_mode_batch_upload_flag_stream; + // uv contraction bool settings_uv_contract; float settings_uv_contract_amount; @@ -1028,6 +1035,10 @@ PREAMBLE(void)::batch_initialize() { bdata.settings_light_max_join_items = CLAMP(bdata.settings_light_max_join_items, 0, 65535); bdata.settings_item_reordering_lookahead = CLAMP(bdata.settings_item_reordering_lookahead, 0, 65535); + // allow user to override the api usage techniques using project settings + bdata.buffer_mode_batch_upload_send_null = GLOBAL_GET("rendering/options/api_usage_batching/send_null"); + bdata.buffer_mode_batch_upload_flag_stream = GLOBAL_GET("rendering/options/api_usage_batching/flag_stream"); + // for debug purposes, output a string with the batching options String batching_options_string = "OpenGL ES Batching: "; if (bdata.settings_use_batching) { diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index e32190ff366..a138af74c0d 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -2450,6 +2450,11 @@ VisualServer::VisualServer() { GLOBAL_DEF("rendering/quality/2d/ninepatch_mode", 0); ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/2d/ninepatch_mode", PropertyInfo(Variant::INT, "rendering/quality/2d/ninepatch_mode", PROPERTY_HINT_ENUM, "Default,Scaling")); + GLOBAL_DEF_RST("rendering/options/api_usage_batching/send_null", true); + GLOBAL_DEF_RST("rendering/options/api_usage_batching/flag_stream", false); + GLOBAL_DEF_RST("rendering/options/api_usage_legacy/flag_stream", false); + GLOBAL_DEF_RST("rendering/options/api_usage_legacy/orphan_buffers", true); + GLOBAL_DEF("rendering/batching/options/use_batching", true); GLOBAL_DEF_RST("rendering/batching/options/use_batching_in_editor", true); GLOBAL_DEF("rendering/batching/options/single_rect_fallback", false);