diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.cpp b/drivers/gles2/rasterizer_canvas_base_gles2.cpp index d7c85646a0b..5a90ffc84f6 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_base_gles2.cpp @@ -447,7 +447,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); @@ -495,7 +495,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); if (storage->config.support_32_bits_indices) { //should check for - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); storage->info.render._2d_draw_call_count++; } else { @@ -503,7 +503,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ for (int i = 0; i < p_index_count; i++) { index16[i] = uint16_t(p_indices[i]); } - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_SHORT, 0); storage->info.render._2d_draw_call_count++; } @@ -517,7 +517,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_c glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); @@ -556,7 +556,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); @@ -588,7 +588,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); if (storage->config.support_32_bits_indices) { //should check for - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); storage->info.render._2d_draw_call_count++; } else { @@ -596,7 +596,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const for (int i = 0; i < p_index_count; i++) { index16[i] = uint16_t(p_indices[i]); } - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_SHORT, 0); storage->info.render._2d_draw_call_count++; } @@ -659,7 +659,7 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 } glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4 * sizeof(float), buffer_data); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4 * sizeof(float), buffer_data, GL_ARRAY_BUFFER, _buffer_upload_usage_flag, true); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), NULL); @@ -912,6 +912,12 @@ void RasterizerCanvasBaseGLES2::draw_lens_distortion_rect(const Rect2 &p_rect, f void RasterizerCanvasBaseGLES2::initialize() { + bool flag_stream = GLOBAL_GET("rendering/options/api_usage_legacy/flag_stream"); + if (flag_stream) + _buffer_upload_usage_flag = GL_STREAM_DRAW; + else + _buffer_upload_usage_flag = GL_DYNAMIC_DRAW; + // quad buffer { glGenBuffers(1, &data.canvas_quad_vertices); diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.h b/drivers/gles2/rasterizer_canvas_base_gles2.h index f500db55ed7..f4dbafa35e3 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.h +++ b/drivers/gles2/rasterizer_canvas_base_gles2.h @@ -107,6 +107,9 @@ public: RasterizerStorageGLES2 *storage; + // allow user to choose api usage + GLenum _buffer_upload_usage_flag; + void _set_uniforms(); virtual RID light_internal_create(); diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index fb0284fbd7b..f29f1caae41 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -53,26 +53,34 @@ void RasterizerCanvasGLES2::_batch_upload_buffers() { glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); + // usage flag is a project setting + GLenum buffer_usage_flag = GL_DYNAMIC_DRAW; + if (bdata.buffer_mode_batch_upload_flag_stream) { + buffer_usage_flag = GL_STREAM_DRAW; + } + // orphan the old (for now) - //glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); + if (bdata.buffer_mode_batch_upload_send_null) { + glBufferData(GL_ARRAY_BUFFER, 0, 0, buffer_usage_flag); // GL_DYNAMIC_DRAW); + } switch (bdata.fvf) { case RasterizerStorageCommon::FVF_UNBATCHED: // should not happen break; case RasterizerStorageCommon::FVF_REGULAR: // no change - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_COLOR: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LIGHT_ANGLE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_MODULATED: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LARGE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; } @@ -810,7 +818,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } glBindBuffer(GL_ARRAY_BUFFER, data.ninepatch_vertices); - glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, buffer, GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(float) * (16 + 16) * 2, buffer, _buffer_upload_usage_flag); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.ninepatch_elements); diff --git a/drivers/gles2/rasterizer_storage_gles2.cpp b/drivers/gles2/rasterizer_storage_gles2.cpp index b2119c1dcef..d155d372173 100644 --- a/drivers/gles2/rasterizer_storage_gles2.cpp +++ b/drivers/gles2/rasterizer_storage_gles2.cpp @@ -3798,7 +3798,7 @@ void RasterizerStorageGLES2::_update_skeleton_transform_buffer(const PoolVector< glBufferData(GL_ARRAY_BUFFER, buffer_size, p_data.read().ptr(), GL_DYNAMIC_DRAW); } else { // this may not be best, it could be better to use glBufferData in both cases. - buffer_orphan_and_upload(resources.skeleton_transform_buffer_size, 0, buffer_size, p_data.read().ptr()); + buffer_orphan_and_upload(resources.skeleton_transform_buffer_size, 0, buffer_size, p_data.read().ptr(), GL_ARRAY_BUFFER, true); } glBindBuffer(GL_ARRAY_BUFFER, 0); @@ -6289,6 +6289,7 @@ void RasterizerStorageGLES2::initialize() { config.force_vertex_shading = GLOBAL_GET("rendering/quality/shading/force_vertex_shading"); config.use_fast_texture_filter = GLOBAL_GET("rendering/quality/filters/use_nearest_mipmap_filter"); + config.should_orphan = GLOBAL_GET("rendering/options/api_usage_legacy/orphan_buffers"); } void RasterizerStorageGLES2::finalize() { @@ -6308,4 +6309,5 @@ void RasterizerStorageGLES2::update_dirty_resources() { RasterizerStorageGLES2::RasterizerStorageGLES2() { RasterizerStorageGLES2::system_fbo = 0; + config.should_orphan = true; } diff --git a/drivers/gles2/rasterizer_storage_gles2.h b/drivers/gles2/rasterizer_storage_gles2.h index 00ee0f8b0d8..f2024de128f 100644 --- a/drivers/gles2/rasterizer_storage_gles2.h +++ b/drivers/gles2/rasterizer_storage_gles2.h @@ -96,6 +96,9 @@ public: GLuint depth_type; GLuint depth_buffer_internalformat; + // in some cases the legacy render didn't orphan. We will mark these + // so the user can switch orphaning off for them. + bool should_orphan; } config; struct Resources { @@ -1334,17 +1337,19 @@ public: virtual String get_video_adapter_name() const; virtual String get_video_adapter_vendor() const; - void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER); + void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER, GLenum p_usage = GL_DYNAMIC_DRAW, bool p_optional_orphan = false); RasterizerStorageGLES2(); }; // standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future // bugs causing pipeline stalls -inline void RasterizerStorageGLES2::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) { +inline void RasterizerStorageGLES2::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target, GLenum p_usage, bool p_optional_orphan) { // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData // Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other) - glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW); + if (!p_optional_orphan || (config.should_orphan)) { + glBufferData(p_target, p_buffer_size, NULL, p_usage); + } glBufferSubData(p_target, p_offset, p_data_size, p_data); } diff --git a/drivers/gles3/rasterizer_canvas_base_gles3.cpp b/drivers/gles3/rasterizer_canvas_base_gles3.cpp index e60e06be40f..7f80a258695 100644 --- a/drivers/gles3/rasterizer_canvas_base_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_base_gles3.cpp @@ -349,7 +349,7 @@ void RasterizerCanvasBaseGLES3::_draw_polygon(const int *p_indices, int p_index_ glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); @@ -417,7 +417,7 @@ void RasterizerCanvasBaseGLES3::_draw_polygon(const int *p_indices, int p_index_ //bind the indices buffer. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag); //draw the triangles. glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); @@ -441,7 +441,7 @@ void RasterizerCanvasBaseGLES3::_draw_generic(GLuint p_primitive, int p_vertex_c //vertex uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); @@ -489,7 +489,7 @@ void RasterizerCanvasBaseGLES3::_draw_generic_indices(GLuint p_primitive, const //vertex uint32_t buffer_ofs = 0; - storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices, GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); @@ -535,7 +535,7 @@ void RasterizerCanvasBaseGLES3::_draw_generic_indices(GLuint p_primitive, const //bind the indices buffer. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); - storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER, _buffer_upload_usage_flag); //draw the triangles. glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); @@ -610,7 +610,7 @@ void RasterizerCanvasBaseGLES3::_draw_gui_primitive(int p_points, const Vector2 glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); //TODO the below call may need to be replaced with: p_points * stride * 4 * sizeof(float), &b[0]); - storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4, &b[0]); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4, &b[0], GL_ARRAY_BUFFER, _buffer_upload_usage_flag); glBindVertexArray(data.polygon_buffer_quad_arrays[version]); glDrawArrays(prim[p_points], 0, p_points); @@ -1140,6 +1140,13 @@ void RasterizerCanvasBaseGLES3::draw_window_margins(int *black_margin, RID *blac void RasterizerCanvasBaseGLES3::initialize() { + bool flag_stream = GLOBAL_GET("rendering/options/api_usage_legacy/flag_stream"); + if (flag_stream) { + _buffer_upload_usage_flag = GL_STREAM_DRAW; + } else { + _buffer_upload_usage_flag = GL_DYNAMIC_DRAW; + } + { //quad buffers diff --git a/drivers/gles3/rasterizer_canvas_base_gles3.h b/drivers/gles3/rasterizer_canvas_base_gles3.h index 687b7cae54c..dfb87805799 100644 --- a/drivers/gles3/rasterizer_canvas_base_gles3.h +++ b/drivers/gles3/rasterizer_canvas_base_gles3.h @@ -102,6 +102,9 @@ public: RasterizerStorageGLES3 *storage; + // allow user to choose api usage + GLenum _buffer_upload_usage_flag; + struct LightInternal : public RID_Data { struct UBOData { diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index dc74847cca7..9bb66e26bf8 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -1924,7 +1924,7 @@ void RasterizerCanvasGLES3::canvas_render_items_implementation(Item *p_item_list ris.prev_distance_field = false; glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_item_ubo); - glBufferData(GL_UNIFORM_BUFFER, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data, GL_DYNAMIC_DRAW); + glBufferData(GL_UNIFORM_BUFFER, sizeof(CanvasItemUBO), &state.canvas_item_ubo_data, _buffer_upload_usage_flag); glBindBuffer(GL_UNIFORM_BUFFER, 0); state.current_tex = RID(); @@ -1969,26 +1969,34 @@ void RasterizerCanvasGLES3::_batch_upload_buffers() { glBindBuffer(GL_ARRAY_BUFFER, bdata.gl_vertex_buffer); + // usage flag is a project setting + GLenum buffer_usage_flag = GL_DYNAMIC_DRAW; + if (bdata.buffer_mode_batch_upload_flag_stream) { + buffer_usage_flag = GL_STREAM_DRAW; + } + // orphan the old (for now) - //glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); + if (bdata.buffer_mode_batch_upload_send_null) { + glBufferData(GL_ARRAY_BUFFER, 0, 0, buffer_usage_flag); // GL_DYNAMIC_DRAW); + } switch (bdata.fvf) { case RasterizerStorageCommon::FVF_UNBATCHED: // should not happen break; case RasterizerStorageCommon::FVF_REGULAR: // no change - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_COLOR: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LIGHT_ANGLE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_MODULATED: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexModulated) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; case RasterizerStorageCommon::FVF_LARGE: - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLarge) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), buffer_usage_flag); break; } diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index 1e53e11816b..bba60d10292 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -5089,7 +5089,11 @@ void RasterizerStorageGLES3::update_dirty_multimeshes() { glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); uint32_t buffer_size = multimesh->data.size() * sizeof(float); - glBufferData(GL_ARRAY_BUFFER, buffer_size, multimesh->data.ptr(), GL_DYNAMIC_DRAW); + if (config.should_orphan) { + glBufferData(GL_ARRAY_BUFFER, buffer_size, multimesh->data.ptr(), GL_DYNAMIC_DRAW); + } else { + glBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, multimesh->data.ptr()); + } glBindBuffer(GL_ARRAY_BUFFER, 0); } @@ -8553,6 +8557,8 @@ void RasterizerStorageGLES3::initialize() { } } } + + config.should_orphan = GLOBAL_GET("rendering/options/api_usage_legacy/orphan_buffers"); } void RasterizerStorageGLES3::finalize() { @@ -8572,4 +8578,5 @@ void RasterizerStorageGLES3::update_dirty_resources() { } RasterizerStorageGLES3::RasterizerStorageGLES3() { + config.should_orphan = true; } diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index bbd8687cff8..a8800ba49b8 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -106,6 +106,10 @@ public: bool use_depth_prepass; bool force_vertex_shading; + + // in some cases the legacy render didn't orphan. We will mark these + // so the user can switch orphaning off for them. + bool should_orphan; } config; mutable struct Shaders { @@ -1499,17 +1503,19 @@ public: virtual String get_video_adapter_name() const; virtual String get_video_adapter_vendor() const; - void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER); + void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER, GLenum p_usage = GL_DYNAMIC_DRAW, bool p_optional_orphan = false); RasterizerStorageGLES3(); }; // standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future // bugs causing pipeline stalls -inline void RasterizerStorageGLES3::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) { +inline void RasterizerStorageGLES3::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target, GLenum p_usage, bool p_optional_orphan) { // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData // Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other) - glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW); + if (!p_optional_orphan || (config.should_orphan)) { + glBufferData(p_target, p_buffer_size, NULL, p_usage); + } glBufferSubData(p_target, p_offset, p_data_size, p_data); } diff --git a/drivers/gles_common/rasterizer_canvas_batcher.h b/drivers/gles_common/rasterizer_canvas_batcher.h index 4054fcd29c0..05b950e789c 100644 --- a/drivers/gles_common/rasterizer_canvas_batcher.h +++ b/drivers/gles_common/rasterizer_canvas_batcher.h @@ -279,6 +279,9 @@ public: settings_uv_contract = false; settings_uv_contract_amount = 0.0f; + buffer_mode_batch_upload_send_null = true; + buffer_mode_batch_upload_flag_stream = false; + stats_items_sorted = 0; stats_light_items_joined = 0; } @@ -399,6 +402,10 @@ public: int settings_light_max_join_items; int settings_ninepatch_mode; + // buffer orphaning modes + bool buffer_mode_batch_upload_send_null; + bool buffer_mode_batch_upload_flag_stream; + // uv contraction bool settings_uv_contract; float settings_uv_contract_amount; @@ -1028,6 +1035,10 @@ PREAMBLE(void)::batch_initialize() { bdata.settings_light_max_join_items = CLAMP(bdata.settings_light_max_join_items, 0, 65535); bdata.settings_item_reordering_lookahead = CLAMP(bdata.settings_item_reordering_lookahead, 0, 65535); + // allow user to override the api usage techniques using project settings + bdata.buffer_mode_batch_upload_send_null = GLOBAL_GET("rendering/options/api_usage_batching/send_null"); + bdata.buffer_mode_batch_upload_flag_stream = GLOBAL_GET("rendering/options/api_usage_batching/flag_stream"); + // for debug purposes, output a string with the batching options String batching_options_string = "OpenGL ES Batching: "; if (bdata.settings_use_batching) { diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index e32190ff366..a138af74c0d 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -2450,6 +2450,11 @@ VisualServer::VisualServer() { GLOBAL_DEF("rendering/quality/2d/ninepatch_mode", 0); ProjectSettings::get_singleton()->set_custom_property_info("rendering/quality/2d/ninepatch_mode", PropertyInfo(Variant::INT, "rendering/quality/2d/ninepatch_mode", PROPERTY_HINT_ENUM, "Default,Scaling")); + GLOBAL_DEF_RST("rendering/options/api_usage_batching/send_null", true); + GLOBAL_DEF_RST("rendering/options/api_usage_batching/flag_stream", false); + GLOBAL_DEF_RST("rendering/options/api_usage_legacy/flag_stream", false); + GLOBAL_DEF_RST("rendering/options/api_usage_legacy/orphan_buffers", true); + GLOBAL_DEF("rendering/batching/options/use_batching", true); GLOBAL_DEF_RST("rendering/batching/options/use_batching_in_editor", true); GLOBAL_DEF("rendering/batching/options/single_rect_fallback", false);