From 42bca1a4a90acc36c93bb5ee863f1c7471ab5712 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Mon, 12 Oct 2020 10:06:30 +0100 Subject: [PATCH] Fix buffer orphaning on desktop Vertex / Index Buffer orphaning was previously turned off on desktop, which was causing performance problems on some platforms, especially Mac. --- .../gles2/rasterizer_canvas_base_gles2.cpp | 40 ++++------------ drivers/gles2/rasterizer_storage_gles2.h | 11 +++++ drivers/gles3/rasterizer_canvas_gles3.cpp | 46 ++++--------------- drivers/gles3/rasterizer_storage_gles3.h | 11 +++++ 4 files changed, 39 insertions(+), 69 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.cpp b/drivers/gles2/rasterizer_canvas_base_gles2.cpp index 8b5046d5276..dc3ddef886f 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_base_gles2.cpp @@ -430,14 +430,10 @@ void RasterizerCanvasBaseGLES2::_copy_texscreen(const Rect2 &p_rect) { void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights, const int *p_bones) { glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif uint32_t buffer_ofs = 0; + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); buffer_ofs += sizeof(Vector2) * p_vertex_count; @@ -482,13 +478,9 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif if (storage->config.support_32_bits_indices) { //should check for - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); storage->info.render._2d_draw_call_count++; } else { @@ -496,7 +488,7 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ for (int i = 0; i < p_index_count; i++) { index16[i] = uint16_t(p_indices[i]); } - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER); glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_SHORT, 0); storage->info.render._2d_draw_call_count++; } @@ -508,14 +500,10 @@ void RasterizerCanvasBaseGLES2::_draw_polygon(const int *p_indices, int p_index_ void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) { glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif uint32_t buffer_ofs = 0; + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); buffer_ofs += sizeof(Vector2) * p_vertex_count; @@ -551,14 +539,10 @@ void RasterizerCanvasBaseGLES2::_draw_generic(GLuint p_primitive, int p_vertex_c void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor) { glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif uint32_t buffer_ofs = 0; + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, sizeof(Vector2) * p_vertex_count, p_vertices); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vector2) * p_vertex_count, p_vertices); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(Vector2), NULL); buffer_ofs += sizeof(Vector2) * p_vertex_count; @@ -587,13 +571,9 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif if (storage->config.support_32_bits_indices) { //should check for - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); storage->info.render._2d_draw_call_count++; } else { @@ -601,7 +581,7 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const for (int i = 0; i < p_index_count; i++) { index16[i] = uint16_t(p_indices[i]); } - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(uint16_t) * p_index_count, index16); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(uint16_t) * p_index_count, index16, GL_ELEMENT_ARRAY_BUFFER); glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_SHORT, 0); storage->info.render._2d_draw_call_count++; } @@ -664,11 +644,7 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 } glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4 * sizeof(float), buffer_data); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4 * sizeof(float), buffer_data); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, stride * sizeof(float), NULL); diff --git a/drivers/gles2/rasterizer_storage_gles2.h b/drivers/gles2/rasterizer_storage_gles2.h index 72089f31895..c814cd38675 100644 --- a/drivers/gles2/rasterizer_storage_gles2.h +++ b/drivers/gles2/rasterizer_storage_gles2.h @@ -1333,7 +1333,18 @@ public: virtual String get_video_adapter_name() const; virtual String get_video_adapter_vendor() const; + void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER); + RasterizerStorageGLES2(); }; +// standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future +// bugs causing pipeline stalls +inline void RasterizerStorageGLES2::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) { + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + // Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other) + glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW); + glBufferSubData(p_target, p_offset, p_data_size, p_data); +} + #endif // RASTERIZERSTORAGEGLES2_H diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 535f118b05b..f8c404ede85 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -331,15 +331,9 @@ void RasterizerCanvasGLES3::_draw_polygon(const int *p_indices, int p_index_coun glBindVertexArray(data.polygon_buffer_pointer_array); glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - uint32_t buffer_ofs = 0; + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); - //vertex - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); buffer_ofs += sizeof(Vector2) * p_vertex_count; @@ -406,11 +400,7 @@ void RasterizerCanvasGLES3::_draw_polygon(const int *p_indices, int p_index_coun //bind the indices buffer. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); //draw the triangles. glDrawElements(GL_TRIANGLES, p_index_count, GL_UNSIGNED_INT, 0); @@ -432,15 +422,9 @@ void RasterizerCanvasGLES3::_draw_generic(GLuint p_primitive, int p_vertex_count glBindVertexArray(data.polygon_buffer_pointer_array); glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - uint32_t buffer_ofs = 0; + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); - //vertex - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); buffer_ofs += sizeof(Vector2) * p_vertex_count; @@ -485,15 +469,9 @@ void RasterizerCanvasGLES3::_draw_generic_indices(GLuint p_primitive, const int glBindVertexArray(data.polygon_buffer_pointer_array); glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - uint32_t buffer_ofs = 0; + storage->buffer_orphan_and_upload(data.polygon_buffer_size, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); - //vertex - glBufferSubData(GL_ARRAY_BUFFER, buffer_ofs, sizeof(Vector2) * p_vertex_count, p_vertices); glEnableVertexAttribArray(VS::ARRAY_VERTEX); glVertexAttribPointer(VS::ARRAY_VERTEX, 2, GL_FLOAT, false, sizeof(Vector2), CAST_INT_TO_UCHAR_PTR(buffer_ofs)); buffer_ofs += sizeof(Vector2) * p_vertex_count; @@ -538,11 +516,8 @@ void RasterizerCanvasGLES3::_draw_generic_indices(GLuint p_primitive, const int //bind the indices buffer. glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ELEMENT_ARRAY_BUFFER, data.polygon_index_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(int) * p_index_count, p_indices); + + storage->buffer_orphan_and_upload(data.polygon_index_buffer_size, 0, sizeof(int) * p_index_count, p_indices, GL_ELEMENT_ARRAY_BUFFER); //draw the triangles. glDrawElements(p_primitive, p_index_count, GL_UNSIGNED_INT, 0); @@ -616,12 +591,9 @@ void RasterizerCanvasGLES3::_draw_gui_primitive(int p_points, const Vector2 *p_v } glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); -#ifndef GLES_OVER_GL - // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData - glBufferData(GL_ARRAY_BUFFER, data.polygon_buffer_size, NULL, GL_DYNAMIC_DRAW); -#endif - //TODO the below call may need to be replaced with: glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4 * sizeof(float), &b[0]); - glBufferSubData(GL_ARRAY_BUFFER, 0, p_points * stride * 4, &b[0]); + //TODO the below call may need to be replaced with: (p_points * stride * 4 * sizeof(float), &b[0]); + storage->buffer_orphan_and_upload(data.polygon_buffer_size, 0, p_points * stride * 4, &b[0]); + glBindVertexArray(data.polygon_buffer_quad_arrays[version]); glDrawArrays(prim[p_points], 0, p_points); glBindVertexArray(0); diff --git a/drivers/gles3/rasterizer_storage_gles3.h b/drivers/gles3/rasterizer_storage_gles3.h index 13dc8d7b318..86c97d82126 100644 --- a/drivers/gles3/rasterizer_storage_gles3.h +++ b/drivers/gles3/rasterizer_storage_gles3.h @@ -1481,7 +1481,18 @@ public: virtual String get_video_adapter_name() const; virtual String get_video_adapter_vendor() const; + void buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target = GL_ARRAY_BUFFER); + RasterizerStorageGLES3(); }; +// standardize the orphan / upload in one place so it can be changed per platform as necessary, and avoid future +// bugs causing pipeline stalls +inline void RasterizerStorageGLES3::buffer_orphan_and_upload(unsigned int p_buffer_size, unsigned int p_offset, unsigned int p_data_size, const void *p_data, GLenum p_target) { + // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData + // Was previously #ifndef GLES_OVER_GL however this causes stalls on desktop mac also (and possibly other) + glBufferData(p_target, p_buffer_size, NULL, GL_DYNAMIC_DRAW); + glBufferSubData(p_target, p_offset, p_data_size, p_data); +} + #endif // RASTERIZERSTORAGEGLES3_H