From 72adefa5cf9d1633f81165ca9480f4a8849a658b Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Fri, 17 Apr 2020 08:44:12 +0100 Subject: [PATCH] Add frame diagnostics for GLES2 Batch renderer Added project setting to enable / disable print frame diagnostics every 10 seconds. This prints out a list of batches and info, which is useful to optimize games and identify performance problems. --- doc/classes/Performance.xml | 30 +++++---- doc/classes/ProjectSettings.xml | 5 +- doc/classes/Viewport.xml | 8 ++- doc/classes/VisualServer.xml | 22 ++++-- drivers/gles2/rasterizer_canvas_gles2.cpp | 82 ++++++++++++++++++++++- drivers/gles2/rasterizer_canvas_gles2.h | 11 +++ servers/visual_server.cpp | 1 + 7 files changed, 139 insertions(+), 20 deletions(-) diff --git a/doc/classes/Performance.xml b/doc/classes/Performance.xml index fbbbb5a99ba..7596e1d2987 100644 --- a/doc/classes/Performance.xml +++ b/doc/classes/Performance.xml @@ -79,40 +79,46 @@ Draw calls per frame. 3D only. - + + Items or joined items drawn per frame. + + + Draw calls per frame. + + The amount of video memory used, i.e. texture and vertex memory combined. - + The amount of texture memory used. - + The amount of vertex memory used. - + Unimplemented in the GLES2 and GLES3 rendering backends, always returns 0. - + Number of active [RigidBody2D] nodes in the game. - + Number of collision pairs in the 2D physics engine. - + Number of islands in the 2D physics engine. - + Number of active [RigidBody] and [VehicleBody] nodes in the game. - + Number of collision pairs in the 3D physics engine. - + Number of islands in the 3D physics engine. - + Output latency of the [AudioServer]. - + Represents the size of the [enum Monitor] enum. diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 713103a29f9..836c776ecb3 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -984,10 +984,13 @@ Turns batching on and off. Batching increases performance by reducing the amount of graphics API drawcalls. + + When batching is on, this regularly prints a frame diagnosis log. Note that this will degrade performance. + [b]Experimental[/b] For regression testing against the old renderer. If this is switched on, and [code]use_batching[/code] is set, the renderer will swap alternately between using the old renderer, and the batched renderer, on each frame. This makes it easy to identify visual differences. Performance will be degraded. - + [b]Experimental[/b] Switches on batching within the editor. Use with caution - note that if your editor does not render correctly you may need to edit your [code]project.godot[/code] and remove the use_batching_in_editor setting manually. diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml index c9afc9b1bf0..c55bc9b6a31 100644 --- a/doc/classes/Viewport.xml +++ b/doc/classes/Viewport.xml @@ -377,7 +377,13 @@ Amount of draw calls in frame. - + + Amount of items or joined items in frame. + + + Amount of draw calls in frame. + + Represents the size of the [enum RenderInfo] enum. diff --git a/doc/classes/VisualServer.xml b/doc/classes/VisualServer.xml index 7db734fc09d..f672222fc46 100644 --- a/doc/classes/VisualServer.xml +++ b/doc/classes/VisualServer.xml @@ -4601,7 +4601,13 @@ Number of draw calls during this frame. - + + Number of 2d items drawn this frame. + + + Number of 2d draw calls during this frame. + + Represents the size of the [enum ViewportRenderInfo] enum. @@ -4748,16 +4754,22 @@ The amount of draw calls in frame. - + + The amount of 2d items in the frame. + + + The amount of 2d draw calls in frame. + + Unimplemented in the GLES2 and GLES3 rendering backends, always returns 0. - + The amount of video memory used, i.e. texture and vertex memory combined. - + The amount of texture memory used. - + The amount of vertex memory used. diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 62d7cfab32c..0493ff20f4c 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -60,9 +60,13 @@ RasterizerCanvasGLES2::BatchData::BatchData() { settings_colored_vertex_format_threshold = 0.0f; settings_batch_buffer_num_verts = 0; scissor_threshold_area = 0.0f; + diagnose_frame = false; + next_diagnose_tick = 10000; + diagnose_frame_number = 9999999999; // some high number settings_use_batching_original_choice = false; settings_flash_batching = false; + settings_diagnose_frame = false; settings_scissor_lights = false; settings_scissor_threshold = -1.0f; } @@ -642,6 +646,32 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } +void RasterizerCanvasGLES2::diagnose_batches(Item::Command *const *p_commands) { + int num_batches = bdata.batches.size(); + + for (int batch_num = 0; batch_num < num_batches; batch_num++) { + const Batch &batch = bdata.batches[batch_num]; + bdata.frame_string += "\t\tbatch "; + + switch (batch.type) { + case Batch::BT_RECT: { + bdata.frame_string += "R "; + bdata.frame_string += itos(batch.num_commands); + bdata.frame_string += " [" + itos(batch.batch_texture_id) + "]"; + if (batch.num_commands > 1) { + bdata.frame_string += " MULTI\n"; + } else { + bdata.frame_string += "\n"; + } + } break; + default: { + bdata.frame_string += "D "; + bdata.frame_string += itos(batch.num_commands) + "\n"; + } break; + } + } +} + void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material) { int num_batches = bdata.batches.size(); @@ -1562,6 +1592,10 @@ void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_cur Item::Command *const *commands = p_first_item->commands.ptr(); + if (bdata.diagnose_frame) { + diagnose_batches(commands); + } + render_batches(commands, p_current_clip, r_reclip, p_material); } @@ -1637,6 +1671,33 @@ void RasterizerCanvasGLES2::join_items(Item *p_item_list, int p_z) { } } +void RasterizerCanvasGLES2::canvas_begin() { + // diagnose_frame? + if (bdata.settings_diagnose_frame) { + bdata.diagnose_frame = false; + + uint32_t tick = OS::get_singleton()->get_ticks_msec(); + uint64_t frame = Engine::get_singleton()->get_frames_drawn(); + + if (tick >= bdata.next_diagnose_tick) { + bdata.next_diagnose_tick = tick + 10000; + + // the plus one is prevent starting diagnosis half way through frame + bdata.diagnose_frame_number = frame + 1; + } + + if (frame == bdata.diagnose_frame_number) { + bdata.diagnose_frame = true; + } + + if (bdata.diagnose_frame) { + bdata.frame_string = "canvas_begin FRAME " + itos(frame) + "\n"; + } + } + + RasterizerCanvasBaseGLES2::canvas_begin(); +} + void RasterizerCanvasGLES2::canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { // if we are debugging, flash each frame between batching renderer and old version to compare for regressions if (bdata.settings_flash_batching) { @@ -1666,6 +1727,10 @@ void RasterizerCanvasGLES2::canvas_render_items_end() { return; } + if (bdata.diagnose_frame) { + bdata.frame_string += "items\n"; + } + // batching render is deferred until after going through all the z_indices, joining all the items canvas_render_items_implementation(0, 0, _render_item_state.item_group_modulate, _render_item_state.item_group_light, @@ -1673,6 +1738,10 @@ void RasterizerCanvasGLES2::canvas_render_items_end() { bdata.items_joined.reset(); bdata.item_refs.reset(); + + if (bdata.diagnose_frame) { + print_line(bdata.frame_string); + } } void RasterizerCanvasGLES2::canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform) { @@ -2284,6 +2353,10 @@ void RasterizerCanvasGLES2::render_joined_item(const BItemJoined &p_bij, RenderI storage->info.render._2d_item_count++; + if (bdata.diagnose_frame) { + bdata.frame_string += "\tjoined_item " + itos(p_bij.num_item_refs) + " refs\n"; + } + // all the joined items will share the same state with the first item Item *ci = bdata.item_refs[p_bij.first_item_ref].item; @@ -2798,6 +2871,12 @@ void RasterizerCanvasGLES2::initialize() { bdata.settings_flash_batching = false; } + // frame diagnosis. print out the batches every nth frame + bdata.settings_diagnose_frame = false; + if (!Engine::get_singleton()->is_editor_hint() && bdata.settings_use_batching) { + bdata.settings_diagnose_frame = GLOBAL_GET("rendering/gles2/debug/diagnose_frame"); + } + // the maximum num quads in a batch is limited by GLES2. We can have only 16 bit indices, // which means we can address a vertex buffer of max size 65535. 4 vertices are needed per quad. @@ -2823,7 +2902,8 @@ void RasterizerCanvasGLES2::initialize() { batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n"; batching_options_string += "\tbatch_buffer_size " + itos(bdata.settings_batch_buffer_num_verts) + "\n"; batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n"; - batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)); + batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)) + "\n"; + batching_options_string += "\tdiagnose_frame " + String(Variant(bdata.settings_diagnose_frame)); } else { batching_options_string += "OFF"; } diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index 4de3a197c27..8669545a7a9 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -169,10 +169,17 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { // measured in pixels, recalculated each frame float scissor_threshold_area; + // diagnose this frame, every nTh frame when settings_diagnose_frame is on + bool diagnose_frame; + String frame_string; + uint32_t next_diagnose_tick; + uint64_t diagnose_frame_number; + // global settings bool settings_use_batching; // the current use_batching (affected by flash) bool settings_use_batching_original_choice; // the choice entered in project settings bool settings_flash_batching; // for regression testing, flash between non-batched and batched renderer + bool settings_diagnose_frame; // print out batches to help optimize / regression test int settings_max_join_item_commands; float settings_colored_vertex_format_threshold; int settings_batch_buffer_num_verts; @@ -227,6 +234,7 @@ public: virtual void canvas_render_items_begin(const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); virtual void canvas_render_items_end(); virtual void canvas_render_items(Item *p_item_list, int p_z, const Color &p_modulate, Light *p_light, const Transform2D &p_base_transform); + virtual void canvas_begin(); private: // legacy codepath .. to remove after testing @@ -263,6 +271,9 @@ private: bool _light_scissor_begin(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect) const; void _calculate_scissor_threshold_area(); + // debug + void diagnose_batches(Item::Command *const *p_commands); + public: void initialize(); RasterizerCanvasGLES2(); diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index c749ebc0fb2..18fcdeee370 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -2422,6 +2422,7 @@ VisualServer::VisualServer() { GLOBAL_DEF("rendering/gles2/batching/light_scissor_area_threshold", 1.0f); GLOBAL_DEF("rendering/gles2/batching/batch_buffer_size", 16384); GLOBAL_DEF("rendering/gles2/debug/flash_batching", false); + GLOBAL_DEF("rendering/gles2/debug/diagnose_frame", false); GLOBAL_DEF_RST("rendering/gles2/debug/use_batching_in_editor", true); ProjectSettings::get_singleton()->set_custom_property_info("rendering/gles2/batching/max_join_item_commands", PropertyInfo(Variant::INT, "rendering/gles2/batching/max_join_item_commands", PROPERTY_HINT_RANGE, "0,65535"));