From a4cd274ca72f5a42d12cf5667ac8417be61d4d4c Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Wed, 15 Apr 2020 12:38:13 +0100 Subject: [PATCH] Batching with Extra Matrix commands Defers sending 'transform' commands within a RasterizerCanvas::Item until they are needed for default batches. Instead locally caches the extra matrix and applies it using software transform, preventing unnecessary batch breaks. The logic is relatively complex, and the whole 'extra matrix' of the legacy renderer in addition to the final_transform is not ideal. However this is required to accelerate some user drawing techniques, and later the lines in the IDE. --- drivers/gles2/rasterizer_canvas_gles2.cpp | 77 +++++++++++--- drivers/gles2/rasterizer_canvas_gles2.h | 118 ++++++++++++++++++---- 2 files changed, 159 insertions(+), 36 deletions(-) diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 69d06251fff..73957beb81a 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -221,9 +221,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_count = p_item->commands.size(); Item::Command *const *commands = p_item->commands.ptr(); - Transform2D transform; - TransformMode transform_mode = _find_transform_mode(r_fill_state.use_hardware_transform, p_item->final_transform, transform); - + // just a local, might be more efficient in a register (check) Vector2 texpixel_size = r_fill_state.texpixel_size; // checking the color for not being white makes it 92/90 times faster in the case where it is white @@ -252,7 +250,36 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ switch (command->type) { default: { - _prefill_default_batch(r_fill_state, command_num); + _prefill_default_batch(r_fill_state, command_num, *p_item); + } break; + case Item::Command::TYPE_TRANSFORM: { + // if the extra matrix has been sent already, + // break this extra matrix software path (as we don't want to unset it on the GPU etc) + if (r_fill_state.extra_matrix_sent) { + _prefill_default_batch(r_fill_state, command_num, *p_item); + } else { + // Extra matrix fast path. + // Instead of sending the command immediately, we store the modified transform (in combined) + // for software transform, and only flush this transform command if we NEED to (i.e. we want to + // render some default commands) + Item::CommandTransform *transform = static_cast(command); + const Transform2D &extra_matrix = transform->xform; + + if (r_fill_state.use_hardware_transform) { + // if we are using hardware transform mode, we have already sent the final transform, + // so we only want to software transform the extra matrix + r_fill_state.transform_combined = extra_matrix; + } else { + r_fill_state.transform_combined = p_item->final_transform * extra_matrix; + } + // after a transform command, always use some form of software transform (either the combined final + extra, or just the extra) + // until we flush this dirty extra matrix because we need to render default commands. + r_fill_state.transform_mode = _find_transform_mode(r_fill_state.transform_combined); + + // make a note of which command the dirty extra matrix is store in, so we can send it later + // if necessary + r_fill_state.transform_extra_command_number_p1 = command_num + 1; // plus 1 so we can test against zero + } } break; case Item::Command::TYPE_RECT: { @@ -277,7 +304,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_num_next = command_num + 1; if (command_num_next < command_count) { Item::Command *command_next = commands[command_num_next]; - if (command_next->type != Item::Command::TYPE_RECT) { + if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) { is_single_rect = true; } } else { @@ -285,7 +312,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ } // if it is a rect on its own, do exactly the same as the default routine if (is_single_rect) { - _prefill_default_batch(r_fill_state, command_num); + _prefill_default_batch(r_fill_state, command_num, *p_item); break; } } // if use hardware transform @@ -352,8 +379,8 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ // fill the quad geometry Vector2 mins = rect->rect.position; - if (transform_mode == TM_TRANSLATE) { - _software_transform_vertex(mins, transform); + if (r_fill_state.transform_mode == TM_TRANSLATE) { + _software_transform_vertex(mins, r_fill_state.transform_combined); } Vector2 maxs = mins + rect->rect.size; @@ -385,11 +412,11 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ SWAP(bB->pos, bC->pos); } - if (transform_mode == TM_ALL) { - _software_transform_vertex(bA->pos, transform); - _software_transform_vertex(bB->pos, transform); - _software_transform_vertex(bC->pos, transform); - _software_transform_vertex(bD->pos, transform); + if (r_fill_state.transform_mode == TM_ALL) { + _software_transform_vertex(bA->pos, r_fill_state.transform_combined); + _software_transform_vertex(bB->pos, r_fill_state.transform_combined); + _software_transform_vertex(bC->pos, r_fill_state.transform_combined); + _software_transform_vertex(bD->pos, r_fill_state.transform_combined); } // uvs @@ -1452,6 +1479,7 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij FillState fill_state; fill_state.reset(); fill_state.use_hardware_transform = p_bij.use_hardware_transform(); + fill_state.extra_matrix_sent = false; for (unsigned int i = 0; i < p_bij.num_item_refs; i++) { const BItemRef &ref = bdata.item_refs[p_bij.first_item_ref + i]; @@ -1461,6 +1489,23 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij int command_count = item->commands.size(); int command_start = 0; + // ONCE OFF fill state setup, that will be retained over multiple calls to + // prefill_joined_item() + fill_state.transform_combined = item->final_transform; + + // decide the initial transform mode, and make a backup + // in orig_transform_mode in case we need to switch back + if (!fill_state.use_hardware_transform) { + fill_state.transform_mode = _find_transform_mode(fill_state.transform_combined); + } else { + fill_state.transform_mode = TM_NONE; + } + fill_state.orig_transform_mode = fill_state.transform_mode; + + // keep track of when we added an extra matrix + // so we can defer sending until we see a default command + fill_state.transform_extra_command_number_p1 = 0; + while (command_start < command_count) { // fill as many batches as possible (until all done, or the vertex buffer is full) bool bFull = prefill_joined_item(fill_state, command_start, item, p_current_clip, r_reclip, p_material); @@ -1469,7 +1514,6 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij // always pass first item (commands for default are always first item) flush_render_batches(first_item, p_current_clip, r_reclip, p_material); fill_state.reset(); - fill_state.use_hardware_transform = p_bij.use_hardware_transform(); } } } @@ -1799,7 +1843,7 @@ bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bo } // non rects will break the batching anyway, we don't want to record item changes, detect this - if (_detect_batch_break(p_ci)) { + if (!r_batch_break && _detect_batch_break(p_ci)) { join = false; r_batch_break = true; } @@ -1847,7 +1891,8 @@ bool RasterizerCanvasGLES2::_detect_batch_break(Item *p_ci) { default: { return true; } break; - case Item::Command::TYPE_RECT: { + case Item::Command::TYPE_RECT: + case Item::Command::TYPE_TRANSFORM: { } break; } // switch diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index cf8adba95e9..4de3a197c27 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -203,9 +203,10 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { struct FillState { void reset() { + // don't reset members that need to be preserved after flushing + // half way through a list of commands curr_batch = 0; batch_tex_id = -1; - use_hardware_transform = true; texpixel_size = Vector2(1, 1); } Batch *curr_batch; @@ -213,6 +214,13 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { bool use_hardware_transform; Vector2 texpixel_size; Color final_modulate; + TransformMode transform_mode; + TransformMode orig_transform_mode; + + // support for extra matrices + bool extra_matrix_sent; // whether sent on this item (in which case sofware transform can't be used untl end of item) + int transform_extra_command_number_p1; // plus one to allow fast checking against zero + Transform2D transform_combined; // final * extra }; public: @@ -247,8 +255,8 @@ private: bool _detect_batch_break(Item *p_ci); void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const; void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const; - TransformMode _find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const; - _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num); + TransformMode _find_transform_mode(const Transform2D &p_tr) const; + _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num, const Item &p_item); // light scissoring bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const; @@ -262,12 +270,88 @@ public: ////////////////////////////////////////////////////////////// -_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num) { +// Default batches will not occur in software transform only items +// EXCEPT IN THE CASE OF SINGLE RECTS (and this may well not occur, check the logic in prefill_join_item TYPE_RECT) +// but can occur where transform commands have been sent during hardware batch +_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num, const Item &p_item) { if (r_fill_state.curr_batch->type == Batch::BT_DEFAULT) { - // another default command, just add to the existing batch - r_fill_state.curr_batch->num_commands++; + // don't need to flush an extra transform command? + if (!r_fill_state.transform_extra_command_number_p1) { + // another default command, just add to the existing batch + r_fill_state.curr_batch->num_commands++; + } else { +#ifdef DEBUG_ENABLED + if (r_fill_state.transform_extra_command_number_p1 != p_command_num) { + WARN_PRINT_ONCE("_prefill_default_batch : transform_extra_command_number_p1 != p_command_num"); + } +#endif + // we do have a pending extra transform command to flush + // either the extra transform is in the prior command, or not, in which case we need 2 batches + // if (r_fill_state.transform_extra_command_number_p1 == p_command_num) { + // this should be most common case + r_fill_state.curr_batch->num_commands += 2; + // } else { + // // mad ordering .. does this even happen? + // int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based + + // // send the extra to the GPU in a batch + // r_fill_state.curr_batch = _batch_request_new(); + // r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + // r_fill_state.curr_batch->first_command = extra_command; + // r_fill_state.curr_batch->num_commands = 1; + + // // start default batch + // r_fill_state.curr_batch = _batch_request_new(); + // r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + // r_fill_state.curr_batch->first_command = p_command_num; + // r_fill_state.curr_batch->num_commands = 1; + // } + + r_fill_state.transform_extra_command_number_p1 = 0; // mark as sent + r_fill_state.extra_matrix_sent = true; + + // the original mode should always be hardware transform .. + // test this assumption + r_fill_state.transform_mode = r_fill_state.orig_transform_mode; + + // do we need to restore anything else? + } } else { // end of previous different type batch, so start new default batch + + // first consider whether there is a dirty extra matrix to send + if (r_fill_state.transform_extra_command_number_p1) { + // get which command the extra is in, and blank all the records as it no longer is stored CPU side + int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based + r_fill_state.transform_extra_command_number_p1 = 0; + r_fill_state.extra_matrix_sent = true; + + // send the extra to the GPU in a batch + r_fill_state.curr_batch = _batch_request_new(); + r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + r_fill_state.curr_batch->first_command = extra_command; + r_fill_state.curr_batch->num_commands = 1; + + // revert to the original transform mode + // e.g. go back to NONE if we were in hardware transform mode + r_fill_state.transform_mode = r_fill_state.orig_transform_mode; + + // reset the original transform if we are going back to software mode, + // because the extra is now done on the GPU... + // (any subsequent extras are sent directly to the GPU, no deferring) + if (r_fill_state.orig_transform_mode != TM_NONE) { + r_fill_state.transform_combined = p_item.final_transform; + } + + // can possibly combine batch with the next one in some cases + // this is more efficient than having an extra batch especially for the extra + if ((extra_command + 1) == p_command_num) { + r_fill_state.curr_batch->num_commands = 2; + return; + } + } + + // start default batch r_fill_state.curr_batch = _batch_request_new(); r_fill_state.curr_batch->type = Batch::BT_DEFAULT; r_fill_state.curr_batch->first_command = p_command_num; @@ -285,22 +369,16 @@ _FORCE_INLINE_ void RasterizerCanvasGLES2::_software_transform_vertex(Vector2 &r r_v = p_tr.xform(r_v); } -_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const { - if (!p_use_hardware_transform) { - r_tr = p_tr; - - // decided whether to do translate only for software transform - if ((p_tr.elements[0].x == 1.0) && - (p_tr.elements[0].y == 0.0) && - (p_tr.elements[1].x == 0.0) && - (p_tr.elements[1].y == 1.0)) { - return TM_TRANSLATE; - } else { - return TM_ALL; - } +_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(const Transform2D &p_tr) const { + // decided whether to do translate only for software transform + if ((p_tr.elements[0].x == 1.0) && + (p_tr.elements[0].y == 0.0) && + (p_tr.elements[1].x == 0.0) && + (p_tr.elements[1].y == 1.0)) { + return TM_TRANSLATE; } - return TM_NONE; + return TM_ALL; } #endif // RASTERIZERCANVASGLES2_H