diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index 69d06251fff..73957beb81a 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -221,9 +221,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_count = p_item->commands.size(); Item::Command *const *commands = p_item->commands.ptr(); - Transform2D transform; - TransformMode transform_mode = _find_transform_mode(r_fill_state.use_hardware_transform, p_item->final_transform, transform); - + // just a local, might be more efficient in a register (check) Vector2 texpixel_size = r_fill_state.texpixel_size; // checking the color for not being white makes it 92/90 times faster in the case where it is white @@ -252,7 +250,36 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ switch (command->type) { default: { - _prefill_default_batch(r_fill_state, command_num); + _prefill_default_batch(r_fill_state, command_num, *p_item); + } break; + case Item::Command::TYPE_TRANSFORM: { + // if the extra matrix has been sent already, + // break this extra matrix software path (as we don't want to unset it on the GPU etc) + if (r_fill_state.extra_matrix_sent) { + _prefill_default_batch(r_fill_state, command_num, *p_item); + } else { + // Extra matrix fast path. + // Instead of sending the command immediately, we store the modified transform (in combined) + // for software transform, and only flush this transform command if we NEED to (i.e. we want to + // render some default commands) + Item::CommandTransform *transform = static_cast(command); + const Transform2D &extra_matrix = transform->xform; + + if (r_fill_state.use_hardware_transform) { + // if we are using hardware transform mode, we have already sent the final transform, + // so we only want to software transform the extra matrix + r_fill_state.transform_combined = extra_matrix; + } else { + r_fill_state.transform_combined = p_item->final_transform * extra_matrix; + } + // after a transform command, always use some form of software transform (either the combined final + extra, or just the extra) + // until we flush this dirty extra matrix because we need to render default commands. + r_fill_state.transform_mode = _find_transform_mode(r_fill_state.transform_combined); + + // make a note of which command the dirty extra matrix is store in, so we can send it later + // if necessary + r_fill_state.transform_extra_command_number_p1 = command_num + 1; // plus 1 so we can test against zero + } } break; case Item::Command::TYPE_RECT: { @@ -277,7 +304,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_num_next = command_num + 1; if (command_num_next < command_count) { Item::Command *command_next = commands[command_num_next]; - if (command_next->type != Item::Command::TYPE_RECT) { + if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) { is_single_rect = true; } } else { @@ -285,7 +312,7 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ } // if it is a rect on its own, do exactly the same as the default routine if (is_single_rect) { - _prefill_default_batch(r_fill_state, command_num); + _prefill_default_batch(r_fill_state, command_num, *p_item); break; } } // if use hardware transform @@ -352,8 +379,8 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ // fill the quad geometry Vector2 mins = rect->rect.position; - if (transform_mode == TM_TRANSLATE) { - _software_transform_vertex(mins, transform); + if (r_fill_state.transform_mode == TM_TRANSLATE) { + _software_transform_vertex(mins, r_fill_state.transform_combined); } Vector2 maxs = mins + rect->rect.size; @@ -385,11 +412,11 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ SWAP(bB->pos, bC->pos); } - if (transform_mode == TM_ALL) { - _software_transform_vertex(bA->pos, transform); - _software_transform_vertex(bB->pos, transform); - _software_transform_vertex(bC->pos, transform); - _software_transform_vertex(bD->pos, transform); + if (r_fill_state.transform_mode == TM_ALL) { + _software_transform_vertex(bA->pos, r_fill_state.transform_combined); + _software_transform_vertex(bB->pos, r_fill_state.transform_combined); + _software_transform_vertex(bC->pos, r_fill_state.transform_combined); + _software_transform_vertex(bD->pos, r_fill_state.transform_combined); } // uvs @@ -1452,6 +1479,7 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij FillState fill_state; fill_state.reset(); fill_state.use_hardware_transform = p_bij.use_hardware_transform(); + fill_state.extra_matrix_sent = false; for (unsigned int i = 0; i < p_bij.num_item_refs; i++) { const BItemRef &ref = bdata.item_refs[p_bij.first_item_ref + i]; @@ -1461,6 +1489,23 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij int command_count = item->commands.size(); int command_start = 0; + // ONCE OFF fill state setup, that will be retained over multiple calls to + // prefill_joined_item() + fill_state.transform_combined = item->final_transform; + + // decide the initial transform mode, and make a backup + // in orig_transform_mode in case we need to switch back + if (!fill_state.use_hardware_transform) { + fill_state.transform_mode = _find_transform_mode(fill_state.transform_combined); + } else { + fill_state.transform_mode = TM_NONE; + } + fill_state.orig_transform_mode = fill_state.transform_mode; + + // keep track of when we added an extra matrix + // so we can defer sending until we see a default command + fill_state.transform_extra_command_number_p1 = 0; + while (command_start < command_count) { // fill as many batches as possible (until all done, or the vertex buffer is full) bool bFull = prefill_joined_item(fill_state, command_start, item, p_current_clip, r_reclip, p_material); @@ -1469,7 +1514,6 @@ void RasterizerCanvasGLES2::render_joined_item_commands(const BItemJoined &p_bij // always pass first item (commands for default are always first item) flush_render_batches(first_item, p_current_clip, r_reclip, p_material); fill_state.reset(); - fill_state.use_hardware_transform = p_bij.use_hardware_transform(); } } } @@ -1799,7 +1843,7 @@ bool RasterizerCanvasGLES2::try_join_item(Item *p_ci, RenderItemState &r_ris, bo } // non rects will break the batching anyway, we don't want to record item changes, detect this - if (_detect_batch_break(p_ci)) { + if (!r_batch_break && _detect_batch_break(p_ci)) { join = false; r_batch_break = true; } @@ -1847,7 +1891,8 @@ bool RasterizerCanvasGLES2::_detect_batch_break(Item *p_ci) { default: { return true; } break; - case Item::Command::TYPE_RECT: { + case Item::Command::TYPE_RECT: + case Item::Command::TYPE_TRANSFORM: { } break; } // switch diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index cf8adba95e9..4de3a197c27 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -203,9 +203,10 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { struct FillState { void reset() { + // don't reset members that need to be preserved after flushing + // half way through a list of commands curr_batch = 0; batch_tex_id = -1; - use_hardware_transform = true; texpixel_size = Vector2(1, 1); } Batch *curr_batch; @@ -213,6 +214,13 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { bool use_hardware_transform; Vector2 texpixel_size; Color final_modulate; + TransformMode transform_mode; + TransformMode orig_transform_mode; + + // support for extra matrices + bool extra_matrix_sent; // whether sent on this item (in which case sofware transform can't be used untl end of item) + int transform_extra_command_number_p1; // plus one to allow fast checking against zero + Transform2D transform_combined; // final * extra }; public: @@ -247,8 +255,8 @@ private: bool _detect_batch_break(Item *p_ci); void _software_transform_vertex(BatchVector2 &r_v, const Transform2D &p_tr) const; void _software_transform_vertex(Vector2 &r_v, const Transform2D &p_tr) const; - TransformMode _find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const; - _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num); + TransformMode _find_transform_mode(const Transform2D &p_tr) const; + _FORCE_INLINE_ void _prefill_default_batch(FillState &r_fill_state, int p_command_num, const Item &p_item); // light scissoring bool _light_find_intersection(const Rect2 &p_item_rect, const Transform2D &p_light_xform, const Rect2 &p_light_rect, Rect2 &r_cliprect) const; @@ -262,12 +270,88 @@ public: ////////////////////////////////////////////////////////////// -_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num) { +// Default batches will not occur in software transform only items +// EXCEPT IN THE CASE OF SINGLE RECTS (and this may well not occur, check the logic in prefill_join_item TYPE_RECT) +// but can occur where transform commands have been sent during hardware batch +_FORCE_INLINE_ void RasterizerCanvasGLES2::_prefill_default_batch(FillState &r_fill_state, int p_command_num, const Item &p_item) { if (r_fill_state.curr_batch->type == Batch::BT_DEFAULT) { - // another default command, just add to the existing batch - r_fill_state.curr_batch->num_commands++; + // don't need to flush an extra transform command? + if (!r_fill_state.transform_extra_command_number_p1) { + // another default command, just add to the existing batch + r_fill_state.curr_batch->num_commands++; + } else { +#ifdef DEBUG_ENABLED + if (r_fill_state.transform_extra_command_number_p1 != p_command_num) { + WARN_PRINT_ONCE("_prefill_default_batch : transform_extra_command_number_p1 != p_command_num"); + } +#endif + // we do have a pending extra transform command to flush + // either the extra transform is in the prior command, or not, in which case we need 2 batches + // if (r_fill_state.transform_extra_command_number_p1 == p_command_num) { + // this should be most common case + r_fill_state.curr_batch->num_commands += 2; + // } else { + // // mad ordering .. does this even happen? + // int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based + + // // send the extra to the GPU in a batch + // r_fill_state.curr_batch = _batch_request_new(); + // r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + // r_fill_state.curr_batch->first_command = extra_command; + // r_fill_state.curr_batch->num_commands = 1; + + // // start default batch + // r_fill_state.curr_batch = _batch_request_new(); + // r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + // r_fill_state.curr_batch->first_command = p_command_num; + // r_fill_state.curr_batch->num_commands = 1; + // } + + r_fill_state.transform_extra_command_number_p1 = 0; // mark as sent + r_fill_state.extra_matrix_sent = true; + + // the original mode should always be hardware transform .. + // test this assumption + r_fill_state.transform_mode = r_fill_state.orig_transform_mode; + + // do we need to restore anything else? + } } else { // end of previous different type batch, so start new default batch + + // first consider whether there is a dirty extra matrix to send + if (r_fill_state.transform_extra_command_number_p1) { + // get which command the extra is in, and blank all the records as it no longer is stored CPU side + int extra_command = r_fill_state.transform_extra_command_number_p1 - 1; // plus 1 based + r_fill_state.transform_extra_command_number_p1 = 0; + r_fill_state.extra_matrix_sent = true; + + // send the extra to the GPU in a batch + r_fill_state.curr_batch = _batch_request_new(); + r_fill_state.curr_batch->type = Batch::BT_DEFAULT; + r_fill_state.curr_batch->first_command = extra_command; + r_fill_state.curr_batch->num_commands = 1; + + // revert to the original transform mode + // e.g. go back to NONE if we were in hardware transform mode + r_fill_state.transform_mode = r_fill_state.orig_transform_mode; + + // reset the original transform if we are going back to software mode, + // because the extra is now done on the GPU... + // (any subsequent extras are sent directly to the GPU, no deferring) + if (r_fill_state.orig_transform_mode != TM_NONE) { + r_fill_state.transform_combined = p_item.final_transform; + } + + // can possibly combine batch with the next one in some cases + // this is more efficient than having an extra batch especially for the extra + if ((extra_command + 1) == p_command_num) { + r_fill_state.curr_batch->num_commands = 2; + return; + } + } + + // start default batch r_fill_state.curr_batch = _batch_request_new(); r_fill_state.curr_batch->type = Batch::BT_DEFAULT; r_fill_state.curr_batch->first_command = p_command_num; @@ -285,22 +369,16 @@ _FORCE_INLINE_ void RasterizerCanvasGLES2::_software_transform_vertex(Vector2 &r r_v = p_tr.xform(r_v); } -_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(bool p_use_hardware_transform, const Transform2D &p_tr, Transform2D &r_tr) const { - if (!p_use_hardware_transform) { - r_tr = p_tr; - - // decided whether to do translate only for software transform - if ((p_tr.elements[0].x == 1.0) && - (p_tr.elements[0].y == 0.0) && - (p_tr.elements[1].x == 0.0) && - (p_tr.elements[1].y == 1.0)) { - return TM_TRANSLATE; - } else { - return TM_ALL; - } +_FORCE_INLINE_ RasterizerCanvasGLES2::TransformMode RasterizerCanvasGLES2::_find_transform_mode(const Transform2D &p_tr) const { + // decided whether to do translate only for software transform + if ((p_tr.elements[0].x == 1.0) && + (p_tr.elements[0].y == 0.0) && + (p_tr.elements[1].x == 0.0) && + (p_tr.elements[1].y == 1.0)) { + return TM_TRANSLATE; } - return TM_NONE; + return TM_ALL; } #endif // RASTERIZERCANVASGLES2_H