Rewrite culling to be more cache/thread friendly.

-Uses a single array with all data
-Massive performance improvement
-Does not support threads yet, but code is now thread friendly
This commit is contained in:
reduz 2020-12-26 09:05:36 -03:00
parent fb16b1e39b
commit 548524152e
8 changed files with 1076 additions and 743 deletions

View File

@ -206,6 +206,24 @@ public:
count++;
}
_FORCE_INLINE_ void pop_back() {
ERR_FAIL_COND(count == 0);
if (!__has_trivial_destructor(T)) {
uint32_t page = (count - 1) >> page_size_shift;
uint32_t offset = (count - 1) & page_size_mask;
page_data[page][offset].~T();
}
uint32_t remainder = count & page_size_mask;
if (unlikely(remainder == 1)) {
// one element remained, so page must be freed.
uint32_t last_page = _get_pages_in_use() - 1;
page_pool->free_page(page_ids[last_page]);
}
count--;
}
void clear() {
//destruct if needed
if (!__has_trivial_destructor(T)) {

View File

@ -1532,7 +1532,7 @@ void RendererSceneRenderForward::_add_geometry_with_material(InstanceBase *p_ins
}
}
void RendererSceneRenderForward::_fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, bool p_using_sdfgi) {
void RendererSceneRenderForward::_fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi) {
scene_state.current_shader_index = 0;
scene_state.current_material_index = 0;
scene_state.used_sss = false;
@ -1540,6 +1540,10 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<InstanceBase
scene_state.used_normal_texture = false;
scene_state.used_depth_texture = false;
Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(Vector3::AXIS_Z));
near_plane.d += p_cam_projection.get_z_near();
float z_max = p_cam_projection.get_z_far() - p_cam_projection.get_z_near();
uint32_t geometry_index = 0;
//fill list
@ -1547,6 +1551,9 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<InstanceBase
for (int i = 0; i < (int)p_instances.size(); i++) {
InstanceBase *inst = p_instances[i];
inst->depth = near_plane.distance_to(inst->transform.origin);
inst->depth_layer = CLAMP(int(inst->depth * 16 / z_max), 0, 15);
//add geometry for drawing
switch (inst->base_type) {
case RS::INSTANCE_MESH: {
@ -1782,7 +1789,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
_update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example)
render_list.clear();
_fill_render_list(p_instances, PASS_MODE_COLOR, using_sdfgi);
_fill_render_list(p_instances, PASS_MODE_COLOR, p_cam_projection, p_cam_transform, using_sdfgi);
bool using_sss = !low_end && render_buffer && scene_state.used_sss && sub_surface_scattering_get_quality() != RS::SUB_SURFACE_SCATTERING_QUALITY_DISABLED;
@ -2046,7 +2053,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr
PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW;
_fill_render_list(p_instances, pass_mode);
_fill_render_list(p_instances, pass_mode, p_projection, p_transform);
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>());
@ -2079,7 +2086,7 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb,
PassMode pass_mode = PASS_MODE_SHADOW;
_fill_render_list(p_instances, pass_mode);
_fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform);
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>());
@ -2112,7 +2119,7 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo
render_list.clear();
PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL;
_fill_render_list(p_instances, pass_mode);
_fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform);
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>());
@ -2151,7 +2158,7 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<InstanceBase *> &p
render_list.clear();
PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL;
_fill_render_list(p_instances, pass_mode);
_fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>());
@ -2209,7 +2216,7 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto
render_list.clear();
PassMode pass_mode = PASS_MODE_SDF;
_fill_render_list(p_instances, pass_mode);
_fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform());
render_list.sort_by_key(false);
_fill_instances(render_list.elements, render_list.element_count, true);

View File

@ -574,7 +574,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
_FORCE_INLINE_ void _add_geometry(InstanceBase *p_instance, uint32_t p_surface, RID p_material, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false);
_FORCE_INLINE_ void _add_geometry_with_material(InstanceBase *p_instance, uint32_t p_surface, MaterialData *p_material, RID p_material_rid, PassMode p_pass_mode, uint32_t p_geometry_index, bool p_using_sdfgi = false);
void _fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, bool p_using_sdfgi = false);
void _fill_render_list(const PagedArray<InstanceBase *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false);
Map<Size2i, RID> sdfgi_framebuffer_size_cache;

View File

@ -1153,7 +1153,7 @@ void RendererSceneRenderRD::_sdfgi_update_cascades(RID p_render_buffers) {
RD::get_singleton()->buffer_update(rb->sdfgi->cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, true);
}
void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const PagedArray<RID> &p_directional_light_instances, const RID *p_positional_light_instances, uint32_t p_positional_light_count) {
void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) {
RenderBuffers *rb = render_buffers_owner.getornull(p_render_buffers);
ERR_FAIL_COND(rb == nullptr);
if (rb->sdfgi == nullptr) {
@ -1179,12 +1179,12 @@ void RendererSceneRenderRD::sdfgi_update_probes(RID p_render_buffers, RID p_envi
SDGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS];
uint32_t idx = 0;
for (uint32_t j = 0; j < (uint32_t)p_directional_light_instances.size(); j++) {
for (uint32_t j = 0; j < (uint32_t)p_directional_lights.size(); j++) {
if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) {
break;
}
LightInstance *li = light_instance_owner.getornull(p_directional_light_instances[j]);
LightInstance *li = light_instance_owner.getornull(p_directional_lights[j]);
ERR_CONTINUE(!li);
if (storage->light_directional_is_sky_only(li->light)) {
@ -8485,7 +8485,7 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
cluster.lights_instances = memnew_arr(RID, cluster.max_lights);
cluster.lights_shadow_rect_cache = memnew_arr(Rect2i, cluster.max_lights);
cluster.max_directional_lights = 8;
cluster.max_directional_lights = MAX_DIRECTIONAL_LIGHTS;
uint32_t directional_light_buffer_size = cluster.max_directional_lights * sizeof(Cluster::DirectionalLightData);
cluster.directional_lights = memnew_arr(Cluster::DirectionalLightData, cluster.max_directional_lights);
cluster.directional_light_buffer = RD::get_singleton()->uniform_buffer_create(directional_light_buffer_size);

View File

@ -1516,7 +1516,7 @@ public:
virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const;
virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const;
virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const;
virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const PagedArray<RID> &p_directional_light_instances, const RID *p_positional_light_instances, uint32_t p_positional_light_count);
virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count);
RID sdfgi_get_ubo() const { return gi.sdfgi_ubo; }
/* SKY API */

File diff suppressed because it is too large Load Diff

View File

@ -52,7 +52,8 @@ public:
RendererSceneRender *scene_render;
enum {
SDFGI_MAX_CASCADES = 8
SDFGI_MAX_CASCADES = 8,
SDFGI_MAX_REGIONS_PER_CASCADE = 3
};
uint64_t render_pass;
@ -108,6 +109,153 @@ public:
struct Instance;
struct PlaneSign {
_ALWAYS_INLINE_ PlaneSign() {}
_ALWAYS_INLINE_ PlaneSign(const Plane &p_plane) {
if (p_plane.normal.x > 0) {
signs[0] = 0;
} else {
signs[0] = 3;
}
if (p_plane.normal.y > 0) {
signs[1] = 1;
} else {
signs[1] = 4;
}
if (p_plane.normal.z > 0) {
signs[2] = 2;
} else {
signs[2] = 5;
}
}
uint32_t signs[3];
};
struct Frustum {
Vector<Plane> planes;
Vector<PlaneSign> plane_signs;
const Plane *planes_ptr;
const PlaneSign *plane_signs_ptr;
uint32_t plane_count;
_ALWAYS_INLINE_ Frustum() {}
_ALWAYS_INLINE_ Frustum(const Frustum &p_frustum) {
planes = p_frustum.planes;
plane_signs = p_frustum.plane_signs;
planes_ptr = planes.ptr();
plane_signs_ptr = plane_signs.ptr();
plane_count = p_frustum.plane_count;
}
_ALWAYS_INLINE_ void operator=(const Frustum &p_frustum) {
planes = p_frustum.planes;
plane_signs = p_frustum.plane_signs;
planes_ptr = planes.ptr();
plane_signs_ptr = plane_signs.ptr();
plane_count = p_frustum.plane_count;
}
_ALWAYS_INLINE_ Frustum(const Vector<Plane> &p_planes) {
planes = p_planes;
planes_ptr = planes.ptrw();
plane_count = planes.size();
for (int i = 0; i < planes.size(); i++) {
PlaneSign ps(p_planes[i]);
plane_signs.push_back(ps);
}
plane_signs_ptr = plane_signs.ptr();
}
};
struct InstanceBounds {
// Efficiently store instance bounds.
// Because bounds checking is performed first,
// keep it separated from data.
real_t bounds[6];
_ALWAYS_INLINE_ InstanceBounds() {}
_ALWAYS_INLINE_ InstanceBounds(const AABB &p_aabb) {
bounds[0] = p_aabb.position.x;
bounds[1] = p_aabb.position.y;
bounds[2] = p_aabb.position.z;
bounds[3] = p_aabb.position.x + p_aabb.size.x;
bounds[4] = p_aabb.position.y + p_aabb.size.y;
bounds[5] = p_aabb.position.z + p_aabb.size.z;
}
_ALWAYS_INLINE_ bool in_frustum(const Frustum &p_frustum) const {
// This is not a full SAT check and the possibility of false positives exist,
// but the tradeoff vs performance is still very good.
for (uint32_t i = 0; i < p_frustum.plane_count; i++) {
Vector3 min(
bounds[p_frustum.plane_signs_ptr[i].signs[0]],
bounds[p_frustum.plane_signs_ptr[i].signs[1]],
bounds[p_frustum.plane_signs_ptr[i].signs[2]]);
if (p_frustum.planes_ptr[i].distance_to(min) >= 0.0) {
return false;
}
}
return true;
}
_ALWAYS_INLINE_ bool in_aabb(const AABB &p_aabb) const {
Vector3 end = p_aabb.position + p_aabb.size;
if (bounds[0] >= end.x) {
return false;
}
if (bounds[3] <= p_aabb.position.x) {
return false;
}
if (bounds[1] >= end.y) {
return false;
}
if (bounds[4] <= p_aabb.position.y) {
return false;
}
if (bounds[2] >= end.z) {
return false;
}
if (bounds[5] <= p_aabb.position.z) {
return false;
}
return true;
}
};
struct InstanceData {
// Store instance pointer as well as common instance processing information,
// to make processing more cache friendly.
enum Flags {
FLAG_BASE_TYPE_MASK = 0xFF,
FLAG_CAST_SHADOWS = (1 << 8),
FLAG_CAST_SHADOWS_ONLY = (1 << 9),
FLAG_REDRAW_IF_VISIBLE = (1 << 10),
FLAG_GEOM_LIGHTING_DIRTY = (1 << 11),
FLAG_GEOM_REFLECTION_DIRTY = (1 << 12),
FLAG_GEOM_DECAL_DIRTY = (1 << 13),
FLAG_GEOM_GI_PROBE_DIRTY = (1 << 14),
FLAG_LIGHTMAP_CAPTURE = (1 << 15),
FLAG_USES_BAKED_LIGHT = (1 << 16),
FLAG_USES_MESH_INSTANCE = (1 << 17),
FLAG_REFLECTION_PROBE_DIRTY = (1 << 18),
};
uint32_t flags = 0;
uint32_t layer_mask = 0; //for fast layer-mask discard
RID base_rid;
RID instance_data_rid;
Instance *instance = nullptr;
};
PagedArrayPool<InstanceBounds> instance_aabb_page_pool;
PagedArrayPool<InstanceData> instance_data_page_pool;
struct Scenario {
enum IndexerType {
INDEXER_GEOMETRY, //for geometry
@ -131,6 +279,9 @@ public:
LocalVector<RID> dynamic_lights;
PagedArray<InstanceBounds> instance_aabbs;
PagedArray<InstanceData> instance_data;
Scenario() {
indexers[INDEXER_GEOMETRY].set_index(INDEXER_GEOMETRY);
indexers[INDEXER_VOLUMES].set_index(INDEXER_VOLUMES);
@ -178,6 +329,7 @@ public:
RID self;
//scenario stuff
DynamicBVH::ID indexer_id;
int32_t array_index;
Scenario *scenario;
SelfList<Instance> scenario_item;
@ -199,7 +351,6 @@ public:
Vector<Color> lightmap_target_sh; //target is used for incrementally changing the SH over time, this avoids pops in some corner cases and when going interior <-> exterior
uint64_t last_render_pass;
uint64_t last_frame_pass;
uint64_t version; // changes to this, and changes to base increase version
@ -240,7 +391,6 @@ public:
lod_begin_hysteresis = 0;
lod_end_hysteresis = 0;
last_render_pass = 0;
last_frame_pass = 0;
version = 1;
base_data = nullptr;
@ -248,6 +398,7 @@ public:
custom_aabb = nullptr;
pair_check = 0;
array_index = -1;
}
~Instance() {
@ -265,28 +416,17 @@ public:
struct InstanceGeometryData : public InstanceBaseData {
Set<Instance *> lights;
bool lighting_dirty;
bool can_cast_shadows;
bool material_is_animated;
Set<Instance *> decals;
bool decal_dirty;
Set<Instance *> reflection_probes;
bool reflection_dirty;
Set<Instance *> gi_probes;
bool gi_probes_dirty;
Set<Instance *> lightmap_captures;
InstanceGeometryData() {
lighting_dirty = false;
reflection_dirty = true;
can_cast_shadows = true;
material_is_animated = true;
gi_probes_dirty = true;
decal_dirty = true;
}
};
@ -296,14 +436,12 @@ public:
Set<Instance *> geometries;
RID instance;
bool reflection_dirty;
SelfList<InstanceReflectionProbeData> update_list;
int render_step;
InstanceReflectionProbeData() :
update_list(this) {
reflection_dirty = true;
render_step = -1;
}
};
@ -334,8 +472,6 @@ public:
RS::LightBakeMode bake_mode;
uint32_t max_sdfgi_cascade = 2;
uint64_t sdfgi_cascade_light_pass = 0;
InstanceLightData() {
bake_mode = RS::LIGHT_BAKE_DISABLED;
shadow_dirty = true;
@ -468,25 +604,19 @@ public:
PagedArrayPool<RID> rid_cull_page_pool;
PagedArray<Instance *> instance_cull_result;
PagedArray<RID> mesh_instance_cull_result;
PagedArray<RendererSceneRender::InstanceBase *> geometry_instances_to_render;
PagedArray<Instance *> instance_shadow_cull_result;
PagedArray<RendererSceneRender::InstanceBase *> geometry_instances_to_shadow_render;
PagedArray<Instance *> instance_sdfgi_cull_result;
PagedArray<RendererSceneRender::InstanceBase *> geometry_instances_to_sdfgi_render;
PagedArray<Instance *> light_cull_result;
PagedArray<RendererSceneRender::InstanceBase *> lightmap_cull_result;
PagedArray<Instance *> directional_shadow_cull_result;
PagedArray<RID> reflection_probe_instance_cull_result;
PagedArray<RID> light_instance_cull_result;
PagedArray<RID> directional_light_cull_result;
PagedArray<RID> gi_probe_instance_cull_result;
PagedArray<RID> decal_instance_cull_result;
PagedArray<RID> sdfgi_cascade_lights[SDFGI_MAX_CASCADES];
uint64_t sdfgi_light_cull_pass = 0;
int directional_light_count;
RID_PtrOwner<Instance> instance_owner;
bool pair_volumes_to_mesh; // used in traditional forward, unnecesary on clustered
@ -536,10 +666,54 @@ public:
_FORCE_INLINE_ void _update_instance_lightmap_captures(Instance *p_instance);
void _unpair_instance(Instance *p_instance);
void _light_instance_setup_directional_shadow(int p_shadow_index, Instance *p_instance, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect);
_FORCE_INLINE_ bool _light_instance_update_shadow(Instance *p_instance, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_shadow_atlas, Scenario *p_scenario, float p_scren_lod_threshold);
RID _render_get_environment(RID p_camera, RID p_scenario);
struct Cull {
struct Shadow {
RID light_instance;
struct Cascade {
Frustum frustum;
CameraMatrix projection;
Transform transform;
real_t zfar;
real_t split;
real_t shadow_texel_size;
real_t bias_scale;
real_t range_begin;
Vector2 uv_scale;
PagedArray<RendererSceneRender::InstanceBase *> cull_result;
} cascades[RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES]; //max 4 cascades
uint32_t cascade_count;
} shadows[RendererSceneRender::MAX_DIRECTIONAL_LIGHTS];
uint32_t shadow_count;
struct SDFGI {
//have arrays here because SDFGI functions expects this, plus regions can have areas
PagedArray<RendererSceneRender::InstanceBase *> region_cull_result[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE];
AABB region_aabb[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; //max 3 regions per cascade
uint32_t region_cascade[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; //max 3 regions per cascade
uint32_t region_count = 0;
PagedArray<RID> cascade_lights[SDFGI_MAX_CASCADES];
uint32_t cascade_light_index[SDFGI_MAX_CASCADES];
uint32_t cascade_light_count = 0;
} sdfgi;
SpinLock lock;
Frustum frustum;
} cull;
bool _render_reflection_probe_step(Instance *p_instance, int p_step);
void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows = true);
void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold);

View File

@ -37,9 +37,14 @@
class RendererSceneRender {
public:
enum {
MAX_DIRECTIONAL_LIGHTS = 8,
MAX_DIRECTIONAL_LIGHT_CASCADES = 4
};
/* SHADOW ATLAS API */
virtual RID shadow_atlas_create() = 0;
virtual RID
shadow_atlas_create() = 0;
virtual void shadow_atlas_set_size(RID p_atlas, int p_size) = 0;
virtual void shadow_atlas_set_quadrant_subdivision(RID p_atlas, int p_quadrant, int p_subdivision) = 0;
virtual bool shadow_atlas_update_light(RID p_atlas, RID p_light_intance, float p_coverage, uint64_t p_light_version) = 0;
@ -56,7 +61,7 @@ public:
virtual int sdfgi_get_pending_region_count(RID p_render_buffers) const = 0;
virtual AABB sdfgi_get_pending_region_bounds(RID p_render_buffers, int p_region) const = 0;
virtual uint32_t sdfgi_get_pending_region_cascade(RID p_render_buffers, int p_region) const = 0;
virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const PagedArray<RID> &p_directionals, const RID *p_positional_light_instances, uint32_t p_positional_light_count) = 0;
virtual void sdfgi_update_probes(RID p_render_buffers, RID p_environment, const Vector<RID> &p_directional_lights, const RID *p_positional_light_instances, uint32_t p_positional_light_count) = 0;
/* SKY API */