Optimize lightmapper using triangle clusters on the acceleration structure.

Add an additional layer of indirection to the grid used by the lightmapper to store fixed-size triangle clusters. Greatly speeds up baking times on scenes with high triangle density, as the clusters will help to avoid unnecessary checks when the triangle density is high on the scene.
This commit is contained in:
Dario 2023-10-13 11:39:48 -03:00
parent 325cc0178e
commit 47214ea9f5
4 changed files with 292 additions and 79 deletions

View File

@ -124,7 +124,7 @@ void LightmapperRD::add_probe(const Vector3 &p_position) {
probe_positions.push_back(probe); probe_positions.push_back(probe);
} }
void LightmapperRD::_plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector<TriangleSort> &triangles, uint32_t p_grid_size) { void LightmapperRD::_plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector<TriangleSort> &p_triangles_sort, uint32_t p_grid_size) {
int half_size = p_size / 2; int half_size = p_size / 2;
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
@ -159,13 +159,69 @@ void LightmapperRD::_plot_triangle_into_triangle_index_list(int p_size, const Ve
TriangleSort ts; TriangleSort ts;
ts.cell_index = n.x + (n.y * p_grid_size) + (n.z * p_grid_size * p_grid_size); ts.cell_index = n.x + (n.y * p_grid_size) + (n.z * p_grid_size * p_grid_size);
ts.triangle_index = p_triangle_index; ts.triangle_index = p_triangle_index;
triangles.push_back(ts); ts.triangle_aabb.position = p_points[0];
ts.triangle_aabb.size = Vector3();
ts.triangle_aabb.expand_to(p_points[1]);
ts.triangle_aabb.expand_to(p_points[2]);
p_triangles_sort.push_back(ts);
} else { } else {
_plot_triangle_into_triangle_index_list(half_size, n, aabb, p_points, p_triangle_index, triangles, p_grid_size); _plot_triangle_into_triangle_index_list(half_size, n, aabb, p_points, p_triangle_index, p_triangles_sort, p_grid_size);
} }
} }
} }
void LightmapperRD::_sort_triangle_clusters(uint32_t p_cluster_size, uint32_t p_cluster_index, uint32_t p_index_start, uint32_t p_count, LocalVector<TriangleSort> &p_triangle_sort, LocalVector<ClusterAABB> &p_cluster_aabb) {
if (p_count == 0) {
return;
}
// Compute AABB for all triangles in the range.
SortArray<TriangleSort, TriangleSortAxis<0>> triangle_sorter_x;
SortArray<TriangleSort, TriangleSortAxis<1>> triangle_sorter_y;
SortArray<TriangleSort, TriangleSortAxis<2>> triangle_sorter_z;
AABB cluster_aabb = p_triangle_sort[p_index_start].triangle_aabb;
for (uint32_t i = 1; i < p_count; i++) {
cluster_aabb.merge_with(p_triangle_sort[p_index_start + i].triangle_aabb);
}
if (p_count > p_cluster_size) {
int longest_axis_index = cluster_aabb.get_longest_axis_index();
switch (longest_axis_index) {
case 0:
triangle_sorter_x.sort(&p_triangle_sort[p_index_start], p_count);
break;
case 1:
triangle_sorter_y.sort(&p_triangle_sort[p_index_start], p_count);
break;
case 2:
triangle_sorter_z.sort(&p_triangle_sort[p_index_start], p_count);
break;
default:
DEV_ASSERT(false && "Invalid axis returned by AABB.");
break;
}
uint32_t left_cluster_count = next_power_of_2(p_count / 2);
left_cluster_count = MAX(left_cluster_count, p_cluster_size);
left_cluster_count = MIN(left_cluster_count, p_count);
_sort_triangle_clusters(p_cluster_size, p_cluster_index, p_index_start, left_cluster_count, p_triangle_sort, p_cluster_aabb);
if (left_cluster_count < p_count) {
uint32_t cluster_index_right = p_cluster_index + (left_cluster_count / p_cluster_size);
_sort_triangle_clusters(p_cluster_size, cluster_index_right, p_index_start + left_cluster_count, p_count - left_cluster_count, p_triangle_sort, p_cluster_aabb);
}
} else {
ClusterAABB &aabb = p_cluster_aabb[p_cluster_index];
Vector3 aabb_end = cluster_aabb.get_end();
aabb.min_bounds[0] = cluster_aabb.position.x;
aabb.min_bounds[1] = cluster_aabb.position.y;
aabb.min_bounds[2] = cluster_aabb.position.z;
aabb.max_bounds[0] = aabb_end.x;
aabb.max_bounds[1] = aabb_end.y;
aabb.max_bounds[2] = aabb_end.z;
}
}
Lightmapper::BakeError LightmapperRD::_blit_meshes_into_atlas(int p_max_texture_size, Vector<Ref<Image>> &albedo_images, Vector<Ref<Image>> &emission_images, AABB &bounds, Size2i &atlas_size, int &atlas_slices, BakeStepFunc p_step_function, void *p_bake_userdata) { Lightmapper::BakeError LightmapperRD::_blit_meshes_into_atlas(int p_max_texture_size, Vector<Ref<Image>> &albedo_images, Vector<Ref<Image>> &emission_images, AABB &bounds, Size2i &atlas_size, int &atlas_slices, BakeStepFunc p_step_function, void *p_bake_userdata) {
Vector<Size2i> sizes; Vector<Size2i> sizes;
@ -281,7 +337,7 @@ Lightmapper::BakeError LightmapperRD::_blit_meshes_into_atlas(int p_max_texture_
return BAKE_OK; return BAKE_OK;
} }
void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, Vector<Probe> &p_probe_positions, GenerateProbes p_generate_probes, Vector<int> &slice_triangle_count, Vector<int> &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &triangle_cell_indices_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata) { void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, uint32_t p_cluster_size, Vector<Probe> &p_probe_positions, GenerateProbes p_generate_probes, Vector<int> &slice_triangle_count, Vector<int> &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &r_triangle_indices_buffer, RID &r_cluster_indices_buffer, RID &r_cluster_aabbs_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata) {
HashMap<Vertex, uint32_t, VertexHash> vertex_map; HashMap<Vertex, uint32_t, VertexHash> vertex_map;
//fill triangles array and vertex array //fill triangles array and vertex array
@ -433,31 +489,70 @@ void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i
//sort it //sort it
triangle_sort.sort(); triangle_sort.sort();
LocalVector<uint32_t> cluster_indices;
LocalVector<ClusterAABB> cluster_aabbs;
Vector<uint32_t> triangle_indices; Vector<uint32_t> triangle_indices;
triangle_indices.resize(triangle_sort.size()); triangle_indices.resize(triangle_sort.size());
Vector<uint32_t> grid_indices; Vector<uint32_t> grid_indices;
grid_indices.resize(grid_size * grid_size * grid_size * 2); grid_indices.resize(grid_size * grid_size * grid_size * 2);
memset(grid_indices.ptrw(), 0, grid_indices.size() * sizeof(uint32_t)); memset(grid_indices.ptrw(), 0, grid_indices.size() * sizeof(uint32_t));
Vector<bool> solid;
solid.resize(grid_size * grid_size * grid_size);
memset(solid.ptrw(), 0, solid.size() * sizeof(bool));
{ {
uint32_t *tiw = triangle_indices.ptrw(); // Fill grid with cell indices.
uint32_t last_cell = 0xFFFFFFFF; uint32_t last_cell = 0xFFFFFFFF;
uint32_t *giw = grid_indices.ptrw(); uint32_t *giw = grid_indices.ptrw();
bool *solidw = solid.ptrw(); uint32_t cluster_count = 0;
uint32_t solid_cell_count = 0;
for (uint32_t i = 0; i < triangle_sort.size(); i++) { for (uint32_t i = 0; i < triangle_sort.size(); i++) {
uint32_t cell = triangle_sort[i].cell_index; uint32_t cell = triangle_sort[i].cell_index;
if (cell != last_cell) { if (cell != last_cell) {
//cell changed, update pointer to indices giw[cell * 2 + 1] = solid_cell_count;
giw[cell * 2 + 1] = i; solid_cell_count++;
solidw[cell] = true;
} }
tiw[i] = triangle_sort[i].triangle_index;
giw[cell * 2]++; //update counter if ((giw[cell * 2] % p_cluster_size) == 0) {
// Add an extra cluster every time the triangle counter reaches a multiple of the cluster size.
cluster_count++;
}
giw[cell * 2]++;
last_cell = cell; last_cell = cell;
} }
// Build fixed-size triangle clusters for all the cells to speed up the traversal. A cell can hold multiple clusters that each contain a fixed
// amount of triangles and an AABB. The tracer will check against the AABBs first to know whether it needs to visit the cell's triangles.
//
// The building algorithm will divide the triangles recursively contained inside each cell, sorting by the longest axis of the AABB on each step.
//
// - If the amount of triangles is less or equal to the cluster size, the AABB will be stored and the algorithm stops.
//
// - The division by two is increased to the next power of two of half the amount of triangles (with cluster size as the minimum value) to
// ensure the first half always fills the cluster.
cluster_indices.resize(solid_cell_count * 2);
cluster_aabbs.resize(cluster_count);
uint32_t i = 0;
uint32_t cluster_index = 0;
uint32_t solid_cell_index = 0;
uint32_t *tiw = triangle_indices.ptrw();
while (i < triangle_sort.size()) {
cluster_indices[solid_cell_index * 2] = cluster_index;
cluster_indices[solid_cell_index * 2 + 1] = i;
uint32_t cell = triangle_sort[i].cell_index;
uint32_t triangle_count = giw[cell * 2];
uint32_t cell_cluster_count = (triangle_count + p_cluster_size - 1) / p_cluster_size;
_sort_triangle_clusters(p_cluster_size, cluster_index, i, triangle_count, triangle_sort, cluster_aabbs);
for (uint32_t j = 0; j < triangle_count; j++) {
tiw[i + j] = triangle_sort[i + j].triangle_index;
}
i += triangle_count;
cluster_index += cell_cluster_count;
solid_cell_index++;
}
} }
#if 0 #if 0
for (int i = 0; i < grid_size; i++) { for (int i = 0; i < grid_size; i++) {
@ -507,7 +602,13 @@ void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i
triangle_buffer = rd->storage_buffer_create(tb.size(), tb); triangle_buffer = rd->storage_buffer_create(tb.size(), tb);
Vector<uint8_t> tib = triangle_indices.to_byte_array(); Vector<uint8_t> tib = triangle_indices.to_byte_array();
triangle_cell_indices_buffer = rd->storage_buffer_create(tib.size(), tib); r_triangle_indices_buffer = rd->storage_buffer_create(tib.size(), tib);
Vector<uint8_t> cib = cluster_indices.to_byte_array();
r_cluster_indices_buffer = rd->storage_buffer_create(cib.size(), cib);
Vector<uint8_t> cab = cluster_aabbs.to_byte_array();
r_cluster_aabbs_buffer = rd->storage_buffer_create(cab.size(), cab);
Vector<uint8_t> lb = lights.to_byte_array(); Vector<uint8_t> lb = lights.to_byte_array();
if (lb.size() == 0) { if (lb.size() == 0) {
@ -1020,7 +1121,9 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d
RID vertex_buffer; RID vertex_buffer;
RID triangle_buffer; RID triangle_buffer;
RID lights_buffer; RID lights_buffer;
RID triangle_cell_indices_buffer; RID triangle_indices_buffer;
RID cluster_indices_buffer;
RID cluster_aabbs_buffer;
RID grid_texture; RID grid_texture;
RID seams_buffer; RID seams_buffer;
RID probe_positions_buffer; RID probe_positions_buffer;
@ -1032,12 +1135,15 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d
rd->free(vertex_buffer); \ rd->free(vertex_buffer); \
rd->free(triangle_buffer); \ rd->free(triangle_buffer); \
rd->free(lights_buffer); \ rd->free(lights_buffer); \
rd->free(triangle_cell_indices_buffer); \ rd->free(triangle_indices_buffer); \
rd->free(cluster_indices_buffer); \
rd->free(cluster_aabbs_buffer); \
rd->free(grid_texture); \ rd->free(grid_texture); \
rd->free(seams_buffer); \ rd->free(seams_buffer); \
rd->free(probe_positions_buffer); rd->free(probe_positions_buffer);
_create_acceleration_structures(rd, atlas_size, atlas_slices, bounds, grid_size, probe_positions, p_generate_probes, slice_triangle_count, slice_seam_count, vertex_buffer, triangle_buffer, lights_buffer, triangle_cell_indices_buffer, probe_positions_buffer, grid_texture, seams_buffer, p_step_function, p_bake_userdata); const uint32_t cluster_size = 16;
_create_acceleration_structures(rd, atlas_size, atlas_slices, bounds, grid_size, cluster_size, probe_positions, p_generate_probes, slice_triangle_count, slice_seam_count, vertex_buffer, triangle_buffer, lights_buffer, triangle_indices_buffer, cluster_indices_buffer, cluster_aabbs_buffer, probe_positions_buffer, grid_texture, seams_buffer, p_step_function, p_bake_userdata);
// Create global bake parameters buffer. // Create global bake parameters buffer.
BakeParameters bake_parameters; BakeParameters bake_parameters;
@ -1133,7 +1239,7 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d
RD::Uniform u; RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 3; u.binding = 3;
u.append_id(triangle_cell_indices_buffer); u.append_id(triangle_indices_buffer);
base_uniforms.push_back(u); base_uniforms.push_back(u);
} }
{ {
@ -1185,6 +1291,20 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d
u.append_id(sampler); u.append_id(sampler);
base_uniforms.push_back(u); base_uniforms.push_back(u);
} }
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 11;
u.append_id(cluster_indices_buffer);
base_uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 12;
u.append_id(cluster_aabbs_buffer);
base_uniforms.push_back(u);
}
} }
RID raster_base_uniform = rd->uniform_set_create(base_uniforms, rasterize_shader, 0); RID raster_base_uniform = rd->uniform_set_create(base_uniforms, rasterize_shader, 0);
@ -1230,6 +1350,8 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d
Ref<RDShaderFile> compute_shader; Ref<RDShaderFile> compute_shader;
String defines = ""; String defines = "";
defines += "\n#define CLUSTER_SIZE " + uitos(cluster_size) + "\n";
if (p_bake_sh) { if (p_bake_sh) {
defines += "\n#define USE_SH_LIGHTMAPS\n"; defines += "\n#define USE_SH_LIGHTMAPS\n";
} }

View File

@ -192,6 +192,13 @@ class LightmapperRD : public Lightmapper {
} }
}; };
struct ClusterAABB {
float min_bounds[3];
float pad0 = 0.0f;
float max_bounds[3];
float pad1 = 0.0f;
};
Vector<MeshInstance> mesh_instances; Vector<MeshInstance> mesh_instances;
Vector<Light> lights; Vector<Light> lights;
@ -199,12 +206,22 @@ class LightmapperRD : public Lightmapper {
struct TriangleSort { struct TriangleSort {
uint32_t cell_index = 0; uint32_t cell_index = 0;
uint32_t triangle_index = 0; uint32_t triangle_index = 0;
AABB triangle_aabb;
bool operator<(const TriangleSort &p_triangle_sort) const { bool operator<(const TriangleSort &p_triangle_sort) const {
return cell_index < p_triangle_sort.cell_index; //sorting by triangle index in this case makes no sense return cell_index < p_triangle_sort.cell_index; //sorting by triangle index in this case makes no sense
} }
}; };
template <int T>
struct TriangleSortAxis {
bool operator()(const TriangleSort &p_a, const TriangleSort &p_b) const {
return p_a.triangle_aabb.get_center()[T] < p_b.triangle_aabb.get_center()[T];
}
};
void _plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector<TriangleSort> &triangles, uint32_t p_grid_size); void _plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector<TriangleSort> &triangles, uint32_t p_grid_size);
void _sort_triangle_clusters(uint32_t p_cluster_size, uint32_t p_cluster_index, uint32_t p_index_start, uint32_t p_count, LocalVector<TriangleSort> &p_triangle_sort, LocalVector<ClusterAABB> &p_cluster_aabb);
struct RasterPushConstant { struct RasterPushConstant {
float atlas_size[2] = {}; float atlas_size[2] = {};
@ -250,7 +267,7 @@ class LightmapperRD : public Lightmapper {
}; };
BakeError _blit_meshes_into_atlas(int p_max_texture_size, Vector<Ref<Image>> &albedo_images, Vector<Ref<Image>> &emission_images, AABB &bounds, Size2i &atlas_size, int &atlas_slices, BakeStepFunc p_step_function, void *p_bake_userdata); BakeError _blit_meshes_into_atlas(int p_max_texture_size, Vector<Ref<Image>> &albedo_images, Vector<Ref<Image>> &emission_images, AABB &bounds, Size2i &atlas_size, int &atlas_slices, BakeStepFunc p_step_function, void *p_bake_userdata);
void _create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, Vector<Probe> &probe_positions, GenerateProbes p_generate_probes, Vector<int> &slice_triangle_count, Vector<int> &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &triangle_cell_indices_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata); void _create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, uint32_t p_cluster_size, Vector<Probe> &probe_positions, GenerateProbes p_generate_probes, Vector<int> &slice_triangle_count, Vector<int> &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &r_triangle_indices_buffer, RID &r_cluster_indices_buffer, RID &r_cluster_aabbs_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata);
void _raster_geometry(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, int grid_size, AABB bounds, float p_bias, Vector<int> slice_triangle_count, RID position_tex, RID unocclude_tex, RID normal_tex, RID raster_depth_buffer, RID rasterize_shader, RID raster_base_uniform); void _raster_geometry(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, int grid_size, AABB bounds, float p_bias, Vector<int> slice_triangle_count, RID position_tex, RID unocclude_tex, RID normal_tex, RID raster_depth_buffer, RID rasterize_shader, RID raster_base_uniform);
BakeError _dilate(RenderingDevice *rd, Ref<RDShaderFile> &compute_shader, RID &compute_base_uniform_set, PushConstant &push_constant, RID &source_light_tex, RID &dest_light_tex, const Size2i &atlas_size, int atlas_slices); BakeError _dilate(RenderingDevice *rd, Ref<RDShaderFile> &compute_shader, RID &compute_base_uniform_set, PushConstant &push_constant, RID &source_light_tex, RID &dest_light_tex, const Size2i &atlas_size, int atlas_slices);

View File

@ -42,15 +42,22 @@ struct Triangle {
uint pad1; uint pad1;
}; };
struct ClusterAABB {
vec3 min_bounds;
uint pad0;
vec3 max_bounds;
uint pad1;
};
layout(set = 0, binding = 2, std430) restrict readonly buffer Triangles { layout(set = 0, binding = 2, std430) restrict readonly buffer Triangles {
Triangle data[]; Triangle data[];
} }
triangles; triangles;
layout(set = 0, binding = 3, std430) restrict readonly buffer GridIndices { layout(set = 0, binding = 3, std430) restrict readonly buffer TriangleIndices {
uint data[]; uint data[];
} }
grid_indices; triangle_indices;
#define LIGHT_TYPE_DIRECTIONAL 0 #define LIGHT_TYPE_DIRECTIONAL 0
#define LIGHT_TYPE_OMNI 1 #define LIGHT_TYPE_OMNI 1
@ -104,6 +111,16 @@ layout(set = 0, binding = 9) uniform texture2DArray emission_tex;
layout(set = 0, binding = 10) uniform sampler linear_sampler; layout(set = 0, binding = 10) uniform sampler linear_sampler;
layout(set = 0, binding = 11, std430) restrict readonly buffer ClusterIndices {
uint data[];
}
cluster_indices;
layout(set = 0, binding = 12, std430) restrict readonly buffer ClusterAABBs {
ClusterAABB data[];
}
cluster_aabbs;
// Fragment action constants // Fragment action constants
const uint FA_NONE = 0; const uint FA_NONE = 0;
const uint FA_SMOOTHEN_POSITION = 1; const uint FA_SMOOTHEN_POSITION = 1;

View File

@ -119,6 +119,17 @@ const uint RAY_FRONT = 1;
const uint RAY_BACK = 2; const uint RAY_BACK = 2;
const uint RAY_ANY = 3; const uint RAY_ANY = 3;
bool ray_box_test(vec3 p_from, vec3 p_inv_dir, vec3 p_box_min, vec3 p_box_max) {
vec3 t0 = (p_box_min - p_from) * p_inv_dir;
vec3 t1 = (p_box_max - p_from) * p_inv_dir;
vec3 tmin = min(t0, t1), tmax = max(t0, t1);
return max(tmin.x, max(tmin.y, tmin.z)) <= min(tmax.x, min(tmax.y, tmax.z));
}
#if CLUSTER_SIZE > 32
#define CLUSTER_TRIANGLE_ITERATION
#endif
uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) { uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) {
// World coordinates. // World coordinates.
vec3 rel = p_to - p_from; vec3 rel = p_to - p_from;
@ -142,24 +153,60 @@ uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out
uint iters = 0; uint iters = 0;
while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) { while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) {
uvec2 cell_data = texelFetch(usampler3D(grid, linear_sampler), icell, 0).xy; uvec2 cell_data = texelFetch(usampler3D(grid, linear_sampler), icell, 0).xy;
if (cell_data.x > 0) { //triangles here uint triangle_count = cell_data.x;
if (triangle_count > 0) {
uint hit = RAY_MISS; uint hit = RAY_MISS;
float best_distance = 1e20; float best_distance = 1e20;
uint cluster_start = cluster_indices.data[cell_data.y * 2];
for (uint i = 0; i < cell_data.x; i++) { uint cell_triangle_start = cluster_indices.data[cell_data.y * 2 + 1];
uint tidx = grid_indices.data[cell_data.y + i]; uint cluster_count = (triangle_count + CLUSTER_SIZE - 1) / CLUSTER_SIZE;
uint cluster_base_index = 0;
// Ray-Box test. while (cluster_base_index < cluster_count) {
Triangle triangle = triangles.data[tidx]; // To minimize divergence, all Ray-AABB tests on the clusters contained in the cell are performed
vec3 t0 = (triangle.min_bounds - p_from) * inv_dir; // before checking against the triangles. We do this 32 clusters at a time and store the intersected
vec3 t1 = (triangle.max_bounds - p_from) * inv_dir; // clusters on each bit of the 32-bit integer.
vec3 tmin = min(t0, t1), tmax = max(t0, t1); uint cluster_test_count = min(32, cluster_count - cluster_base_index);
uint cluster_hits = 0;
if (max(tmin.x, max(tmin.y, tmin.z)) > min(tmax.x, min(tmax.y, tmax.z))) { for (uint i = 0; i < cluster_test_count; i++) {
continue; // Ray-Box test failed. uint cluster_index = cluster_start + cluster_base_index + i;
ClusterAABB cluster_aabb = cluster_aabbs.data[cluster_index];
if (ray_box_test(p_from, inv_dir, cluster_aabb.min_bounds, cluster_aabb.max_bounds)) {
cluster_hits |= (1 << i);
}
} }
// Prepare triangle vertices. // Check the triangles in any of the clusters that were intersected by toggling off the bits in the
// 32-bit integer counter until no bits are left.
while (cluster_hits > 0) {
uint cluster_index = findLSB(cluster_hits);
cluster_hits &= ~(1 << cluster_index);
cluster_index += cluster_base_index;
// Do the same divergence execution trick with triangles as well.
uint triangle_base_index = 0;
#ifdef CLUSTER_TRIANGLE_ITERATION
while (triangle_base_index < triangle_count)
#endif
{
uint triangle_start_index = cell_triangle_start + cluster_index * CLUSTER_SIZE + triangle_base_index;
uint triangle_test_count = min(CLUSTER_SIZE, triangle_count - triangle_base_index);
uint triangle_hits = 0;
for (uint i = 0; i < triangle_test_count; i++) {
uint triangle_index = triangle_indices.data[triangle_start_index + i];
if (ray_box_test(p_from, inv_dir, triangles.data[triangle_index].min_bounds, triangles.data[triangle_index].max_bounds)) {
triangle_hits |= (1 << i);
}
}
while (triangle_hits > 0) {
uint cluster_triangle_index = findLSB(triangle_hits);
triangle_hits &= ~(1 << cluster_triangle_index);
cluster_triangle_index += triangle_start_index;
uint triangle_index = triangle_indices.data[cluster_triangle_index];
Triangle triangle = triangles.data[triangle_index];
// Gather the triangle vertex positions.
vec3 vtx0 = vertices.data[triangle.indices.x].position; vec3 vtx0 = vertices.data[triangle.indices.x].position;
vec3 vtx1 = vertices.data[triangle.indices.y].position; vec3 vtx1 = vertices.data[triangle.indices.y].position;
vec3 vtx2 = vertices.data[triangle.indices.z].position; vec3 vtx2 = vertices.data[triangle.indices.z].position;
@ -182,8 +229,9 @@ uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out
} }
if (!backface) { if (!backface) {
// The case of meshes having both a front and back face in the same plane is more common than expected. // The case of meshes having both a front and back face in the same plane is more common than
// If this is a front-face, bias it closer to the ray origin, so it always wins over the back-face. // expected, so if this is a front-face, bias it closer to the ray origin, so it always wins
// over the back-face.
distance = max(bake_params.bias, distance - bake_params.bias); distance = max(bake_params.bias, distance - bake_params.bias);
} }
@ -192,12 +240,21 @@ uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out
best_distance = distance; best_distance = distance;
r_distance = distance; r_distance = distance;
r_normal = normal; r_normal = normal;
r_triangle = tidx; r_triangle = triangle_index;
r_barycentric = barycentric; r_barycentric = barycentric;
} }
} }
} }
#ifdef CLUSTER_TRIANGLE_ITERATION
triangle_base_index += CLUSTER_SIZE;
#endif
}
}
cluster_base_index += 32;
}
if (hit != RAY_MISS) { if (hit != RAY_MISS) {
return hit; return hit;
} }