diff --git a/modules/lightmapper_rd/lightmapper_rd.cpp b/modules/lightmapper_rd/lightmapper_rd.cpp index feb9a2274e1..fe919953c1e 100644 --- a/modules/lightmapper_rd/lightmapper_rd.cpp +++ b/modules/lightmapper_rd/lightmapper_rd.cpp @@ -124,7 +124,7 @@ void LightmapperRD::add_probe(const Vector3 &p_position) { probe_positions.push_back(probe); } -void LightmapperRD::_plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector &triangles, uint32_t p_grid_size) { +void LightmapperRD::_plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector &p_triangles_sort, uint32_t p_grid_size) { int half_size = p_size / 2; for (int i = 0; i < 8; i++) { @@ -159,13 +159,69 @@ void LightmapperRD::_plot_triangle_into_triangle_index_list(int p_size, const Ve TriangleSort ts; ts.cell_index = n.x + (n.y * p_grid_size) + (n.z * p_grid_size * p_grid_size); ts.triangle_index = p_triangle_index; - triangles.push_back(ts); + ts.triangle_aabb.position = p_points[0]; + ts.triangle_aabb.size = Vector3(); + ts.triangle_aabb.expand_to(p_points[1]); + ts.triangle_aabb.expand_to(p_points[2]); + p_triangles_sort.push_back(ts); } else { - _plot_triangle_into_triangle_index_list(half_size, n, aabb, p_points, p_triangle_index, triangles, p_grid_size); + _plot_triangle_into_triangle_index_list(half_size, n, aabb, p_points, p_triangle_index, p_triangles_sort, p_grid_size); } } } +void LightmapperRD::_sort_triangle_clusters(uint32_t p_cluster_size, uint32_t p_cluster_index, uint32_t p_index_start, uint32_t p_count, LocalVector &p_triangle_sort, LocalVector &p_cluster_aabb) { + if (p_count == 0) { + return; + } + + // Compute AABB for all triangles in the range. + SortArray> triangle_sorter_x; + SortArray> triangle_sorter_y; + SortArray> triangle_sorter_z; + AABB cluster_aabb = p_triangle_sort[p_index_start].triangle_aabb; + for (uint32_t i = 1; i < p_count; i++) { + cluster_aabb.merge_with(p_triangle_sort[p_index_start + i].triangle_aabb); + } + + if (p_count > p_cluster_size) { + int longest_axis_index = cluster_aabb.get_longest_axis_index(); + switch (longest_axis_index) { + case 0: + triangle_sorter_x.sort(&p_triangle_sort[p_index_start], p_count); + break; + case 1: + triangle_sorter_y.sort(&p_triangle_sort[p_index_start], p_count); + break; + case 2: + triangle_sorter_z.sort(&p_triangle_sort[p_index_start], p_count); + break; + default: + DEV_ASSERT(false && "Invalid axis returned by AABB."); + break; + } + + uint32_t left_cluster_count = next_power_of_2(p_count / 2); + left_cluster_count = MAX(left_cluster_count, p_cluster_size); + left_cluster_count = MIN(left_cluster_count, p_count); + _sort_triangle_clusters(p_cluster_size, p_cluster_index, p_index_start, left_cluster_count, p_triangle_sort, p_cluster_aabb); + + if (left_cluster_count < p_count) { + uint32_t cluster_index_right = p_cluster_index + (left_cluster_count / p_cluster_size); + _sort_triangle_clusters(p_cluster_size, cluster_index_right, p_index_start + left_cluster_count, p_count - left_cluster_count, p_triangle_sort, p_cluster_aabb); + } + } else { + ClusterAABB &aabb = p_cluster_aabb[p_cluster_index]; + Vector3 aabb_end = cluster_aabb.get_end(); + aabb.min_bounds[0] = cluster_aabb.position.x; + aabb.min_bounds[1] = cluster_aabb.position.y; + aabb.min_bounds[2] = cluster_aabb.position.z; + aabb.max_bounds[0] = aabb_end.x; + aabb.max_bounds[1] = aabb_end.y; + aabb.max_bounds[2] = aabb_end.z; + } +} + Lightmapper::BakeError LightmapperRD::_blit_meshes_into_atlas(int p_max_texture_size, Vector> &albedo_images, Vector> &emission_images, AABB &bounds, Size2i &atlas_size, int &atlas_slices, BakeStepFunc p_step_function, void *p_bake_userdata) { Vector sizes; @@ -281,7 +337,7 @@ Lightmapper::BakeError LightmapperRD::_blit_meshes_into_atlas(int p_max_texture_ return BAKE_OK; } -void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, Vector &p_probe_positions, GenerateProbes p_generate_probes, Vector &slice_triangle_count, Vector &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &triangle_cell_indices_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata) { +void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, uint32_t p_cluster_size, Vector &p_probe_positions, GenerateProbes p_generate_probes, Vector &slice_triangle_count, Vector &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &r_triangle_indices_buffer, RID &r_cluster_indices_buffer, RID &r_cluster_aabbs_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata) { HashMap vertex_map; //fill triangles array and vertex array @@ -433,31 +489,70 @@ void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i //sort it triangle_sort.sort(); + LocalVector cluster_indices; + LocalVector cluster_aabbs; Vector triangle_indices; triangle_indices.resize(triangle_sort.size()); Vector grid_indices; grid_indices.resize(grid_size * grid_size * grid_size * 2); memset(grid_indices.ptrw(), 0, grid_indices.size() * sizeof(uint32_t)); - Vector solid; - solid.resize(grid_size * grid_size * grid_size); - memset(solid.ptrw(), 0, solid.size() * sizeof(bool)); { - uint32_t *tiw = triangle_indices.ptrw(); + // Fill grid with cell indices. uint32_t last_cell = 0xFFFFFFFF; uint32_t *giw = grid_indices.ptrw(); - bool *solidw = solid.ptrw(); + uint32_t cluster_count = 0; + uint32_t solid_cell_count = 0; for (uint32_t i = 0; i < triangle_sort.size(); i++) { uint32_t cell = triangle_sort[i].cell_index; if (cell != last_cell) { - //cell changed, update pointer to indices - giw[cell * 2 + 1] = i; - solidw[cell] = true; + giw[cell * 2 + 1] = solid_cell_count; + solid_cell_count++; } - tiw[i] = triangle_sort[i].triangle_index; - giw[cell * 2]++; //update counter + + if ((giw[cell * 2] % p_cluster_size) == 0) { + // Add an extra cluster every time the triangle counter reaches a multiple of the cluster size. + cluster_count++; + } + + giw[cell * 2]++; last_cell = cell; } + + // Build fixed-size triangle clusters for all the cells to speed up the traversal. A cell can hold multiple clusters that each contain a fixed + // amount of triangles and an AABB. The tracer will check against the AABBs first to know whether it needs to visit the cell's triangles. + // + // The building algorithm will divide the triangles recursively contained inside each cell, sorting by the longest axis of the AABB on each step. + // + // - If the amount of triangles is less or equal to the cluster size, the AABB will be stored and the algorithm stops. + // + // - The division by two is increased to the next power of two of half the amount of triangles (with cluster size as the minimum value) to + // ensure the first half always fills the cluster. + + cluster_indices.resize(solid_cell_count * 2); + cluster_aabbs.resize(cluster_count); + + uint32_t i = 0; + uint32_t cluster_index = 0; + uint32_t solid_cell_index = 0; + uint32_t *tiw = triangle_indices.ptrw(); + while (i < triangle_sort.size()) { + cluster_indices[solid_cell_index * 2] = cluster_index; + cluster_indices[solid_cell_index * 2 + 1] = i; + + uint32_t cell = triangle_sort[i].cell_index; + uint32_t triangle_count = giw[cell * 2]; + uint32_t cell_cluster_count = (triangle_count + p_cluster_size - 1) / p_cluster_size; + _sort_triangle_clusters(p_cluster_size, cluster_index, i, triangle_count, triangle_sort, cluster_aabbs); + + for (uint32_t j = 0; j < triangle_count; j++) { + tiw[i + j] = triangle_sort[i + j].triangle_index; + } + + i += triangle_count; + cluster_index += cell_cluster_count; + solid_cell_index++; + } } #if 0 for (int i = 0; i < grid_size; i++) { @@ -507,7 +602,13 @@ void LightmapperRD::_create_acceleration_structures(RenderingDevice *rd, Size2i triangle_buffer = rd->storage_buffer_create(tb.size(), tb); Vector tib = triangle_indices.to_byte_array(); - triangle_cell_indices_buffer = rd->storage_buffer_create(tib.size(), tib); + r_triangle_indices_buffer = rd->storage_buffer_create(tib.size(), tib); + + Vector cib = cluster_indices.to_byte_array(); + r_cluster_indices_buffer = rd->storage_buffer_create(cib.size(), cib); + + Vector cab = cluster_aabbs.to_byte_array(); + r_cluster_aabbs_buffer = rd->storage_buffer_create(cab.size(), cab); Vector lb = lights.to_byte_array(); if (lb.size() == 0) { @@ -1020,24 +1121,29 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d RID vertex_buffer; RID triangle_buffer; RID lights_buffer; - RID triangle_cell_indices_buffer; + RID triangle_indices_buffer; + RID cluster_indices_buffer; + RID cluster_aabbs_buffer; RID grid_texture; RID seams_buffer; RID probe_positions_buffer; Vector slice_seam_count; -#define FREE_BUFFERS \ - rd->free(bake_parameters_buffer); \ - rd->free(vertex_buffer); \ - rd->free(triangle_buffer); \ - rd->free(lights_buffer); \ - rd->free(triangle_cell_indices_buffer); \ - rd->free(grid_texture); \ - rd->free(seams_buffer); \ +#define FREE_BUFFERS \ + rd->free(bake_parameters_buffer); \ + rd->free(vertex_buffer); \ + rd->free(triangle_buffer); \ + rd->free(lights_buffer); \ + rd->free(triangle_indices_buffer); \ + rd->free(cluster_indices_buffer); \ + rd->free(cluster_aabbs_buffer); \ + rd->free(grid_texture); \ + rd->free(seams_buffer); \ rd->free(probe_positions_buffer); - _create_acceleration_structures(rd, atlas_size, atlas_slices, bounds, grid_size, probe_positions, p_generate_probes, slice_triangle_count, slice_seam_count, vertex_buffer, triangle_buffer, lights_buffer, triangle_cell_indices_buffer, probe_positions_buffer, grid_texture, seams_buffer, p_step_function, p_bake_userdata); + const uint32_t cluster_size = 16; + _create_acceleration_structures(rd, atlas_size, atlas_slices, bounds, grid_size, cluster_size, probe_positions, p_generate_probes, slice_triangle_count, slice_seam_count, vertex_buffer, triangle_buffer, lights_buffer, triangle_indices_buffer, cluster_indices_buffer, cluster_aabbs_buffer, probe_positions_buffer, grid_texture, seams_buffer, p_step_function, p_bake_userdata); // Create global bake parameters buffer. BakeParameters bake_parameters; @@ -1133,7 +1239,7 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 3; - u.append_id(triangle_cell_indices_buffer); + u.append_id(triangle_indices_buffer); base_uniforms.push_back(u); } { @@ -1185,6 +1291,20 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d u.append_id(sampler); base_uniforms.push_back(u); } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 11; + u.append_id(cluster_indices_buffer); + base_uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 12; + u.append_id(cluster_aabbs_buffer); + base_uniforms.push_back(u); + } } RID raster_base_uniform = rd->uniform_set_create(base_uniforms, rasterize_shader, 0); @@ -1230,6 +1350,8 @@ LightmapperRD::BakeError LightmapperRD::bake(BakeQuality p_quality, bool p_use_d Ref compute_shader; String defines = ""; + defines += "\n#define CLUSTER_SIZE " + uitos(cluster_size) + "\n"; + if (p_bake_sh) { defines += "\n#define USE_SH_LIGHTMAPS\n"; } diff --git a/modules/lightmapper_rd/lightmapper_rd.h b/modules/lightmapper_rd/lightmapper_rd.h index 8c1c4deba6f..5414048ddcc 100644 --- a/modules/lightmapper_rd/lightmapper_rd.h +++ b/modules/lightmapper_rd/lightmapper_rd.h @@ -192,6 +192,13 @@ class LightmapperRD : public Lightmapper { } }; + struct ClusterAABB { + float min_bounds[3]; + float pad0 = 0.0f; + float max_bounds[3]; + float pad1 = 0.0f; + }; + Vector mesh_instances; Vector lights; @@ -199,12 +206,22 @@ class LightmapperRD : public Lightmapper { struct TriangleSort { uint32_t cell_index = 0; uint32_t triangle_index = 0; + AABB triangle_aabb; + bool operator<(const TriangleSort &p_triangle_sort) const { return cell_index < p_triangle_sort.cell_index; //sorting by triangle index in this case makes no sense } }; + template + struct TriangleSortAxis { + bool operator()(const TriangleSort &p_a, const TriangleSort &p_b) const { + return p_a.triangle_aabb.get_center()[T] < p_b.triangle_aabb.get_center()[T]; + } + }; + void _plot_triangle_into_triangle_index_list(int p_size, const Vector3i &p_ofs, const AABB &p_bounds, const Vector3 p_points[3], uint32_t p_triangle_index, LocalVector &triangles, uint32_t p_grid_size); + void _sort_triangle_clusters(uint32_t p_cluster_size, uint32_t p_cluster_index, uint32_t p_index_start, uint32_t p_count, LocalVector &p_triangle_sort, LocalVector &p_cluster_aabb); struct RasterPushConstant { float atlas_size[2] = {}; @@ -250,7 +267,7 @@ class LightmapperRD : public Lightmapper { }; BakeError _blit_meshes_into_atlas(int p_max_texture_size, Vector> &albedo_images, Vector> &emission_images, AABB &bounds, Size2i &atlas_size, int &atlas_slices, BakeStepFunc p_step_function, void *p_bake_userdata); - void _create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, Vector &probe_positions, GenerateProbes p_generate_probes, Vector &slice_triangle_count, Vector &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &triangle_cell_indices_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata); + void _create_acceleration_structures(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, AABB &bounds, int grid_size, uint32_t p_cluster_size, Vector &probe_positions, GenerateProbes p_generate_probes, Vector &slice_triangle_count, Vector &slice_seam_count, RID &vertex_buffer, RID &triangle_buffer, RID &lights_buffer, RID &r_triangle_indices_buffer, RID &r_cluster_indices_buffer, RID &r_cluster_aabbs_buffer, RID &probe_positions_buffer, RID &grid_texture, RID &seams_buffer, BakeStepFunc p_step_function, void *p_bake_userdata); void _raster_geometry(RenderingDevice *rd, Size2i atlas_size, int atlas_slices, int grid_size, AABB bounds, float p_bias, Vector slice_triangle_count, RID position_tex, RID unocclude_tex, RID normal_tex, RID raster_depth_buffer, RID rasterize_shader, RID raster_base_uniform); BakeError _dilate(RenderingDevice *rd, Ref &compute_shader, RID &compute_base_uniform_set, PushConstant &push_constant, RID &source_light_tex, RID &dest_light_tex, const Size2i &atlas_size, int atlas_slices); diff --git a/modules/lightmapper_rd/lm_common_inc.glsl b/modules/lightmapper_rd/lm_common_inc.glsl index c91f06d0f30..98d11b9e69e 100644 --- a/modules/lightmapper_rd/lm_common_inc.glsl +++ b/modules/lightmapper_rd/lm_common_inc.glsl @@ -42,15 +42,22 @@ struct Triangle { uint pad1; }; +struct ClusterAABB { + vec3 min_bounds; + uint pad0; + vec3 max_bounds; + uint pad1; +}; + layout(set = 0, binding = 2, std430) restrict readonly buffer Triangles { Triangle data[]; } triangles; -layout(set = 0, binding = 3, std430) restrict readonly buffer GridIndices { +layout(set = 0, binding = 3, std430) restrict readonly buffer TriangleIndices { uint data[]; } -grid_indices; +triangle_indices; #define LIGHT_TYPE_DIRECTIONAL 0 #define LIGHT_TYPE_OMNI 1 @@ -104,6 +111,16 @@ layout(set = 0, binding = 9) uniform texture2DArray emission_tex; layout(set = 0, binding = 10) uniform sampler linear_sampler; +layout(set = 0, binding = 11, std430) restrict readonly buffer ClusterIndices { + uint data[]; +} +cluster_indices; + +layout(set = 0, binding = 12, std430) restrict readonly buffer ClusterAABBs { + ClusterAABB data[]; +} +cluster_aabbs; + // Fragment action constants const uint FA_NONE = 0; const uint FA_SMOOTHEN_POSITION = 1; diff --git a/modules/lightmapper_rd/lm_compute.glsl b/modules/lightmapper_rd/lm_compute.glsl index 572e6d55d8d..a2a480043ab 100644 --- a/modules/lightmapper_rd/lm_compute.glsl +++ b/modules/lightmapper_rd/lm_compute.glsl @@ -119,6 +119,17 @@ const uint RAY_FRONT = 1; const uint RAY_BACK = 2; const uint RAY_ANY = 3; +bool ray_box_test(vec3 p_from, vec3 p_inv_dir, vec3 p_box_min, vec3 p_box_max) { + vec3 t0 = (p_box_min - p_from) * p_inv_dir; + vec3 t1 = (p_box_max - p_from) * p_inv_dir; + vec3 tmin = min(t0, t1), tmax = max(t0, t1); + return max(tmin.x, max(tmin.y, tmin.z)) <= min(tmax.x, min(tmax.y, tmax.z)); +} + +#if CLUSTER_SIZE > 32 +#define CLUSTER_TRIANGLE_ITERATION +#endif + uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) { // World coordinates. vec3 rel = p_to - p_from; @@ -142,60 +153,106 @@ uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out uint iters = 0; while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) { uvec2 cell_data = texelFetch(usampler3D(grid, linear_sampler), icell, 0).xy; - if (cell_data.x > 0) { //triangles here + uint triangle_count = cell_data.x; + if (triangle_count > 0) { uint hit = RAY_MISS; float best_distance = 1e20; - - for (uint i = 0; i < cell_data.x; i++) { - uint tidx = grid_indices.data[cell_data.y + i]; - - // Ray-Box test. - Triangle triangle = triangles.data[tidx]; - vec3 t0 = (triangle.min_bounds - p_from) * inv_dir; - vec3 t1 = (triangle.max_bounds - p_from) * inv_dir; - vec3 tmin = min(t0, t1), tmax = max(t0, t1); - - if (max(tmin.x, max(tmin.y, tmin.z)) > min(tmax.x, min(tmax.y, tmax.z))) { - continue; // Ray-Box test failed. - } - - // Prepare triangle vertices. - vec3 vtx0 = vertices.data[triangle.indices.x].position; - vec3 vtx1 = vertices.data[triangle.indices.y].position; - vec3 vtx2 = vertices.data[triangle.indices.z].position; - vec3 normal = -normalize(cross((vtx0 - vtx1), (vtx0 - vtx2))); - bool backface = dot(normal, dir) >= 0.0; - float distance; - vec3 barycentric; - if (ray_hits_triangle(p_from, dir, rel_len, vtx0, vtx1, vtx2, distance, barycentric)) { - if (p_any_hit) { - // Return early if any hit was requested. - return RAY_ANY; - } - - vec3 position = p_from + dir * distance; - vec3 hit_cell = (position - bake_params.to_cell_offset) * bake_params.to_cell_size; - if (icell != ivec3(hit_cell)) { - // It's possible for the ray to hit a triangle in a position outside the bounds of the cell - // if it's large enough to cover multiple ones. The hit must be ignored if this is the case. - continue; - } - - if (!backface) { - // The case of meshes having both a front and back face in the same plane is more common than expected. - // If this is a front-face, bias it closer to the ray origin, so it always wins over the back-face. - distance = max(bake_params.bias, distance - bake_params.bias); - } - - if (distance < best_distance) { - hit = backface ? RAY_BACK : RAY_FRONT; - best_distance = distance; - r_distance = distance; - r_normal = normal; - r_triangle = tidx; - r_barycentric = barycentric; + uint cluster_start = cluster_indices.data[cell_data.y * 2]; + uint cell_triangle_start = cluster_indices.data[cell_data.y * 2 + 1]; + uint cluster_count = (triangle_count + CLUSTER_SIZE - 1) / CLUSTER_SIZE; + uint cluster_base_index = 0; + while (cluster_base_index < cluster_count) { + // To minimize divergence, all Ray-AABB tests on the clusters contained in the cell are performed + // before checking against the triangles. We do this 32 clusters at a time and store the intersected + // clusters on each bit of the 32-bit integer. + uint cluster_test_count = min(32, cluster_count - cluster_base_index); + uint cluster_hits = 0; + for (uint i = 0; i < cluster_test_count; i++) { + uint cluster_index = cluster_start + cluster_base_index + i; + ClusterAABB cluster_aabb = cluster_aabbs.data[cluster_index]; + if (ray_box_test(p_from, inv_dir, cluster_aabb.min_bounds, cluster_aabb.max_bounds)) { + cluster_hits |= (1 << i); } } + + // Check the triangles in any of the clusters that were intersected by toggling off the bits in the + // 32-bit integer counter until no bits are left. + while (cluster_hits > 0) { + uint cluster_index = findLSB(cluster_hits); + cluster_hits &= ~(1 << cluster_index); + cluster_index += cluster_base_index; + + // Do the same divergence execution trick with triangles as well. + uint triangle_base_index = 0; +#ifdef CLUSTER_TRIANGLE_ITERATION + while (triangle_base_index < triangle_count) +#endif + { + uint triangle_start_index = cell_triangle_start + cluster_index * CLUSTER_SIZE + triangle_base_index; + uint triangle_test_count = min(CLUSTER_SIZE, triangle_count - triangle_base_index); + uint triangle_hits = 0; + for (uint i = 0; i < triangle_test_count; i++) { + uint triangle_index = triangle_indices.data[triangle_start_index + i]; + if (ray_box_test(p_from, inv_dir, triangles.data[triangle_index].min_bounds, triangles.data[triangle_index].max_bounds)) { + triangle_hits |= (1 << i); + } + } + + while (triangle_hits > 0) { + uint cluster_triangle_index = findLSB(triangle_hits); + triangle_hits &= ~(1 << cluster_triangle_index); + cluster_triangle_index += triangle_start_index; + + uint triangle_index = triangle_indices.data[cluster_triangle_index]; + Triangle triangle = triangles.data[triangle_index]; + + // Gather the triangle vertex positions. + vec3 vtx0 = vertices.data[triangle.indices.x].position; + vec3 vtx1 = vertices.data[triangle.indices.y].position; + vec3 vtx2 = vertices.data[triangle.indices.z].position; + vec3 normal = -normalize(cross((vtx0 - vtx1), (vtx0 - vtx2))); + bool backface = dot(normal, dir) >= 0.0; + float distance; + vec3 barycentric; + if (ray_hits_triangle(p_from, dir, rel_len, vtx0, vtx1, vtx2, distance, barycentric)) { + if (p_any_hit) { + // Return early if any hit was requested. + return RAY_ANY; + } + + vec3 position = p_from + dir * distance; + vec3 hit_cell = (position - bake_params.to_cell_offset) * bake_params.to_cell_size; + if (icell != ivec3(hit_cell)) { + // It's possible for the ray to hit a triangle in a position outside the bounds of the cell + // if it's large enough to cover multiple ones. The hit must be ignored if this is the case. + continue; + } + + if (!backface) { + // The case of meshes having both a front and back face in the same plane is more common than + // expected, so if this is a front-face, bias it closer to the ray origin, so it always wins + // over the back-face. + distance = max(bake_params.bias, distance - bake_params.bias); + } + + if (distance < best_distance) { + hit = backface ? RAY_BACK : RAY_FRONT; + best_distance = distance; + r_distance = distance; + r_normal = normal; + r_triangle = triangle_index; + r_barycentric = barycentric; + } + } + } + +#ifdef CLUSTER_TRIANGLE_ITERATION + triangle_base_index += CLUSTER_SIZE; +#endif + } + } + + cluster_base_index += 32; } if (hit != RAY_MISS) {