update meshoptimizer to 0.16

This commit is contained in:
Hendrik Brucker 2021-04-18 16:15:43 +02:00
parent c05245f539
commit 9d18610190
8 changed files with 897 additions and 123 deletions

View File

@ -261,7 +261,7 @@ License: Apache-2.0
Files: ./thirdparty/meshoptimizer/
Comment: meshoptimizer
Copyright: 2016-2020, Arseny Kapoulkine
Copyright: 2016-2021, Arseny Kapoulkine
License: Expat
Files: ./thirdparty/minimp3/

View File

@ -343,7 +343,7 @@ File extracted from upstream release tarball:
## meshoptimizer
- Upstream: https://github.com/zeux/meshoptimizer
- Version: git (e3f53f66e7a35b9b8764bee478589d79e34fa698, 2021)
- Version: 0.16 (95893c0566646434dd675b708d293fcb2d526d08, 2021)
- License: MIT
Files extracted from upstream repository:

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2016-2020 Arseny Kapoulkine
Copyright (c) 2016-2021 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -2,6 +2,7 @@
#include "meshoptimizer.h"
#include <assert.h>
#include <float.h>
#include <math.h>
#include <string.h>
@ -12,6 +13,68 @@
namespace meshopt
{
// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
const size_t kMeshletMaxVertices = 255;
// A reasonable limit is around 2*max_vertices or less
const size_t kMeshletMaxTriangles = 512;
struct TriangleAdjacency2
{
unsigned int* counts;
unsigned int* offsets;
unsigned int* data;
};
static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
{
size_t face_count = index_count / 3;
// allocate arrays
adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
adjacency.data = allocator.allocate<unsigned int>(index_count);
// fill triangle counts
memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
for (size_t i = 0; i < index_count; ++i)
{
assert(indices[i] < vertex_count);
adjacency.counts[indices[i]]++;
}
// fill offset table
unsigned int offset = 0;
for (size_t i = 0; i < vertex_count; ++i)
{
adjacency.offsets[i] = offset;
offset += adjacency.counts[i];
}
assert(offset == index_count);
// fill triangle data
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
adjacency.data[adjacency.offsets[a]++] = unsigned(i);
adjacency.data[adjacency.offsets[b]++] = unsigned(i);
adjacency.data[adjacency.offsets[c]++] = unsigned(i);
}
// fix offsets that have been disturbed by the previous pass
for (size_t i = 0; i < vertex_count; ++i)
{
assert(adjacency.offsets[i] >= adjacency.counts[i]);
adjacency.offsets[i] -= adjacency.counts[i];
}
}
static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
{
assert(count > 0);
@ -82,13 +145,310 @@ static void computeBoundingSphere(float result[4], const float points[][3], size
result[3] = radius;
}
struct Cone
{
float px, py, pz;
float nx, ny, nz;
};
static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
{
float cone = 1.f - spread * cone_weight;
float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
}
static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
{
Cone result = acc;
float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
result.px *= center_scale;
result.py *= center_scale;
result.pz *= center_scale;
float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
result.nx *= axis_scale;
result.ny *= axis_scale;
result.nz *= axis_scale;
return result;
}
static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
(void)vertex_count;
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
size_t face_count = index_count / 3;
float mesh_area = 0;
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
const float* p0 = vertex_positions + vertex_stride_float * a;
const float* p1 = vertex_positions + vertex_stride_float * b;
const float* p2 = vertex_positions + vertex_stride_float * c;
float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
float normalx = p10[1] * p20[2] - p10[2] * p20[1];
float normaly = p10[2] * p20[0] - p10[0] * p20[2];
float normalz = p10[0] * p20[1] - p10[1] * p20[0];
float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
float invarea = (area == 0.f) ? 0.f : 1.f / area;
triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
triangles[i].nx = normalx * invarea;
triangles[i].ny = normaly * invarea;
triangles[i].nz = normalz * invarea;
mesh_area += area;
}
return mesh_area;
}
static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
{
size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
// fill 4b padding with 0
while (offset & 3)
meshlet_triangles[offset++] = 0;
}
static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
{
unsigned char& av = used[a];
unsigned char& bv = used[b];
unsigned char& cv = used[c];
bool result = false;
unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
{
meshlets[meshlet_offset] = meshlet;
for (size_t j = 0; j < meshlet.vertex_count; ++j)
used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
finishMeshlet(meshlet, meshlet_triangles);
meshlet.vertex_offset += meshlet.vertex_count;
meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
meshlet.vertex_count = 0;
meshlet.triangle_count = 0;
result = true;
}
if (av == 0xff)
{
av = (unsigned char)meshlet.vertex_count;
meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
}
if (bv == 0xff)
{
bv = (unsigned char)meshlet.vertex_count;
meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
}
if (cv == 0xff)
{
cv = (unsigned char)meshlet.vertex_count;
meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
}
meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
meshlet.triangle_count++;
return result;
}
struct KDNode
{
union
{
float split;
unsigned int index;
};
// leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
// branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
unsigned int axis : 2;
unsigned int children : 30;
};
static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
{
size_t m = 0;
// invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
for (size_t i = 0; i < count; ++i)
{
float v = points[indices[i] * stride + axis];
// swap(m, i) unconditionally
unsigned int t = indices[m];
indices[m] = indices[i];
indices[i] = t;
// when v >= pivot, we swap i with m without advancing it, preserving invariants
m += v < pivot;
}
return m;
}
static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
{
assert(offset + count <= node_count);
(void)node_count;
KDNode& result = nodes[offset];
result.index = indices[0];
result.axis = 3;
result.children = unsigned(count - 1);
// all remaining points are stored in nodes immediately following the leaf
for (size_t i = 1; i < count; ++i)
{
KDNode& tail = nodes[offset + i];
tail.index = indices[i];
tail.axis = 3;
tail.children = ~0u >> 2; // bogus value to prevent misuse
}
return offset + count;
}
static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
{
assert(count > 0);
assert(offset < node_count);
if (count <= leaf_size)
return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
float mean[3] = {};
float vars[3] = {};
float runc = 1, runs = 1;
// gather statistics on the points in the subtree using Welford's algorithm
for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
{
const float* point = points + indices[i] * stride;
for (int k = 0; k < 3; ++k)
{
float delta = point[k] - mean[k];
mean[k] += delta * runs;
vars[k] += delta * (point[k] - mean[k]);
}
}
// split axis is one where the variance is largest
unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1
: 2;
float split = mean[axis];
size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
// when the partition is degenerate simply consolidate the points into a single node
if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
KDNode& result = nodes[offset];
result.split = split;
result.axis = axis;
// left subtree is right after our node
size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
// distance to the right subtree is represented explicitly
result.children = unsigned(next_offset - offset - 1);
return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
}
static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
{
const KDNode& node = nodes[root];
if (node.axis == 3)
{
// leaf
for (unsigned int i = 0; i <= node.children; ++i)
{
unsigned int index = nodes[root + i].index;
if (emitted_flags[index])
continue;
const float* point = points + index * stride;
float distance2 =
(point[0] - position[0]) * (point[0] - position[0]) +
(point[1] - position[1]) * (point[1] - position[1]) +
(point[2] - position[2]) * (point[2] - position[2]);
float distance = sqrtf(distance2);
if (distance < limit)
{
result = index;
limit = distance;
}
}
}
else
{
// branch; we order recursion to process the node that search position is in first
float delta = position[node.axis] - node.split;
unsigned int first = (delta <= 0) ? 0 : node.children;
unsigned int second = first ^ node.children;
kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
// only process the other node if it can have a match based on closest distance so far
if (fabsf(delta) <= limit)
kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
}
}
} // namespace meshopt
size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(max_vertices >= 3);
assert(max_triangles >= 1);
assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
(void)kMeshletMaxVertices;
(void)kMeshletMaxTriangles;
// meshlet construction is limited by max vertices and max triangles per meshlet
// the worst case is that the input is an unindexed stream since this equally stresses both limits
@ -100,80 +460,7 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_
return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
}
size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
{
assert(index_count % 3 == 0);
assert(max_vertices >= 3);
assert(max_triangles >= 1);
meshopt_Allocator allocator;
meshopt_Meshlet meshlet;
memset(&meshlet, 0, sizeof(meshlet));
assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
assert(max_triangles <= sizeof(meshlet.indices) / 3);
// index of the vertex in the meshlet, 0xff if the vertex isn't used
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
memset(used, -1, vertex_count);
size_t offset = 0;
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
unsigned char& av = used[a];
unsigned char& bv = used[b];
unsigned char& cv = used[c];
unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
{
destination[offset++] = meshlet;
for (size_t j = 0; j < meshlet.vertex_count; ++j)
used[meshlet.vertices[j]] = 0xff;
memset(&meshlet, 0, sizeof(meshlet));
}
if (av == 0xff)
{
av = meshlet.vertex_count;
meshlet.vertices[meshlet.vertex_count++] = a;
}
if (bv == 0xff)
{
bv = meshlet.vertex_count;
meshlet.vertices[meshlet.vertex_count++] = b;
}
if (cv == 0xff)
{
cv = meshlet.vertex_count;
meshlet.vertices[meshlet.vertex_count++] = c;
}
meshlet.indices[meshlet.triangle_count][0] = av;
meshlet.indices[meshlet.triangle_count][1] = bv;
meshlet.indices[meshlet.triangle_count][2] = cv;
meshlet.triangle_count++;
}
if (meshlet.triangle_count)
destination[offset++] = meshlet;
assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
return offset;
}
meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
{
using namespace meshopt;
@ -181,15 +468,236 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
assert(index_count / 3 <= 256);
assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
meshopt_Allocator allocator;
TriangleAdjacency2 adjacency = {};
buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
size_t face_count = index_count / 3;
unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
memset(emitted_flags, 0, face_count);
// for each triangle, precompute centroid & normal to use for scoring
Cone* triangles = allocator.allocate<Cone>(face_count);
float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
// assuming each meshlet is a square patch, expected radius is sqrt(expected area)
float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
// build a kd-tree for nearest neighbor lookup
unsigned int* kdindices = allocator.allocate<unsigned int>(face_count);
for (size_t i = 0; i < face_count; ++i)
kdindices[i] = unsigned(i);
KDNode* nodes = allocator.allocate<KDNode>(face_count * 2);
kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
// index of the vertex in the meshlet, 0xff if the vertex isn't used
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
memset(used, -1, vertex_count);
meshopt_Meshlet meshlet = {};
size_t meshlet_offset = 0;
Cone meshlet_cone_acc = {};
for (;;)
{
unsigned int best_triangle = ~0u;
unsigned int best_extra = 5;
float best_score = FLT_MAX;
Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
for (size_t i = 0; i < meshlet.vertex_count; ++i)
{
unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
size_t neighbours_size = adjacency.counts[index];
for (size_t j = 0; j < neighbours_size; ++j)
{
unsigned int triangle = neighbours[j];
assert(!emitted_flags[triangle]);
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
// triangles that don't add new vertices to meshlets are max. priority
if (extra != 0)
{
// artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
extra = 0;
extra++;
}
// since topology-based priority is always more important than the score, we can skip scoring in some cases
if (extra > best_extra)
continue;
const Cone& tri_cone = triangles[triangle];
float distance2 =
(tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) +
(tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) +
(tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz);
float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz;
float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
// note that topology-based priority is always more important than the score
// this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
if (extra < best_extra || score < best_score)
{
best_triangle = triangle;
best_extra = extra;
best_score = score;
}
}
}
if (best_triangle == ~0u)
{
float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
unsigned int index = ~0u;
float limit = FLT_MAX;
kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
best_triangle = index;
}
if (best_triangle == ~0u)
break;
unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
// add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
{
meshlet_offset++;
memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
}
live_triangles[a]--;
live_triangles[b]--;
live_triangles[c]--;
// remove emitted triangle from adjacency data
// this makes sure that we spend less time traversing these lists on subsequent iterations
for (size_t k = 0; k < 3; ++k)
{
unsigned int index = indices[best_triangle * 3 + k];
unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
size_t neighbours_size = adjacency.counts[index];
for (size_t i = 0; i < neighbours_size; ++i)
{
unsigned int tri = neighbours[i];
if (tri == best_triangle)
{
neighbours[i] = neighbours[neighbours_size - 1];
adjacency.counts[index]--;
break;
}
}
}
// update aggregated meshlet cone data for scoring subsequent triangles
meshlet_cone_acc.px += triangles[best_triangle].px;
meshlet_cone_acc.py += triangles[best_triangle].py;
meshlet_cone_acc.pz += triangles[best_triangle].pz;
meshlet_cone_acc.nx += triangles[best_triangle].nx;
meshlet_cone_acc.ny += triangles[best_triangle].ny;
meshlet_cone_acc.nz += triangles[best_triangle].nz;
emitted_flags[best_triangle] = 1;
}
if (meshlet.triangle_count)
{
finishMeshlet(meshlet, meshlet_triangles);
meshlets[meshlet_offset++] = meshlet;
}
assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
return meshlet_offset;
}
size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
meshopt_Allocator allocator;
// index of the vertex in the meshlet, 0xff if the vertex isn't used
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
memset(used, -1, vertex_count);
meshopt_Meshlet meshlet = {};
size_t meshlet_offset = 0;
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
assert(a < vertex_count && b < vertex_count && c < vertex_count);
// appends triangle to the meshlet and writes previous meshlet to the output if full
meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
}
if (meshlet.triangle_count)
{
finishMeshlet(meshlet, meshlet_triangles);
meshlets[meshlet_offset++] = meshlet;
}
assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
return meshlet_offset;
}
meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(index_count / 3 <= kMeshletMaxTriangles);
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
(void)vertex_count;
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
// compute triangle normals and gather triangle corners
float normals[256][3];
float corners[256][3][3];
float normals[kMeshletMaxTriangles][3];
float corners[kMeshletMaxTriangles][3][3];
size_t triangles = 0;
for (size_t i = 0; i < index_count; i += 3)
@ -327,25 +835,23 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
return bounds;
}
meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(triangle_count <= kMeshletMaxTriangles);
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
unsigned int indices[kMeshletMaxTriangles * 3];
for (size_t i = 0; i < meshlet->triangle_count; ++i)
for (size_t i = 0; i < triangle_count * 3; ++i)
{
unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
unsigned int index = meshlet_vertices[meshlet_triangles[i]];
assert(index < vertex_count);
assert(a < vertex_count && b < vertex_count && c < vertex_count);
indices[i * 3 + 0] = a;
indices[i * 3 + 1] = b;
indices[i * 3 + 2] = c;
indices[i] = index;
}
return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
}

View File

@ -4,6 +4,8 @@
#include <assert.h>
#include <string.h>
// This work is based on:
// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
namespace meshopt
{
@ -83,10 +85,49 @@ struct VertexStreamHasher
}
};
struct EdgeHasher
{
const unsigned int* remap;
size_t hash(unsigned long long edge) const
{
unsigned int e0 = unsigned(edge >> 32);
unsigned int e1 = unsigned(edge);
unsigned int h1 = remap[e0];
unsigned int h2 = remap[e1];
const unsigned int m = 0x5bd1e995;
// MurmurHash64B finalizer
h1 ^= h2 >> 18;
h1 *= m;
h2 ^= h1 >> 22;
h2 *= m;
h1 ^= h2 >> 17;
h1 *= m;
h2 ^= h1 >> 19;
h2 *= m;
return h2;
}
bool equal(unsigned long long lhs, unsigned long long rhs) const
{
unsigned int l0 = unsigned(lhs >> 32);
unsigned int l1 = unsigned(lhs);
unsigned int r0 = unsigned(rhs >> 32);
unsigned int r1 = unsigned(rhs);
return remap[l0] == remap[r0] && remap[l1] == remap[r1];
}
};
static size_t hashBuckets(size_t count)
{
size_t buckets = 1;
while (buckets < count)
while (buckets < count + count / 4)
buckets *= 2;
return buckets;
@ -119,6 +160,26 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c
return 0;
}
static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
{
VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
size_t vertex_table_size = hashBuckets(vertex_count);
unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
for (size_t i = 0; i < vertex_count; ++i)
{
unsigned int index = unsigned(i);
unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
if (*entry == ~0u)
*entry = index;
remap[index] = *entry;
}
}
} // namespace meshopt
size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
@ -345,3 +406,146 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns
destination[i] = remap[index];
}
}
void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
static const int next[4] = {1, 2, 0, 1};
// build position remap: for each vertex, which other (canonical) vertex does it map to?
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
// build edge set; this stores all triangle edges but we can look these up by any other wedge
EdgeHasher edge_hasher = {remap};
size_t edge_table_size = hashBuckets(index_count);
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
for (size_t i = 0; i < index_count; i += 3)
{
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
unsigned int i2 = indices[i + next[e + 1]];
assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
if (*entry == ~0ull)
{
*entry = edge;
// store vertex opposite to the edge
edge_vertex_table[entry - edge_table] = i2;
}
}
}
// build resulting index buffer: 6 indices for each input triangle
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int patch[6];
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
assert(i0 < vertex_count && i1 < vertex_count);
// note: this refers to the opposite edge!
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
patch[e * 2 + 0] = i0;
patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
}
memcpy(destination + i * 2, patch, sizeof(patch));
}
}
void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
using namespace meshopt;
assert(index_count % 3 == 0);
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
meshopt_Allocator allocator;
static const int next[3] = {1, 2, 0};
// build position remap: for each vertex, which other (canonical) vertex does it map to?
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
// build edge set; this stores all triangle edges but we can look these up by any other wedge
EdgeHasher edge_hasher = {remap};
size_t edge_table_size = hashBuckets(index_count);
unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
for (size_t i = 0; i < index_count; i += 3)
{
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
assert(i0 < vertex_count && i1 < vertex_count);
unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
if (*entry == ~0ull)
*entry = edge;
}
}
// build resulting index buffer: 12 indices for each input triangle
for (size_t i = 0; i < index_count; i += 3)
{
unsigned int patch[12];
for (int e = 0; e < 3; ++e)
{
unsigned int i0 = indices[i + e];
unsigned int i1 = indices[i + next[e]];
assert(i0 < vertex_count && i1 < vertex_count);
// note: this refers to the opposite edge!
unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
// use the same edge if opposite edge doesn't exist (border)
oppe = (oppe == ~0ull) ? edge : oppe;
// triangle index (0, 1, 2)
patch[e] = i0;
// opposite edge (3, 4; 5, 6; 7, 8)
patch[3 + e * 2 + 0] = unsigned(oppe);
patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
// dominant vertex (9, 10, 11)
patch[9 + e] = remap[i0];
}
memcpy(destination + i * 4, patch, sizeof(patch));
}
}

View File

@ -1,7 +1,7 @@
/**
* meshoptimizer - version 0.15
* meshoptimizer - version 0.16
*
* Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Copyright (C) 2016-2021, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at https://github.com/zeux/meshoptimizer
*
* This library is distributed under the MIT License. See notice at the end of this file.
@ -12,7 +12,7 @@
#include <stddef.h>
/* Version macro; major * 1000 + minor * 10 + patch */
#define MESHOPTIMIZER_VERSION 150 /* 0.15 */
#define MESHOPTIMIZER_VERSION 160 /* 0.16 */
/* If no API is defined, assume default */
#ifndef MESHOPTIMIZER_API
@ -97,6 +97,35 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati
*/
MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
/**
* Generate index buffer that can be used as a geometry shader input with triangle adjacency topology
* Each triangle is converted into a 6-vertex patch with the following layout:
* - 0, 2, 4: original triangle vertices
* - 1, 3, 5: vertices adjacent to edges 02, 24 and 40
* The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY.
* This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering.
*
* destination must contain enough space for the resulting index buffer (index_count*2 elements)
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
*/
MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
/**
* Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement
* Each triangle is converted into a 12-vertex patch with the following layout:
* - 0, 1, 2: original triangle vertices
* - 3, 4: opposing edge for edge 0, 1
* - 5, 6: opposing edge for edge 1, 2
* - 7, 8: opposing edge for edge 2, 0
* - 9, 10, 11: dominant vertices for corners 0, 1, 2
* The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping.
* See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details.
*
* destination must contain enough space for the resulting index buffer (index_count*4 elements)
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
*/
MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
/**
* Vertex transform cache optimizer
* Reorders indices to reduce the number of GPU vertex shader invocations
@ -373,22 +402,31 @@ MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetc
struct meshopt_Meshlet
{
unsigned int vertices[64];
unsigned char indices[126][3];
unsigned char triangle_count;
unsigned char vertex_count;
/* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */
unsigned int vertex_offset;
unsigned int triangle_offset;
/* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */
unsigned int vertex_count;
unsigned int triangle_count;
};
/**
* Experimental: Meshlet builder
* Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
* The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
* For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
* When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters.
* When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
*
* destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
* max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126)
* meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
* meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices
* meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
* max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512)
* cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency
*/
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
struct meshopt_Bounds
@ -426,10 +464,10 @@ struct meshopt_Bounds
* to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
*
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
* index_count should be less than or equal to 256*3 (the function assumes clusters of limited size)
* index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size)
*/
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
/**
* Experimental: Spatial sorter
@ -513,6 +551,10 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices,
template <typename T>
inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
template <typename T>
inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
template <typename T>
inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
template <typename T>
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
template <typename T>
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
@ -547,7 +589,9 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size
template <typename T>
inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
template <typename T>
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
template <typename T>
inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
template <typename T>
inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
template <typename T>
@ -761,6 +805,24 @@ inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indi
meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count);
}
template <typename T>
inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
meshopt_IndexAdapter<T> in(0, indices, index_count);
meshopt_IndexAdapter<T> out(destination, 0, index_count * 2);
meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
}
template <typename T>
inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
meshopt_IndexAdapter<T> in(0, indices, index_count);
meshopt_IndexAdapter<T> out(destination, 0, index_count * 4);
meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
}
template <typename T>
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
{
@ -908,11 +970,19 @@ inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices
}
template <typename T>
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
{
meshopt_IndexAdapter<T> in(0, indices, index_count);
return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles);
return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight);
}
template <typename T>
inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
{
meshopt_IndexAdapter<T> in(0, indices, index_count);
return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles);
}
template <typename T>
@ -934,7 +1004,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_
#endif
/**
* Copyright (c) 2016-2020 Arseny Kapoulkine
* Copyright (c) 2016-2021 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation

View File

@ -131,7 +131,7 @@ struct PositionHasher
static size_t hashBuckets2(size_t count)
{
size_t buckets = 1;
while (buckets < count)
while (buckets < count + count / 4)
buckets *= 2;
return buckets;

View File

@ -710,18 +710,12 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
SIMD_TARGET
static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1)
{
v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3);
uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
// magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00
const uint64_t magic = 0x000103070f1f3f80ull;
// TODO: This can use v8x16_bitmask in the future
uint64_t mask_2 = mask_1a | mask_1b;
uint64_t mask_4 = mask_2 | (mask_2 >> 16);
uint64_t mask_8 = mask_4 | (mask_4 >> 8);
mask0 = uint8_t(mask_8);
mask1 = uint8_t(mask_8 >> 32);
mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56);
mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56);
}
SIMD_TARGET