From 8b69f9c18aeba4ed6217559a0e4436568cbf565e Mon Sep 17 00:00:00 2001 From: "K. S. Ernest (iFire) Lee" Date: Sun, 27 Dec 2020 16:54:21 -0800 Subject: [PATCH] Meshoptimizer update library --- thirdparty/README.md | 10 +- thirdparty/meshoptimizer/indexcodec.cpp | 78 ----- thirdparty/meshoptimizer/meshoptimizer.h | 22 +- .../simplifier_get_resulting_error.patch | 96 ------ thirdparty/meshoptimizer/simplifier.cpp | 285 +++++++++++------- thirdparty/meshoptimizer/vertexcodec.cpp | 66 ---- thirdparty/meshoptimizer/vertexfilter.cpp | 31 +- 7 files changed, 223 insertions(+), 365 deletions(-) delete mode 100644 thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch diff --git a/thirdparty/README.md b/thirdparty/README.md index 8075ec43ab0..1a325f71112 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -339,18 +339,12 @@ File extracted from upstream release tarball: ## meshoptimizer - Upstream: https://github.com/zeux/meshoptimizer -- Version: 0.15 (2020) +- Version: git (e4e43fe36e7a8705e602e7ca2f9fb795ded1d0b9, 2020) - License: MIT -File extracted from upstream release tarball: - +- File extracted from upstream tarball: - All files in `src/`. -Important: Some files have Godot-made changes. -They can be applied with the patch in the `patches` folder, but are meant to be superseded -by upstream API changes. - - ## miniupnpc - Upstream: https://github.com/miniupnp/miniupnp/tree/master/miniupnpc diff --git a/thirdparty/meshoptimizer/indexcodec.cpp b/thirdparty/meshoptimizer/indexcodec.cpp index eeb541e5be0..5c35eb43ae3 100644 --- a/thirdparty/meshoptimizer/indexcodec.cpp +++ b/thirdparty/meshoptimizer/indexcodec.cpp @@ -4,14 +4,6 @@ #include #include -#ifndef TRACE -#define TRACE 0 -#endif - -#if TRACE -#include -#endif - // This work is based on: // Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013 // Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014 @@ -167,38 +159,6 @@ static void writeTriangle(void* destination, size_t offset, size_t index_size, u } } -#if TRACE -static size_t sortTop16(unsigned char dest[16], size_t stats[256]) -{ - size_t destsize = 0; - - for (size_t i = 0; i < 256; ++i) - { - size_t j = 0; - for (; j < destsize; ++j) - { - if (stats[i] >= stats[dest[j]]) - { - if (destsize < 16) - destsize++; - - memmove(&dest[j + 1], &dest[j], destsize - 1 - j); - dest[j] = (unsigned char)i; - break; - } - } - - if (j == destsize && destsize < 16) - { - dest[destsize] = (unsigned char)i; - destsize++; - } - } - - return destsize; -} -#endif - } // namespace meshopt size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) @@ -207,11 +167,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons assert(index_count % 3 == 0); -#if TRACE - size_t codestats[256] = {}; - size_t codeauxstats[256] = {}; -#endif - // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table if (buffer_size < 1 + index_count / 3 + 16) return 0; @@ -275,10 +230,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons *code++ = (unsigned char)((fe << 4) | fec); -#if TRACE - codestats[code[-1]]++; -#endif - // note that we need to update the last index since free indices are delta-encoded if (fec == 15) encodeIndex(data, c, last), last = c; @@ -334,11 +285,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons *data++ = codeaux; } -#if TRACE - codestats[code[-1]]++; - codeauxstats[codeaux]++; -#endif - // note that we need to update the last index since free indices are delta-encoded if (fea == 15) encodeIndex(data, a, last), last = a; @@ -387,30 +333,6 @@ size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, cons assert(data >= buffer + index_count / 3 + 16); assert(data <= buffer + buffer_size); -#if TRACE - unsigned char codetop[16], codeauxtop[16]; - size_t codetopsize = sortTop16(codetop, codestats); - size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats); - - size_t sumcode = 0, sumcodeaux = 0; - for (size_t i = 0; i < 256; ++i) - sumcode += codestats[i], sumcodeaux += codeauxstats[i]; - - size_t acccode = 0, acccodeaux = 0; - - printf("code\t\t\t\t\tcodeaux\n"); - - for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i) - { - acccode += codestats[codetop[i]]; - acccodeaux += codeauxstats[codeauxtop[i]]; - - printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n", - int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100, - int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100); - } -#endif - return data - buffer; } diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index fde00f9c82b..4071f0a3710 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -239,7 +239,6 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte /** * Vertex buffer filters * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. - * count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation. * * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f. * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. @@ -265,11 +264,10 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver * * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!) * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation + * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification */ -// -- GODOT start -- -//MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error); -// -- GODOT end -- +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); /** * Experimental: Mesh simplifier (sloppy) @@ -296,6 +294,14 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destinati */ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); +/** + * Experimental: Returns the error scaling factor used by the simplifier to convert between absolute and relative extents + * + * Absolute error must be *divided* by the scaling factor before passing it to meshopt_simplify as target_error + * Relative error returned by meshopt_simplify via result_error must be *multiplied* by the scaling factor to get absolute error. + */ +MESHOPTIMIZER_EXPERIMENTAL float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + /** * Mesh stripifier * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles @@ -525,7 +531,7 @@ inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_s template inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); template -inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0); template inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count); template @@ -840,12 +846,12 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const } template -inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error) { meshopt_IndexAdapter in(0, indices, index_count); meshopt_IndexAdapter out(destination, 0, index_count); - return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error); + return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error); } template diff --git a/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch b/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch deleted file mode 100644 index 1be38e45d23..00000000000 --- a/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch +++ /dev/null @@ -1,96 +0,0 @@ -diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h -index a442d103c8..fde00f9c82 100644 ---- a/thirdparty/meshoptimizer/meshoptimizer.h -+++ b/thirdparty/meshoptimizer/meshoptimizer.h -@@ -266,7 +266,10 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver - * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!) - * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer - */ --MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); -+// -- GODOT start -- -+//MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); -+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error); -+// -- GODOT end -- - - /** - * Experimental: Mesh simplifier (sloppy) -diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp -index bd523275ce..51cf634186 100644 ---- a/thirdparty/meshoptimizer/simplifier.cpp -+++ b/thirdparty/meshoptimizer/simplifier.cpp -@@ -1143,7 +1143,10 @@ unsigned int* meshopt_simplifyDebugLoop = 0; - unsigned int* meshopt_simplifyDebugLoopBack = 0; - #endif - --size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) -+// -- GODOT start -- -+//size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) -+size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error) -+// -- GODOT end -- - { - using namespace meshopt; - -@@ -1198,10 +1201,13 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, - if (result != indices) - memcpy(result, indices, index_count * sizeof(unsigned int)); - -+// -- GODOT start -- - #if TRACE - size_t pass_count = 0; -- float worst_error = 0; -+ //float worst_error = 0; - #endif -+ float worst_error = 0; -+// -- GODOT end -- - - Collapse* edge_collapses = allocator.allocate(index_count); - unsigned int* collapse_order = allocator.allocate(index_count); -@@ -1213,6 +1219,12 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, - // target_error input is linear; we need to adjust it to match quadricError units - float error_limit = target_error * target_error; - -+// -- GODOT start -- -+ if (r_resulting_error) { -+ *r_resulting_error = 1.0; -+ } -+// -- GODOT end -- -+ - while (result_count > target_index_count) - { - size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); -@@ -1257,7 +1269,8 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, - size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); - assert(new_count < result_count); - --#if TRACE -+// -- GODOT start -- -+//#if TRACE - float pass_error = 0.f; - for (size_t i = 0; i < edge_collapse_count; ++i) - { -@@ -1267,15 +1280,24 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, - pass_error = c.error; - } - -- pass_count++; -+ //pass_count++; - worst_error = (worst_error < pass_error) ? pass_error : worst_error; - -+#if TRACE -+ pass_count++; - printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal); - #endif -+// -- GODOT end -- - - result_count = new_count; - } - -+// -- GODOT start -- -+ if (r_resulting_error) { -+ *r_resulting_error = sqrt(worst_error); -+ } -+// -- GODOT end -- -+ - #if TRACE - printf("passes: %d, worst error: %e\n", int(pass_count), worst_error); - #endif diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index b195a8cb5d5..5205b011725 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -6,7 +6,6 @@ #include #include - #ifndef TRACE #define TRACE 0 #endif @@ -15,6 +14,12 @@ #include #endif +#if TRACE +#define TRACESTATS(i) stats[i]++; +#else +#define TRACESTATS(i) (void)0 +#endif + // This work is based on: // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 // Michael Garland. Quadric-based polygonal surface simplification. 1999 @@ -26,28 +31,37 @@ namespace meshopt struct EdgeAdjacency { + struct Edge + { + unsigned int next; + unsigned int prev; + }; + unsigned int* counts; unsigned int* offsets; - unsigned int* data; + Edge* data; }; -static void buildEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +static void prepareEdgeAdjacency(EdgeAdjacency& adjacency, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) { - size_t face_count = index_count / 3; - - // allocate arrays adjacency.counts = allocator.allocate(vertex_count); adjacency.offsets = allocator.allocate(vertex_count); - adjacency.data = allocator.allocate(index_count); + adjacency.data = allocator.allocate(index_count); +} + +static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* remap) +{ + size_t face_count = index_count / 3; // fill edge counts memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); for (size_t i = 0; i < index_count; ++i) { - assert(indices[i] < vertex_count); + unsigned int v = remap ? remap[indices[i]] : indices[i]; + assert(v < vertex_count); - adjacency.counts[indices[i]]++; + adjacency.counts[v]++; } // fill offset table @@ -66,9 +80,24 @@ static void buildEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* ind { unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; - adjacency.data[adjacency.offsets[a]++] = b; - adjacency.data[adjacency.offsets[b]++] = c; - adjacency.data[adjacency.offsets[c]++] = a; + if (remap) + { + a = remap[a]; + b = remap[b]; + c = remap[c]; + } + + adjacency.data[adjacency.offsets[a]].next = b; + adjacency.data[adjacency.offsets[a]].prev = c; + adjacency.offsets[a]++; + + adjacency.data[adjacency.offsets[b]].next = c; + adjacency.data[adjacency.offsets[b]].prev = a; + adjacency.offsets[b]++; + + adjacency.data[adjacency.offsets[c]].next = a; + adjacency.data[adjacency.offsets[c]].prev = b; + adjacency.offsets[c]++; } // fix offsets that have been disturbed by the previous pass @@ -209,10 +238,10 @@ const unsigned char kHasOpposite[Kind_Count][Kind_Count] = { static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b) { unsigned int count = adjacency.counts[a]; - const unsigned int* data = adjacency.data + adjacency.offsets[a]; + const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[a]; for (size_t i = 0; i < count; ++i) - if (data[i] == b) + if (edges[i].next == b) return true; return false; @@ -234,11 +263,11 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned unsigned int vertex = unsigned(i); unsigned int count = adjacency.counts[vertex]; - const unsigned int* data = adjacency.data + adjacency.offsets[vertex]; + const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[vertex]; for (size_t j = 0; j < count; ++j) { - unsigned int target = data[j]; + unsigned int target = edges[j].next; if (!hasEdge(adjacency, target, vertex)) { @@ -249,10 +278,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned } #if TRACE - size_t lockedstats[4] = {}; -#define TRACELOCKED(i) lockedstats[i]++; -#else -#define TRACELOCKED(i) (void)0 + size_t stats[4] = {}; #endif for (size_t i = 0; i < vertex_count; ++i) @@ -278,7 +304,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned else { result[i] = Kind_Locked; - TRACELOCKED(0); + TRACESTATS(0); } } else if (wedge[wedge[i]] == i) @@ -299,20 +325,20 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned else { result[i] = Kind_Locked; - TRACELOCKED(1); + TRACESTATS(1); } } else { result[i] = Kind_Locked; - TRACELOCKED(2); + TRACESTATS(2); } } else { // more than one vertex maps to this one; we don't have classification available result[i] = Kind_Locked; - TRACELOCKED(3); + TRACESTATS(3); } } else @@ -325,7 +351,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned #if TRACE printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n", - int(lockedstats[0]), int(lockedstats[1]), int(lockedstats[2]), int(lockedstats[3])); + int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3])); #endif } @@ -333,11 +359,8 @@ struct Vector3 { float x, y, z; }; -// -- GODOT start -- -//static void rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) -static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) -// -- GODOT end -- +static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) { size_t vertex_stride_float = vertex_positions_stride / sizeof(float); @@ -348,9 +371,12 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat { const float* v = vertex_positions_data + i * vertex_stride_float; - result[i].x = v[0]; - result[i].y = v[1]; - result[i].z = v[2]; + if (result) + { + result[i].x = v[0]; + result[i].y = v[1]; + result[i].z = v[2]; + } for (int j = 0; j < 3; ++j) { @@ -367,18 +393,19 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); - float scale = extent == 0 ? 0.f : 1.f / extent; - - for (size_t i = 0; i < vertex_count; ++i) + if (result) { - result[i].x = (result[i].x - minv[0]) * scale; - result[i].y = (result[i].y - minv[1]) * scale; - result[i].z = (result[i].z - minv[2]) * scale; - } -// -- GODOT start -- - return extent; -// -- GODOT end -- + float scale = extent == 0 ? 0.f : 1.f / extent; + for (size_t i = 0; i < vertex_count; ++i) + { + result[i].x = (result[i].x - minv[0]) * scale; + result[i].y = (result[i].y - minv[1]) * scale; + result[i].z = (result[i].z - minv[2]) * scale; + } + } + + return extent; } struct Quadric @@ -594,6 +621,48 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic } } +// does triangle ABC flip when C is replaced with D? +static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c, const Vector3& d) +{ + Vector3 eb = {b.x - a.x, b.y - a.y, b.z - a.z}; + Vector3 ec = {c.x - a.x, c.y - a.y, c.z - a.z}; + Vector3 ed = {d.x - a.x, d.y - a.y, d.z - a.z}; + + Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x}; + Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x}; + + return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z < 0; +} + +static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1) +{ + assert(collapse_remap[i0] == i0); + assert(collapse_remap[i1] == i1); + + const Vector3& v0 = vertex_positions[i0]; + const Vector3& v1 = vertex_positions[i1]; + + const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]]; + size_t count = adjacency.counts[i0]; + + for (size_t i = 0; i < count; ++i) + { + unsigned int a = collapse_remap[edges[i].next]; + unsigned int b = collapse_remap[edges[i].prev]; + + // skip triangles that get collapsed + // note: this is mathematically redundant as if either of these is true, the dot product in hasTriangleFlip should be 0 + if (a == i1 || b == i1) + continue; + + // early-out when at least one triangle flips due to a collapse + if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1)) + return true; + } + + return false; +} + static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) { size_t collapse_count = 0; @@ -704,7 +773,7 @@ static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, for (int k0 = 0; k0 < Kind_Count; ++k0) for (int k1 = 0; k1 < Kind_Count; ++k1) if (ckinds[k0][k1]) - printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), cerrors[k0][k1]); + printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), ckinds[k0][k1] ? sqrtf(cerrors[k0][k1]) : 0.f); } static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind) @@ -772,22 +841,38 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse } } -static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, size_t triangle_collapse_goal, float error_goal, float error_limit) +static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error) { size_t edge_collapses = 0; size_t triangle_collapses = 0; + // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit + // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses + size_t edge_collapse_goal = triangle_collapse_goal / 2; + +#if TRACE + size_t stats[4] = {}; +#endif + for (size_t i = 0; i < collapse_count; ++i) { const Collapse& c = collapses[collapse_order[i]]; + TRACESTATS(0); + if (c.error > error_limit) break; - if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 10) + if (triangle_collapses >= triangle_collapse_goal) break; - if (triangle_collapses >= triangle_collapse_goal) + // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked + // as they will share vertices with other successfull collapses, we need to increase the acceptable error by some factor + float error_goal = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX; + + // on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd + // topology, we only abort if we got over 1/6 collapses accordingly. + if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 6) break; unsigned int i0 = c.v0; @@ -800,7 +885,19 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* // it's important to not move the vertices twice since it complicates the tracking/remapping logic // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass if (collapse_locked[r0] | collapse_locked[r1]) + { + TRACESTATS(1); continue; + } + + if (hasTriangleFlips(adjacency, vertex_positions, collapse_remap, r0, r1)) + { + // adjust collapse goal since this collapse is invalid and shouldn't factor into error goal + edge_collapse_goal++; + + TRACESTATS(2); + continue; + } assert(collapse_remap[r0] == r0); assert(collapse_remap[r1] == r1); @@ -842,8 +939,18 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* // border edges collapse 1 triangle, other edges collapse 2 or more triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; edge_collapses++; + + result_error = result_error < c.error ? c.error : result_error; } +#if TRACE + float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f; + + printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n", + int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect), + int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2])); +#endif + return edge_collapses; } @@ -1151,10 +1258,7 @@ unsigned int* meshopt_simplifyDebugLoop = 0; unsigned int* meshopt_simplifyDebugLoopBack = 0; #endif -// -- GODOT start -- -//size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) -size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error) -// -- GODOT end -- +size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) { using namespace meshopt; @@ -1169,7 +1273,8 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, // build adjacency information EdgeAdjacency adjacency = {}; - buildEdgeAdjacency(adjacency, indices, index_count, vertex_count, allocator); + prepareEdgeAdjacency(adjacency, index_count, vertex_count, allocator); + updateEdgeAdjacency(adjacency, indices, index_count, vertex_count, NULL); // build position remap that maps each vertex to the one with identical position unsigned int* remap = allocator.allocate(vertex_count); @@ -1198,10 +1303,7 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, #endif Vector3* vertex_positions = allocator.allocate(vertex_count); -// -- GODOT start -- - //rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); - float extent = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); -// -- GODOT end -- + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); Quadric* vertex_quadrics = allocator.allocate(vertex_count); memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); @@ -1212,13 +1314,9 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, if (result != indices) memcpy(result, indices, index_count * sizeof(unsigned int)); -// -- GODOT start -- #if TRACE size_t pass_count = 0; - //float worst_error = 0; #endif - float worst_error = 0; -// -- GODOT end -- Collapse* edge_collapses = allocator.allocate(index_count); unsigned int* collapse_order = allocator.allocate(index_count); @@ -1226,18 +1324,16 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, unsigned char* collapse_locked = allocator.allocate(vertex_count); size_t result_count = index_count; + float result_error = 0; // target_error input is linear; we need to adjust it to match quadricError units float error_limit = target_error * target_error; -// -- GODOT start -- - if (r_resulting_error) { - *r_resulting_error = 1.0; - } -// -- GODOT end -- - while (result_count > target_index_count) { + // note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress + updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap); + size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); // no edges can be collapsed any more due to topology restrictions @@ -1252,23 +1348,18 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count); - // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit - // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses size_t triangle_collapse_goal = (result_count - target_index_count) / 3; - size_t edge_collapse_goal = triangle_collapse_goal / 2; - - // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked - // as they will share vertices with other successfull collapses, we need to increase the acceptable error by this factor - const float kPassErrorBound = 1.5f; - - float error_goal = edge_collapse_goal < edge_collapse_count ? edge_collapses[collapse_order[edge_collapse_goal]].error * kPassErrorBound : FLT_MAX; for (size_t i = 0; i < vertex_count; ++i) collapse_remap[i] = unsigned(i); memset(collapse_locked, 0, vertex_count); - size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, triangle_collapse_goal, error_goal, error_limit); +#if TRACE + printf("pass %d: ", int(pass_count++)); +#endif + + size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error); // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit if (collapses == 0) @@ -1280,37 +1371,11 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); assert(new_count < result_count); -// -- GODOT start -- -//#if TRACE - float pass_error = 0.f; - for (size_t i = 0; i < edge_collapse_count; ++i) - { - Collapse& c = edge_collapses[collapse_order[i]]; - - if (collapse_remap[c.v0] == c.v1) - pass_error = c.error; - } - - //pass_count++; - worst_error = (worst_error < pass_error) ? pass_error : worst_error; - -#if TRACE - pass_count++; - printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal); -#endif -// -- GODOT end -- - result_count = new_count; } -// -- GODOT start -- - if (r_resulting_error) { - *r_resulting_error = sqrt(worst_error) * extent; - } -// -- GODOT end -- - #if TRACE - printf("passes: %d, worst error: %e\n", int(pass_count), worst_error); + printf("result: %d triangles, error: %e; total %d passes\n", int(result_count), sqrtf(result_error), int(pass_count)); #endif #if TRACE > 1 @@ -1328,6 +1393,10 @@ size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int)); #endif + // result_error is quadratic; we need to remap it back to linear + if (out_result_error) + *out_result_error = sqrtf(result_error); + return result_count; } @@ -1560,3 +1629,15 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos return cell_count; } + +float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + float extent = rescalePositions(NULL, vertex_positions, vertex_count, vertex_positions_stride); + + return extent; +} diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp index 784c9a13db7..2cbfaac3676 100644 --- a/thirdparty/meshoptimizer/vertexcodec.cpp +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -80,14 +80,6 @@ #include #endif -#ifndef TRACE -#define TRACE 0 -#endif - -#if TRACE -#include -#endif - #ifdef SIMD_WASM #define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i) #define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) @@ -133,19 +125,6 @@ inline unsigned char unzigzag8(unsigned char v) return -(v & 1) ^ (v >> 1); } -#if TRACE -struct Stats -{ - size_t size; - size_t header; - size_t bitg[4]; - size_t bitb[4]; -}; - -Stats* bytestats; -Stats vertexstats[256]; -#endif - static bool encodeBytesGroupZero(const unsigned char* buffer) { for (size_t i = 0; i < kByteGroupSize; ++i) @@ -267,17 +246,8 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, assert(data + best_size == next); data = next; - -#if TRACE > 1 - bytestats->bitg[bitslog2]++; - bytestats->bitb[bitslog2] += best_size; -#endif } -#if TRACE > 1 - bytestats->header += header_size; -#endif - return data; } @@ -306,19 +276,9 @@ static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data vertex_offset += vertex_size; } -#if TRACE - const unsigned char* olddata = data; - bytestats = &vertexstats[k]; -#endif - data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); if (!data) return 0; - -#if TRACE - bytestats = 0; - vertexstats[k].size += data - olddata; -#endif } memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); @@ -1086,10 +1046,6 @@ size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, con assert(vertex_size > 0 && vertex_size <= 256); assert(vertex_size % 4 == 0); -#if TRACE - memset(vertexstats, 0, sizeof(vertexstats)); -#endif - const unsigned char* vertex_data = static_cast(vertices); unsigned char* data = buffer; @@ -1142,28 +1098,6 @@ size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, con assert(data >= buffer + tail_size); assert(data <= buffer + buffer_size); -#if TRACE - size_t total_size = data - buffer; - - for (size_t k = 0; k < vertex_size; ++k) - { - const Stats& vsk = vertexstats[k]; - - printf("%2d: %d bytes\t%.1f%%\t%.1f bpv", int(k), int(vsk.size), double(vsk.size) / double(total_size) * 100, double(vsk.size) / double(vertex_count) * 8); - -#if TRACE > 1 - printf("\t\thdr %d bytes\tbit0 %d (%d bytes)\tbit1 %d (%d bytes)\tbit2 %d (%d bytes)\tbit3 %d (%d bytes)", - int(vsk.header), - int(vsk.bitg[0]), int(vsk.bitb[0]), - int(vsk.bitg[1]), int(vsk.bitb[1]), - int(vsk.bitg[2]), int(vsk.bitb[2]), - int(vsk.bitg[3]), int(vsk.bitb[3])); -#endif - - printf("\n"); - } -#endif - return data - buffer; } diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp index e7ad2c9d39e..39946f46edf 100644 --- a/thirdparty/meshoptimizer/vertexfilter.cpp +++ b/thirdparty/meshoptimizer/vertexfilter.cpp @@ -2,6 +2,7 @@ #include "meshoptimizer.h" #include +#include // The block below auto-detects SIMD ISA that can be used on the target platform #ifndef MESHOPTIMIZER_NO_SIMD @@ -159,6 +160,25 @@ static void decodeFilterExp(unsigned int* data, size_t count) #endif #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +template static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride) +{ + assert(stride <= 4); + + size_t count4 = count & ~size_t(3); + process(data, count4); + + if (count4 < count) + { + T tail[4 * 4] = {}; // max stride 4, max count 4 + size_t tail_size = (count - count4) * stride * sizeof(T); + assert(tail_size <= sizeof(tail)); + + memcpy(tail, data + count4 * stride, tail_size); + process(tail, count - count4); + memcpy(data + count4 * stride, tail, tail_size); + } +} + inline uint64_t rotateleft64(uint64_t v, int x) { #if defined(_MSC_VER) && !defined(__clang__) @@ -775,14 +795,13 @@ void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_si { using namespace meshopt; - assert(vertex_count % 4 == 0); assert(vertex_size == 4 || vertex_size == 8); #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) if (vertex_size == 4) - decodeFilterOctSimd(static_cast(buffer), vertex_count); + dispatchSimd(decodeFilterOctSimd, static_cast(buffer), vertex_count, 4); else - decodeFilterOctSimd(static_cast(buffer), vertex_count); + dispatchSimd(decodeFilterOctSimd, static_cast(buffer), vertex_count, 4); #else if (vertex_size == 4) decodeFilterOct(static_cast(buffer), vertex_count); @@ -795,12 +814,11 @@ void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_s { using namespace meshopt; - assert(vertex_count % 4 == 0); assert(vertex_size == 8); (void)vertex_size; #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) - decodeFilterQuatSimd(static_cast(buffer), vertex_count); + dispatchSimd(decodeFilterQuatSimd, static_cast(buffer), vertex_count, 4); #else decodeFilterQuat(static_cast(buffer), vertex_count); #endif @@ -810,11 +828,10 @@ void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_si { using namespace meshopt; - assert(vertex_count % 4 == 0); assert(vertex_size % 4 == 0); #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) - decodeFilterExpSimd(static_cast(buffer), vertex_count * (vertex_size / 4)); + dispatchSimd(decodeFilterExpSimd, static_cast(buffer), vertex_count * (vertex_size / 4), 1); #else decodeFilterExp(static_cast(buffer), vertex_count * (vertex_size / 4)); #endif