From c5f73a1783fbf35e2601134678c52ec813837384 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 2 Nov 2023 14:03:02 -0700 Subject: [PATCH] Update thirdparty/meshoptimizer to v0.20 Note: this change completely overwrites the meshoptimizer library source (from git SHA c21d3be6ddf627f8ca852ba4b6db9903b0557858) without including any patches; a distance error metric patch is still needed and will be reapplied in the next commit. The changes elsewhere are due to a signature change for meshopt_simplifyWithAttributes. --- scene/resources/importer_mesh.cpp | 7 +- scene/resources/surface_tool.h | 2 +- thirdparty/meshoptimizer/indexcodec.cpp | 2 +- thirdparty/meshoptimizer/indexgenerator.cpp | 41 +- thirdparty/meshoptimizer/meshoptimizer.h | 219 ++++---- thirdparty/meshoptimizer/quantization.cpp | 70 +++ thirdparty/meshoptimizer/simplifier.cpp | 529 ++++++++++--------- thirdparty/meshoptimizer/vcacheoptimizer.cpp | 21 +- thirdparty/meshoptimizer/vertexcodec.cpp | 51 +- thirdparty/meshoptimizer/vertexfilter.cpp | 107 +++- 10 files changed, 631 insertions(+), 418 deletions(-) create mode 100644 thirdparty/meshoptimizer/quantization.cpp diff --git a/scene/resources/importer_mesh.cpp b/scene/resources/importer_mesh.cpp index 1f4171c0720..36baec919a8 100644 --- a/scene/resources/importer_mesh.cpp +++ b/scene/resources/importer_mesh.cpp @@ -460,12 +460,13 @@ void ImporterMesh::generate_lods(float p_normal_merge_angle, float p_normal_spli (const uint32_t *)merged_indices_ptr, index_count, merged_vertices_f32.ptr(), merged_vertex_count, sizeof(float) * 3, // Vertex stride + merged_normals_f32.ptr(), + sizeof(float) * 3, // Attribute stride + normal_weights.ptr(), 3, index_target, max_mesh_error, simplify_options, - &mesh_error, - merged_normals_f32.ptr(), - normal_weights.ptr(), 3); + &mesh_error); if (new_index_count < last_index_count * 1.5f) { index_target = index_target * 1.5f; diff --git a/scene/resources/surface_tool.h b/scene/resources/surface_tool.h index 2a8ad535259..9dfb298b9e6 100644 --- a/scene/resources/surface_tool.h +++ b/scene/resources/surface_tool.h @@ -86,7 +86,7 @@ public: static OptimizeVertexCacheFunc optimize_vertex_cache_func; typedef size_t (*SimplifyFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float *r_error); static SimplifyFunc simplify_func; - typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float *result_error, const float *attributes, const float *attribute_weights, size_t attribute_count); + typedef size_t (*SimplifyWithAttribFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_data, size_t vertex_count, size_t vertex_stride, const float *attributes, size_t attribute_stride, const float *attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float *result_error); static SimplifyWithAttribFunc simplify_with_attrib_func; typedef float (*SimplifyScaleFunc)(const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride); static SimplifyScaleFunc simplify_scale_func; diff --git a/thirdparty/meshoptimizer/indexcodec.cpp b/thirdparty/meshoptimizer/indexcodec.cpp index e4495b85862..4cc2fea63ad 100644 --- a/thirdparty/meshoptimizer/indexcodec.cpp +++ b/thirdparty/meshoptimizer/indexcodec.cpp @@ -13,7 +13,7 @@ namespace meshopt const unsigned char kIndexHeader = 0xe0; const unsigned char kSequenceHeader = 0xd0; -static int gEncodeIndexVersion = 0; +static int gEncodeIndexVersion = 1; typedef unsigned int VertexFifo[16]; typedef unsigned int EdgeFifo[16][2]; diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp index cad808a2b18..f6728345a9f 100644 --- a/thirdparty/meshoptimizer/indexgenerator.cpp +++ b/thirdparty/meshoptimizer/indexgenerator.cpp @@ -157,7 +157,7 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c } assert(false && "Hash table is full"); // unreachable - return 0; + return NULL; } static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) @@ -178,6 +178,22 @@ static void buildPositionRemap(unsigned int* remap, const float* vertex_position remap[index] = *entry; } + + allocator.deallocate(vertex_table); +} + +template +static void remapVertices(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) +{ + size_t block_size = BlockSize == 0 ? vertex_size : BlockSize; + assert(block_size == vertex_size); + + for (size_t i = 0; i < vertex_count; ++i) + if (remap[i] != ~0u) + { + assert(remap[i] < vertex_count); + memcpy(static_cast(destination) + remap[i] * block_size, static_cast(vertices) + i * block_size, block_size); + } } } // namespace meshopt @@ -288,6 +304,8 @@ size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigne void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) { + using namespace meshopt; + assert(vertex_size > 0 && vertex_size <= 256); meshopt_Allocator allocator; @@ -300,14 +318,23 @@ void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t v vertices = vertices_copy; } - for (size_t i = 0; i < vertex_count; ++i) + // specialize the loop for common vertex sizes to ensure memcpy is compiled as an inlined intrinsic + switch (vertex_size) { - if (remap[i] != ~0u) - { - assert(remap[i] < vertex_count); + case 4: + return remapVertices<4>(destination, vertices, vertex_count, vertex_size, remap); - memcpy(static_cast(destination) + remap[i] * vertex_size, static_cast(vertices) + i * vertex_size, vertex_size); - } + case 8: + return remapVertices<8>(destination, vertices, vertex_count, vertex_size, remap); + + case 12: + return remapVertices<12>(destination, vertices, vertex_count, vertex_size, remap); + + case 16: + return remapVertices<16>(destination, vertices, vertex_count, vertex_size, remap); + + default: + return remapVertices<0>(destination, vertices, vertex_count, vertex_size, remap); } } diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index 46d28d3ea3b..dbafd4e6e40 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -1,7 +1,7 @@ /** - * meshoptimizer - version 0.18 + * meshoptimizer - version 0.20 * - * Copyright (C) 2016-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2016-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer * * This library is distributed under the MIT License. See notice at the end of this file. @@ -12,7 +12,7 @@ #include /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 180 /* 0.18 */ +#define MESHOPTIMIZER_VERSION 200 /* 0.20 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -67,6 +67,7 @@ MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, * * destination must contain enough space for the resulting remap table (vertex_count elements) * indices can be NULL if the input is unindexed + * stream_count must be <= 16 */ MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); @@ -103,6 +104,7 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. * * destination must contain enough space for the resulting index buffer (index_count elements) + * stream_count must be <= 16 */ MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); @@ -304,13 +306,22 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t cou * Input data must contain 4 floats for every quaternion (count*4 total). * * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24). - * Mantissa is shared between all components of a given vector as defined by stride; stride must be divisible by 4. + * Exponent can be shared between all components of a given vector as defined by stride or all values of a given component; stride must be divisible by 4. * Input data must contain stride/4 floats for every vector (count*stride/4 total). - * When individual (scalar) encoding is desired, simply pass stride=4 and adjust count accordingly. */ +enum meshopt_EncodeExpMode +{ + /* When encoding exponents, use separate values for each component (maximum quality) */ + meshopt_EncodeExpSeparate, + /* When encoding exponents, use shared value for all components of each vector (better compression) */ + meshopt_EncodeExpSharedVector, + /* When encoding exponents, use shared value for each component of all vectors (best compression) */ + meshopt_EncodeExpSharedComponent, +}; + MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data); MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data); -MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode); /** * Simplification options @@ -321,11 +332,6 @@ enum meshopt_SimplifyLockBorder = 1 << 0, }; -/** - * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) - */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); - /** * Mesh simplifier * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible @@ -343,6 +349,18 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* d */ MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error); +/** + * Experimental: Mesh simplifier with attribute metric + * The algorithm ehnahces meshopt_simplify by incorporating attribute values into the error metric used to prioritize simplification order; see meshopt_simplify documentation for details. + * Note that the number of attributes affects memory requirements and running time; this algorithm requires ~1.5x more memory and time compared to meshopt_simplify when using 4 scalar attributes. + * + * vertex_attributes should have attribute_count floats for each vertex + * attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position. The recommended weight range is [1e-3..1e-1], assuming attribute data is in [0..1] range. + * attribute_count must be <= 16 + * TODO target_error/result_error currently use combined distance+attribute error; this may change in the future + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error); + /** * Experimental: Mesh simplifier (sloppy) * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance @@ -367,8 +385,9 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destinati * * destination must contain enough space for the target index buffer (target_vertex_count elements) * vertex_positions should have float3 position in the first 12 bytes of each vertex + * vertex_colors should can be NULL; when it's not NULL, it should have float3 color in the first 12 bytes of each vertex */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count); /** * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents @@ -497,7 +516,7 @@ struct meshopt_Bounds * For backface culling with orthographic projection, use the following formula to reject backfacing clusters: * dot(view, cone_axis) >= cone_cutoff * - * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff: + * For perspective projection, you can use the formula that needs cone apex in addition to axis & cutoff: * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff * * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead: @@ -506,7 +525,8 @@ struct meshopt_Bounds * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius * * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere - * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. + * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable (for derivation see + * Real-Time Rendering 4th Edition, section 19.3). * * vertex_positions should have float3 position in the first 12 bytes of each vertex * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) @@ -515,13 +535,14 @@ MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsig MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); /** - * Experimental: Spatial sorter + * Spatial sorter * Generates a remap table that can be used to reorder points for spatial locality. * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer. * * destination must contain enough space for the resulting remap table (vertex_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex */ -MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_API void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); /** * Experimental: Spatial sorter @@ -561,19 +582,25 @@ inline int meshopt_quantizeUnorm(float v, int N); inline int meshopt_quantizeSnorm(float v, int N); /** - * Quantize a float into half-precision floating point value + * Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest * Representable magnitude range: [6e-5; 65504] * Maximum relative reconstruction error: 5e-4 */ -inline unsigned short meshopt_quantizeHalf(float v); +MESHOPTIMIZER_API unsigned short meshopt_quantizeHalf(float v); /** - * Quantize a float into a floating point value with a limited number of significant mantissa bits + * Quantize a float into a floating point value with a limited number of significant mantissa bits, preserving the IEEE-754 fp32 binary representation * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest * Assumes N is in a valid mantissa precision range, which is 1..23 */ -inline float meshopt_quantizeFloat(float v, int N); +MESHOPTIMIZER_API float meshopt_quantizeFloat(float v, int N); + +/** + * Reverse quantization of a half-precision (as defined by IEEE-754 fp16) floating point value + * Preserves Inf/NaN, flushes denormals to zero + */ +MESHOPTIMIZER_API float meshopt_dequantizeHalf(unsigned short h); #endif /** @@ -620,9 +647,11 @@ inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_s template inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); template -inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0); +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); template -inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0); +inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL); +template +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = NULL); template inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index); template @@ -666,50 +695,6 @@ inline int meshopt_quantizeSnorm(float v, int N) return int(v * scale + round); } - -inline unsigned short meshopt_quantizeHalf(float v) -{ - union { float f; unsigned int ui; } u = {v}; - unsigned int ui = u.ui; - - int s = (ui >> 16) & 0x8000; - int em = ui & 0x7fffffff; - - /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */ - int h = (em - (112 << 23) + (1 << 12)) >> 13; - - /* underflow: flush to zero; 113 encodes exponent -14 */ - h = (em < (113 << 23)) ? 0 : h; - - /* overflow: infinity; 143 encodes exponent 16 */ - h = (em >= (143 << 23)) ? 0x7c00 : h; - - /* NaN; note that we convert all types of NaN to qNaN */ - h = (em > (255 << 23)) ? 0x7e00 : h; - - return (unsigned short)(s | h); -} - -inline float meshopt_quantizeFloat(float v, int N) -{ - union { float f; unsigned int ui; } u = {v}; - unsigned int ui = u.ui; - - const int mask = (1 << (23 - N)) - 1; - const int round = (1 << (23 - N)) >> 1; - - int e = ui & 0x7f800000; - unsigned int rui = (ui + round) & ~mask; - - /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */ - ui = e == 0x7f800000 ? ui : rui; - - /* flush denormals to zero */ - ui = e == 0 ? 0 : ui; - - u.ui = ui; - return u.f; -} #endif /* Internal implementation helpers */ @@ -746,6 +731,13 @@ public: return result; } + void deallocate(void* ptr) + { + assert(count > 0 && blocks[count - 1] == ptr); + Storage::deallocate(ptr); + count--; + } + private: void* blocks[24]; size_t count; @@ -770,7 +762,7 @@ struct meshopt_IndexAdapter meshopt_IndexAdapter(T* result_, const T* input, size_t count_) : result(result_) - , data(0) + , data(NULL) , count(count_) { size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int); @@ -810,33 +802,33 @@ struct meshopt_IndexAdapter template inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) { - meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); + meshopt_IndexAdapter in(NULL, indices, indices ? index_count : 0); - return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size); + return meshopt_generateVertexRemap(destination, indices ? in.data : NULL, index_count, vertices, vertex_count, vertex_size); } template inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) { - meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); + meshopt_IndexAdapter in(NULL, indices, indices ? index_count : 0); - return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count); + return meshopt_generateVertexRemapMulti(destination, indices ? in.data : NULL, index_count, vertex_count, streams, stream_count); } template inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) { - meshopt_IndexAdapter in(0, indices, indices ? index_count : 0); + meshopt_IndexAdapter in(NULL, indices, indices ? index_count : 0); meshopt_IndexAdapter out(destination, 0, index_count); - meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap); + meshopt_remapIndexBuffer(out.data, indices ? in.data : NULL, index_count, remap); } template inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride); } @@ -844,8 +836,8 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, template inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count); } @@ -853,8 +845,8 @@ inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indi template inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count * 2); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count * 2); meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } @@ -862,8 +854,8 @@ inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indice template inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count * 4); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count * 4); meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } @@ -871,8 +863,8 @@ inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* ind template inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count); } @@ -880,8 +872,8 @@ inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t template inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count); } @@ -889,8 +881,8 @@ inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, s template inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size); } @@ -898,8 +890,8 @@ inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, si template inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold); } @@ -907,7 +899,7 @@ inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t in template inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count); } @@ -923,7 +915,7 @@ inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t template inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count); } @@ -940,7 +932,7 @@ inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const u template inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count); } @@ -957,17 +949,26 @@ inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const template inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error); } +template +inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* result_error) +{ + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); + + return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, result_error); +} + template inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error); } @@ -975,8 +976,8 @@ inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t in template inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, (index_count / 3) * 5); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, (index_count / 3) * 5); return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index)); } @@ -984,8 +985,8 @@ inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_co template inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, (index_count - 2) * 3); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, (index_count - 2) * 3); return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index)); } @@ -993,7 +994,7 @@ inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_ template inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); } @@ -1001,7 +1002,7 @@ inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices template inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } @@ -1009,7 +1010,7 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size template inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); } @@ -1017,7 +1018,7 @@ inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices template inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight); } @@ -1025,7 +1026,7 @@ inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* mes template inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); } @@ -1033,7 +1034,7 @@ inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* template inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { - meshopt_IndexAdapter in(0, indices, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } @@ -1041,15 +1042,15 @@ inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t inde template inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { - meshopt_IndexAdapter in(0, indices, index_count); - meshopt_IndexAdapter out(destination, 0, index_count); + meshopt_IndexAdapter in(NULL, indices, index_count); + meshopt_IndexAdapter out(destination, NULL, index_count); meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); } #endif /** - * Copyright (c) 2016-2022 Arseny Kapoulkine + * Copyright (c) 2016-2023 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/thirdparty/meshoptimizer/quantization.cpp b/thirdparty/meshoptimizer/quantization.cpp new file mode 100644 index 00000000000..09a314d6028 --- /dev/null +++ b/thirdparty/meshoptimizer/quantization.cpp @@ -0,0 +1,70 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include + +unsigned short meshopt_quantizeHalf(float v) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + int s = (ui >> 16) & 0x8000; + int em = ui & 0x7fffffff; + + // bias exponent and round to nearest; 112 is relative exponent bias (127-15) + int h = (em - (112 << 23) + (1 << 12)) >> 13; + + // underflow: flush to zero; 113 encodes exponent -14 + h = (em < (113 << 23)) ? 0 : h; + + // overflow: infinity; 143 encodes exponent 16 + h = (em >= (143 << 23)) ? 0x7c00 : h; + + // NaN; note that we convert all types of NaN to qNaN + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (unsigned short)(s | h); +} + +float meshopt_quantizeFloat(float v, int N) +{ + assert(N >= 0 && N <= 23); + + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + const int mask = (1 << (23 - N)) - 1; + const int round = (1 << (23 - N)) >> 1; + + int e = ui & 0x7f800000; + unsigned int rui = (ui + round) & ~mask; + + // round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 + ui = e == 0x7f800000 ? ui : rui; + + // flush denormals to zero + ui = e == 0 ? 0 : ui; + + u.ui = ui; + return u.f; +} + +float meshopt_dequantizeHalf(unsigned short h) +{ + unsigned int s = unsigned(h & 0x8000) << 16; + int em = h & 0x7fff; + + // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) + int r = (em + (112 << 10)) << 13; + + // denormal: flush to zero + r = (em < (1 << 10)) ? 0 : r; + + // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases + // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 + r += (em >= (31 << 10)) ? (112 << 23) : 0; + + union { float f; unsigned int ui; } u; + u.ui = s | r; + return u.f; +} diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index 391a77861ab..5ba8570076e 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -20,14 +20,13 @@ #define TRACESTATS(i) (void)0 #endif -#define ATTRIBUTES 3 - // This work is based on: // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 // Michael Garland. Quadric-based polygonal surface simplification. 1999 // Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000 // Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003 // Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019 +// Hugues Hoppe. New Quadric Metric for Simplifying Meshes with Appearance Attributes. 1999 namespace meshopt { @@ -39,31 +38,31 @@ struct EdgeAdjacency unsigned int prev; }; - unsigned int* counts; unsigned int* offsets; Edge* data; }; static void prepareEdgeAdjacency(EdgeAdjacency& adjacency, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) { - adjacency.counts = allocator.allocate(vertex_count); - adjacency.offsets = allocator.allocate(vertex_count); + adjacency.offsets = allocator.allocate(vertex_count + 1); adjacency.data = allocator.allocate(index_count); } static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* remap) { size_t face_count = index_count / 3; + unsigned int* offsets = adjacency.offsets + 1; + EdgeAdjacency::Edge* data = adjacency.data; // fill edge counts - memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + memset(offsets, 0, vertex_count * sizeof(unsigned int)); for (size_t i = 0; i < index_count; ++i) { unsigned int v = remap ? remap[indices[i]] : indices[i]; assert(v < vertex_count); - adjacency.counts[v]++; + offsets[v]++; } // fill offset table @@ -71,8 +70,9 @@ static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* in for (size_t i = 0; i < vertex_count; ++i) { - adjacency.offsets[i] = offset; - offset += adjacency.counts[i]; + unsigned int count = offsets[i]; + offsets[i] = offset; + offset += count; } assert(offset == index_count); @@ -89,26 +89,22 @@ static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* in c = remap[c]; } - adjacency.data[adjacency.offsets[a]].next = b; - adjacency.data[adjacency.offsets[a]].prev = c; - adjacency.offsets[a]++; + data[offsets[a]].next = b; + data[offsets[a]].prev = c; + offsets[a]++; - adjacency.data[adjacency.offsets[b]].next = c; - adjacency.data[adjacency.offsets[b]].prev = a; - adjacency.offsets[b]++; + data[offsets[b]].next = c; + data[offsets[b]].prev = a; + offsets[b]++; - adjacency.data[adjacency.offsets[c]].next = a; - adjacency.data[adjacency.offsets[c]].prev = b; - adjacency.offsets[c]++; + data[offsets[c]].next = a; + data[offsets[c]].prev = b; + offsets[c]++; } - // fix offsets that have been disturbed by the previous pass - for (size_t i = 0; i < vertex_count; ++i) - { - assert(adjacency.offsets[i] >= adjacency.counts[i]); - - adjacency.offsets[i] -= adjacency.counts[i]; - } + // finalize offsets + adjacency.offsets[0] = 0; + assert(adjacency.offsets[vertex_count] == index_count); } struct PositionHasher @@ -168,7 +164,7 @@ static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, } assert(false && "Hash table is full"); // unreachable - return 0; + return NULL; } static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) @@ -205,6 +201,8 @@ static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const f wedge[i] = wedge[r]; wedge[r] = unsigned(i); } + + allocator.deallocate(table); } enum VertexKind @@ -244,7 +242,7 @@ const unsigned char kHasOpposite[Kind_Count][Kind_Count] = { static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b) { - unsigned int count = adjacency.counts[a]; + unsigned int count = adjacency.offsets[a + 1] - adjacency.offsets[a]; const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[a]; for (size_t i = 0; i < count; ++i) @@ -269,7 +267,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned { unsigned int vertex = unsigned(i); - unsigned int count = adjacency.counts[vertex]; + unsigned int count = adjacency.offsets[vertex + 1] - adjacency.offsets[vertex]; const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[vertex]; for (size_t j = 0; j < count; ++j) @@ -378,10 +376,6 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned struct Vector3 { float x, y, z; - -#if ATTRIBUTES - float a[ATTRIBUTES]; -#endif }; static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) @@ -432,19 +426,43 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat return extent; } +static void rescaleAttributes(float* result, const float* vertex_attributes_data, size_t vertex_count, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count) +{ + size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float); + + for (size_t i = 0; i < vertex_count; ++i) + { + for (size_t k = 0; k < attribute_count; ++k) + { + float a = vertex_attributes_data[i * vertex_attributes_stride_float + k]; + + result[i * attribute_count + k] = a * attribute_weights[k]; + } + } +} + +static const size_t kMaxAttributes = 16; + struct Quadric { + // a00*x^2 + a11*y^2 + a22*z^2 + 2*(a10*xy + a20*xz + a21*yz) + b0*x + b1*y + b2*z + c float a00, a11, a22; float a10, a20, a21; float b0, b1, b2, c; float w; +}; -#if ATTRIBUTES - float gx[ATTRIBUTES]; - float gy[ATTRIBUTES]; - float gz[ATTRIBUTES]; - float gw[ATTRIBUTES]; -#endif +struct QuadricGrad +{ + // gx*x + gy*y + gz*z + gw + float gx, gy, gz, gw; +}; + +struct Reservoir +{ + float x, y, z; + float r, g, b; + float w; }; struct Collapse @@ -458,7 +476,6 @@ struct Collapse float error; unsigned int errorui; }; - float distance_error; }; static float normalize(Vector3& v) @@ -488,16 +505,17 @@ static void quadricAdd(Quadric& Q, const Quadric& R) Q.b2 += R.b2; Q.c += R.c; Q.w += R.w; +} -#if ATTRIBUTES - for (int k = 0; k < ATTRIBUTES; ++k) +static void quadricAdd(QuadricGrad* G, const QuadricGrad* R, size_t attribute_count) +{ + for (size_t k = 0; k < attribute_count; ++k) { - Q.gx[k] += R.gx[k]; - Q.gy[k] += R.gy[k]; - Q.gz[k] += R.gz[k]; - Q.gw[k] += R.gw[k]; + G[k].gx += R[k].gx; + G[k].gy += R[k].gy; + G[k].gz += R[k].gz; + G[k].gw += R[k].gw; } -#endif } static float quadricError(const Quadric& Q, const Vector3& v) @@ -523,23 +541,12 @@ static float quadricError(const Quadric& Q, const Vector3& v) r += ry * v.y; r += rz * v.z; -#if ATTRIBUTES - // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr - for (int k = 0; k < ATTRIBUTES; ++k) - { - float a = v.a[k]; - - r += a * a * Q.w; - r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]); - } -#endif - float s = Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; } -static float quadricErrorNoAttributes(const Quadric& Q, const Vector3& v) +static float quadricError(const Quadric& Q, const QuadricGrad* G, size_t attribute_count, const Vector3& v, const float* va) { float rx = Q.b0; float ry = Q.b1; @@ -562,7 +569,18 @@ static float quadricErrorNoAttributes(const Quadric& Q, const Vector3& v) r += ry * v.y; r += rz * v.z; - float s = Q.w == 0.f ? 0.f : 1.f / Q.w; + // see quadricFromAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr + for (size_t k = 0; k < attribute_count; ++k) + { + float a = va[k]; + float g = v.x * G[k].gx + v.y * G[k].gy + v.z * G[k].gz + G[k].gw; + + r += a * a * Q.w; + r -= 2 * a * g; + } + + // TODO: weight normalization is breaking attribute error somehow + float s = 1;// Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; } @@ -585,29 +603,6 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo Q.b2 = c * dw; Q.c = d * dw; Q.w = w; - -#if ATTRIBUTES - memset(Q.gx, 0, sizeof(Q.gx)); - memset(Q.gy, 0, sizeof(Q.gy)); - memset(Q.gz, 0, sizeof(Q.gz)); - memset(Q.gw, 0, sizeof(Q.gw)); -#endif -} - -static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) -{ - // we need to encode (x - X) ^ 2 + (y - Y)^2 + (z - Z)^2 into the quadric - Q.a00 = w; - Q.a11 = w; - Q.a22 = w; - Q.a10 = 0.f; - Q.a20 = 0.f; - Q.a21 = 0.f; - Q.b0 = -2.f * x * w; - Q.b1 = -2.f * y * w; - Q.b2 = -2.f * z * w; - Q.c = (x * x + y * y + z * z) * w; - Q.w = w; } static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) @@ -644,8 +639,7 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); } -#if ATTRIBUTES -static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w) +static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0, const Vector3& p1, const Vector3& p2, const float* va0, const float* va1, const float* va2, size_t attribute_count) { // for each attribute we want to encode the following function into the quadric: // (eval(pos) - attr)^2 @@ -655,6 +649,11 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3 Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + // weight is scaled linearly with edge length + Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; + float area = sqrtf(normal.x * normal.x + normal.y * normal.y + normal.z * normal.z); + float w = sqrtf(area); // TODO this needs more experimentation + // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows: // v = (d11 * d20 - d01 * d21) / denom // w = (d00 * d21 - d01 * d20) / denom @@ -677,9 +676,13 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3 float gz1 = (d11 * v0.z - d01 * v1.z) * denomr; float gz2 = (d00 * v1.z - d01 * v0.z) * denomr; - for (int k = 0; k < ATTRIBUTES; ++k) + memset(&Q, 0, sizeof(Quadric)); + + Q.w = w; + + for (size_t k = 0; k < attribute_count; ++k) { - float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k]; + float a0 = va0[k], a1 = va1[k], a2 = va2[k]; // compute gradient of eval(pos) for x/y/z/w // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w @@ -705,24 +708,14 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3 Q.c += w * (gw * gw); // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError - Q.gx[k] = w * gx; - Q.gy[k] = w * gy; - Q.gz[k] = w * gz; - Q.gw[k] = w * gw; - -#if TRACE > 2 - printf("attr%d: %e %e %e\n", - k, - (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0), - (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1), - (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2) - ); -#endif + G[k].gx = w * gx; + G[k].gy = w * gy; + G[k].gz = w * gz; + G[k].gw = w * gw; } } -#endif -static void fillFaceQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) +static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) { for (size_t i = 0; i < index_count; i += 3) { @@ -732,24 +725,18 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib Quadric Q; quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); - quadricAdd(vertex_no_attrib_quadrics[remap[i0]], Q); - quadricAdd(vertex_no_attrib_quadrics[remap[i1]], Q); - quadricAdd(vertex_no_attrib_quadrics[remap[i2]], Q); -#if ATTRIBUTES - quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w); -#endif quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); quadricAdd(vertex_quadrics[remap[i2]], Q); } } -static void fillEdgeQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) +static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) { for (size_t i = 0; i < index_count; i += 3) { - static const int next[3] = {1, 2, 0}; + static const int next[4] = {1, 2, 0, 1}; for (int e = 0; e < 3; ++e) { @@ -775,7 +762,7 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) continue; - unsigned int i2 = indices[i + next[next[e]]]; + unsigned int i2 = indices[i + next[e + 1]]; // we try hard to maintain border edge geometry; seam edges can move more freely // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical @@ -789,13 +776,33 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); - - quadricAdd(vertex_no_attrib_quadrics[remap[i0]], Q); - quadricAdd(vertex_no_attrib_quadrics[remap[i1]], Q); } } } +static void fillAttributeQuadrics(Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const float* vertex_attributes, size_t attribute_count, const unsigned int* remap) +{ + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int i0 = indices[i + 0]; + unsigned int i1 = indices[i + 1]; + unsigned int i2 = indices[i + 2]; + + Quadric QA; + QuadricGrad G[kMaxAttributes]; + quadricFromAttributes(QA, G, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], &vertex_attributes[i0 * attribute_count], &vertex_attributes[i1 * attribute_count], &vertex_attributes[i2 * attribute_count], attribute_count); + + // TODO: This blends together attribute weights across attribute discontinuities, which is probably not a great idea + quadricAdd(attribute_quadrics[remap[i0]], QA); + quadricAdd(attribute_quadrics[remap[i1]], QA); + quadricAdd(attribute_quadrics[remap[i2]], QA); + + quadricAdd(&attribute_gradients[remap[i0] * attribute_count], G, attribute_count); + quadricAdd(&attribute_gradients[remap[i1] * attribute_count], G, attribute_count); + quadricAdd(&attribute_gradients[remap[i2] * attribute_count], G, attribute_count); + } +} + // does triangle ABC flip when C is replaced with D? static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c, const Vector3& d) { @@ -806,7 +813,7 @@ static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x}; Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x}; - return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z < 0; + return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z <= 0; } static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1) @@ -818,16 +825,15 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert const Vector3& v1 = vertex_positions[i1]; const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]]; - size_t count = adjacency.counts[i0]; + size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0]; for (size_t i = 0; i < count; ++i) { unsigned int a = collapse_remap[edges[i].next]; unsigned int b = collapse_remap[edges[i].prev]; - // skip triangles that get collapsed - // note: this is mathematically redundant as if either of these is true, the dot product in hasTriangleFlip should be 0 - if (a == i1 || b == i1) + // skip triangles that will get collapsed by i0->i1 collapse or already got collapsed previously + if (a == i1 || b == i1 || a == b) continue; // early-out when at least one triangle flips due to a collapse @@ -838,7 +844,25 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert return false; } -static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) +static size_t boundEdgeCollapses(const EdgeAdjacency& adjacency, size_t vertex_count, size_t index_count, unsigned char* vertex_kind) +{ + size_t dual_count = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned char k = vertex_kind[i]; + unsigned int e = adjacency.offsets[i + 1] - adjacency.offsets[i]; + + dual_count += (k == Kind_Manifold || k == Kind_Seam) ? e : 0; + } + + assert(dual_count <= index_count); + + // pad capacity by 3 so that we can check for overflow once per triangle instead of once per edge + return (index_count - dual_count / 2) + 3; +} + +static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) { size_t collapse_count = 0; @@ -846,6 +870,10 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices { static const int next[3] = {1, 2, 0}; + // this should never happen as boundEdgeCollapses should give an upper bound for the collapse count, but in an unlikely event it does we can just drop extra collapses + if (collapse_count + 3 > collapse_capacity) + break; + for (int e = 0; e < 3; ++e) { unsigned int i0 = indices[i + e]; @@ -896,7 +924,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices return collapse_count; } -static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const Quadric* vertex_no_attrib_quadrics, const unsigned int* remap) +static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const float* vertex_attributes, const Quadric* vertex_quadrics, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap) { for (size_t i = 0; i < collapse_count; ++i) { @@ -910,78 +938,22 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const unsigned int j0 = c.bidi ? i1 : i0; unsigned int j1 = c.bidi ? i0 : i1; - const Quadric& qi = vertex_quadrics[remap[i0]]; - const Quadric& qj = vertex_quadrics[remap[j0]]; + float ei = quadricError(vertex_quadrics[remap[i0]], vertex_positions[i1]); + float ej = quadricError(vertex_quadrics[remap[j0]], vertex_positions[j1]); - float ei = quadricError(qi, vertex_positions[i1]); - float ej = quadricError(qj, vertex_positions[j1]); - - const Quadric& naqi = vertex_no_attrib_quadrics[remap[i0]]; - const Quadric& naqj = vertex_no_attrib_quadrics[remap[j0]]; + if (attribute_count) + { + ei += quadricError(attribute_quadrics[remap[i0]], &attribute_gradients[remap[i0] * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]); + ej += quadricError(attribute_quadrics[remap[j0]], &attribute_gradients[remap[j0] * attribute_count], attribute_count, vertex_positions[j1], &vertex_attributes[j1 * attribute_count]); + } // pick edge direction with minimal error c.v0 = ei <= ej ? i0 : j0; c.v1 = ei <= ej ? i1 : j1; c.error = ei <= ej ? ei : ej; - c.distance_error = ei <= ej ? quadricErrorNoAttributes(naqi, vertex_positions[i1]) : quadricErrorNoAttributes(naqj, vertex_positions[j1]); } } -#if TRACE > 1 -static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind) -{ - size_t ckinds[Kind_Count][Kind_Count] = {}; - float cerrors[Kind_Count][Kind_Count] = {}; - - for (int k0 = 0; k0 < Kind_Count; ++k0) - for (int k1 = 0; k1 < Kind_Count; ++k1) - cerrors[k0][k1] = FLT_MAX; - - for (size_t i = 0; i < collapse_count; ++i) - { - unsigned int i0 = collapses[i].v0; - unsigned int i1 = collapses[i].v1; - - unsigned char k0 = vertex_kind[i0]; - unsigned char k1 = vertex_kind[i1]; - - ckinds[k0][k1]++; - cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1]; - } - - for (int k0 = 0; k0 < Kind_Count; ++k0) - for (int k1 = 0; k1 < Kind_Count; ++k1) - if (ckinds[k0][k1]) - printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), ckinds[k0][k1] ? sqrtf(cerrors[k0][k1]) : 0.f); -} - -static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind) -{ - size_t locked_collapses[Kind_Count][Kind_Count] = {}; - - for (size_t i = 0; i < index_count; i += 3) - { - static const int next[3] = {1, 2, 0}; - - for (int e = 0; e < 3; ++e) - { - unsigned int i0 = indices[i + e]; - unsigned int i1 = indices[i + next[e]]; - - unsigned char k0 = vertex_kind[i0]; - unsigned char k1 = vertex_kind[i1]; - - locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0]; - } - } - - for (int k0 = 0; k0 < Kind_Count; ++k0) - for (int k1 = 0; k1 < Kind_Count; ++k1) - if (locked_collapses[k0][k1]) - printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1])); -} -#endif - static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count) { const int sort_bits = 11; @@ -1020,7 +992,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse } } -static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error) +static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error) { size_t edge_collapses = 0; size_t triangle_collapses = 0; @@ -1082,7 +1054,12 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* assert(collapse_remap[r1] == r1); quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); - quadricAdd(vertex_no_attrib_quadrics[r1], vertex_no_attrib_quadrics[r0]); + + if (attribute_count) + { + quadricAdd(attribute_quadrics[r1], attribute_quadrics[r0]); + quadricAdd(&attribute_gradients[r1 * attribute_count], &attribute_gradients[r0 * attribute_count], attribute_count); + } if (vertex_kind[i0] == Kind_Complex) { @@ -1120,7 +1097,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; edge_collapses++; - result_error = result_error < c.distance_error ? c.distance_error : result_error; + result_error = result_error < c.error ? c.error : result_error; } #if TRACE @@ -1346,17 +1323,41 @@ static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices } } -static void fillCellQuadrics(Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* vertex_cells) +static void fillCellReservoirs(Reservoir* cell_reservoirs, size_t cell_count, const Vector3* vertex_positions, const float* vertex_colors, size_t vertex_colors_stride, size_t vertex_count, const unsigned int* vertex_cells) { + static const float dummy_color[] = { 0.f, 0.f, 0.f }; + + size_t vertex_colors_stride_float = vertex_colors_stride / sizeof(float); + for (size_t i = 0; i < vertex_count; ++i) { - unsigned int c = vertex_cells[i]; + unsigned int cell = vertex_cells[i]; const Vector3& v = vertex_positions[i]; + Reservoir& r = cell_reservoirs[cell]; - Quadric Q; - quadricFromPoint(Q, v.x, v.y, v.z, 1.f); + const float* color = vertex_colors ? &vertex_colors[i * vertex_colors_stride_float] : dummy_color; - quadricAdd(cell_quadrics[c], Q); + r.x += v.x; + r.y += v.y; + r.z += v.z; + r.r += color[0]; + r.g += color[1]; + r.b += color[2]; + r.w += 1.f; + } + + for (size_t i = 0; i < cell_count; ++i) + { + Reservoir& r = cell_reservoirs[i]; + + float iw = r.w == 0.f ? 0.f : 1.f / r.w; + + r.x *= iw; + r.y *= iw; + r.z *= iw; + r.r *= iw; + r.g *= iw; + r.b *= iw; } } @@ -1377,6 +1378,34 @@ static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t c } } +static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Reservoir* cell_reservoirs, const Vector3* vertex_positions, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t vertex_count) +{ + static const float dummy_color[] = { 0.f, 0.f, 0.f }; + + size_t vertex_colors_stride_float = vertex_colors_stride / sizeof(float); + + memset(cell_remap, -1, cell_count * sizeof(unsigned int)); + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int cell = vertex_cells[i]; + const Vector3& v = vertex_positions[i]; + const Reservoir& r = cell_reservoirs[cell]; + + const float* color = vertex_colors ? &vertex_colors[i * vertex_colors_stride_float] : dummy_color; + + float pos_error = (v.x - r.x) * (v.x - r.x) + (v.y - r.y) * (v.y - r.y) + (v.z - r.z) * (v.z - r.z); + float col_error = (color[0] - r.r) * (color[0] - r.r) + (color[1] - r.g) * (color[1] - r.g) + (color[2] - r.b) * (color[2] - r.b); + float error = pos_error + color_weight * col_error; + + if (cell_remap[cell] == ~0u || cell_errors[cell] > error) + { + cell_remap[cell] = unsigned(i); + cell_errors[cell] = error; + } + } +} + static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap) { TriangleHasher hasher = {destination}; @@ -1434,26 +1463,23 @@ static float interpolate(float y, float x0, float y0, float x1, float y1, float #ifndef NDEBUG // Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds -MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = 0; -MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0; -MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; +MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = NULL; +MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = NULL; +MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = NULL; #endif -size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) -{ - return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, options, out_result_error, 0, 0, 0); -} - -size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) +size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) { using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_stride >= 12 && vertex_stride <= 256); - assert(vertex_stride % sizeof(float) == 0); + assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); assert(target_index_count <= index_count); assert((options & ~(meshopt_SimplifyLockBorder)) == 0); - assert(attribute_count <= ATTRIBUTES); + assert(vertex_attributes_stride >= attribute_count * sizeof(float) && vertex_attributes_stride <= 256); + assert(vertex_attributes_stride % sizeof(float) == 0); + assert(attribute_count <= kMaxAttributes); meshopt_Allocator allocator; @@ -1467,7 +1493,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned // build position remap that maps each vertex to the one with identical position unsigned int* remap = allocator.allocate(vertex_count); unsigned int* wedge = allocator.allocate(vertex_count); - buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator); + buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); // classify vertices; vertex kind determines collapse rules, see kCanCollapse unsigned char* vertex_kind = allocator.allocate(vertex_count); @@ -1491,29 +1517,36 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned #endif Vector3* vertex_positions = allocator.allocate(vertex_count); - rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); -#if ATTRIBUTES - for (size_t i = 0; i < vertex_count; ++i) + float* vertex_attributes = NULL; + + if (attribute_count) { - memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a)); - - for (size_t k = 0; k < attribute_count; ++k) - { - float a = attributes[i * attribute_count + k]; - - vertex_positions[i].a[k] = a * attribute_weights[k]; - } + vertex_attributes = allocator.allocate(vertex_count * attribute_count); + rescaleAttributes(vertex_attributes, vertex_attributes_data, vertex_count, vertex_attributes_stride, attribute_weights, attribute_count); } -#endif Quadric* vertex_quadrics = allocator.allocate(vertex_count); memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); - Quadric* vertex_no_attrib_quadrics = allocator.allocate(vertex_count); - memset(vertex_no_attrib_quadrics, 0, vertex_count * sizeof(Quadric)); - fillFaceQuadrics(vertex_quadrics, vertex_no_attrib_quadrics, indices, index_count, vertex_positions, remap); - fillEdgeQuadrics(vertex_quadrics, vertex_no_attrib_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback); + Quadric* attribute_quadrics = NULL; + QuadricGrad* attribute_gradients = NULL; + + if (attribute_count) + { + attribute_quadrics = allocator.allocate(vertex_count); + memset(attribute_quadrics, 0, vertex_count * sizeof(Quadric)); + + attribute_gradients = allocator.allocate(vertex_count * attribute_count); + memset(attribute_gradients, 0, vertex_count * attribute_count * sizeof(QuadricGrad)); + } + + fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap); + fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback); + + if (attribute_count) + fillAttributeQuadrics(attribute_quadrics, attribute_gradients, indices, index_count, vertex_positions, vertex_attributes, attribute_count, remap); if (result != indices) memcpy(result, indices, index_count * sizeof(unsigned int)); @@ -1522,8 +1555,10 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned size_t pass_count = 0; #endif - Collapse* edge_collapses = allocator.allocate(index_count); - unsigned int* collapse_order = allocator.allocate(index_count); + size_t collapse_capacity = boundEdgeCollapses(adjacency, vertex_count, index_count, vertex_kind); + + Collapse* edge_collapses = allocator.allocate(collapse_capacity); + unsigned int* collapse_order = allocator.allocate(collapse_capacity); unsigned int* collapse_remap = allocator.allocate(vertex_count); unsigned char* collapse_locked = allocator.allocate(vertex_count); @@ -1538,17 +1573,14 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned // note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap); - size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); + size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, collapse_capacity, result, result_count, remap, vertex_kind, loop); + assert(edge_collapse_count <= collapse_capacity); // no edges can be collapsed any more due to topology restrictions if (edge_collapse_count == 0) break; - rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, vertex_no_attrib_quadrics, remap); - -#if TRACE > 1 - dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind); -#endif + rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_attributes, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, remap); sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count); @@ -1563,7 +1595,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned printf("pass %d: ", int(pass_count++)); #endif - size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, vertex_no_attrib_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error); + size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error); // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit if (collapses == 0) @@ -1582,10 +1614,6 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned printf("result: %d triangles, error: %e; total %d passes\n", int(result_count), sqrtf(result_error), int(pass_count)); #endif -#if TRACE > 1 - dumpLockedCollapses(result, result_count, vertex_kind); -#endif - #ifndef NDEBUG if (meshopt_simplifyDebugKind) memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count); @@ -1599,13 +1627,21 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned // result_error is quadratic; we need to remap it back to linear if (out_result_error) - { *out_result_error = sqrtf(result_error); - } return result_count; } +size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +{ + return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, target_index_count, target_error, options, out_result_error); +} + +size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t target_index_count, float target_error, unsigned int options, float* out_result_error) +{ + return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, target_index_count, target_error, options, out_result_error); +} + size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) { using namespace meshopt; @@ -1738,12 +1774,15 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind return write; } -size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count) +size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_colors, size_t vertex_colors_stride, float color_weight, size_t target_vertex_count) { using namespace meshopt; assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); + assert(vertex_colors_stride == 0 || (vertex_colors_stride >= 12 && vertex_colors_stride <= 256)); + assert(vertex_colors_stride % sizeof(float) == 0); + assert(vertex_colors == NULL || vertex_colors_stride != 0); assert(target_vertex_count <= vertex_count); size_t target_cell_count = target_vertex_count; @@ -1827,24 +1866,30 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); - // build a quadric for each target cell - Quadric* cell_quadrics = allocator.allocate(cell_count); - memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); + // accumulate points into a reservoir for each target cell + Reservoir* cell_reservoirs = allocator.allocate(cell_count); + memset(cell_reservoirs, 0, cell_count * sizeof(Reservoir)); - fillCellQuadrics(cell_quadrics, vertex_positions, vertex_count, vertex_cells); + fillCellReservoirs(cell_reservoirs, cell_count, vertex_positions, vertex_colors, vertex_colors_stride, vertex_count, vertex_cells); // for each target cell, find the vertex with the minimal error unsigned int* cell_remap = allocator.allocate(cell_count); float* cell_errors = allocator.allocate(cell_count); - fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); + fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_reservoirs, vertex_positions, vertex_colors, vertex_colors_stride, color_weight * color_weight, vertex_count); // copy results to the output assert(cell_count <= target_vertex_count); memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count); #if TRACE - printf("result: %d cells\n", int(cell_count)); + // compute error + float result_error = 0.f; + + for (size_t i = 0; i < cell_count; ++i) + result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error; + + printf("result: %d cells, %e error\n", int(cell_count), sqrtf(result_error)); #endif return cell_count; diff --git a/thirdparty/meshoptimizer/vcacheoptimizer.cpp b/thirdparty/meshoptimizer/vcacheoptimizer.cpp index ce8fd3a887b..d4b08ba3406 100644 --- a/thirdparty/meshoptimizer/vcacheoptimizer.cpp +++ b/thirdparty/meshoptimizer/vcacheoptimizer.cpp @@ -221,9 +221,9 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; } - unsigned int cache_holder[2 * (kCacheSizeMax + 3)]; + unsigned int cache_holder[2 * (kCacheSizeMax + 4)]; unsigned int* cache = cache_holder; - unsigned int* cache_new = cache_holder + kCacheSizeMax + 3; + unsigned int* cache_new = cache_holder + kCacheSizeMax + 4; size_t cache_count = 0; unsigned int current_triangle = 0; @@ -260,10 +260,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned { unsigned int index = cache[i]; - if (index != a && index != b && index != c) - { - cache_new[cache_write++] = index; - } + cache_new[cache_write] = index; + cache_write += (index != a && index != b && index != c); } unsigned int* cache_temp = cache; @@ -305,6 +303,10 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned { unsigned int index = cache[i]; + // no need to update scores if we are never going to use this vertex + if (adjacency.counts[index] == 0) + continue; + int cache_position = i >= cache_size ? -1 : int(i); // update vertex score @@ -325,11 +327,8 @@ void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned float tri_score = triangle_scores[tri] + score_diff; assert(tri_score > 0); - if (best_score < tri_score) - { - best_triangle = tri; - best_score = tri_score; - } + best_triangle = best_score < tri_score ? tri : best_triangle; + best_score = best_score < tri_score ? tri_score : best_score; triangle_scores[tri] = tri_score; } diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp index 4bd11121d2f..8ab0662d88a 100644 --- a/thirdparty/meshoptimizer/vertexcodec.cpp +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -44,6 +44,10 @@ // When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD #if defined(__wasm_simd128__) #define SIMD_WASM +// Prevent compiling other variant when wasm simd compilation is active +#undef SIMD_NEON +#undef SIMD_SSE +#undef SIMD_AVX #endif #ifndef SIMD_TARGET @@ -83,19 +87,17 @@ #endif #ifdef SIMD_WASM -#undef __DEPRECATED -#pragma clang diagnostic ignored "-Wdeprecated-declarations" #include #endif #ifdef SIMD_WASM -#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i) -#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) -#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) -#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) -#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) -#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2) -#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3) +#define wasmx_splat_v32x4(v, i) wasm_i32x4_shuffle(v, v, i, i, i, i) +#define wasmx_unpacklo_v8x16(a, b) wasm_i8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) +#define wasmx_unpackhi_v8x16(a, b) wasm_i8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) +#define wasmx_unpacklo_v16x8(a, b) wasm_i16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +#define wasmx_unpackhi_v16x8(a, b) wasm_i16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +#define wasmx_unpacklo_v64x2(a, b) wasm_i64x2_shuffle(a, b, 0, 2) +#define wasmx_unpackhi_v64x2(a, b) wasm_i64x2_shuffle(a, b, 1, 3) #endif namespace meshopt @@ -218,7 +220,7 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; if (size_t(data_end - data) < header_size) - return 0; + return NULL; data += header_size; @@ -227,7 +229,7 @@ static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, for (size_t i = 0; i < buffer_size; i += kByteGroupSize) { if (size_t(data_end - data) < kByteGroupDecodeLimit) - return 0; + return NULL; int best_bits = 8; size_t best_size = encodeBytesGroupMeasure(buffer + i, 8); @@ -286,7 +288,7 @@ static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); if (!data) - return 0; + return NULL; } memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); @@ -294,7 +296,7 @@ static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data return data; } -#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX)) +#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX) && !defined(SIMD_WASM)) static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2) { #define READ() byte = *data++ @@ -354,14 +356,14 @@ static const unsigned char* decodeBytes(const unsigned char* data, const unsigne size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; if (size_t(data_end - data) < header_size) - return 0; + return NULL; data += header_size; for (size_t i = 0; i < buffer_size; i += kByteGroupSize) { if (size_t(data_end - data) < kByteGroupDecodeLimit) - return 0; + return NULL; size_t header_offset = i / kByteGroupSize; @@ -386,7 +388,7 @@ static const unsigned char* decodeVertexBlock(const unsigned char* data, const u { data = decodeBytes(data, data_end, buffer, vertex_count_aligned); if (!data) - return 0; + return NULL; size_t vertex_offset = k; @@ -757,7 +759,7 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); - sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + sm1off = wasm_i8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); v128_t sm1r = wasm_i8x16_add(sm1, sm1off); @@ -777,9 +779,6 @@ static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1 SIMD_TARGET static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) { - unsigned char byte, enc, encv; - const unsigned char* data_var; - switch (bitslog2) { case 0: @@ -807,7 +806,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi v128_t shuf = decodeShuffleMask(mask0, mask1); - v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); + v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); wasm_v128_store(buffer, result); @@ -829,7 +828,7 @@ static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsi v128_t shuf = decodeShuffleMask(mask0, mask1); - v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); + v128_t result = wasm_v128_bitselect(wasm_i8x16_swizzle(rest, shuf), sel, mask); wasm_v128_store(buffer, result); @@ -939,7 +938,7 @@ static const unsigned char* decodeBytesSimd(const unsigned char* data, const uns size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; if (size_t(data_end - data) < header_size) - return 0; + return NULL; data += header_size; @@ -961,7 +960,7 @@ static const unsigned char* decodeBytesSimd(const unsigned char* data, const uns for (; i < buffer_size; i += kByteGroupSize) { if (size_t(data_end - data) < kByteGroupDecodeLimit) - return 0; + return NULL; size_t header_offset = i / kByteGroupSize; @@ -989,7 +988,7 @@ static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, con { data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned); if (!data) - return 0; + return NULL; } #if defined(SIMD_SSE) || defined(SIMD_AVX) @@ -1183,7 +1182,7 @@ int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t ve assert(vertex_size > 0 && vertex_size <= 256); assert(vertex_size % 4 == 0); - const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0; + const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = NULL; #if defined(SIMD_SSE) && defined(SIMD_FALLBACK) decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock; diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp index 14a73b1dddc..4b5f444f046 100644 --- a/thirdparty/meshoptimizer/vertexfilter.cpp +++ b/thirdparty/meshoptimizer/vertexfilter.cpp @@ -30,6 +30,9 @@ // When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD #if defined(__wasm_simd128__) #define SIMD_WASM +// Prevent compiling other variant when wasm simd compilation is active +#undef SIMD_NEON +#undef SIMD_SSE #endif #endif // !MESHOPTIMIZER_NO_SIMD @@ -63,6 +66,10 @@ #define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7) #endif +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + namespace meshopt { @@ -185,9 +192,7 @@ inline uint64_t rotateleft64(uint64_t v, int x) { #if defined(_MSC_VER) && !defined(__clang__) return _rotl64(v, x); -// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for -// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions -#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11) +#elif defined(__clang__) && __has_builtin(__builtin_rotateleft64) return __builtin_rotateleft64(v, x); #else return (v << (x & 63)) | (v >> ((64 - x) & 63)); @@ -791,6 +796,33 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count) } #endif +// optimized variant of frexp +inline int optlog2(float v) +{ + union + { + float f; + unsigned int ui; + } u; + + u.f = v; + // +1 accounts for implicit 1. in mantissa; denormalized numbers will end up clamped to min_exp by calling code + return u.ui == 0 ? 0 : int((u.ui >> 23) & 0xff) - 127 + 1; +} + +// optimized variant of ldexp +inline float optexp2(int e) +{ + union + { + float f; + unsigned int ui; + } u; + + u.ui = unsigned(e + 127) << 23; + return u.f; +} + } // namespace meshopt void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride) @@ -918,39 +950,78 @@ void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, i } } -void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data) +void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode) { - assert(stride > 0 && stride % 4 == 0); + using namespace meshopt; + + assert(stride > 0 && stride % 4 == 0 && stride <= 256); assert(bits >= 1 && bits <= 24); unsigned int* destination = static_cast(destination_); size_t stride_float = stride / sizeof(float); + int component_exp[64]; + assert(stride_float <= sizeof(component_exp) / sizeof(int)); + + const int min_exp = -100; + + if (mode == meshopt_EncodeExpSharedComponent) + { + for (size_t j = 0; j < stride_float; ++j) + component_exp[j] = min_exp; + + for (size_t i = 0; i < count; ++i) + { + const float* v = &data[i * stride_float]; + + // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); + + component_exp[j] = (component_exp[j] < e) ? e : component_exp[j]; + } + } + } + for (size_t i = 0; i < count; ++i) { const float* v = &data[i * stride_float]; unsigned int* d = &destination[i * stride_float]; - // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] - int exp = -100; + int vector_exp = min_exp; - for (size_t j = 0; j < stride_float; ++j) + if (mode == meshopt_EncodeExpSharedVector) { - int e; - frexp(v[j], &e); + // use maximum exponent to encode values; this guarantees that mantissa is [-1, 1] + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); - exp = (exp < e) ? e : exp; + vector_exp = (vector_exp < e) ? e : vector_exp; + } + } + else if (mode == meshopt_EncodeExpSeparate) + { + for (size_t j = 0; j < stride_float; ++j) + { + int e = optlog2(v[j]); + + component_exp[j] = (min_exp < e) ? e : min_exp; + } } - // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude) - exp -= (bits - 1); - - // compute renormalized rounded mantissa for each component - int mmask = (1 << 24) - 1; - for (size_t j = 0; j < stride_float; ++j) { - int m = int(ldexp(v[j], -exp) + (v[j] >= 0 ? 0.5f : -0.5f)); + int exp = (mode == meshopt_EncodeExpSharedVector) ? vector_exp : component_exp[j]; + + // note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude) + exp -= (bits - 1); + + // compute renormalized rounded mantissa for each component + int mmask = (1 << 24) - 1; + + int m = int(v[j] * optexp2(-exp) + (v[j] >= 0 ? 0.5f : -0.5f)); d[j] = (m & mmask) | (unsigned(exp) << 24); }