From 78881b3cc3a389c21c1417ad05840e80a234e2d7 Mon Sep 17 00:00:00 2001 From: Omar El Sheikh Date: Tue, 22 Feb 2022 18:10:09 -0500 Subject: [PATCH] Octahedral Normal/Tangent Compression Implementation of Octahedral normal compression into Godot 4.0 --- core/math/vector3.cpp | 16 ++ core/math/vector3.h | 2 + scene/resources/mesh.cpp | 137 ++++++------------ .../shaders/scene_forward_clustered.glsl | 18 ++- .../shaders/scene_forward_mobile.glsl | 18 ++- .../renderer_rd/storage_rd/mesh_storage.cpp | 9 +- servers/rendering_server.cpp | 49 +++---- 7 files changed, 112 insertions(+), 137 deletions(-) diff --git a/core/math/vector3.cpp b/core/math/vector3.cpp index d71d3650534..fdbbb8cb5ca 100644 --- a/core/math/vector3.cpp +++ b/core/math/vector3.cpp @@ -117,6 +117,22 @@ Vector3 Vector3::octahedron_decode(const Vector2 &p_oct) { return n.normalized(); } +Vector2 Vector3::octahedron_tangent_encode(const float sign) const { + Vector2 res = this->octahedron_encode(); + res.y = res.y * 0.5f + 0.5f; + res.y = sign >= 0.0f ? res.y : 1 - res.y; + return res; +} + +Vector3 Vector3::octahedron_tangent_decode(const Vector2 &p_oct, float *sign) { + Vector2 oct_compressed = p_oct; + oct_compressed.y = oct_compressed.y * 2 - 1; + *sign = oct_compressed.y >= 0.0f ? 1.0f : -1.0f; + oct_compressed.y = Math::abs(oct_compressed.y); + Vector3 res = Vector3::octahedron_decode(oct_compressed); + return res; +} + Basis Vector3::outer(const Vector3 &p_with) const { Vector3 row0(x * p_with.x, x * p_with.y, x * p_with.z); Vector3 row1(y * p_with.x, y * p_with.y, y * p_with.z); diff --git a/core/math/vector3.h b/core/math/vector3.h index 4ce01da60e2..890aeeeed85 100644 --- a/core/math/vector3.h +++ b/core/math/vector3.h @@ -111,6 +111,8 @@ struct _NO_DISCARD_ Vector3 { Vector2 octahedron_encode() const; static Vector3 octahedron_decode(const Vector2 &p_oct); + Vector2 octahedron_tangent_encode(const float sign) const; + static Vector3 octahedron_tangent_decode(const Vector2 &p_oct, float *sign); _FORCE_INLINE_ Vector3 cross(const Vector3 &p_with) const; _FORCE_INLINE_ real_t dot(const Vector3 &p_with) const; diff --git a/scene/resources/mesh.cpp b/scene/resources/mesh.cpp index ec9db89794c..b71682744f8 100644 --- a/scene/resources/mesh.cpp +++ b/scene/resources/mesh.cpp @@ -863,27 +863,6 @@ static Mesh::PrimitiveType _old_primitives[7] = { }; #endif // DISABLE_DEPRECATED -// Convert Octahedron-mapped normalized vector back to Cartesian -// Assumes normalized format (elements of v within range [-1, 1]) -Vector3 _oct_to_norm(const Vector2 v) { - Vector3 res(v.x, v.y, 1 - (Math::absf(v.x) + Math::absf(v.y))); - float t = MAX(-res.z, 0.0f); - res.x += t * -SIGN(res.x); - res.y += t * -SIGN(res.y); - return res.normalized(); -} - -// Convert Octahedron-mapped normalized tangent vector back to Cartesian -// out_sign provides the direction for the original cartesian tangent -// Assumes normalized format (elements of v within range [-1, 1]) -Vector3 _oct_to_tangent(const Vector2 v, float *out_sign) { - Vector2 v_decompressed = v; - v_decompressed.y = Math::absf(v_decompressed.y) * 2 - 1; - Vector3 res = _oct_to_norm(v_decompressed); - *out_sign = SIGN(v[1]); - return res; -} - void _fix_array_compatibility(const Vector &p_src, uint32_t p_old_format, uint32_t p_new_format, uint32_t p_elements, Vector &vertex_data, Vector &attribute_data, Vector &skin_data) { uint32_t dst_vertex_stride; uint32_t dst_attribute_stride; @@ -954,127 +933,93 @@ void _fix_array_compatibility(const Vector &p_src, uint32_t p_old_forma if ((p_old_format & OLD_ARRAY_COMPRESS_NORMAL) && (p_old_format & OLD_ARRAY_FORMAT_TANGENT) && (p_old_format & OLD_ARRAY_COMPRESS_TANGENT)) { for (uint32_t i = 0; i < p_elements; i++) { const int8_t *src = (const int8_t *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; - const Vector2 src_vec(src[0] / 127.0f, src[1] / 127.0f); + int16_t *dst = (int16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; - const Vector3 res = _oct_to_norm(src_vec) * Vector3(0.5, 0.5, 0.5) + Vector3(0.5, 0.5, 0.5); - *dst = 0; - *dst |= CLAMP(int(res.x * 1023.0f), 0, 1023); - *dst |= CLAMP(int(res.y * 1023.0f), 0, 1023) << 10; - *dst |= CLAMP(int(res.z * 1023.0f), 0, 1023) << 20; + dst[0] = (int16_t)CLAMP(src[0] / 127.0f * 32767, -32768, 32767); + dst[1] = (int16_t)CLAMP(src[1] / 127.0f * 32767, -32768, 32767); } - src_offset += sizeof(int8_t) * 2; + src_offset += sizeof(int16_t) * 2; } else { for (uint32_t i = 0; i < p_elements; i++) { const int16_t *src = (const int16_t *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; - const Vector2 src_vec(src[0] / 32767.0f, src[1] / 32767.0f); + int16_t *dst = (int16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; - const Vector3 res = _oct_to_norm(src_vec) * Vector3(0.5, 0.5, 0.5) + Vector3(0.5, 0.5, 0.5); - *dst = 0; - *dst |= CLAMP(int(res.x * 1023.0f), 0, 1023); - *dst |= CLAMP(int(res.y * 1023.0f), 0, 1023) << 10; - *dst |= CLAMP(int(res.z * 1023.0f), 0, 1023) << 20; + dst[0] = src[0]; + dst[1] = src[1]; } src_offset += sizeof(int16_t) * 2; } } else { // No Octahedral compression if (p_old_format & OLD_ARRAY_COMPRESS_NORMAL) { - const float multiplier = 1.f / 127.f * 1023.0f; - for (uint32_t i = 0; i < p_elements; i++) { const int8_t *src = (const int8_t *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; + const Vector3 original_normal(src[0], src[1], src[2]); + Vector2 res = original_normal.octahedron_encode(); - *dst = 0; - *dst |= CLAMP(int(src[0] * multiplier), 0, 1023); - *dst |= CLAMP(int(src[1] * multiplier), 0, 1023) << 10; - *dst |= CLAMP(int(src[2] * multiplier), 0, 1023) << 20; + uint16_t *dst = (uint16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; + dst[0] = (uint16_t)CLAMP(res.x * 65535, 0, 65535); + dst[1] = (uint16_t)CLAMP(res.y * 65535, 0, 65535); } - src_offset += sizeof(uint32_t); + src_offset += sizeof(uint16_t) * 2; } else { for (uint32_t i = 0; i < p_elements; i++) { const float *src = (const float *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; + const Vector3 original_normal(src[0], src[1], src[2]); + Vector2 res = original_normal.octahedron_encode(); - *dst = 0; - *dst |= CLAMP(int(src[0] * 1023.0), 0, 1023); - *dst |= CLAMP(int(src[1] * 1023.0), 0, 1023) << 10; - *dst |= CLAMP(int(src[2] * 1023.0), 0, 1023) << 20; + uint16_t *dst = (uint16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; + dst[0] = (uint16_t)CLAMP(res.x * 65535, 0, 65535); + dst[1] = (uint16_t)CLAMP(res.y * 65535, 0, 65535); } - src_offset += sizeof(float) * 3; + src_offset += sizeof(uint16_t) * 2; } } } break; case OLD_ARRAY_TANGENT: { if (p_old_format & OLD_ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - if (p_old_format & OLD_ARRAY_COMPRESS_TANGENT) { // int8 + if (p_old_format & OLD_ARRAY_COMPRESS_TANGENT) { // int8 SNORM -> uint16 UNORM for (uint32_t i = 0; i < p_elements; i++) { const int8_t *src = (const int8_t *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_TANGENT]]; - const Vector2 src_vec(src[0] / 127.0f, src[1] / 127.0f); - float out_sign; - const Vector3 res = _oct_to_tangent(src_vec, &out_sign) * Vector3(0.5, 0.5, 0.5) + Vector3(0.5, 0.5, 0.5); + uint16_t *dst = (uint16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_TANGENT]]; - *dst = 0; - *dst |= CLAMP(int(res.x * 1023.0), 0, 1023); - *dst |= CLAMP(int(res.y * 1023.0), 0, 1023) << 10; - *dst |= CLAMP(int(res.z * 1023.0), 0, 1023) << 20; - if (out_sign > 0) { - *dst |= 3 << 30; - } + dst[0] = (uint16_t)CLAMP((src[0] / 127.0f * .5f + .5f) * 65535, 0, 65535); + dst[1] = (uint16_t)CLAMP((src[1] / 127.0f * .5f + .5f) * 65535, 0, 65535); } - src_offset += sizeof(int8_t) * 2; - } else { // int16 + src_offset += sizeof(uint16_t) * 2; + } else { // int16 SNORM -> uint16 UNORM for (uint32_t i = 0; i < p_elements; i++) { const int16_t *src = (const int16_t *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_TANGENT]]; - const Vector2 src_vec(src[0] / 32767.0f, src[1] / 32767.0f); - float out_sign; - Vector3 res = _oct_to_tangent(src_vec, &out_sign) * Vector3(0.5, 0.5, 0.5) + Vector3(0.5, 0.5, 0.5); + uint16_t *dst = (uint16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_TANGENT]]; - *dst = 0; - *dst |= CLAMP(int(res.x * 1023.0), 0, 1023); - *dst |= CLAMP(int(res.y * 1023.0), 0, 1023) << 10; - *dst |= CLAMP(int(res.z * 1023.0), 0, 1023) << 20; - if (out_sign > 0) { - *dst |= 3 << 30; - } + dst[0] = (uint16_t)CLAMP((src[0] / 32767.0f * .5f + .5f) * 65535, 0, 65535); + dst[1] = (uint16_t)CLAMP((src[1] / 32767.0f * .5f + .5f) * 65535, 0, 65535); } - src_offset += sizeof(int16_t) * 2; + src_offset += sizeof(uint16_t) * 2; } } else { // No Octahedral compression if (p_old_format & OLD_ARRAY_COMPRESS_TANGENT) { - const float multiplier = 1.f / 127.f * 1023.0f; - for (uint32_t i = 0; i < p_elements; i++) { const int8_t *src = (const int8_t *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_TANGENT]]; + const Vector3 original_tangent(src[0], src[1], src[2]); + Vector2 res = original_tangent.octahedron_tangent_encode(src[3]); - *dst = 0; - *dst |= CLAMP(int(src[0] * multiplier), 0, 1023); - *dst |= CLAMP(int(src[1] * multiplier), 0, 1023) << 10; - *dst |= CLAMP(int(src[2] * multiplier), 0, 1023) << 20; - if (src[3] > 0) { - *dst |= 3 << 30; - } + uint16_t *dst = (uint16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; + dst[0] = (uint16_t)CLAMP(res.x * 65535, 0, 65535); + dst[1] = (uint16_t)CLAMP(res.y * 65535, 0, 65535); } - src_offset += sizeof(uint32_t); + src_offset += sizeof(uint16_t) * 2; } else { for (uint32_t i = 0; i < p_elements; i++) { const float *src = (const float *)&src_vertex_ptr[i * src_vertex_stride + src_offset]; - uint32_t *dst = (uint32_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_TANGENT]]; + const Vector3 original_tangent(src[0], src[1], src[2]); + Vector2 res = original_tangent.octahedron_tangent_encode(src[3]); - *dst = 0; - *dst |= CLAMP(int(src[0] * 1023.0), 0, 1023); - *dst |= CLAMP(int(src[1] * 1023.0), 0, 1023) << 10; - *dst |= CLAMP(int(src[2] * 1023.0), 0, 1023) << 20; - if (src[3] > 0) { - *dst |= 3 << 30; - } + uint16_t *dst = (uint16_t *)&dst_vertex_ptr[i * dst_vertex_stride + dst_offsets[Mesh::ARRAY_NORMAL]]; + dst[0] = (uint16_t)CLAMP(res.x * 65535, 0, 65535); + dst[1] = (uint16_t)CLAMP(res.y * 65535, 0, 65535); } - src_offset += sizeof(float) * 4; + src_offset += sizeof(uint16_t) * 2; } } } break; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl index e9515c76708..6b4e4a5a167 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl @@ -15,11 +15,11 @@ layout(location = 0) in vec3 vertex_attrib; //only for pure render depth when normal is not used #ifdef NORMAL_USED -layout(location = 1) in vec3 normal_attrib; +layout(location = 1) in vec2 normal_attrib; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 2) in vec4 tangent_attrib; +layout(location = 2) in vec2 tangent_attrib; #endif #if defined(COLOR_USED) @@ -58,6 +58,13 @@ layout(location = 10) in uvec4 bone_attrib; layout(location = 11) in vec4 weight_attrib; #endif +vec3 oct_to_vec3(vec2 e) { + vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); + float t = max(-v.z, 0.0); + v.xy += t * -sign(v.xy); + return v; +} + /* Varyings */ layout(location = 0) out vec3 vertex_interp; @@ -231,12 +238,13 @@ void vertex_shader(in uint instance_index, in bool is_multimesh, in SceneData sc vec3 vertex = vertex_attrib; #ifdef NORMAL_USED - vec3 normal = normal_attrib * 2.0 - 1.0; + vec3 normal = oct_to_vec3(normal_attrib * 2.0 - 1.0); #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - vec3 tangent = tangent_attrib.xyz * 2.0 - 1.0; - float binormalf = tangent_attrib.a * 2.0 - 1.0; + vec2 signed_tangent_attrib = tangent_attrib * 2.0 - 1.0; + vec3 tangent = oct_to_vec3(vec2(signed_tangent_attrib.x, abs(signed_tangent_attrib.y) * 2.0 - 1.0)); + float binormalf = sign(signed_tangent_attrib.y); vec3 binormal = normalize(cross(normal, tangent) * binormalf); #endif diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl index 6548793beea..09605339170 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl @@ -16,11 +16,11 @@ layout(location = 0) in vec3 vertex_attrib; //only for pure render depth when normal is not used #ifdef NORMAL_USED -layout(location = 1) in vec3 normal_attrib; +layout(location = 1) in vec2 normal_attrib; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 2) in vec4 tangent_attrib; +layout(location = 2) in vec2 tangent_attrib; #endif #if defined(COLOR_USED) @@ -59,6 +59,13 @@ layout(location = 10) in uvec4 bone_attrib; layout(location = 11) in vec4 weight_attrib; #endif +vec3 oct_to_vec3(vec2 e) { + vec3 v = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); + float t = max(-v.z, 0.0); + v.xy += t * -sign(v.xy); + return v; +} + /* Varyings */ layout(location = 0) highp out vec3 vertex_interp; @@ -229,12 +236,13 @@ void main() { vec3 vertex = vertex_attrib; #ifdef NORMAL_USED - vec3 normal = normal_attrib * 2.0 - 1.0; + vec3 normal = oct_to_vec3(normal_attrib * 2.0 - 1.0); #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - vec3 tangent = tangent_attrib.xyz * 2.0 - 1.0; - float binormalf = tangent_attrib.a * 2.0 - 1.0; + vec3 signed_tangent_attrib = tangent_attrib * 2.0 - 1.0; + vec3 tangent = oct_to_vec3(vec2(signed_tangent_attrib.x, abs(signed_tangent_attrib.y) * 2.0 - 1.0)); + float binormalf = sign(signed_tangent_attrib.y); vec3 binormal = normalize(cross(normal, tangent) * binormalf); #endif diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index dc3f35f9424..71cfb32828b 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1057,10 +1057,9 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V } break; case RS::ARRAY_NORMAL: { vd.offset = stride; + vd.format = RD::DATA_FORMAT_R16G16_UNORM; + stride += sizeof(uint16_t) * 2; - vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; - - stride += sizeof(uint32_t); if (mis) { buffer = mis->vertex_buffer; } else { @@ -1069,9 +1068,9 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V } break; case RS::ARRAY_TANGENT: { vd.offset = stride; + vd.format = RD::DATA_FORMAT_R16G16_UNORM; + stride += sizeof(uint16_t) * 2; - vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32; - stride += sizeof(uint32_t); if (mis) { buffer = mis->vertex_buffer; } else { diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index a54d4f0384a..f79c4ac54c8 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -398,16 +398,14 @@ Error RenderingServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint const Vector3 *src = array.ptr(); for (int i = 0; i < p_vertex_array_len; i++) { - Vector3 n = src[i] * Vector3(0.5, 0.5, 0.5) + Vector3(0.5, 0.5, 0.5); + Vector2 res = src[i].octahedron_encode(); + int16_t vector[2] = { + (int16_t)CLAMP(res.x * 65535, 0, 65535), + (int16_t)CLAMP(res.y * 65535, 0, 65535), + }; - uint32_t value = 0; - value |= CLAMP(int(n.x * 1023.0), 0, 1023); - value |= CLAMP(int(n.y * 1023.0), 0, 1023) << 10; - value |= CLAMP(int(n.z * 1023.0), 0, 1023) << 20; - - memcpy(&vw[p_offsets[ai] + i * p_vertex_stride], &value, 4); + memcpy(&vw[p_offsets[ai] + i * p_vertex_stride], vector, 4); } - } break; case RS::ARRAY_TANGENT: { @@ -416,33 +414,32 @@ Error RenderingServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint if (type == Variant::PACKED_FLOAT32_ARRAY) { Vector array = p_arrays[ai]; ERR_FAIL_COND_V(array.size() != p_vertex_array_len * 4, ERR_INVALID_PARAMETER); - const float *src = array.ptr(); + const float *src_ptr = array.ptr(); for (int i = 0; i < p_vertex_array_len; i++) { - uint32_t value = 0; - value |= CLAMP(int((src[i * 4 + 0] * 0.5 + 0.5) * 1023.0), 0, 1023); - value |= CLAMP(int((src[i * 4 + 1] * 0.5 + 0.5) * 1023.0), 0, 1023) << 10; - value |= CLAMP(int((src[i * 4 + 2] * 0.5 + 0.5) * 1023.0), 0, 1023) << 20; - if (src[i * 4 + 3] > 0) { - value |= 3UL << 30; - } + const Vector3 src(src_ptr[i * 4 + 0], src_ptr[i * 4 + 1], src_ptr[i * 4 + 2]); + Vector2 res = src.octahedron_tangent_encode(src_ptr[i * 4 + 3]); + int16_t vector[2] = { + (int16_t)CLAMP(res.x * 65535, 0, 65535), + (int16_t)CLAMP(res.y * 65535, 0, 65535), + }; - memcpy(&vw[p_offsets[ai] + i * p_vertex_stride], &value, 4); + memcpy(&vw[p_offsets[ai] + i * p_vertex_stride], vector, 4); } } else { // PACKED_FLOAT64_ARRAY Vector array = p_arrays[ai]; ERR_FAIL_COND_V(array.size() != p_vertex_array_len * 4, ERR_INVALID_PARAMETER); - const double *src = array.ptr(); + const double *src_ptr = array.ptr(); for (int i = 0; i < p_vertex_array_len; i++) { - uint32_t value = 0; - value |= CLAMP(int((src[i * 4 + 0] * 0.5 + 0.5) * 1023.0), 0, 1023); - value |= CLAMP(int((src[i * 4 + 1] * 0.5 + 0.5) * 1023.0), 0, 1023) << 10; - value |= CLAMP(int((src[i * 4 + 2] * 0.5 + 0.5) * 1023.0), 0, 1023) << 20; - if (src[i * 4 + 3] > 0) { - value |= 3UL << 30; - } - memcpy(&vw[p_offsets[ai] + i * p_vertex_stride], &value, 4); + const Vector3 src(src_ptr[i * 4 + 0], src_ptr[i * 4 + 1], src_ptr[i * 4 + 2]); + Vector2 res = src.octahedron_tangent_encode(src_ptr[i * 4 + 3]); + int16_t vector[2] = { + (int16_t)CLAMP(res.x * 65535, 0, 65535), + (int16_t)CLAMP(res.y * 65535, 0, 65535), + }; + + memcpy(&vw[p_offsets[ai] + i * p_vertex_stride], vector, 4); } } } break;