From f0de7ec2b6ba817903fb65a2005d531ba5d0d75c Mon Sep 17 00:00:00 2001 From: Omar El Sheikh Date: Fri, 6 Aug 2021 12:34:29 -0400 Subject: [PATCH] Align Vertex Buffer to 4 Bytes With the octahedral compression, we had attributes of a size of 2 bytes which potentially caused performance regressions on iOS/Mac Now add padding to the normal/tangent buffer For octahedral, normal will always be oct32 encoded UNLESS tangent exists and is also compressed then both will be oct16 encoded and packed into a vec4 attribute --- drivers/gles2/rasterizer_storage_gles2.cpp | 47 ++++++++++------------ drivers/gles2/shaders/scene.glsl | 10 ++--- drivers/gles3/rasterizer_storage_gles3.cpp | 27 +++++++------ drivers/gles3/shaders/scene.glsl | 12 +++--- servers/visual_server.cpp | 19 ++++++--- 5 files changed, 59 insertions(+), 56 deletions(-) diff --git a/drivers/gles2/rasterizer_storage_gles2.cpp b/drivers/gles2/rasterizer_storage_gles2.cpp index 42aea361800..d0a231e4bf5 100644 --- a/drivers/gles2/rasterizer_storage_gles2.cpp +++ b/drivers/gles2/rasterizer_storage_gles2.cpp @@ -2099,13 +2099,8 @@ static PoolVector _unpack_half_floats(const PoolVector &array, } break; case VS::ARRAY_NORMAL: { if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - if (p_format & VS::ARRAY_COMPRESS_NORMAL) { - src_size[i] = 2; - dst_size[i] = 2; - } else { - src_size[i] = 4; - dst_size[i] = 4; - } + src_size[i] = 4; + dst_size[i] = 4; } else { if (p_format & VS::ARRAY_COMPRESS_NORMAL) { src_size[i] = 4; @@ -2119,13 +2114,12 @@ static PoolVector _unpack_half_floats(const PoolVector &array, } break; case VS::ARRAY_TANGENT: { if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - if (p_format & VS::ARRAY_COMPRESS_TANGENT) { - src_size[i] = 2; - dst_size[i] = 2; - } else { - src_size[i] = 4; - dst_size[i] = 4; + if (!(p_format & VS::ARRAY_COMPRESS_TANGENT)) { + src_size[VS::ARRAY_NORMAL] = 8; + dst_size[VS::ARRAY_NORMAL] = 8; } + src_size[i] = 0; + dst_size[i] = 0; } else { if (p_format & VS::ARRAY_COMPRESS_TANGENT) { src_size[i] = 4; @@ -2309,15 +2303,15 @@ void RasterizerStorageGLES2::mesh_add_surface(RID p_mesh, uint32_t p_format, VS: } break; case VS::ARRAY_NORMAL: { if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { + // Always pack normal and tangent into vec4 + // normal will be xy tangent will be zw + // normal will always be oct32 encoded + // UNLESS tangent exists and is also compressed + // then it will be oct16 encoded along with tangent attribs[i].normalized = GL_TRUE; - attribs[i].size = 2; - if (p_format & VS::ARRAY_COMPRESS_NORMAL) { - attribs[i].type = GL_BYTE; - attributes_stride += 2; - } else { - attribs[i].type = GL_SHORT; - attributes_stride += 4; - } + attribs[i].size = 4; + attribs[i].type = GL_SHORT; + attributes_stride += 4; } else { attribs[i].size = 3; @@ -2335,13 +2329,14 @@ void RasterizerStorageGLES2::mesh_add_surface(RID p_mesh, uint32_t p_format, VS: } break; case VS::ARRAY_TANGENT: { if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - attribs[i].normalized = GL_TRUE; - attribs[i].size = 2; + attribs[i].enabled = false; if (p_format & VS::ARRAY_COMPRESS_TANGENT) { - attribs[i].type = GL_BYTE; - attributes_stride += 2; + // normal and tangent will each be oct16 (2 bytes each) + // pack into single vec4 for memory bandwidth + // savings while keeping 4 byte alignment + attribs[VS::ARRAY_NORMAL].type = GL_BYTE; } else { - attribs[i].type = GL_SHORT; + // normal and tangent will each be oct32 (4 bytes each) attributes_stride += 4; } } else { diff --git a/drivers/gles2/shaders/scene.glsl b/drivers/gles2/shaders/scene.glsl index 12a6bbb55ac..dfce4d9212e 100644 --- a/drivers/gles2/shaders/scene.glsl +++ b/drivers/gles2/shaders/scene.glsl @@ -32,14 +32,14 @@ precision highp int; attribute highp vec4 vertex_attrib; // attrib:0 /* clang-format on */ #ifdef ENABLE_OCTAHEDRAL_COMPRESSION -attribute vec2 normal_attrib; // attrib:1 +attribute vec4 normal_tangent_attrib; // attrib:1 #else attribute vec3 normal_attrib; // attrib:1 #endif #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) #ifdef ENABLE_OCTAHEDRAL_COMPRESSION -attribute vec2 tangent_attrib; // attrib:2 +// packed into normal_attrib zw component #else attribute vec4 tangent_attrib; // attrib:2 #endif @@ -359,15 +359,15 @@ void main() { #endif #ifdef ENABLE_OCTAHEDRAL_COMPRESSION - vec3 normal = oct_to_vec3(normal_attrib); + vec3 normal = oct_to_vec3(normal_tangent_attrib.xy); #else vec3 normal = normal_attrib; #endif #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) #ifdef ENABLE_OCTAHEDRAL_COMPRESSION - vec3 tangent = oct_to_vec3(vec2(tangent_attrib.x, abs(tangent_attrib.y) * 2.0 - 1.0)); - float binormalf = sign(tangent_attrib.y); + vec3 tangent = oct_to_vec3(vec2(normal_tangent_attrib.z, abs(normal_tangent_attrib.w) * 2.0 - 1.0)); + float binormalf = sign(normal_tangent_attrib.w); #else vec3 tangent = tangent_attrib.xyz; float binormalf = tangent_attrib.a; diff --git a/drivers/gles3/rasterizer_storage_gles3.cpp b/drivers/gles3/rasterizer_storage_gles3.cpp index 802af73017e..99a9365a8a2 100644 --- a/drivers/gles3/rasterizer_storage_gles3.cpp +++ b/drivers/gles3/rasterizer_storage_gles3.cpp @@ -3397,15 +3397,15 @@ void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh, uint32_t p_format, VS: } break; case VS::ARRAY_NORMAL: { if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { + // Always pack normal and tangent into vec4 + // normal will be xy tangent will be zw + // normal will always be oct32 (4 byte) encoded + // UNLESS tangent exists and is also compressed + // then it will be oct16 encoded along with tangent attribs[i].normalized = GL_TRUE; - attribs[i].size = 2; - if (p_format & VS::ARRAY_COMPRESS_NORMAL) { - attribs[i].type = GL_BYTE; - attributes_stride += 2; - } else { - attribs[i].type = GL_SHORT; - attributes_stride += 4; - } + attribs[i].size = 4; + attribs[i].type = GL_SHORT; + attributes_stride += 4; } else { attribs[i].size = 3; @@ -3423,13 +3423,14 @@ void RasterizerStorageGLES3::mesh_add_surface(RID p_mesh, uint32_t p_format, VS: } break; case VS::ARRAY_TANGENT: { if (p_format & VS::ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - attribs[i].normalized = GL_TRUE; - attribs[i].size = 2; + attribs[i].enabled = false; if (p_format & VS::ARRAY_COMPRESS_TANGENT) { - attribs[i].type = GL_BYTE; - attributes_stride += 2; + // normal and tangent will each be oct16 (2 bytes each) + // pack into single vec4 for memory bandwidth + // savings while keeping 4 byte alignment + attribs[VS::ARRAY_NORMAL].type = GL_BYTE; } else { - attribs[i].type = GL_SHORT; + // normal and tangent will each be oct32 (4 bytes each) attributes_stride += 4; } } else { diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 8de8b478b7d..8433e471008 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -26,13 +26,11 @@ ARRAY_INDEX=8, layout(location = 0) in highp vec4 vertex_attrib; /* clang-format on */ #ifdef ENABLE_OCTAHEDRAL_COMPRESSION -layout(location = 1) in vec2 normal_attrib; -#else -layout(location = 1) in vec3 normal_attrib; +layout(location = 1) in vec4 normal_tangent_attrib; #endif #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY) #ifdef ENABLE_OCTAHEDRAL_COMPRESSION -layout(location = 2) in vec2 tangent_attrib; +// packed into normal_attrib zw component #else layout(location = 2) in vec4 tangent_attrib; #endif @@ -340,15 +338,15 @@ void main() { #endif #ifdef ENABLE_OCTAHEDRAL_COMPRESSION - vec3 normal = oct_to_vec3(normal_attrib); + vec3 normal = oct_to_vec3(normal_tangent_attrib.xy); #else vec3 normal = normal_attrib; #endif #if defined(ENABLE_TANGENT_INTERP) || defined(ENABLE_NORMALMAP) || defined(LIGHT_USE_ANISOTROPY) #ifdef ENABLE_OCTAHEDRAL_COMPRESSION - vec3 tangent = oct_to_vec3(vec2(tangent_attrib.x, abs(tangent_attrib.y) * 2.0 - 1.0)); - float binormalf = sign(tangent_attrib.y); + vec3 tangent = oct_to_vec3(vec2(normal_tangent_attrib.z, abs(normal_tangent_attrib.w) * 2.0 - 1.0)); + float binormalf = sign(normal_tangent_attrib.w); #else vec3 tangent = tangent_attrib.xyz; float binormalf = tangent_attrib.a; diff --git a/servers/visual_server.cpp b/servers/visual_server.cpp index 9e4bbc5d790..99c192da933 100644 --- a/servers/visual_server.cpp +++ b/servers/visual_server.cpp @@ -491,7 +491,7 @@ Error VisualServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint32_ // setting vertices means regenerating the AABB if (p_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - if (p_format & ARRAY_COMPRESS_NORMAL) { + if ((p_format & ARRAY_COMPRESS_NORMAL) && (p_format & ARRAY_FORMAT_TANGENT) && (p_format & ARRAY_COMPRESS_TANGENT)) { for (int i = 0; i < p_vertex_array_len; i++) { Vector2 res = norm_to_oct(src[i]); int8_t vector[2] = { @@ -878,7 +878,10 @@ uint32_t VisualServer::mesh_surface_make_offsets_from_format(uint32_t p_format, } break; case VS::ARRAY_NORMAL: { if (p_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - if (p_format & ARRAY_COMPRESS_NORMAL) { + // normal will always be oct32 (4 byte) encoded + // UNLESS tangent exists and is also compressed + // then it will be oct16 encoded along with tangent + if ((p_format & ARRAY_COMPRESS_NORMAL) && (p_format & ARRAY_FORMAT_TANGENT) && (p_format & ARRAY_COMPRESS_TANGENT)) { elem_size = sizeof(uint8_t) * 2; } else { elem_size = sizeof(uint16_t) * 2; @@ -1083,7 +1086,10 @@ void VisualServer::mesh_add_surface_from_arrays(RID p_mesh, PrimitiveType p_prim } break; case VS::ARRAY_NORMAL: { if (p_compress_format & ARRAY_FLAG_USE_OCTAHEDRAL_COMPRESSION) { - if (p_compress_format & ARRAY_COMPRESS_NORMAL) { + // normal will always be oct32 (4 byte) encoded + // UNLESS tangent exists and is also compressed + // then it will be oct16 encoded along with tangent + if ((p_compress_format & ARRAY_COMPRESS_NORMAL) && (format & ARRAY_FORMAT_TANGENT) && (p_compress_format & ARRAY_COMPRESS_TANGENT)) { elem_size = sizeof(uint8_t) * 2; } else { elem_size = sizeof(uint16_t) * 2; @@ -1286,7 +1292,10 @@ Array VisualServer::_get_array_from_surface(uint32_t p_format, PoolVector::Write w = arr.write(); for (int j = 0; j < p_vertex_len; j++) {