From 80cf6cbfe9c0611ac649eb0e9cb92d0f5765d220 Mon Sep 17 00:00:00 2001 From: BlueCube3310 <53150244+BlueCube3310@users.noreply.github.com> Date: Thu, 8 Aug 2024 19:13:00 +0200 Subject: [PATCH] Optimize .hdr loading and RGB9E5 conversion --- core/math/color.h | 53 ++++++++++++++++++++------------ modules/hdr/image_loader_hdr.cpp | 32 ++++++++++++------- modules/hdr/image_loader_hdr.h | 1 + 3 files changed, 55 insertions(+), 31 deletions(-) diff --git a/core/math/color.h b/core/math/color.h index e17b8c9fd70..70fad78acbd 100644 --- a/core/math/color.h +++ b/core/math/color.h @@ -129,33 +129,46 @@ struct [[nodiscard]] Color { } _FORCE_INLINE_ uint32_t to_rgbe9995() const { - const float pow2to9 = 512.0f; - const float B = 15.0f; - const float N = 9.0f; + // https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp + static const float kMaxVal = float(0x1FF << 7); + static const float kMinVal = float(1.f / (1 << 16)); - float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f) + // Clamp RGB to [0, 1.FF*2^16] + const float _r = CLAMP(r, 0.0f, kMaxVal); + const float _g = CLAMP(g, 0.0f, kMaxVal); + const float _b = CLAMP(b, 0.0f, kMaxVal); - float cRed = MAX(0.0f, MIN(sharedexp, r)); - float cGreen = MAX(0.0f, MIN(sharedexp, g)); - float cBlue = MAX(0.0f, MIN(sharedexp, b)); + // Compute the maximum channel, no less than 1.0*2^-15 + const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal)); - float cMax = MAX(cRed, MAX(cGreen, cBlue)); + // Take the exponent of the maximum channel (rounding up the 9th bit) and + // add 15 to it. When added to the channels, it causes the implicit '1.0' + // bit and the first 8 mantissa bits to be shifted down to the low 9 bits + // of the mantissa, rounding the truncated bits. + union { + float f; + int32_t i; + } R, G, B, E; - float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B; + E.f = MaxChannel; + E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa + E.i &= 0x7F800000; // Zero the mantissa - float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f); + // This shifts the 9-bit values we need into the lowest bits, rounding as + // needed. Note that if the channel has a smaller exponent than the max + // channel, it will shift even more. This is intentional. + R.f = _r + E.f; + G.f = _g + E.f; + B.f = _b + E.f; - float exps = expp + 1.0f; + // Convert the Bias to the correct exponent in the upper 5 bits. + E.i <<= 4; + E.i += 0x10000000; - if (0.0f <= sMax && sMax < pow2to9) { - exps = expp; - } - - float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); - float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); - float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); - - return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27); + // Combine the fields. RGB floats have unwanted data in the upper 9 + // bits. Only red needs to mask them off because green and blue shift + // it out to the left. + return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511); } _FORCE_INLINE_ Color blend(const Color &p_over) const { diff --git a/modules/hdr/image_loader_hdr.cpp b/modules/hdr/image_loader_hdr.cpp index c49c62a08b6..ba59bb25eee 100644 --- a/modules/hdr/image_loader_hdr.cpp +++ b/modules/hdr/image_loader_hdr.cpp @@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref p_image, Ref f, BitField imgdata.resize(height * width * (int)sizeof(uint32_t)); { - uint8_t *w = imgdata.ptrw(); + uint8_t *ptr = imgdata.ptrw(); - uint8_t *ptr = (uint8_t *)w; + Vector temp_read_data; + temp_read_data.resize(128); + uint8_t *temp_read_ptr = temp_read_data.ptrw(); if (width < 8 || width >= 32768) { // Read flat data @@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref p_image, Ref f, BitField } } else { // Dump + f->get_buffer(temp_read_ptr, count); for (int z = 0; z < count; ++z) { - ptr[(j * width + i++) * 4 + k] = f->get_8(); + ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z]; } } } @@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref p_image, Ref f, BitField } } + const bool force_linear = p_flags & FLAG_FORCE_LINEAR; + //convert for (int i = 0; i < width * height; i++) { - float exp = pow(2.0f, ptr[3] - 128.0f); + int e = ptr[3] - 128; - Color c( - ptr[0] * exp / 255.0, - ptr[1] * exp / 255.0, - ptr[2] * exp / 255.0); + if (force_linear || (e < -15 || e > 15)) { + float exp = pow(2.0f, e); + Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0); - if (p_flags & FLAG_FORCE_LINEAR) { - c = c.srgb_to_linear(); + if (force_linear) { + c = c.srgb_to_linear(); + } + + *(uint32_t *)ptr = c.to_rgbe9995(); + } else { + // https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129 + uint32_t e5 = (uint32_t)(e + 15); + *(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1)); } - *(uint32_t *)ptr = c.to_rgbe9995(); ptr += 4; } } diff --git a/modules/hdr/image_loader_hdr.h b/modules/hdr/image_loader_hdr.h index 9821db059e1..0a8e91fb9e5 100644 --- a/modules/hdr/image_loader_hdr.h +++ b/modules/hdr/image_loader_hdr.h @@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader { public: virtual Error load_image(Ref p_image, Ref f, BitField p_flags, float p_scale); virtual void get_recognized_extensions(List *p_extensions) const; + ImageLoaderHDR(); };