Merge pull request #95291 from BlueCube3310/hdr-optimizations
Optimize .hdr loading and RGB9E5 conversion
This commit is contained in:
commit
886d5865a4
|
@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
|
|||
}
|
||||
|
||||
_FORCE_INLINE_ uint32_t to_rgbe9995() const {
|
||||
const float pow2to9 = 512.0f;
|
||||
const float B = 15.0f;
|
||||
const float N = 9.0f;
|
||||
// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
|
||||
static const float kMaxVal = float(0x1FF << 7);
|
||||
static const float kMinVal = float(1.f / (1 << 16));
|
||||
|
||||
float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)
|
||||
// Clamp RGB to [0, 1.FF*2^16]
|
||||
const float _r = CLAMP(r, 0.0f, kMaxVal);
|
||||
const float _g = CLAMP(g, 0.0f, kMaxVal);
|
||||
const float _b = CLAMP(b, 0.0f, kMaxVal);
|
||||
|
||||
float cRed = MAX(0.0f, MIN(sharedexp, r));
|
||||
float cGreen = MAX(0.0f, MIN(sharedexp, g));
|
||||
float cBlue = MAX(0.0f, MIN(sharedexp, b));
|
||||
// Compute the maximum channel, no less than 1.0*2^-15
|
||||
const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));
|
||||
|
||||
float cMax = MAX(cRed, MAX(cGreen, cBlue));
|
||||
// Take the exponent of the maximum channel (rounding up the 9th bit) and
|
||||
// add 15 to it. When added to the channels, it causes the implicit '1.0'
|
||||
// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
|
||||
// of the mantissa, rounding the truncated bits.
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} R, G, B, E;
|
||||
|
||||
float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
|
||||
E.f = MaxChannel;
|
||||
E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
|
||||
E.i &= 0x7F800000; // Zero the mantissa
|
||||
|
||||
float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
|
||||
// This shifts the 9-bit values we need into the lowest bits, rounding as
|
||||
// needed. Note that if the channel has a smaller exponent than the max
|
||||
// channel, it will shift even more. This is intentional.
|
||||
R.f = _r + E.f;
|
||||
G.f = _g + E.f;
|
||||
B.f = _b + E.f;
|
||||
|
||||
float exps = expp + 1.0f;
|
||||
// Convert the Bias to the correct exponent in the upper 5 bits.
|
||||
E.i <<= 4;
|
||||
E.i += 0x10000000;
|
||||
|
||||
if (0.0f <= sMax && sMax < pow2to9) {
|
||||
exps = expp;
|
||||
}
|
||||
|
||||
float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
|
||||
float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
|
||||
float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
|
||||
|
||||
return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
|
||||
// Combine the fields. RGB floats have unwanted data in the upper 9
|
||||
// bits. Only red needs to mask them off because green and blue shift
|
||||
// it out to the left.
|
||||
return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ Color blend(const Color &p_over) const {
|
||||
|
|
|
@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
|
|||
imgdata.resize(height * width * (int)sizeof(uint32_t));
|
||||
|
||||
{
|
||||
uint8_t *w = imgdata.ptrw();
|
||||
uint8_t *ptr = imgdata.ptrw();
|
||||
|
||||
uint8_t *ptr = (uint8_t *)w;
|
||||
Vector<uint8_t> temp_read_data;
|
||||
temp_read_data.resize(128);
|
||||
uint8_t *temp_read_ptr = temp_read_data.ptrw();
|
||||
|
||||
if (width < 8 || width >= 32768) {
|
||||
// Read flat data
|
||||
|
@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
|
|||
}
|
||||
} else {
|
||||
// Dump
|
||||
f->get_buffer(temp_read_ptr, count);
|
||||
for (int z = 0; z < count; ++z) {
|
||||
ptr[(j * width + i++) * 4 + k] = f->get_8();
|
||||
ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
|
|||
}
|
||||
}
|
||||
|
||||
const bool force_linear = p_flags & FLAG_FORCE_LINEAR;
|
||||
|
||||
//convert
|
||||
for (int i = 0; i < width * height; i++) {
|
||||
float exp = pow(2.0f, ptr[3] - 128.0f);
|
||||
int e = ptr[3] - 128;
|
||||
|
||||
Color c(
|
||||
ptr[0] * exp / 255.0,
|
||||
ptr[1] * exp / 255.0,
|
||||
ptr[2] * exp / 255.0);
|
||||
if (force_linear || (e < -15 || e > 15)) {
|
||||
float exp = pow(2.0f, e);
|
||||
Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0);
|
||||
|
||||
if (p_flags & FLAG_FORCE_LINEAR) {
|
||||
if (force_linear) {
|
||||
c = c.srgb_to_linear();
|
||||
}
|
||||
|
||||
*(uint32_t *)ptr = c.to_rgbe9995();
|
||||
} else {
|
||||
// https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129
|
||||
uint32_t e5 = (uint32_t)(e + 15);
|
||||
*(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1));
|
||||
}
|
||||
|
||||
ptr += 4;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader {
|
|||
public:
|
||||
virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
|
||||
virtual void get_recognized_extensions(List<String> *p_extensions) const;
|
||||
|
||||
ImageLoaderHDR();
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue