godot/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
Dario 057367bf4f Add FidelityFX Super Resolution 2.2 (FSR 2.2.1) support.
Introduces support for FSR2 as a new upscaler option available from the project settings. Also introduces an specific render list for surfaces that require motion and the ability to derive motion vectors from depth buffer and camera motion.
2023-09-25 10:37:47 -03:00

1503 lines
49 KiB
C++

// This file is part of the FidelityFX SDK.
//
// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
/// A define for abstracting shared memory between shading languages.
///
/// @ingroup GPU
#define FFX_GROUPSHARED groupshared
/// A define for abstracting compute memory barriers between shading languages.
///
/// @ingroup GPU
#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
///
/// @ingroup GPU
#define FFX_STATIC static
/// A define for abstracting loop unrolling between shading languages.
///
/// @ingroup GPU
#define FFX_UNROLL [unroll]
/// A define for abstracting a 'greater than' comparison operator between two types.
///
/// @ingroup GPU
#define FFX_GREATER_THAN(x, y) x > y
/// A define for abstracting a 'greater than or equal' comparison operator between two types.
///
/// @ingroup GPU
#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
/// A define for abstracting a 'less than' comparison operator between two types.
///
/// @ingroup GPU
#define FFX_LESS_THAN(x, y) x < y
/// A define for abstracting a 'less than or equal' comparison operator between two types.
///
/// @ingroup GPU
#define FFX_LESS_THAN_EQUAL(x, y) x <= y
/// A define for abstracting an 'equal' comparison operator between two types.
///
/// @ingroup GPU
#define FFX_EQUAL(x, y) x == y
/// A define for abstracting a 'not equal' comparison operator between two types.
///
/// @ingroup GPU
#define FFX_NOT_EQUAL(x, y) x != y
/// Broadcast a scalar value to a 1-dimensional floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
/// Broadcast a scalar value to a 2-dimensional floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
/// Broadcast a scalar value to a 3-dimensional floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
/// Broadcast a scalar value to a 4-dimensional floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
/// Broadcast a scalar value to a 1-dimensional signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_INT32(x) FfxInt32(x)
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a)
/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
///
/// @ingroup GPU
#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
/// Pack 2x32-bit floating point values in a single 32bit value.
///
/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
/// 32bit unsigned integer respectively.
///
/// @param [in] value A 2-dimensional floating point value to convert and pack.
///
/// @returns
/// A packed 32bit value containing 2 16bit floating point values.
///
/// @ingroup HLSL
FfxUInt32 packHalf2x16(FfxFloat32x2 value)
{
return f32tof16(value.x) | (f32tof16(value.y) << 16);
}
/// Broadcast a scalar value to a 2-dimensional floating point vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
{
return FfxFloat32x2(value, value);
}
/// Broadcast a scalar value to a 3-dimensional floating point vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
{
return FfxFloat32x3(value, value, value);
}
/// Broadcast a scalar value to a 4-dimensional floating point vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
{
return FfxFloat32x4(value, value, value, value);
}
/// Broadcast a scalar value to a 2-dimensional signed integer vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxInt32x2 ffxBroadcast2(FfxInt32 value)
{
return FfxInt32x2(value, value);
}
/// Broadcast a scalar value to a 3-dimensional signed integer vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
{
return FfxUInt32x3(value, value, value);
}
/// Broadcast a scalar value to a 4-dimensional signed integer vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxInt32x4 ffxBroadcast4(FfxInt32 value)
{
return FfxInt32x4(value, value, value, value);
}
/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
{
return FfxUInt32x2(value, value);
}
/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
{
return FfxUInt32x3(value, value, value);
}
/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
///
/// @param [in] value The value to to broadcast.
///
/// @returns
/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
///
/// @ingroup HLSL
FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
{
return FfxUInt32x4(value, value, value, value);
}
FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
{
FfxUInt32 mask = (1u << bits) - 1;
return (src >> off) & mask;
}
FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
{
return (ins & mask) | (src & (~mask));
}
FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
{
FfxUInt32 mask = (1u << bits) - 1;
return (ins & mask) | (src & (~mask));
}
/// Interprets the bit pattern of x as an unsigned integer.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
/// @ingroup HLSL
FfxUInt32 ffxAsUInt32(FfxFloat32 x)
{
return asuint(x);
}
/// Interprets the bit pattern of x as an unsigned integer.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
/// @ingroup HLSL
FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
{
return asuint(x);
}
/// Interprets the bit pattern of x as an unsigned integer.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
/// @ingroup HLSL
FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
{
return asuint(x);
}
/// Interprets the bit pattern of x as an unsigned integer.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as an unsigned integer.
///
/// @ingroup HLSL
FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
{
return asuint(x);
}
/// Interprets the bit pattern of x as a floating-point number.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
/// @ingroup HLSL
FfxFloat32 ffxAsFloat(FfxUInt32 x)
{
return asfloat(x);
}
/// Interprets the bit pattern of x as a floating-point number.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
/// @ingroup HLSL
FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
{
return asfloat(x);
}
/// Interprets the bit pattern of x as a floating-point number.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
/// @ingroup HLSL
FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
{
return asfloat(x);
}
/// Interprets the bit pattern of x as a floating-point number.
///
/// @param [in] value The input value.
///
/// @returns
/// The input interpreted as a floating-point number.
///
/// @ingroup HLSL
FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
{
return asfloat(x);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
{
return lerp(x, y, t);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
{
return lerp(x, y, t);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
{
return lerp(x, y, t);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
{
return lerp(x, y, t);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
{
return lerp(x, y, t);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
{
return lerp(x, y, t);
}
/// Compute the linear interopation between two values.
///
/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
/// following math:
///
/// (1 - t) * x + t * y
///
/// @param [in] x The first value to lerp between.
/// @param [in] y The second value to lerp between.
/// @param [in] t The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
///
/// @returns
/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
{
return lerp(x, y, t);
}
/// Clamp a value to a [0..1] range.
///
/// @param [in] x The value to clamp to [0..1] range.
///
/// @returns
/// The clamped version of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32 ffxSaturate(FfxFloat32 x)
{
return saturate(x);
}
/// Clamp a value to a [0..1] range.
///
/// @param [in] x The value to clamp to [0..1] range.
///
/// @returns
/// The clamped version of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
{
return saturate(x);
}
/// Clamp a value to a [0..1] range.
///
/// @param [in] x The value to clamp to [0..1] range.
///
/// @returns
/// The clamped version of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
{
return saturate(x);
}
/// Clamp a value to a [0..1] range.
///
/// @param [in] x The value to clamp to [0..1] range.
///
/// @returns
/// The clamped version of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
{
return saturate(x);
}
/// Compute the factional part of a decimal value.
///
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
/// function.
///
/// @param [in] x The value to compute the fractional part from.
///
/// @returns
/// The fractional part of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32 ffxFract(FfxFloat32 x)
{
return x - floor(x);
}
/// Compute the factional part of a decimal value.
///
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
/// function.
///
/// @param [in] x The value to compute the fractional part from.
///
/// @returns
/// The fractional part of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxFract(FfxFloat32x2 x)
{
return x - floor(x);
}
/// Compute the factional part of a decimal value.
///
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
/// function.
///
/// @param [in] x The value to compute the fractional part from.
///
/// @returns
/// The fractional part of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxFract(FfxFloat32x3 x)
{
return x - floor(x);
}
/// Compute the factional part of a decimal value.
///
/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
/// function.
///
/// @param [in] x The value to compute the fractional part from.
///
/// @returns
/// The fractional part of <c><i>x</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxFract(FfxFloat32x4 x)
{
return x - floor(x);
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
{
return max(x, max(y, z));
}
/// Compute the maximum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the max calculation.
/// @param [in] y The second value to include in the max calcuation.
/// @param [in] z The third value to include in the max calcuation.
///
/// @returns
/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
{
return max(x, max(y, z));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return max(min(x, y), min(max(x, y), z));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return max(min(x, y), min(max(x, y), z));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return max(min(x, y), min(max(x, y), z));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return max(min(x, y), min(max(x, y), z));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
{
return max(min(x, y), min(max(x, y), z));
// return min(max(min(y, z), x), max(y, z));
// return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
{
return max(min(x, y), min(max(x, y), z));
// return min(max(min(y, z), x), max(y, z));
// return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
{
return max(min(x, y), min(max(x, y), z));
}
/// Compute the median of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the median calculation.
/// @param [in] y The second value to include in the median calcuation.
/// @param [in] z The third value to include in the median calcuation.
///
/// @returns
/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
{
return max(min(x, y), min(max(x, y), z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</c></i> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
{
return min(x, min(y, z));
}
/// Compute the minimum of three values.
///
/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
///
/// @param [in] x The first value to include in the min calculation.
/// @param [in] y The second value to include in the min calcuation.
/// @param [in] z The third value to include in the min calcuation.
///
/// @returns
/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
///
/// @ingroup HLSL
FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
{
return min(x, min(y, z));
}
FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
{
return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
}
//==============================================================================================================================
// HLSL HALF
//==============================================================================================================================
#if FFX_HALF
//==============================================================================================================================
// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
{
FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
return FFX_MIN16_F2(t);
}
FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
{
return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
}
FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
{
FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
return FFX_MIN16_U2(t);
}
FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
{
return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
}
#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
//------------------------------------------------------------------------------------------------------------------------------
FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
{
return f32tof16(x.x) + (f32tof16(x.y) << 16);
}
FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
{
return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));
}
FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)
{
return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
}
FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
{
return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));
}
#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))
#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))
#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
#define FFX_TO_UINT16(x) asuint16(x)
#define FFX_TO_UINT16X2(x) asuint16(x)
#define FFX_TO_UINT16X3(x) asuint16(x)
#define FFX_TO_UINT16X4(x) asuint16(x)
#else
#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
#define FFX_TO_FLOAT16(x) asfloat16(x)
#define FFX_TO_FLOAT16X2(x) asfloat16(x)
#define FFX_TO_FLOAT16X3(x) asfloat16(x)
#define FFX_TO_FLOAT16X4(x) asfloat16(x)
#else
#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))
#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
//==============================================================================================================================
#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a)
#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)
#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)
#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)
//------------------------------------------------------------------------------------------------------------------------------
#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a)
#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)
#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)
#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)
//------------------------------------------------------------------------------------------------------------------------------
#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a)
#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)
#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)
#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)
//==============================================================================================================================
FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)
{
return FFX_MIN16_U(abs(FFX_MIN16_I(a)));
}
FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)
{
return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));
}
FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)
{
return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));
}
FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)
{
return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)
{
return max(n, min(x, m));
}
FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)
{
return max(n, min(x, m));
}
FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)
{
return max(n, min(x, m));
}
FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)
{
return max(n, min(x, m));
}
//------------------------------------------------------------------------------------------------------------------------------
// V_FRACT_F16 (note DX frac() is different).
FFX_MIN16_F ffxFract(FFX_MIN16_F x)
{
return x - floor(x);
}
FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)
{
return x - floor(x);
}
FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)
{
return x - floor(x);
}
FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)
{
return x - floor(x);
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)
{
return lerp(x, y, a);
}
FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)
{
return lerp(x, y, a);
}
FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)
{
return lerp(x, y, a);
}
FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)
{
return lerp(x, y, a);
}
FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)
{
return lerp(x, y, a);
}
FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)
{
return lerp(x, y, a);
}
FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)
{
return lerp(x, y, a);
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
{
return max(x, max(y, z));
}
FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
{
return max(x, max(y, z));
}
FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
{
return max(x, max(y, z));
}
FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
{
return max(x, max(y, z));
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
{
return min(x, min(y, z));
}
FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
{
return min(x, min(y, z));
}
FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
{
return min(x, min(y, z));
}
FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
{
return min(x, min(y, z));
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
{
return max(min(x, y), min(max(x, y), z));
}
FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
{
return max(min(x, y), min(max(x, y), z));
}
FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
{
return max(min(x, y), min(max(x, y), z));
}
FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
{
return max(min(x, y), min(max(x, y), z));
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)
{
return max(min(x, y), min(max(x, y), z));
}
FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)
{
return max(min(x, y), min(max(x, y), z));
}
FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)
{
return max(min(x, y), min(max(x, y), z));
}
FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)
{
return max(min(x, y), min(max(x, y), z));
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)
{
return rcp(x);
}
FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)
{
return rcp(x);
}
FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)
{
return rcp(x);
}
FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)
{
return rcp(x);
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)
{
return rsqrt(x);
}
FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)
{
return rsqrt(x);
}
FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)
{
return rsqrt(x);
}
FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)
{
return rsqrt(x);
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)
{
return saturate(x);
}
FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)
{
return saturate(x);
}
FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)
{
return saturate(x);
}
FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)
{
return saturate(x);
}
//------------------------------------------------------------------------------------------------------------------------------
FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)
{
return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));
}
FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)
{
return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));
}
FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)
{
return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));
}
FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
{
return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));
}
#endif // FFX_HALF
//==============================================================================================================================
// HLSL WAVE
//==============================================================================================================================
#if defined(FFX_WAVE)
// Where 'x' must be a compile time literal.
FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
{
return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
}
#if FFX_HALF
FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
{
return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
}
FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
{
return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
}
FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
{
return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
}
FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
{
return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));
}
#endif // FFX_HALF
#endif // #if defined(FFX_WAVE)