godot/thirdparty/thekla_atlas/nvmath/nvmath.h

// This code is in the public domain -- castanyo@yahoo.es

#pragma once
#ifndef NV_MATH_H
#define NV_MATH_H

#include "nvcore/nvcore.h"
#include "nvcore/Debug.h"   // nvDebugCheck
#include "nvcore/Utils.h"   // max, clamp

#include <math.h>

#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
#include <float.h>  // finite, isnan
#endif

// -- GODOT start --
//#if NV_CPU_X86 || NV_CPU_X86_64
//    //#include <intrin.h>
//    #include <xmmintrin.h>
//#endif
// -- GODOT end --


// Function linkage
#if NVMATH_SHARED
#ifdef NVMATH_EXPORTS
#define NVMATH_API DLL_EXPORT
#define NVMATH_CLASS DLL_EXPORT_CLASS
#else
#define NVMATH_API DLL_IMPORT
#define NVMATH_CLASS DLL_IMPORT
#endif
#else // NVMATH_SHARED
#define NVMATH_API
#define NVMATH_CLASS
#endif // NVMATH_SHARED

// Set some reasonable defaults.
#ifndef NV_USE_ALTIVEC
#   define NV_USE_ALTIVEC NV_CPU_PPC
//#   define NV_USE_ALTIVEC defined(__VEC__)
#endif

#ifndef NV_USE_SSE
#   if NV_CPU_X86_64
        // x64 always supports at least SSE2
#       define NV_USE_SSE 2
#   elif NV_CC_MSVC && defined(_M_IX86_FP)
        // Also on x86 with the /arch:SSE flag in MSVC.
#       define NV_USE_SSE _M_IX86_FP       // 1=SSE, 2=SS2
#   elif defined(__SSE__)
#       define NV_USE_SSE 1
#   elif defined(__SSE2__)
#       define NV_USE_SSE 2
#   else
        // Otherwise we assume no SSE.
#       define NV_USE_SSE 0
#   endif
#endif


// Internally set NV_USE_SIMD when either altivec or sse is available.
#if NV_USE_ALTIVEC && NV_USE_SSE
#	error "Cannot enable both altivec and sse!"
#endif


// -- GODOT start --
#if NV_USE_SSE
    //#include <intrin.h>
    #include <xmmintrin.h>
#endif
// -- GODOT end --


#ifndef PI
#define PI                  float(3.1415926535897932384626433833)
#endif

#define NV_EPSILON          (0.0001f)
#define NV_NORMAL_EPSILON   (0.001f)

/*
#define SQ(r)               ((r)*(r))

#define SIGN_BITMASK        0x80000000

/// Integer representation of a floating-point value.
#define IR(x)               ((uint32 &)(x))

/// Absolute integer representation of a floating-point value
#define AIR(x)              (IR(x) & 0x7fffffff)

/// Floating-point representation of an integer value.
#define FR(x)               ((float&)(x))

/// Integer-based comparison of a floating point value.
/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
*/

extern "C" inline double sqrt_assert(const double f)
{
    nvDebugCheck(f >= 0.0f);
    return sqrt(f);
}

inline float sqrtf_assert(const float f)
{
    nvDebugCheck(f >= 0.0f);
    return sqrtf(f);
}

extern "C" inline double acos_assert(const double f) 
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return acos(f);
}

inline float acosf_assert(const float f)
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return acosf(f);
}

extern "C" inline double asin_assert(const double f)
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return asin(f);
}

inline float asinf_assert(const float f)
{
    nvDebugCheck(f >= -1.0f && f <= 1.0f);
    return asinf(f);
}

// Replace default functions with asserting ones.
#if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700))    // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194
#define sqrt sqrt_assert
#define sqrtf sqrtf_assert
#define acos acos_assert
#define acosf acosf_assert
#define asin asin_assert
#define asinf asinf_assert
#endif

#if NV_CC_MSVC
NV_FORCEINLINE float log2f(float x)
{
    nvCheck(x >= 0);
    return logf(x) / logf(2.0f);
}
NV_FORCEINLINE float exp2f(float x)
{
    return powf(2.0f, x);
}
#endif

namespace nv
{
    inline float toRadian(float degree) { return degree * (PI / 180.0f); }
    inline float toDegree(float radian) { return radian * (180.0f / PI); }

    // Robust floating point comparisons:
    // http://realtimecollisiondetection.net/blog/?p=89
    inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
    {
        //return fabs(f0-f1) <= epsilon;
        return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
    }

    inline bool isZero(const float f, const float epsilon = NV_EPSILON)
    {
        return fabs(f) <= epsilon;
    }

    inline bool isFinite(const float f)
    {
#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
        return _finite(f) != 0;
#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
        return isfinite(f);
#else
#   error "isFinite not supported"
#endif
        //return std::isfinite (f);
        //return finite (f);
    }

    inline bool isNan(const float f)
    {
#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
        return _isnan(f) != 0;
#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
        return isnan(f);
#else
#   error "isNan not supported"
#endif
    }

    inline uint log2(uint32 i)
    {
        uint32 value = 0;
        while( i >>= 1 ) value++;
        return value;
    }

    inline uint log2(uint64 i)
    {
        uint64 value = 0;
        while (i >>= 1) value++;
        return U32(value);
    }

    inline float lerp(float f0, float f1, float t)
    {
        const float s = 1.0f - t;
        return f0 * s + f1 * t;
    }

    inline float square(float f) { return f * f; }
    inline int square(int i) { return i * i; }

    inline float cube(float f) { return f * f * f; }
    inline int cube(int i) { return i * i * i; }

    inline float frac(float f)
    {
        return f - floor(f);
    }

    inline float floatRound(float f)
    {
        return floorf(f + 0.5f);
    }

    // Eliminates negative zeros from a float array.
    inline void floatCleanup(float * fp, int n)
    {
        for (int i = 0; i < n; i++) {
            //nvDebugCheck(isFinite(fp[i]));
            union { float f; uint32 i; } x = { fp[i] };
            if (x.i == 0x80000000) fp[i] = 0.0f;
        }
    }

    inline float saturate(float f) {
        return clamp(f, 0.0f, 1.0f);
    }

    inline float linearstep(float edge0, float edge1, float x) {
        // Scale, bias and saturate x to 0..1 range
        return saturate((x - edge0) / (edge1 - edge0));
    }

    inline float smoothstep(float edge0, float edge1, float x) {
        x = linearstep(edge0, edge1, x); 

        // Evaluate polynomial
        return x*x*(3 - 2*x);
    }

    inline int sign(float a)
    {
        return (a > 0) - (a < 0);
        //if (a > 0.0f) return 1;
        //if (a < 0.0f) return -1;
        //return 0;
    }

    union Float754 {
        unsigned int raw;
        float value;
        struct {
        #if NV_BIG_ENDIAN
            unsigned int negative:1;
            unsigned int biasedexponent:8;
            unsigned int mantissa:23;
        #else
            unsigned int mantissa:23;
            unsigned int biasedexponent:8;
            unsigned int negative:1;
        #endif
        } field;
    };

    // Return the exponent of x ~ Floor(Log2(x))
    inline int floatExponent(float x)
    {
        Float754 f;
        f.value = x;
        return (f.field.biasedexponent - 127);
    }


    // FloatRGB9E5
    union Float3SE {
        uint32 v;
        struct {
        #if NV_BIG_ENDIAN
            uint32 e : 5;
            uint32 zm : 9;
            uint32 ym : 9;
            uint32 xm : 9;
        #else
            uint32 xm : 9;
            uint32 ym : 9;
            uint32 zm : 9;
            uint32 e : 5;
        #endif
        };
    };

    // FloatR11G11B10
    union Float3PK {
        uint32 v;
        struct {
        #if NV_BIG_ENDIAN
            uint32 ze : 5;
            uint32 zm : 5;
            uint32 ye : 5;
            uint32 ym : 6;
            uint32 xe : 5;
            uint32 xm : 6;
        #else
            uint32 xm : 6;
            uint32 xe : 5;
            uint32 ym : 6;
            uint32 ye : 5;
            uint32 zm : 5;
            uint32 ze : 5;
        #endif
        };
    };


} // nv

#endif // NV_MATH_H
Import thekla_atlas As requested by reduz, an import of thekla_atlas into thirdparty/ 2017-12-08 14:05:47 +00:00			`// This code is in the public domain -- castanyo@yahoo.es`

			`#pragma once`
			`#ifndef NV_MATH_H`
			`#define NV_MATH_H`

			`#include "nvcore/nvcore.h"`
			`#include "nvcore/Debug.h" // nvDebugCheck`
			`#include "nvcore/Utils.h" // max, clamp`

			`#include <math.h>`

			`#if NV_OS_WIN32 \|\| NV_OS_XBOX \|\| NV_OS_DURANGO`
			`#include <float.h> // finite, isnan`
			`#endif`

thekla: Fix build on x86 systems without SSE support Fixes #14709. Same as https://github.com/Thekla/thekla_atlas/pull/11, but adding comments until it's merged upstream. 2017-12-15 19:35:18 +00:00			`// -- GODOT start --`
			`//#if NV_CPU_X86 \|\| NV_CPU_X86_64`
			`// //#include <intrin.h>`
			`// #include <xmmintrin.h>`
			`//#endif`
			`// -- GODOT end --`
Import thekla_atlas As requested by reduz, an import of thekla_atlas into thirdparty/ 2017-12-08 14:05:47 +00:00


			`// Function linkage`
			`#if NVMATH_SHARED`
			`#ifdef NVMATH_EXPORTS`
			`#define NVMATH_API DLL_EXPORT`
			`#define NVMATH_CLASS DLL_EXPORT_CLASS`
			`#else`
			`#define NVMATH_API DLL_IMPORT`
			`#define NVMATH_CLASS DLL_IMPORT`
			`#endif`
			`#else // NVMATH_SHARED`
			`#define NVMATH_API`
			`#define NVMATH_CLASS`
			`#endif // NVMATH_SHARED`

			`// Set some reasonable defaults.`
			`#ifndef NV_USE_ALTIVEC`
			`# define NV_USE_ALTIVEC NV_CPU_PPC`
			`//# define NV_USE_ALTIVEC defined(__VEC__)`
			`#endif`

			`#ifndef NV_USE_SSE`
			`# if NV_CPU_X86_64`
			`// x64 always supports at least SSE2`
			`# define NV_USE_SSE 2`
			`# elif NV_CC_MSVC && defined(_M_IX86_FP)`
			`// Also on x86 with the /arch:SSE flag in MSVC.`
			`# define NV_USE_SSE _M_IX86_FP // 1=SSE, 2=SS2`
			`# elif defined(__SSE__)`
			`# define NV_USE_SSE 1`
			`# elif defined(__SSE2__)`
			`# define NV_USE_SSE 2`
			`# else`
			`// Otherwise we assume no SSE.`
			`# define NV_USE_SSE 0`
			`# endif`
			`#endif`


			`// Internally set NV_USE_SIMD when either altivec or sse is available.`
			`#if NV_USE_ALTIVEC && NV_USE_SSE`
			`# error "Cannot enable both altivec and sse!"`
			`#endif`


thekla: Fix build on x86 systems without SSE support Fixes #14709. Same as https://github.com/Thekla/thekla_atlas/pull/11, but adding comments until it's merged upstream. 2017-12-15 19:35:18 +00:00			`// -- GODOT start --`
			`#if NV_USE_SSE`
			`//#include <intrin.h>`
			`#include <xmmintrin.h>`
			`#endif`
			`// -- GODOT end --`

Import thekla_atlas As requested by reduz, an import of thekla_atlas into thirdparty/ 2017-12-08 14:05:47 +00:00
			`#ifndef PI`
			`#define PI float(3.1415926535897932384626433833)`
			`#endif`

			`#define NV_EPSILON (0.0001f)`
			`#define NV_NORMAL_EPSILON (0.001f)`

			`/*`
			`#define SQ(r) ((r)*(r))`

			`#define SIGN_BITMASK 0x80000000`

			`/// Integer representation of a floating-point value.`
			`#define IR(x) ((uint32 &)(x))`

			`/// Absolute integer representation of a floating-point value`
			`#define AIR(x) (IR(x) & 0x7fffffff)`

			`/// Floating-point representation of an integer value.`
			`#define FR(x) ((float&)(x))`

			`/// Integer-based comparison of a floating point value.`
			`/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.`
			`#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)`
			`*/`

			`extern "C" inline double sqrt_assert(const double f)`
			`{`
			`nvDebugCheck(f >= 0.0f);`
			`return sqrt(f);`
			`}`

			`inline float sqrtf_assert(const float f)`
			`{`
			`nvDebugCheck(f >= 0.0f);`
			`return sqrtf(f);`
			`}`

			`extern "C" inline double acos_assert(const double f)`
			`{`
			`nvDebugCheck(f >= -1.0f && f <= 1.0f);`
			`return acos(f);`
			`}`

			`inline float acosf_assert(const float f)`
			`{`
			`nvDebugCheck(f >= -1.0f && f <= 1.0f);`
			`return acosf(f);`
			`}`

			`extern "C" inline double asin_assert(const double f)`
			`{`
			`nvDebugCheck(f >= -1.0f && f <= 1.0f);`
			`return asin(f);`
			`}`

			`inline float asinf_assert(const float f)`
			`{`
			`nvDebugCheck(f >= -1.0f && f <= 1.0f);`
			`return asinf(f);`
			`}`

			`// Replace default functions with asserting ones.`
			`#if !NV_CC_MSVC \|\| (NV_CC_MSVC && (_MSC_VER < 1700)) // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194`
			`#define sqrt sqrt_assert`
			`#define sqrtf sqrtf_assert`
			`#define acos acos_assert`
			`#define acosf acosf_assert`
			`#define asin asin_assert`
			`#define asinf asinf_assert`
			`#endif`

			`#if NV_CC_MSVC`
			`NV_FORCEINLINE float log2f(float x)`
			`{`
			`nvCheck(x >= 0);`
			`return logf(x) / logf(2.0f);`
			`}`
			`NV_FORCEINLINE float exp2f(float x)`
			`{`
			`return powf(2.0f, x);`
			`}`
			`#endif`

			`namespace nv`
			`{`
			`inline float toRadian(float degree) { return degree * (PI / 180.0f); }`
			`inline float toDegree(float radian) { return radian * (180.0f / PI); }`

			`// Robust floating point comparisons:`
			`// http://realtimecollisiondetection.net/blog/?p=89`
			`inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)`
			`{`
			`//return fabs(f0-f1) <= epsilon;`
			`return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));`
			`}`

			`inline bool isZero(const float f, const float epsilon = NV_EPSILON)`
			`{`
			`return fabs(f) <= epsilon;`
			`}`

			`inline bool isFinite(const float f)`
			`{`
			`#if NV_OS_WIN32 \|\| NV_OS_XBOX \|\| NV_OS_DURANGO`
			`return _finite(f) != 0;`
Do not use deprecated stuff that only glibc now implements (cherry picked from commit 687b95b41fb178382cecac7c7f5b1a078503431e) 2018-07-03 16:34:09 +00:00			`#elif NV_OS_DARWIN \|\| NV_OS_FREEBSD \|\| NV_OS_OPENBSD \|\| NV_OS_ORBIS \|\| NV_OS_LINUX`
Import thekla_atlas As requested by reduz, an import of thekla_atlas into thirdparty/ 2017-12-08 14:05:47 +00:00			`return isfinite(f);`
			`#else`
			`# error "isFinite not supported"`
			`#endif`
			`//return std::isfinite (f);`
			`//return finite (f);`
			`}`

			`inline bool isNan(const float f)`
			`{`
			`#if NV_OS_WIN32 \|\| NV_OS_XBOX \|\| NV_OS_DURANGO`
			`return _isnan(f) != 0;`
Do not use deprecated stuff that only glibc now implements (cherry picked from commit 687b95b41fb178382cecac7c7f5b1a078503431e) 2018-07-03 16:34:09 +00:00			`#elif NV_OS_DARWIN \|\| NV_OS_FREEBSD \|\| NV_OS_OPENBSD \|\| NV_OS_ORBIS \|\| NV_OS_LINUX`
Import thekla_atlas As requested by reduz, an import of thekla_atlas into thirdparty/ 2017-12-08 14:05:47 +00:00			`return isnan(f);`
			`#else`
			`# error "isNan not supported"`
			`#endif`
			`}`

			`inline uint log2(uint32 i)`
			`{`
			`uint32 value = 0;`
			`while( i >>= 1 ) value++;`
			`return value;`
			`}`

			`inline uint log2(uint64 i)`
			`{`
			`uint64 value = 0;`
			`while (i >>= 1) value++;`
			`return U32(value);`
			`}`

			`inline float lerp(float f0, float f1, float t)`
			`{`
			`const float s = 1.0f - t;`
			`return f0 * s + f1 * t;`
			`}`

			`inline float square(float f) { return f * f; }`
			`inline int square(int i) { return i * i; }`

			`inline float cube(float f) { return f * f * f; }`
			`inline int cube(int i) { return i * i * i; }`

			`inline float frac(float f)`
			`{`
			`return f - floor(f);`
			`}`

			`inline float floatRound(float f)`
			`{`
			`return floorf(f + 0.5f);`
			`}`

			`// Eliminates negative zeros from a float array.`
			`inline void floatCleanup(float * fp, int n)`
			`{`
			`for (int i = 0; i < n; i++) {`
			`//nvDebugCheck(isFinite(fp[i]));`
			`union { float f; uint32 i; } x = { fp[i] };`
			`if (x.i == 0x80000000) fp[i] = 0.0f;`
			`}`
			`}`

			`inline float saturate(float f) {`
			`return clamp(f, 0.0f, 1.0f);`
			`}`

			`inline float linearstep(float edge0, float edge1, float x) {`
			`// Scale, bias and saturate x to 0..1 range`
			`return saturate((x - edge0) / (edge1 - edge0));`
			`}`

			`inline float smoothstep(float edge0, float edge1, float x) {`
			`x = linearstep(edge0, edge1, x);`

			`// Evaluate polynomial`
			`return xx(3 - 2*x);`
			`}`

			`inline int sign(float a)`
			`{`
			`return (a > 0) - (a < 0);`
			`//if (a > 0.0f) return 1;`
			`//if (a < 0.0f) return -1;`
			`//return 0;`
			`}`

			`union Float754 {`
			`unsigned int raw;`
			`float value;`
			`struct {`
			`#if NV_BIG_ENDIAN`
			`unsigned int negative:1;`
			`unsigned int biasedexponent:8;`
			`unsigned int mantissa:23;`
			`#else`
			`unsigned int mantissa:23;`
			`unsigned int biasedexponent:8;`
			`unsigned int negative:1;`
			`#endif`
			`} field;`
			`};`

			`// Return the exponent of x ~ Floor(Log2(x))`
			`inline int floatExponent(float x)`
			`{`
			`Float754 f;`
			`f.value = x;`
			`return (f.field.biasedexponent - 127);`
			`}`


			`// FloatRGB9E5`
			`union Float3SE {`
			`uint32 v;`
			`struct {`
			`#if NV_BIG_ENDIAN`
			`uint32 e : 5;`
			`uint32 zm : 9;`
			`uint32 ym : 9;`
			`uint32 xm : 9;`
			`#else`
			`uint32 xm : 9;`
			`uint32 ym : 9;`
			`uint32 zm : 9;`
			`uint32 e : 5;`
			`#endif`
			`};`
			`};`

			`// FloatR11G11B10`
			`union Float3PK {`
			`uint32 v;`
			`struct {`
			`#if NV_BIG_ENDIAN`
			`uint32 ze : 5;`
			`uint32 zm : 5;`
			`uint32 ye : 5;`
			`uint32 ym : 6;`
			`uint32 xe : 5;`
			`uint32 xm : 6;`
			`#else`
			`uint32 xm : 6;`
			`uint32 xe : 5;`
			`uint32 ym : 6;`
			`uint32 ye : 5;`
			`uint32 zm : 5;`
			`uint32 ze : 5;`
			`#endif`
			`};`
			`};`


			`} // nv`

			`#endif // NV_MATH_H`