Detect the ISA extension when building Embree and adjust its build configuration to match
Embree has specialized implementations for various ISA extensions to improve performance. Its code uses preprocessor definitions to detect which extension to use. Typically, its code is compiled multiple times, with different extensions enabled, and, at runtime, it detects which one should be used. Godot's build system doesn't do this, it simply compiles Embree once, for the base SSE2 instruction set, and uses that. However, it doesn't properly guarantee that it is built correctly. If Godot is compiled for a newer instruction set (such as by using `-march=x86-64-v3` with GCC/Clang, or `/arch:AVX2` with MSVC), Embree will end up with mixed code paths, and compilation will fail. (Additionally, Godot's copy of the Embree source code omits files that are not used by SSE2 builds, but are needed for AVX builds, which causes more build errors.) This commit fixes the compilation issues by finding the highest Embree ISA extension target that's compatible with the compiler flags set by the user, and adjusting the build settings accordingly.
This commit is contained in:
parent
08a49205b9
commit
62e46cde92
|
@ -1,5 +1,8 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
Import("env")
|
||||
Import("env_modules")
|
||||
|
||||
|
@ -59,25 +62,60 @@ if env["builtin_embree"]:
|
|||
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
|
||||
]
|
||||
|
||||
embree_avx_src = [
|
||||
"kernels/geometry/primitive8.cpp",
|
||||
"kernels/bvh/bvh_intersector1_bvh8.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid4_bvh8.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid8_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid8_bvh8.cpp",
|
||||
]
|
||||
|
||||
embree_avx512_src = [
|
||||
"kernels/bvh/bvh_intersector_hybrid16_bvh4.cpp",
|
||||
"kernels/bvh/bvh_intersector_hybrid16_bvh8.cpp",
|
||||
]
|
||||
|
||||
if env.msvc:
|
||||
flags = env_raycast.subst("$CCFLAGS $CXXFLAGS")
|
||||
m = re.search(r"/arch:(AVX512|AVX2|AVX)", flags)
|
||||
if m is not None:
|
||||
isa = m.group(1).lower()
|
||||
else:
|
||||
isa = "sse2"
|
||||
else:
|
||||
env_isa_test = env_raycast.Clone()
|
||||
env_isa_test.Append(CCFLAGS=["-E", "-dM", "-x c++"])
|
||||
command = env_isa_test.subst(env_isa_test["CXXCOM"], source=File('-'), target=File('-'))
|
||||
defines = subprocess.check_output(command, input='', encoding='utf-8', shell=True)
|
||||
|
||||
def is_defined(name: str) -> bool:
|
||||
m = re.search(fr"^#define\s{name}(?:\s|\Z)", defines, flags=re.MULTILINE)
|
||||
return m is not None
|
||||
|
||||
if is_defined("__F16C__") and is_defined("__AVX2__") and is_defined("__FMA__") and is_defined("__LZCNT__") and is_defined("__BMI__") and is_defined("__BMI2__"):
|
||||
if is_defined("__AVX512F__") and is_defined("__AVX512DQ__") and is_defined("__AVX512CD__") and is_defined("__AVX512BW__") and is_defined("__AVX512VL__"):
|
||||
# Embree also enables AVX2 support when targeting AVX512.
|
||||
isa = "avx512"
|
||||
else:
|
||||
isa = "avx2"
|
||||
elif is_defined("__AVX__"):
|
||||
isa = "avx"
|
||||
elif is_defined("__SSE4_1__") and is_defined("__SSE4_2__"):
|
||||
isa = "sse42"
|
||||
else:
|
||||
isa = "sse2"
|
||||
|
||||
if isa not in ["sse2", "sse42"]:
|
||||
embree_src += embree_avx_src
|
||||
|
||||
if isa == "avx512":
|
||||
embree_src += embree_avx512_src
|
||||
|
||||
thirdparty_sources = [thirdparty_dir + file for file in embree_src]
|
||||
|
||||
env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"])
|
||||
env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
|
||||
env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"]) # No assert() even in debug builds.
|
||||
|
||||
if not env.msvc:
|
||||
if env["arch"] in ["x86_64", "x86_32"]:
|
||||
env_raycast.Append(CCFLAGS=["-msse2", "-mxsave"])
|
||||
|
||||
if env["platform"] == "windows":
|
||||
env_raycast.Append(CCFLAGS=["-mstackrealign"])
|
||||
|
||||
if env["platform"] == "windows":
|
||||
if env.msvc:
|
||||
env.Append(LINKFLAGS=["psapi.lib"])
|
||||
else:
|
||||
env.Append(LIBS=["psapi"])
|
||||
|
||||
if env.msvc: # Disable bogus warning about intentional struct padding.
|
||||
env_raycast.Append(CCFLAGS=["/wd4324"])
|
||||
|
||||
|
@ -85,10 +123,72 @@ if env["builtin_embree"]:
|
|||
env_thirdparty.force_optimization_on_debug()
|
||||
env_thirdparty.disable_warnings()
|
||||
env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
|
||||
env_thirdparty.Append(CPPDEFINES=["EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
|
||||
|
||||
if env["arch"] != "x86_64" or env.msvc:
|
||||
# Embree needs those, it will automatically use SSE2NEON in ARM
|
||||
env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"])
|
||||
# These defines are used for MSVC (to signal SSE support) and for ARM (to enable use of NEON in Embree code).
|
||||
sse2_defines = ["__SSE__", "__SSE2__"]
|
||||
sse42_defines = ["__SSE4_1__", "__SSE4_2__"]
|
||||
if env.msvc:
|
||||
sse42_defines = sse2_defines + ["__SSE3__", "__SSSE3__"] + sse42_defines
|
||||
avx_defines = ["__AVX__", "__BMI__", "__BMI2__", "__LZCNT__"] + sse42_defines
|
||||
avx2_defines = ["__AVX2__"] + avx_defines
|
||||
|
||||
if not env.msvc:
|
||||
# To avoid issues when a certain ISA is partially enabled (for example, when using `-mavx512f` but not `-mavx512vl`),
|
||||
# explicitly disable ISAs higher than the target one.
|
||||
no_avx512_flags = ["-mno-avx512f", "-mno-avx512dq", "-mno-avx512cd", "-mno-avx512bw", "-mno-avx512vl"]
|
||||
no_avx2_flags = ["-mno-f16c", "-mno-avx2", "-mno-fma", "-mno-lzcnt", "-mno-bmi", "-mno-bmi2"]
|
||||
no_avx_flags = ["-mno-avx"]
|
||||
no_sse42_flags = ["-mno-sse4.2"]
|
||||
|
||||
sse2_flags = no_sse42_flags + no_avx_flags + no_avx2_flags + no_avx512_flags
|
||||
sse42_flags = no_avx_flags + no_avx2_flags + no_avx512_flags
|
||||
avx_flags = no_avx2_flags + no_avx512_flags
|
||||
avx2_flags = no_avx512_flags
|
||||
|
||||
arm = env["arch"] in ["arm32", "arm64"]
|
||||
if isa == "sse2":
|
||||
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE2"])
|
||||
|
||||
if env.msvc or arm:
|
||||
env_thirdparty.Append(CPPDEFINES=sse2_defines)
|
||||
else:
|
||||
env_thirdparty.Append(CCFLAGS=sse2_flags)
|
||||
|
||||
if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32":
|
||||
env_thirdparty.Append(CCFLAGS=["-mstackrealign"])
|
||||
elif isa == "sse42":
|
||||
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE42"])
|
||||
|
||||
if env.msvc or arm:
|
||||
env_thirdparty.Append(CPPDEFINES=sse42_defines)
|
||||
else:
|
||||
env_thirdparty.Append(CCFLAGS=sse42_flags)
|
||||
|
||||
if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32":
|
||||
env_thirdparty.Append(CCFLAGS=["-mstackrealign"])
|
||||
elif isa == "avx":
|
||||
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX"])
|
||||
|
||||
if arm:
|
||||
env_thirdparty.Append(CPPDEFINES=avx_defines)
|
||||
elif not env.msvc:
|
||||
env_thirdparty.Append(CCFLAGS=avx_flags)
|
||||
elif isa == "avx2":
|
||||
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX2"])
|
||||
|
||||
if arm:
|
||||
env_thirdparty.Append(CPPDEFINES=avx2_defines)
|
||||
elif not env.msvc:
|
||||
env_thirdparty.Append(CCFLAGS=avx2_flags)
|
||||
elif isa == "avx512":
|
||||
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX512"])
|
||||
|
||||
if env["platform"] == "windows":
|
||||
if env.msvc:
|
||||
env.Append(LINKFLAGS=["psapi.lib"])
|
||||
else:
|
||||
env.Append(LIBS=["psapi"])
|
||||
|
||||
if env["platform"] == "web":
|
||||
env_thirdparty.Append(CXXFLAGS=["-msimd128"])
|
||||
|
|
Loading…
Reference in New Issue