Detect the ISA extension when building Embree and adjust its build configuration to match

Embree has specialized implementations for various ISA extensions to
improve performance. Its code uses preprocessor definitions to detect
which extension to use. Typically, its code is compiled multiple times,
with different extensions enabled, and, at runtime, it detects which one
should be used.

Godot's build system doesn't do this, it simply compiles Embree once,
for the base SSE2 instruction set, and uses that. However, it doesn't
properly guarantee that it is built correctly. If Godot is compiled for
a newer instruction set (such as by using `-march=x86-64-v3` with
GCC/Clang, or `/arch:AVX2` with MSVC), Embree will end up with mixed
code paths, and compilation will fail. (Additionally, Godot's copy of
the Embree source code omits files that are not used by SSE2 builds, but
are needed for AVX builds, which causes more build errors.)

This commit fixes the compilation issues by finding the highest Embree
ISA extension target that's compatible with the compiler flags set
by the user, and adjusting the build settings accordingly.
This commit is contained in:
Joaquim Monteiro 2024-08-23 10:42:46 +01:00
parent 08a49205b9
commit 62e46cde92
No known key found for this signature in database
GPG Key ID: D22C1EE6990BF1B3
1 changed files with 117 additions and 17 deletions

View File

@ -1,5 +1,8 @@
#!/usr/bin/env python
import re
import subprocess
Import("env")
Import("env_modules")
@ -59,25 +62,60 @@ if env["builtin_embree"]:
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
]
embree_avx_src = [
"kernels/geometry/primitive8.cpp",
"kernels/bvh/bvh_intersector1_bvh8.cpp",
"kernels/bvh/bvh_intersector_hybrid4_bvh8.cpp",
"kernels/bvh/bvh_intersector_hybrid8_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid8_bvh8.cpp",
]
embree_avx512_src = [
"kernels/bvh/bvh_intersector_hybrid16_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid16_bvh8.cpp",
]
if env.msvc:
flags = env_raycast.subst("$CCFLAGS $CXXFLAGS")
m = re.search(r"/arch:(AVX512|AVX2|AVX)", flags)
if m is not None:
isa = m.group(1).lower()
else:
isa = "sse2"
else:
env_isa_test = env_raycast.Clone()
env_isa_test.Append(CCFLAGS=["-E", "-dM", "-x c++"])
command = env_isa_test.subst(env_isa_test["CXXCOM"], source=File('-'), target=File('-'))
defines = subprocess.check_output(command, input='', encoding='utf-8', shell=True)
def is_defined(name: str) -> bool:
m = re.search(fr"^#define\s{name}(?:\s|\Z)", defines, flags=re.MULTILINE)
return m is not None
if is_defined("__F16C__") and is_defined("__AVX2__") and is_defined("__FMA__") and is_defined("__LZCNT__") and is_defined("__BMI__") and is_defined("__BMI2__"):
if is_defined("__AVX512F__") and is_defined("__AVX512DQ__") and is_defined("__AVX512CD__") and is_defined("__AVX512BW__") and is_defined("__AVX512VL__"):
# Embree also enables AVX2 support when targeting AVX512.
isa = "avx512"
else:
isa = "avx2"
elif is_defined("__AVX__"):
isa = "avx"
elif is_defined("__SSE4_1__") and is_defined("__SSE4_2__"):
isa = "sse42"
else:
isa = "sse2"
if isa not in ["sse2", "sse42"]:
embree_src += embree_avx_src
if isa == "avx512":
embree_src += embree_avx512_src
thirdparty_sources = [thirdparty_dir + file for file in embree_src]
env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"])
env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"]) # No assert() even in debug builds.
if not env.msvc:
if env["arch"] in ["x86_64", "x86_32"]:
env_raycast.Append(CCFLAGS=["-msse2", "-mxsave"])
if env["platform"] == "windows":
env_raycast.Append(CCFLAGS=["-mstackrealign"])
if env["platform"] == "windows":
if env.msvc:
env.Append(LINKFLAGS=["psapi.lib"])
else:
env.Append(LIBS=["psapi"])
if env.msvc: # Disable bogus warning about intentional struct padding.
env_raycast.Append(CCFLAGS=["/wd4324"])
@ -85,10 +123,72 @@ if env["builtin_embree"]:
env_thirdparty.force_optimization_on_debug()
env_thirdparty.disable_warnings()
env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
env_thirdparty.Append(CPPDEFINES=["EMBREE_LOWEST_ISA", "TASKING_INTERNAL"])
if env["arch"] != "x86_64" or env.msvc:
# Embree needs those, it will automatically use SSE2NEON in ARM
env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"])
# These defines are used for MSVC (to signal SSE support) and for ARM (to enable use of NEON in Embree code).
sse2_defines = ["__SSE__", "__SSE2__"]
sse42_defines = ["__SSE4_1__", "__SSE4_2__"]
if env.msvc:
sse42_defines = sse2_defines + ["__SSE3__", "__SSSE3__"] + sse42_defines
avx_defines = ["__AVX__", "__BMI__", "__BMI2__", "__LZCNT__"] + sse42_defines
avx2_defines = ["__AVX2__"] + avx_defines
if not env.msvc:
# To avoid issues when a certain ISA is partially enabled (for example, when using `-mavx512f` but not `-mavx512vl`),
# explicitly disable ISAs higher than the target one.
no_avx512_flags = ["-mno-avx512f", "-mno-avx512dq", "-mno-avx512cd", "-mno-avx512bw", "-mno-avx512vl"]
no_avx2_flags = ["-mno-f16c", "-mno-avx2", "-mno-fma", "-mno-lzcnt", "-mno-bmi", "-mno-bmi2"]
no_avx_flags = ["-mno-avx"]
no_sse42_flags = ["-mno-sse4.2"]
sse2_flags = no_sse42_flags + no_avx_flags + no_avx2_flags + no_avx512_flags
sse42_flags = no_avx_flags + no_avx2_flags + no_avx512_flags
avx_flags = no_avx2_flags + no_avx512_flags
avx2_flags = no_avx512_flags
arm = env["arch"] in ["arm32", "arm64"]
if isa == "sse2":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE2"])
if env.msvc or arm:
env_thirdparty.Append(CPPDEFINES=sse2_defines)
else:
env_thirdparty.Append(CCFLAGS=sse2_flags)
if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32":
env_thirdparty.Append(CCFLAGS=["-mstackrealign"])
elif isa == "sse42":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE42"])
if env.msvc or arm:
env_thirdparty.Append(CPPDEFINES=sse42_defines)
else:
env_thirdparty.Append(CCFLAGS=sse42_flags)
if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32":
env_thirdparty.Append(CCFLAGS=["-mstackrealign"])
elif isa == "avx":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX"])
if arm:
env_thirdparty.Append(CPPDEFINES=avx_defines)
elif not env.msvc:
env_thirdparty.Append(CCFLAGS=avx_flags)
elif isa == "avx2":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX2"])
if arm:
env_thirdparty.Append(CPPDEFINES=avx2_defines)
elif not env.msvc:
env_thirdparty.Append(CCFLAGS=avx2_flags)
elif isa == "avx512":
env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX512"])
if env["platform"] == "windows":
if env.msvc:
env.Append(LINKFLAGS=["psapi.lib"])
else:
env.Append(LIBS=["psapi"])
if env["platform"] == "web":
env_thirdparty.Append(CXXFLAGS=["-msimd128"])