From 62e46cde926915611e1c2e52c6d4c963492f537e Mon Sep 17 00:00:00 2001 From: Joaquim Monteiro Date: Fri, 23 Aug 2024 10:42:46 +0100 Subject: [PATCH] Detect the ISA extension when building Embree and adjust its build configuration to match Embree has specialized implementations for various ISA extensions to improve performance. Its code uses preprocessor definitions to detect which extension to use. Typically, its code is compiled multiple times, with different extensions enabled, and, at runtime, it detects which one should be used. Godot's build system doesn't do this, it simply compiles Embree once, for the base SSE2 instruction set, and uses that. However, it doesn't properly guarantee that it is built correctly. If Godot is compiled for a newer instruction set (such as by using `-march=x86-64-v3` with GCC/Clang, or `/arch:AVX2` with MSVC), Embree will end up with mixed code paths, and compilation will fail. (Additionally, Godot's copy of the Embree source code omits files that are not used by SSE2 builds, but are needed for AVX builds, which causes more build errors.) This commit fixes the compilation issues by finding the highest Embree ISA extension target that's compatible with the compiler flags set by the user, and adjusting the build settings accordingly. --- modules/raycast/SCsub | 134 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 17 deletions(-) diff --git a/modules/raycast/SCsub b/modules/raycast/SCsub index f3a8e307630..38debe43997 100644 --- a/modules/raycast/SCsub +++ b/modules/raycast/SCsub @@ -1,5 +1,8 @@ #!/usr/bin/env python +import re +import subprocess + Import("env") Import("env_modules") @@ -59,25 +62,60 @@ if env["builtin_embree"]: "kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp", ] + embree_avx_src = [ + "kernels/geometry/primitive8.cpp", + "kernels/bvh/bvh_intersector1_bvh8.cpp", + "kernels/bvh/bvh_intersector_hybrid4_bvh8.cpp", + "kernels/bvh/bvh_intersector_hybrid8_bvh4.cpp", + "kernels/bvh/bvh_intersector_hybrid8_bvh8.cpp", + ] + + embree_avx512_src = [ + "kernels/bvh/bvh_intersector_hybrid16_bvh4.cpp", + "kernels/bvh/bvh_intersector_hybrid16_bvh8.cpp", + ] + + if env.msvc: + flags = env_raycast.subst("$CCFLAGS $CXXFLAGS") + m = re.search(r"/arch:(AVX512|AVX2|AVX)", flags) + if m is not None: + isa = m.group(1).lower() + else: + isa = "sse2" + else: + env_isa_test = env_raycast.Clone() + env_isa_test.Append(CCFLAGS=["-E", "-dM", "-x c++"]) + command = env_isa_test.subst(env_isa_test["CXXCOM"], source=File('-'), target=File('-')) + defines = subprocess.check_output(command, input='', encoding='utf-8', shell=True) + + def is_defined(name: str) -> bool: + m = re.search(fr"^#define\s{name}(?:\s|\Z)", defines, flags=re.MULTILINE) + return m is not None + + if is_defined("__F16C__") and is_defined("__AVX2__") and is_defined("__FMA__") and is_defined("__LZCNT__") and is_defined("__BMI__") and is_defined("__BMI2__"): + if is_defined("__AVX512F__") and is_defined("__AVX512DQ__") and is_defined("__AVX512CD__") and is_defined("__AVX512BW__") and is_defined("__AVX512VL__"): + # Embree also enables AVX2 support when targeting AVX512. + isa = "avx512" + else: + isa = "avx2" + elif is_defined("__AVX__"): + isa = "avx" + elif is_defined("__SSE4_1__") and is_defined("__SSE4_2__"): + isa = "sse42" + else: + isa = "sse2" + + if isa not in ["sse2", "sse42"]: + embree_src += embree_avx_src + + if isa == "avx512": + embree_src += embree_avx512_src + thirdparty_sources = [thirdparty_dir + file for file in embree_src] env_raycast.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "include"]) - env_raycast.Append(CPPDEFINES=["EMBREE_TARGET_SSE2", "EMBREE_LOWEST_ISA", "TASKING_INTERNAL"]) env_raycast.AppendUnique(CPPDEFINES=["NDEBUG"]) # No assert() even in debug builds. - if not env.msvc: - if env["arch"] in ["x86_64", "x86_32"]: - env_raycast.Append(CCFLAGS=["-msse2", "-mxsave"]) - - if env["platform"] == "windows": - env_raycast.Append(CCFLAGS=["-mstackrealign"]) - - if env["platform"] == "windows": - if env.msvc: - env.Append(LINKFLAGS=["psapi.lib"]) - else: - env.Append(LIBS=["psapi"]) - if env.msvc: # Disable bogus warning about intentional struct padding. env_raycast.Append(CCFLAGS=["/wd4324"]) @@ -85,10 +123,72 @@ if env["builtin_embree"]: env_thirdparty.force_optimization_on_debug() env_thirdparty.disable_warnings() env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources) + env_thirdparty.Append(CPPDEFINES=["EMBREE_LOWEST_ISA", "TASKING_INTERNAL"]) - if env["arch"] != "x86_64" or env.msvc: - # Embree needs those, it will automatically use SSE2NEON in ARM - env_thirdparty.Append(CPPDEFINES=["__SSE2__", "__SSE__"]) + # These defines are used for MSVC (to signal SSE support) and for ARM (to enable use of NEON in Embree code). + sse2_defines = ["__SSE__", "__SSE2__"] + sse42_defines = ["__SSE4_1__", "__SSE4_2__"] + if env.msvc: + sse42_defines = sse2_defines + ["__SSE3__", "__SSSE3__"] + sse42_defines + avx_defines = ["__AVX__", "__BMI__", "__BMI2__", "__LZCNT__"] + sse42_defines + avx2_defines = ["__AVX2__"] + avx_defines + + if not env.msvc: + # To avoid issues when a certain ISA is partially enabled (for example, when using `-mavx512f` but not `-mavx512vl`), + # explicitly disable ISAs higher than the target one. + no_avx512_flags = ["-mno-avx512f", "-mno-avx512dq", "-mno-avx512cd", "-mno-avx512bw", "-mno-avx512vl"] + no_avx2_flags = ["-mno-f16c", "-mno-avx2", "-mno-fma", "-mno-lzcnt", "-mno-bmi", "-mno-bmi2"] + no_avx_flags = ["-mno-avx"] + no_sse42_flags = ["-mno-sse4.2"] + + sse2_flags = no_sse42_flags + no_avx_flags + no_avx2_flags + no_avx512_flags + sse42_flags = no_avx_flags + no_avx2_flags + no_avx512_flags + avx_flags = no_avx2_flags + no_avx512_flags + avx2_flags = no_avx512_flags + + arm = env["arch"] in ["arm32", "arm64"] + if isa == "sse2": + env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE2"]) + + if env.msvc or arm: + env_thirdparty.Append(CPPDEFINES=sse2_defines) + else: + env_thirdparty.Append(CCFLAGS=sse2_flags) + + if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32": + env_thirdparty.Append(CCFLAGS=["-mstackrealign"]) + elif isa == "sse42": + env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_SSE42"]) + + if env.msvc or arm: + env_thirdparty.Append(CPPDEFINES=sse42_defines) + else: + env_thirdparty.Append(CCFLAGS=sse42_flags) + + if env["platform"] == "windows" and not env.msvc and env["arch"] == "x86_32": + env_thirdparty.Append(CCFLAGS=["-mstackrealign"]) + elif isa == "avx": + env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX"]) + + if arm: + env_thirdparty.Append(CPPDEFINES=avx_defines) + elif not env.msvc: + env_thirdparty.Append(CCFLAGS=avx_flags) + elif isa == "avx2": + env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX2"]) + + if arm: + env_thirdparty.Append(CPPDEFINES=avx2_defines) + elif not env.msvc: + env_thirdparty.Append(CCFLAGS=avx2_flags) + elif isa == "avx512": + env_thirdparty.Append(CPPDEFINES=["EMBREE_TARGET_AVX512"]) + + if env["platform"] == "windows": + if env.msvc: + env.Append(LINKFLAGS=["psapi.lib"]) + else: + env.Append(LIBS=["psapi"]) if env["platform"] == "web": env_thirdparty.Append(CXXFLAGS=["-msimd128"])