// SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- // Copyright 2020-2022 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy // of the License at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations // under the License. // ---------------------------------------------------------------------------- /** * @brief Platform-specific function implementations. * * This module contains functions for querying the host extended ISA support. */ // Include before the defines below to pick up any auto-setup based on compiler // built-in config, if not being set explicitly by the build system #include "astcenc_internal.h" #if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || \ (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0) static bool g_init { false }; /** Does this CPU support SSE 4.1? Set to -1 if not yet initialized. */ static bool g_cpu_has_sse41 { false }; /** Does this CPU support AVX2? Set to -1 if not yet initialized. */ static bool g_cpu_has_avx2 { false }; /** Does this CPU support POPCNT? Set to -1 if not yet initialized. */ static bool g_cpu_has_popcnt { false }; /** Does this CPU support F16C? Set to -1 if not yet initialized. */ static bool g_cpu_has_f16c { false }; /* ============================================================================ Platform code for Visual Studio ============================================================================ */ #if !defined(__clang__) && defined(_MSC_VER) #define WIN32_LEAN_AND_MEAN #include <windows.h> #include <intrin.h> /** * @brief Detect platform CPU ISA support and update global trackers. */ static void detect_cpu_isa() { int data[4]; __cpuid(data, 0); int num_id = data[0]; if (num_id >= 1) { __cpuidex(data, 1, 0); // SSE41 = Bank 1, ECX, bit 19 g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false; // POPCNT = Bank 1, ECX, bit 23 g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false; // F16C = Bank 1, ECX, bit 29 g_cpu_has_f16c = data[2] & (1 << 29) ? true : false; } if (num_id >= 7) { __cpuidex(data, 7, 0); // AVX2 = Bank 7, EBX, bit 5 g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false; } // Ensure state bits are updated before init flag is updated MemoryBarrier(); g_init = true; } /* ============================================================================ Platform code for GCC and Clang ============================================================================ */ #else #include <cpuid.h> /** * @brief Detect platform CPU ISA support and update global trackers. */ static void detect_cpu_isa() { unsigned int data[4]; if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3])) { // SSE41 = Bank 1, ECX, bit 19 g_cpu_has_sse41 = data[2] & (1 << 19) ? true : false; // POPCNT = Bank 1, ECX, bit 23 g_cpu_has_popcnt = data[2] & (1 << 23) ? true : false; // F16C = Bank 1, ECX, bit 29 g_cpu_has_f16c = data[2] & (1 << 29) ? true : false; } g_cpu_has_avx2 = 0; if (__get_cpuid_count(7, 0, &data[0], &data[1], &data[2], &data[3])) { // AVX2 = Bank 7, EBX, bit 5 g_cpu_has_avx2 = data[1] & (1 << 5) ? true : false; } // Ensure state bits are updated before init flag is updated __sync_synchronize(); g_init = true; } #endif /* See header for documentation. */ bool cpu_supports_popcnt() { if (!g_init) { detect_cpu_isa(); } return g_cpu_has_popcnt; } /* See header for documentation. */ bool cpu_supports_f16c() { if (!g_init) { detect_cpu_isa(); } return g_cpu_has_f16c; } /* See header for documentation. */ bool cpu_supports_sse41() { if (!g_init) { detect_cpu_isa(); } return g_cpu_has_sse41; } /* See header for documentation. */ bool cpu_supports_avx2() { if (!g_init) { detect_cpu_isa(); } return g_cpu_has_avx2; } #endif