From 0211b011749a10fb280c67531855ab8d04214371 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 09:28:44 +0200 Subject: [PATCH 01/17] fixed include statement in tests --- include/detail/definitions.hpp | 4 ++-- single_include/compass.hpp | 5 ++++- tests/test_bitview.cpp | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/detail/definitions.hpp b/include/detail/definitions.hpp index d313a4c..56ba956 100644 --- a/include/detail/definitions.hpp +++ b/include/detail/definitions.hpp @@ -1,8 +1,6 @@ #ifndef COMPASS_DEFINITIONS_H #define COMPASS_DEFINITIONS_H -#include "ct/preprocessor_impl.hpp" - namespace compass { namespace compiletime { @@ -26,4 +24,6 @@ namespace compass { }; +//#include "ct/preprocessor_impl.hpp" + #endif /* DEFINITIONS_H */ diff --git a/single_include/compass.hpp b/single_include/compass.hpp index cdc2bba..f08568e 100644 --- a/single_include/compass.hpp +++ b/single_include/compass.hpp @@ -47,6 +47,8 @@ namespace compass { }; + + #endif namespace compass { @@ -261,6 +263,7 @@ namespace compass { #ifndef COMPASS_X86_CPUID_COMMON_H #define COMPASS_X86_CPUID_COMMON_H +#ifndef COMPASS_CT_COMP_MSVC #include "cpuid.h" namespace compass { @@ -299,7 +302,7 @@ namespace compass { }; }; - +#endif #endif #ifndef COMPASS_RT_X86_LLVM_CPUID_H_ #define COMPASS_RT_X86_LLVM_CPUID_H_ diff --git a/tests/test_bitview.cpp b/tests/test_bitview.cpp index 43d8407..908e804 100644 --- a/tests/test_bitview.cpp +++ b/tests/test_bitview.cpp @@ -1,6 +1,6 @@ #include "catch.hpp" -#include "detail/bit_view.hpp" +#include "compass.hpp" #include From 127ed5e0d2fc0c549db8e4bf8ab9c5d3001a32e2 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 09:50:11 +0200 Subject: [PATCH 02/17] created minimal working version on Linux and Mac --- cmake/FindSSE.cmake | 104 ++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 10 ++++ tests/build_machine.hpp.in | 85 ++++++++++++++++++++++++++ tests/test_build_machine.cpp | 113 +++++++++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+) create mode 100644 cmake/FindSSE.cmake create mode 100644 tests/build_machine.hpp.in create mode 100644 tests/test_build_machine.cpp diff --git a/cmake/FindSSE.cmake b/cmake/FindSSE.cmake new file mode 100644 index 0000000..6ece876 --- /dev/null +++ b/cmake/FindSSE.cmake @@ -0,0 +1,104 @@ +# Check if SSE instructions are available on the machine where +# the project is compiled. + +IF(CMAKE_SYSTEM_NAME MATCHES "Linux") + EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) + + STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + # /proc/cpuinfo apparently omits sse3 :( + STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE) + IF (NOT SSE3_TRUE) + STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE) + ENDIF (NOT SSE3_TRUE) + + STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE) + IF (SSE3_TRUE OR SSSE3_TRUE) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + ELSE (SSE3_TRUE OR SSSE3_TRUE) + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + ENDIF (SSE3_TRUE OR SSSE3_TRUE) + IF (SSSE3_TRUE) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + ELSE (SSSE3_TRUE) + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + ENDIF (SSSE3_TRUE) + + STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE + CPUINFO) + + STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE) + IF (SSE3_TRUE) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + ELSE (SSE3_TRUE) + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + ENDIF (SSE3_TRUE) + + STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE) + IF (SSSE3_TRUE) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + ELSE (SSSE3_TRUE) + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + ENDIF (SSSE3_TRUE) + + STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") + # TODO + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") +ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") +ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") + +if(NOT SSE2_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") +endif(NOT SSE2_FOUND) +if(NOT SSE3_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") +endif(NOT SSE3_FOUND) +if(NOT SSSE3_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") +endif(NOT SSSE3_FOUND) +if(NOT SSE4_1_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") +endif(NOT SSE4_1_FOUND) + +mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c1a135a..dbfd5a6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -77,3 +77,13 @@ if(${CURRENT_HOSTNAME} MATCHES ".*(falcon2|talisker|schorle|scicomp-pc-3|recover endif() endif() + + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +find_package(SSE) + +message(STATUS "${PROJECT_SOURCE_DIR}/tests/build_machine.hpp.in --> ${CMAKE_CURRENT_BINARY_DIR}/build_machine.hpp") +configure_file(${PROJECT_SOURCE_DIR}/tests/build_machine.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/build_machine.hpp) + +add_executable(test_build_machine test_build_machine.cpp $) +target_include_directories(test_build_machine PRIVATE ${CATCH2_HEADER_PATH} ${COMPASS_INCLUDE_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/tests/build_machine.hpp.in b/tests/build_machine.hpp.in new file mode 100644 index 0000000..41f828f --- /dev/null +++ b/tests/build_machine.hpp.in @@ -0,0 +1,85 @@ +#ifndef _COMPASS_FIXTURES_H_ +#define _COMPASS_FIXTURES_H_ + +#include +#include + + +struct host_reference +{ + + std::string hostname ; + + std::string expected_vendor ; + std::string expected_brand ; + std::string expected_device_name ; + + bool expected_has_sse ; + bool expected_has_sse2 ; + bool expected_has_sse3 ; + bool expected_has_sse4 ; + bool expected_has_avx ; + bool expected_has_avx2 ; + std::uint32_t expected_L1d_linesize ; + std::uint32_t expected_L2_linesize ; + std::uint32_t expected_L3_linesize ; + + std::uint32_t expected_L1d_size_kB ; + std::uint32_t expected_L2_size_kB ; + std::uint32_t expected_L3_size_kB ; + + std::uint32_t expected_ncores ; + std::uint32_t expected_nphyscores ; + + + host_reference(): + hostname(""), + expected_vendor (""), + expected_brand (""), + expected_device_name (""), + expected_has_sse (false), + expected_has_sse2(false), + expected_has_sse3(false), + expected_has_sse4(false), + expected_has_avx (false), + expected_has_avx2(false), + expected_L1d_linesize(0) , + expected_L2_linesize(0) , + expected_L3_linesize(0) , + expected_L1d_size_kB(0) , + expected_L2_size_kB(0), + expected_L3_size_kB(0), + expected_ncores (0), + expected_nphyscores (0) + { + + expected_vendor = "intel"; + expected_brand = "Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz"; + expected_device_name = "i7-3520M"; + + expected_has_sse2= @SSE2_FOUND@ ; + if(expected_has_sse2) + expected_has_sse = true ; + + expected_has_sse3= @SSE3_FOUND@ ; + expected_has_sse4= @SSE4_1_FOUND@ ; + + expected_has_avx = false ; + expected_has_avx2= false ; + + //check by `cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` + expected_L1d_linesize= 64 ; + expected_L2_linesize= 64 ; + expected_L3_linesize= 64 ; + //check by `cat /sys/devices/system/cpu/cpu0/cache/index0/size` + expected_L1d_size_kB= 32 ; + expected_L2_size_kB= 256; + expected_L3_size_kB= 4096 ; + + expected_ncores = 4; + expected_nphyscores = 2; + } + +}; + +#endif /* _COMPASS_FIXTURES_H_ */ diff --git a/tests/test_build_machine.cpp b/tests/test_build_machine.cpp new file mode 100644 index 0000000..531bf14 --- /dev/null +++ b/tests/test_build_machine.cpp @@ -0,0 +1,113 @@ +#include "catch.hpp" +#include "build_machine.hpp" + +#include "compass.hpp" + +#include +#include +#include +#include + +TEST_CASE_METHOD( host_reference, "machine_specific" ){ + + SECTION( "vendor_right" ){ + + auto value = compass::runtime::vendor(); + + REQUIRE(value.size()!=0u); + + std::transform(value.begin(), value.end(), + value.begin(), + ::tolower); + + REQUIRE(value.find(expected_vendor)!=std::string::npos); + + } + + + SECTION( "brand_right" ){ + + auto value = compass::runtime::brand(); + + REQUIRE(value.empty()!=true); + REQUIRE_THAT(value, Catch::Matchers::Contains(expected_brand) ); + + } + + SECTION( "device_name_right" ){ + + auto value = compass::runtime::device_name(); + + REQUIRE(value.empty()!=true); + REQUIRE_THAT(value, Catch::Matchers::Contains(expected_device_name) ); + + } + + SECTION( "ncores_right" ){ + + auto value = compass::runtime::threads(); + + REQUIRE(value!=0); + REQUIRE(value==expected_ncores); + } + + // SECTION( "physical_cores_right" ){ + + // auto value = compass::runtime::physical_threads(); + + // REQUIRE(value!=0); + // REQUIRE(value==expected_nphyscores); + // } + + + SECTION( "has_sse_right" ){ + + auto value = compass::runtime::has(compass::feature::sse()); + + REQUIRE(value==expected_has_sse); + + } + + SECTION( "has_sse2_right" ){ + + auto value = compass::runtime::has(compass::feature::sse2()); + + REQUIRE(value==expected_has_sse2); + + } + + SECTION( "has_sse3_right" ){ + + auto value = compass::runtime::has(compass::feature::sse3()); + + REQUIRE(value==expected_has_sse3); + + } + + SECTION( "has_sse4_right" ){ + + auto value = compass::runtime::has(compass::feature::sse4()); + + REQUIRE(value==expected_has_sse4); + + } + + SECTION( "has_avx_right" ){ + + auto value = compass::runtime::has(compass::feature::avx()); + + REQUIRE(value==expected_has_avx); + + } + + + SECTION( "has_avx2_right" ){ + + auto value = compass::runtime::has(compass::feature::avx2()); + + REQUIRE(value==expected_has_avx2); + + } + + +} From e1a7a05563c6981b2aab718f7e80c785d4d7e76a Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 11:05:04 +0200 Subject: [PATCH 03/17] added extracting CPU features from OS for Linux and macOS --- cmake/FindCPU_FEATURES.cmake | 229 +++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 3 +- tests/build_machine.hpp.in | 36 +++--- tests/test_build_machine.cpp | 12 +- 4 files changed, 256 insertions(+), 24 deletions(-) create mode 100644 cmake/FindCPU_FEATURES.cmake diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake new file mode 100644 index 0000000..d3d2312 --- /dev/null +++ b/cmake/FindCPU_FEATURES.cmake @@ -0,0 +1,229 @@ +# Check if SSE instructions are available on the machine where +# the project is compiled. + +IF(CMAKE_SYSTEM_NAME MATCHES "Linux") + EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) + + ##FIND VENDOR + STRING(FIND ${CPUINFO} "vendor" VENDOR_TITLE_OFFSET) + STRING(SUBSTRING ${CPUINFO} ${VENDOR_TITLE_OFFSET} 50 GUESSED_VENDOR_LINE) + STRING(REGEX REPLACE "^vendor.*: ([a-zA-Z]+)\n.*" "\\1" VENDOR_TITLE ${GUESSED_VENDOR_LINE}) + message(STATUS "vendor found: ${VENDOR_TITLE}") + set(CPU_VENDOR "${VENDOR_TITLE}" CACHE STRING "cpu vendor") + + ##FIND MODEL NAME + STRING(FIND ${CPUINFO} "model name" MODEL_NAME_OFFSET) + STRING(SUBSTRING ${CPUINFO} ${MODEL_NAME_OFFSET} 100 GUESSED_MODEL_NAME_LINE) + STRING(FIND ${GUESSED_MODEL_NAME_LINE} "\n" MODEL_NAME_RETURN_OFFSET) + STRING(SUBSTRING ${GUESSED_MODEL_NAME_LINE} 0 ${MODEL_NAME_RETURN_OFFSET} GUESSED_MODEL_NAME_LINE) + + STRING(REGEX REPLACE "^model name.*: ([a-zA-Z]+.*)$" "\\1" MODEL_NAME ${GUESSED_MODEL_NAME_LINE}) + message(STATUS "model name found: ${MODEL_NAME}") + set(CPU_MODEL_NAME "${MODEL_NAME}" CACHE STRING "cpu model name") + + ##FIND INSTRUCTIONS + STRING(REGEX REPLACE "^.*(sse) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse" "${SSE_THERE}" SSE_TRUE) + IF (SSE_TRUE) + set(SSE_FOUND true CACHE BOOL "SSE available on host") + ELSE (SSE_TRUE) + set(SSE_FOUND false CACHE BOOL "SSE available on host") + ENDIF (SSE_TRUE) + + STRING(REGEX REPLACE "^.*(sse2) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + # /proc/cpuinfo apparently omits sse3 :( + STRING(REGEX REPLACE "^.*[^s](sse3) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE) + IF (NOT SSE3_TRUE) + STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE) + ENDIF (NOT SSE3_TRUE) + + STRING(REGEX REPLACE "^.* (ssse3) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE) + IF (SSE3_TRUE OR SSSE3_TRUE) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + ELSE (SSE3_TRUE OR SSSE3_TRUE) + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + ENDIF (SSE3_TRUE OR SSSE3_TRUE) + IF (SSSE3_TRUE) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + ELSE (SSSE3_TRUE) + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + ENDIF (SSSE3_TRUE) + + STRING(REGEX REPLACE "^.* (sse4_1) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) + + STRING(REGEX REPLACE "^.* (sse4_2) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE) + IF (SSE42_TRUE) + set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") + ELSE (SSE42_TRUE) + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + ENDIF (SSE42_TRUE) + + if("${CPUINFO}" MATCHES ".*avx .*") + set(AVX_FOUND true CACHE BOOL "AVX available on host") + else() + set(AVX_FOUND false CACHE BOOL "AVX available on host") + endif() + + if("${CPUINFO}" MATCHES ".*avx2 .*") + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + else() + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + endif() + +# + if(EXISTS "/sys/devices/system/cpu/cpu0/cache/index2/size") + EXEC_PROGRAM(cat ARGS "/sys/devices/system/cpu/cpu0/cache/index2/size" OUTPUT_VARIABLE L2_SIZE_KB_STRING) + + if(${L2_SIZE_KB_STRING} MATCHES ".*[K|k]") + string(REGEX REPLACE "[K|k]" "" L2_SIZE_KB ${L2_SIZE_KB_STRING}) + set(CPU_L2_SIZE_KB "${L2_SIZE_KB}" CACHE STRING "cpu L2 cache size in kB") + else() + if(${L2_SIZE_KB_STRING} MATCHES ".*[M|m]") + string(REGEX REPLACE "[M|m]" "000" L2_SIZE_KB ${L2_SIZE_KB_STRING}) + set(CPU_L2_SIZE_KB "${L2_SIZE_KB}" CACHE STRING "cpu L2 cache size in kB") + else() + message(WARNING "unable to find unit prefix (K|M) in /sys/devices/system/cpu/cpu0/cache/index2/size:${L2_SIZE_KB_STRING} (assuming it's contents as expressed in kB, crossing fingers)") + set(CPU_L2_SIZE_KB "${L2_SIZE_KB_STRING}" CACHE STRING "cpu L2 cache size in kB") + endif() + + endif() + + else() + message("unable to find /sys/devices/system/cpu/cpu0/cache/index2/size") + set(CPU_L2_SIZE_KB "0" CACHE STRING "cpu L2 cache size in kB") + endif() + + +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE + CPUINFO) + + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE + VENDOR_TITLE) + + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.brand_string" OUTPUT_VARIABLE + MODEL_NAME) + + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.cache.size" OUTPUT_VARIABLE + CPU_L2_SIZE_KB) + + set(CPU_VENDOR "${VENDOR_TITLE}" CACHE STRING "cpu vendor") + set(CPU_MODEL_NAME "${MODEL_NAME}" CACHE STRING "cpu model name") + + STRING(REGEX REPLACE "^.*[^S](SSE) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE" "${SSE_THERE}" SSE_TRUE) + IF (SSE_TRUE) + set(SSE_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE_TRUE) + set(SSE_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE_TRUE) + + STRING(REGEX REPLACE "^.*[^S](SSE2) .*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) + IF (SSE2_TRUE) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + ELSE (SSE2_TRUE) + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") + ENDIF (SSE2_TRUE) + + STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE) + IF (SSE3_TRUE) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + ELSE (SSE3_TRUE) + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + ENDIF (SSE3_TRUE) + + STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE) + IF (SSSE3_TRUE) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + ELSE (SSSE3_TRUE) + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + ENDIF (SSSE3_TRUE) + + STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) + + if("${CPUINFO}" MATCHES ".*AVX .*") + set(AVX_FOUND true CACHE BOOL "AVX available on host") + else() + set(AVX_FOUND false CACHE BOOL "AVX available on host") + endif() + + if("${CPUINFO}" MATCHES ".*AVX2 .*") + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + else() + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + endif() + +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") + # TODO + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") +ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") + set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") +ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") + +if(NOT SSE_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE on this machine.") +endif(NOT SSE_FOUND) +if(NOT SSE2_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") +endif(NOT SSE2_FOUND) +if(NOT SSE3_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") +endif(NOT SSE3_FOUND) +if(NOT SSSE3_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") +endif(NOT SSSE3_FOUND) +if(NOT SSE4_1_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") +endif(NOT SSE4_1_FOUND) +if(NOT SSE4_2_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE4.2 on this machine.") +endif(NOT SSE4_2_FOUND) +if(NOT AVX_FOUND) + MESSAGE(STATUS "Could not find hardware support for AVX on this machine.") +endif(NOT AVX_FOUND) +if(NOT AVX2_FOUND) + MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.") +endif(NOT AVX2_FOUND) + + +mark_as_advanced(SSE_FOUND SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND SSE4_2_FOUND AVX_FOUND AVX2_FOUND CPU_VENDOR CPU_MODEL_NAME CPU_L2_SIZE_KB) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dbfd5a6..08d6955 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -80,9 +80,8 @@ endif() list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) -find_package(SSE) +find_package(CPU_FEATURES) -message(STATUS "${PROJECT_SOURCE_DIR}/tests/build_machine.hpp.in --> ${CMAKE_CURRENT_BINARY_DIR}/build_machine.hpp") configure_file(${PROJECT_SOURCE_DIR}/tests/build_machine.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/build_machine.hpp) add_executable(test_build_machine test_build_machine.cpp $) diff --git a/tests/build_machine.hpp.in b/tests/build_machine.hpp.in index 41f828f..9032751 100644 --- a/tests/build_machine.hpp.in +++ b/tests/build_machine.hpp.in @@ -3,6 +3,7 @@ #include #include +#include struct host_reference @@ -49,35 +50,32 @@ struct host_reference expected_L1d_size_kB(0) , expected_L2_size_kB(0), expected_L3_size_kB(0), - expected_ncores (0), + expected_ncores (std::thread::hardware_concurrency()), expected_nphyscores (0) { - expected_vendor = "intel"; - expected_brand = "Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz"; - expected_device_name = "i7-3520M"; + expected_vendor = "@CPU_VENDOR@"; + expected_brand = "@CPU_MODEL_NAME@"; + expected_device_name = ""; + expected_has_sse= @SSE_FOUND@ ; expected_has_sse2= @SSE2_FOUND@ ; - if(expected_has_sse2) - expected_has_sse = true ; - expected_has_sse3= @SSE3_FOUND@ ; expected_has_sse4= @SSE4_1_FOUND@ ; - expected_has_avx = false ; - expected_has_avx2= false ; + expected_has_avx = @AVX_FOUND@ ; + expected_has_avx2= @AVX2_FOUND@ ; //check by `cat /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size` - expected_L1d_linesize= 64 ; - expected_L2_linesize= 64 ; - expected_L3_linesize= 64 ; - //check by `cat /sys/devices/system/cpu/cpu0/cache/index0/size` - expected_L1d_size_kB= 32 ; - expected_L2_size_kB= 256; - expected_L3_size_kB= 4096 ; - - expected_ncores = 4; - expected_nphyscores = 2; + // expected_L1d_linesize= 64 ; + // expected_L2_linesize= 64 ; + // expected_L3_linesize= 64 ; + // //check by `cat /sys/devices/system/cpu/cpu0/cache/index0/size` + // expected_L1d_size_kB= 32 ; + expected_L2_size_kB= @CPU_L2_SIZE_KB@; + // expected_L3_size_kB= 4096 ; /sys/devices/system/cpu/cpu0/cache/index3/size + + // expected_nphyscores = 2; } }; diff --git a/tests/test_build_machine.cpp b/tests/test_build_machine.cpp index 531bf14..02aee98 100644 --- a/tests/test_build_machine.cpp +++ b/tests/test_build_machine.cpp @@ -16,9 +16,9 @@ TEST_CASE_METHOD( host_reference, "machine_specific" ){ REQUIRE(value.size()!=0u); - std::transform(value.begin(), value.end(), - value.begin(), - ::tolower); + // std::transform(value.begin(), value.end(), + // value.begin(), + // ::tolower); REQUIRE(value.find(expected_vendor)!=std::string::npos); @@ -109,5 +109,11 @@ TEST_CASE_METHOD( host_reference, "machine_specific" ){ } + SECTION( "correct_l2_cache_size" ){ + auto value = compass::runtime::size::cache::level(2); + + REQUIRE(value >> 10 ==expected_L2_size_kB); + + } } From 32cdf6279101c89a685cefc9608a8974cb891d66 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 11:10:25 +0200 Subject: [PATCH 04/17] fixed funny macOS details in naming AVX = AVX1.0 in sysctl output --- cmake/FindCPU_FEATURES.cmake | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index d3d2312..a1631fc 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -115,6 +115,9 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE CPUINFO) + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.leaf7_features" OUTPUT_VARIABLE + LEAF7_CPUINFO) + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.vendor" OUTPUT_VARIABLE VENDOR_TITLE) @@ -175,7 +178,15 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") ENDIF (SSE41_TRUE) - if("${CPUINFO}" MATCHES ".*AVX .*") + STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE) + IF (SSE42_TRUE) + set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") + ELSE (SSE42_TRUE) + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + ENDIF (SSE42_TRUE) + + if("${CPUINFO}" MATCHES ".*AVX.*") set(AVX_FOUND true CACHE BOOL "AVX available on host") else() set(AVX_FOUND false CACHE BOOL "AVX available on host") @@ -184,7 +195,11 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") if("${CPUINFO}" MATCHES ".*AVX2 .*") set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") else() - set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + if("${LEAF7_CPUINFO}" MATCHES ".*AVX2 .*") + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + else() + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + endif() endif() ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") From c18c1a4631b581bddbe7cc16a964438eab00c77a Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 11:12:06 +0200 Subject: [PATCH 05/17] added build_machine test to unit test suite --- CTestLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CTestLists.txt b/CTestLists.txt index 242e7dc..7e203ea 100644 --- a/CTestLists.txt +++ b/CTestLists.txt @@ -17,3 +17,4 @@ endif() add_test(NAME compass_impl COMMAND test_compass_impl) add_test(NAME bitview COMMAND test_bitview) +add_test(NAME build_machine COMMAND test_build_machine) From c71a00a8d45d5db2fa1c0d2cb94cd15b7f8549b1 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 11:15:22 +0200 Subject: [PATCH 06/17] added debugging help for on-build-time unit tests --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 426f556..a9058d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,6 +58,7 @@ before_install: script: - mkdir -p build && cd build - cmake .. + - cat /proc/cpuinfo && cat tests/build_machine.hpp - make VERBOSE=1 - ctest -V -j - cd .. \ No newline at end of file From 02330244d993a70af354fb5a3ddf2c6244070fc7 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 12:44:45 +0200 Subject: [PATCH 07/17] removed non portable cat of /proc/cpuinfo --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a9058d6..7106b07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,7 +58,7 @@ before_install: script: - mkdir -p build && cd build - cmake .. - - cat /proc/cpuinfo && cat tests/build_machine.hpp + - cat tests/build_machine.hpp - make VERBOSE=1 - ctest -V -j - cd .. \ No newline at end of file From 70e9a40949c722fea65f6fc8cc7620a27f6c9a5a Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 12:50:59 +0200 Subject: [PATCH 08/17] added clang++ to CLI --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 7106b07..5ca52b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,7 +50,7 @@ matrix: - os: osx osx_image: xcode9 env: - - MATRIX_EVAL="CC=clang && CXX=clang" + - MATRIX_EVAL="CC=clang && CXX=clang++" before_install: - eval "${MATRIX_EVAL}" From 680b68e21267f3ebc5aca3b30a7bc693471a4ad4 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 13:29:51 +0200 Subject: [PATCH 09/17] added minimal hardware name and cache size detection during build --- cmake/FindCPU_FEATURES.cmake | 109 ++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 34 deletions(-) diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index a1631fc..0efc978 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -1,5 +1,27 @@ -# Check if SSE instructions are available on the machine where -# the project is compiled. + +##extract a single line that contains the given pattern (starting from that patter) +function(extract_line LINESTART_PATTERN MULTILINE_STRING OUTPUT_VARIABLE) + +STRING(FIND ${MULTILINE_STRING} "${PATTERN}" MATCH_OFFSET) +STRING(SUBSTRING ${MATCH_OFFSET} 0 ${MATCH_OFFSET} MULTILINE_NO_START) +STRING(FIND ${MULTILINE_NO_START} "\n" MULTILINE_RETURN_OFFSET) +STRING(SUBSTRING ${MULTILINE_NO_START} 0 ${MULTILINE_RETURN_OFFSET} MATCHING_LINE) +set(OUTPUT_VARIABLE ${MATCHING_LINE} PARENT_SCOPE) + +endfunction(extract_line) + +##extract system information from wmic.exe on Windows +function(wmic_get ATTRIBUTENAME RESULTVALUE) +EXEC_PROGRAM("wmic.exe" ARGS "cpu get ${ATTRIBUTENAME}" OUTPUT_VARIABLE WMIC_OUTPUT) +string(REGEX REPLACE " [ ]+" ";" WMIC_OUTPUT_CLEANED ${WMIC_OUTPUT}) +set(WMIC_OUTPUT_LIST "${WMIC_OUTPUT_CLEANED};") +list(GET WMIC_OUTPUT_LIST 1 VALUEOFINTEREST) + +string(STRIP "${VALUEOFINTEREST}" VALUEOFINTEREST) +set(${RESULTVALUE} ${VALUEOFINTEREST} PARENT_SCOPE) +message(STATUS "[wmic_get] RESULT = ${VALUEOFINTEREST}") + +endfunction(wmic_get) IF(CMAKE_SYSTEM_NAME MATCHES "Linux") EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) @@ -203,42 +225,61 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") endif() ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") - # TODO - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") -ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + + + wmic_get("Name" MODEL_NAME) + string(STRIP ${MODEL_NAME} MODEL_NAME) + set(CPU_MODEL_NAME "${MODEL_NAME}" CACHE STRING "cpu model name") + + wmic_get("Manufacturer" MODEL_VENDOR) + set(CPU_VENDOR "${MODEL_VENDOR}" CACHE STRING "cpu model vendor") + + wmic_get("L2CacheSize" L2_CACHE_SIZE) + set(CPU_L2_SIZE_KB "${L2_CACHE_SIZE}" CACHE STRING "cpu L2 cache size in kB") + + set(SSE_FOUND false CACHE BOOL "SSE available on host") + set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + set(AVX_FOUND false CACHE BOOL "AVX available on host") + set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") +#ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") +# set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") +# set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") +# set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") +# set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") -if(NOT SSE_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE on this machine.") -endif(NOT SSE_FOUND) -if(NOT SSE2_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") -endif(NOT SSE2_FOUND) -if(NOT SSE3_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") -endif(NOT SSE3_FOUND) -if(NOT SSSE3_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") -endif(NOT SSSE3_FOUND) -if(NOT SSE4_1_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") -endif(NOT SSE4_1_FOUND) -if(NOT SSE4_2_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE4.2 on this machine.") -endif(NOT SSE4_2_FOUND) -if(NOT AVX_FOUND) - MESSAGE(STATUS "Could not find hardware support for AVX on this machine.") -endif(NOT AVX_FOUND) -if(NOT AVX2_FOUND) - MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.") -endif(NOT AVX2_FOUND) - +message(STATUS "found hardware: ${CPU_VENDOR} ${CPU_MODEL_NAME}") +set(FOUND_FEATURES) + +if(SSE_FOUND) + list(APPEND FOUND_FEATURES "sse") +endif(SSE_FOUND) +if(SSE2_FOUND) + list(APPEND FOUND_FEATURES "sse2") +endif(SSE2_FOUND) +if( SSE3_FOUND) +list(APPEND FOUND_FEATURES "sse3") +endif( SSE3_FOUND) +if( SSSE3_FOUND) +list(APPEND FOUND_FEATURES "ssse3") +endif( SSSE3_FOUND) +if( SSE4_1_FOUND) +list(APPEND FOUND_FEATURES "sse4.1") +endif( SSE4_1_FOUND) +if( SSE4_2_FOUND) +list(APPEND FOUND_FEATURES "sse4.2") +endif( SSE4_2_FOUND) +if( AVX_FOUND) +list(APPEND FOUND_FEATURES "avx") +endif( AVX_FOUND) +if( AVX2_FOUND) +list(APPEND FOUND_FEATURES "avx2") +endif( AVX2_FOUND) + +message(STATUS "found hardware features: ${FOUND_FEATURES}") mark_as_advanced(SSE_FOUND SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND SSE4_2_FOUND AVX_FOUND AVX2_FOUND CPU_VENDOR CPU_MODEL_NAME CPU_L2_SIZE_KB) From 2e289b88b4917f645a19ed97413841a4ef17ac96 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 13:30:26 +0200 Subject: [PATCH 10/17] commented messaging out --- cmake/FindCPU_FEATURES.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index 0efc978..7709c04 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -19,7 +19,7 @@ list(GET WMIC_OUTPUT_LIST 1 VALUEOFINTEREST) string(STRIP "${VALUEOFINTEREST}" VALUEOFINTEREST) set(${RESULTVALUE} ${VALUEOFINTEREST} PARENT_SCOPE) -message(STATUS "[wmic_get] RESULT = ${VALUEOFINTEREST}") +#message(STATUS "[wmic_get] RESULT = ${VALUEOFINTEREST}") endfunction(wmic_get) From 58768a49255e03ec4c1678dc127dcce2a2882ae7 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:28:18 +0200 Subject: [PATCH 11/17] added VC based detection of hardware --- .gitignore | 8 +- cmake/FindCPU_FEATURES.cmake | 59 ++- cmake/FindSSE.cmake | 104 ----- cmake/OptimizeForArchitecture.cmake | 563 ++++++++++++++++++++++++++++ 4 files changed, 623 insertions(+), 111 deletions(-) delete mode 100644 cmake/FindSSE.cmake create mode 100644 cmake/OptimizeForArchitecture.cmake diff --git a/.gitignore b/.gitignore index bdc5af0..4fc12ad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ *~ -build +build/* +debug/* +*sln +*.vcxproj* +x64 +x32 +cmake \ No newline at end of file diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index 7709c04..a2d88eb 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -226,6 +226,9 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") + #as an alternative to wmic, use + #get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) + #get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) wmic_get("Name" MODEL_NAME) string(STRIP ${MODEL_NAME} MODEL_NAME) @@ -235,9 +238,57 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") set(CPU_VENDOR "${MODEL_VENDOR}" CACHE STRING "cpu model vendor") wmic_get("L2CacheSize" L2_CACHE_SIZE) + wmic_get("NumberOfCores" CPU_NPHYS_CORES) + + #dirty hack that I need to validate with other machines in Win7 + if(${CPU_NPHYS_CORES} GREATER 1) + math(EXPR L2_CACHE_SIZE "${L2_CACHE_SIZE}/${CPU_NPHYS_CORES}") + endif() + set(CPU_L2_SIZE_KB "${L2_CACHE_SIZE}" CACHE STRING "cpu L2 cache size in kB") + + #thanks to the wonderful VC project (https://github.com/VcDevel/Vc) + include (OptimizeForArchitecture) + + OFA_AutodetectHostArchitecture() + OFA_HandleX86Options() + message("++ _available_vector_units_list = ${_available_vector_units_list}") - set(SSE_FOUND false CACHE BOOL "SSE available on host") + list(FIND _available_vector_units_list "sse" SSE_INDEX) + list(FIND _available_vector_units_list "sse2" SSE2_INDEX) + list(FIND _available_vector_units_list "sse3" SSE3_INDEX) + list(FIND _available_vector_units_list "ssse3" SSSE3_INDEX) + list(FIND _available_vector_units_list "sse4.1" SSE4_1_INDEX) + list(FIND _available_vector_units_list "sse4.2" SSE4_2_INDEX) + list(FIND _available_vector_units_list "avx" AVX_INDEX) + list(FIND _available_vector_units_list "avx2" AVX2_INDEX) + + if(${SSE_INDEX} GREATER -1) + set(SSE_FOUND true CACHE BOOL "SSE available on host") + endif() + if(${SSE2_INDEX} GREATER -1) + set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") + endif() + if(${SSE3_INDEX} GREATER -1) + set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") + endif() + if(${SSSE3_INDEX} GREATER -1) + set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") + endif() + if(${SSE4_1_INDEX} GREATER -1) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + endif() + if(${SSE4_2_INDEX} GREATER -1) + set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") + endif() + if(${AVX_INDEX} GREATER -1) + set(AVX_FOUND true CACHE BOOL "AVX available on host") + endif() + if(${AVX2_INDEX} GREATER -1) + set(AVX2_FOUND true CACHE BOOL "AVX2 available on host") + endif() + + set(SSE_FOUND true CACHE BOOL "SSE available on host") set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") @@ -245,11 +296,7 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") set(AVX_FOUND false CACHE BOOL "AVX available on host") set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") -#ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") -# set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") -# set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") -# set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") -# set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") message(STATUS "found hardware: ${CPU_VENDOR} ${CPU_MODEL_NAME}") diff --git a/cmake/FindSSE.cmake b/cmake/FindSSE.cmake deleted file mode 100644 index 6ece876..0000000 --- a/cmake/FindSSE.cmake +++ /dev/null @@ -1,104 +0,0 @@ -# Check if SSE instructions are available on the machine where -# the project is compiled. - -IF(CMAKE_SYSTEM_NAME MATCHES "Linux") - EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) - - STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE) - IF (SSE2_TRUE) - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - ELSE (SSE2_TRUE) - set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") - ENDIF (SSE2_TRUE) - - # /proc/cpuinfo apparently omits sse3 :( - STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE) - IF (NOT SSE3_TRUE) - STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE) - ENDIF (NOT SSE3_TRUE) - - STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE) - IF (SSE3_TRUE OR SSSE3_TRUE) - set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") - ELSE (SSE3_TRUE OR SSSE3_TRUE) - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - ENDIF (SSE3_TRUE OR SSSE3_TRUE) - IF (SSSE3_TRUE) - set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") - ELSE (SSSE3_TRUE) - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - ENDIF (SSSE3_TRUE) - - STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) - IF (SSE41_TRUE) - set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") - ELSE (SSE41_TRUE) - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") - ENDIF (SSE41_TRUE) -ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") - EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE - CPUINFO) - - STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE) - IF (SSE2_TRUE) - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - ELSE (SSE2_TRUE) - set(SSE2_FOUND false CACHE BOOL "SSE2 available on host") - ENDIF (SSE2_TRUE) - - STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE) - IF (SSE3_TRUE) - set(SSE3_FOUND true CACHE BOOL "SSE3 available on host") - ELSE (SSE3_TRUE) - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - ENDIF (SSE3_TRUE) - - STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE) - IF (SSSE3_TRUE) - set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host") - ELSE (SSSE3_TRUE) - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - ENDIF (SSSE3_TRUE) - - STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) - STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) - IF (SSE41_TRUE) - set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") - ELSE (SSE41_TRUE) - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") - ENDIF (SSE41_TRUE) -ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") - # TODO - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") -ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") - set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") - set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") - set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host") - set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") -ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") - -if(NOT SSE2_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.") -endif(NOT SSE2_FOUND) -if(NOT SSE3_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.") -endif(NOT SSE3_FOUND) -if(NOT SSSE3_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.") -endif(NOT SSSE3_FOUND) -if(NOT SSE4_1_FOUND) - MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") -endif(NOT SSE4_1_FOUND) - -mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND) diff --git a/cmake/OptimizeForArchitecture.cmake b/cmake/OptimizeForArchitecture.cmake new file mode 100644 index 0000000..a5acf5e --- /dev/null +++ b/cmake/OptimizeForArchitecture.cmake @@ -0,0 +1,563 @@ +## Determine the host CPU feature set and determine the best set of compiler +## flags to enable all supported SIMD relevant features. Alternatively, the +## target CPU can be explicitly selected (for generating more generic binaries +## or for targeting a different system). +## Compilers provide e.g. the -march=native flag to achieve a similar result. +## This fails to address the need for building for a different microarchitecture +## than the current host. +## The script tries to deduce all settings from the model and family numbers of +## the CPU instead of reading the CPUID flags from e.g. /proc/cpuinfo. This makes +## the detection more independent from the CPUID code in the kernel (e.g. avx2 is +# not listed on older kernels). +# +# Usage: +# OptimizeForArchitecture() +# If either of Vc_SSE_INTRINSICS_BROKEN, Vc_AVX_INTRINSICS_BROKEN, +# Vc_AVX2_INTRINSICS_BROKEN is defined and set, the OptimizeForArchitecture +# macro will consequently disable the relevant features via compiler flags. + +#============================================================================= +# Copyright 2010-2016 Matthias Kretz +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * Neither the names of contributing organizations nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +get_filename_component(_currentDir "${CMAKE_CURRENT_LIST_FILE}" PATH) +#include("${_currentDir}/AddCompilerFlag.cmake") +include(CheckIncludeFileCXX) + +macro(_my_find _list _value _ret) + list(FIND ${_list} "${_value}" _found) + if(_found EQUAL -1) + set(${_ret} FALSE) + else(_found EQUAL -1) + set(${_ret} TRUE) + endif(_found EQUAL -1) +endmacro(_my_find) + +macro(OFA_AutodetectX86) + set(_vendor_id) + set(_cpu_family) + set(_cpu_model) + if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + file(READ "/proc/cpuinfo" _cpuinfo) + string(REGEX REPLACE ".*vendor_id[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _vendor_id "${_cpuinfo}") + string(REGEX REPLACE ".*cpu family[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_family "${_cpuinfo}") + string(REGEX REPLACE ".*model[ \t]*:[ \t]+([a-zA-Z0-9_-]+).*" "\\1" _cpu_model "${_cpuinfo}") + string(REGEX REPLACE ".*flags[ \t]*:[ \t]+([^\n]+).*" "\\1" _cpu_flags "${_cpuinfo}") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + exec_program("/usr/sbin/sysctl -n machdep.cpu.vendor machdep.cpu.model machdep.cpu.family machdep.cpu.features" OUTPUT_VARIABLE _sysctl_output_string) + string(REPLACE "\n" ";" _sysctl_output ${_sysctl_output_string}) + list(GET _sysctl_output 0 _vendor_id) + list(GET _sysctl_output 1 _cpu_model) + list(GET _sysctl_output 2 _cpu_family) + list(GET _sysctl_output 3 _cpu_flags) + + string(TOLOWER "${_cpu_flags}" _cpu_flags) + string(REPLACE "." "_" _cpu_flags "${_cpu_flags}") + elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") + get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) + get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) + mark_as_advanced(_vendor_id _cpu_id) + string(REGEX REPLACE ".* Family ([0-9]+) .*" "\\1" _cpu_family "${_cpu_id}") + string(REGEX REPLACE ".* Model ([0-9]+) .*" "\\1" _cpu_model "${_cpu_id}") + endif(CMAKE_SYSTEM_NAME STREQUAL "Linux") + if(_vendor_id STREQUAL "GenuineIntel") + if(_cpu_family EQUAL 6) + # taken from the Intel ORM + # http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html + # CPUID Signature Values of Of Recent Intel Microarchitectures + # 4E 5E | Skylake microarchitecture + # 3D 47 56 | Broadwell microarchitecture + # 3C 45 46 3F | Haswell microarchitecture + # 3A 3E | Ivy Bridge microarchitecture + # 2A 2D | Sandy Bridge microarchitecture + # 25 2C 2F | Intel microarchitecture Westmere + # 1A 1E 1F 2E | Intel microarchitecture Nehalem + # 17 1D | Enhanced Intel Core microarchitecture + # 0F | Intel Core microarchitecture + # + # Intel SDM Vol. 3C 35-1 / December 2016: + # 57 | Xeon Phi 3200, 5200, 7200 [Knights Landing] + # 85 | Future Xeon Phi + # 8E 9E | 7th gen. Core [Kaby Lake] + # 55 | Future Xeon [Skylake w/ AVX512] + # 4E 5E | 6th gen. Core / E3 v5 [Skylake w/o AVX512] + # 56 | Xeon D-1500 [Broadwell] + # 4F | Xeon E5 v4, E7 v4, i7-69xx [Broadwell] + # 47 | 5th gen. Core / Xeon E3 v4 [Broadwell] + # 3D | M-5xxx / 5th gen. [Broadwell] + # 3F | Xeon E5 v3, E7 v3, i7-59xx [Haswell-E] + # 3C 45 46 | 4th gen. Core, Xeon E3 v3 [Haswell] + # 3E | Xeon E5 v2, E7 v2, i7-49xx [Ivy Bridge-E] + # 3A | 3rd gen. Core, Xeon E3 v2 [Ivy Bridge] + # 2D | Xeon E5, i7-39xx [Sandy Bridge] + # 2F | Xeon E7 + # 2A | Xeon E3, 2nd gen. Core [Sandy Bridge] + # 2E | Xeon 7500, 6500 series + # 25 2C | Xeon 3600, 5600 series, Core i7, i5 and i3 + # + # Values from the Intel SDE: + # 5C | Goldmont + # 5A | Silvermont + # 57 | Knights Landing + # 66 | Cannonlake + # 55 | Skylake Server + # 4E | Skylake Client + # 3C | Broadwell (likely a bug in the SDE) + # 3C | Haswell + if(_cpu_model EQUAL 87) # 57 + set(TARGET_ARCHITECTURE "knl") # Knights Landing + elseif(_cpu_model EQUAL 92) + set(TARGET_ARCHITECTURE "goldmont") + elseif(_cpu_model EQUAL 90 OR _cpu_model EQUAL 76) + set(TARGET_ARCHITECTURE "silvermont") + elseif(_cpu_model EQUAL 102) + set(TARGET_ARCHITECTURE "cannonlake") + elseif(_cpu_model EQUAL 142 OR _cpu_model EQUAL 158) # 8E, 9E + set(TARGET_ARCHITECTURE "kaby-lake") + elseif(_cpu_model EQUAL 85) # 55 + set(TARGET_ARCHITECTURE "skylake-avx512") + elseif(_cpu_model EQUAL 78 OR _cpu_model EQUAL 94) # 4E, 5E + set(TARGET_ARCHITECTURE "skylake") + elseif(_cpu_model EQUAL 61 OR _cpu_model EQUAL 71 OR _cpu_model EQUAL 79 OR _cpu_model EQUAL 86) # 3D, 47, 4F, 56 + set(TARGET_ARCHITECTURE "broadwell") + elseif(_cpu_model EQUAL 60 OR _cpu_model EQUAL 69 OR _cpu_model EQUAL 70 OR _cpu_model EQUAL 63) + set(TARGET_ARCHITECTURE "haswell") + elseif(_cpu_model EQUAL 58 OR _cpu_model EQUAL 62) + set(TARGET_ARCHITECTURE "ivy-bridge") + elseif(_cpu_model EQUAL 42 OR _cpu_model EQUAL 45) + set(TARGET_ARCHITECTURE "sandy-bridge") + elseif(_cpu_model EQUAL 37 OR _cpu_model EQUAL 44 OR _cpu_model EQUAL 47) + set(TARGET_ARCHITECTURE "westmere") + elseif(_cpu_model EQUAL 26 OR _cpu_model EQUAL 30 OR _cpu_model EQUAL 31 OR _cpu_model EQUAL 46) + set(TARGET_ARCHITECTURE "nehalem") + elseif(_cpu_model EQUAL 23 OR _cpu_model EQUAL 29) + set(TARGET_ARCHITECTURE "penryn") + elseif(_cpu_model EQUAL 15) + set(TARGET_ARCHITECTURE "merom") + elseif(_cpu_model EQUAL 28) + set(TARGET_ARCHITECTURE "atom") + elseif(_cpu_model EQUAL 14) + set(TARGET_ARCHITECTURE "core") + elseif(_cpu_model LESS 14) + message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the generic CPU settings with SSE2.") + set(TARGET_ARCHITECTURE "generic") + else() + message(WARNING "Your CPU (family ${_cpu_family}, model ${_cpu_model}) is not known. Auto-detection of optimization flags failed and will use the 65nm Core 2 CPU settings.") + set(TARGET_ARCHITECTURE "merom") + endif() + elseif(_cpu_family EQUAL 7) # Itanium (not supported) + message(WARNING "Your CPU (Itanium: family ${_cpu_family}, model ${_cpu_model}) is not supported by OptimizeForArchitecture.cmake.") + elseif(_cpu_family EQUAL 15) # NetBurst + list(APPEND _available_vector_units_list "sse" "sse2") + if(_cpu_model GREATER 2) # Not sure whether this must be 3 or even 4 instead + list(APPEND _available_vector_units_list "sse" "sse2" "sse3") + endif(_cpu_model GREATER 2) + endif(_cpu_family EQUAL 6) + elseif(_vendor_id STREQUAL "AuthenticAMD") + if(_cpu_family EQUAL 23) + set(TARGET_ARCHITECTURE "zen") + elseif(_cpu_family EQUAL 22) # 16h + set(TARGET_ARCHITECTURE "AMD 16h") + elseif(_cpu_family EQUAL 21) # 15h + if(_cpu_model LESS 2) + set(TARGET_ARCHITECTURE "bulldozer") + else() + set(TARGET_ARCHITECTURE "piledriver") + endif() + elseif(_cpu_family EQUAL 20) # 14h + set(TARGET_ARCHITECTURE "AMD 14h") + elseif(_cpu_family EQUAL 18) # 12h + elseif(_cpu_family EQUAL 16) # 10h + set(TARGET_ARCHITECTURE "barcelona") + elseif(_cpu_family EQUAL 15) + set(TARGET_ARCHITECTURE "k8") + if(_cpu_model GREATER 64) # I don't know the right number to put here. This is just a guess from the hardware I have access to + set(TARGET_ARCHITECTURE "k8-sse3") + endif(_cpu_model GREATER 64) + endif() + endif(_vendor_id STREQUAL "GenuineIntel") +endmacro() + +macro(OFA_AutodetectArm) + message(WARNING "Architecture auto-detection for CMAKE_SYSTEM_PROCESSOR '${CMAKE_SYSTEM_PROCESSOR}' is not supported by OptimizeForArchitecture.cmake") +endmacro() + +macro(OFA_AutodetectHostArchitecture) + set(TARGET_ARCHITECTURE "generic") + set(Vc_ARCHITECTURE_FLAGS) + if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(x86|AMD64)") + OFA_AutodetectX86() + elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") + OFA_AutodetectArm() + else() + message(FATAL_ERROR "OptimizeForArchitecture.cmake does not implement support for CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") + endif() +endmacro() + +macro(OFA_HandleX86Options) + set(_march_flag_list) + set(_available_vector_units_list) + macro(_nehalem) + list(APPEND _march_flag_list "nehalem") + list(APPEND _march_flag_list "corei7") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2") + endmacro() + macro(_westmere) + list(APPEND _march_flag_list "westmere") + _nehalem() + endmacro() + macro(_sandybridge) + list(APPEND _march_flag_list "sandybridge") + list(APPEND _march_flag_list "corei7-avx") + _westmere() + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4.1" "sse4.2" "avx") + endmacro() + macro(_ivybridge) + list(APPEND _march_flag_list "ivybridge") + list(APPEND _march_flag_list "core-avx-i") + _sandybridge() + list(APPEND _available_vector_units_list "rdrnd" "f16c") + endmacro() + macro(_haswell) + list(APPEND _march_flag_list "haswell") + list(APPEND _march_flag_list "core-avx2") + _ivybridge() + list(APPEND _available_vector_units_list "avx2" "fma" "bmi" "bmi2") + endmacro() + macro(_broadwell) + list(APPEND _march_flag_list "broadwell") + _haswell() + endmacro() + macro(_skylake) + list(APPEND _march_flag_list "skylake") + _broadwell() + endmacro() + macro(_skylake_avx512) + list(APPEND _march_flag_list "skylake-avx512") + _skylake() + list(APPEND _available_vector_units_list "avx512f" "avx512cd" "avx512dq" "avx512bw" "avx512vl") + endmacro() + macro(_cannonlake) + list(APPEND _march_flag_list "cannonlake") + _skylake_avx512() + list(APPEND _available_vector_units_list "avx512ifma" "avx512vbmi") + endmacro() + macro(_knightslanding) + list(APPEND _march_flag_list "knl") + _broadwell() + list(APPEND _available_vector_units_list "avx512f" "avx512pf" "avx512er" "avx512cd") + endmacro() + macro(_silvermont) + list(APPEND _march_flag_list "silvermont") + _westmere() + list(APPEND _available_vector_units_list "rdrnd") + endmacro() + macro(_goldmont) + list(APPEND _march_flag_list "goldmont") + _silvermont() + endmacro() + + if(TARGET_ARCHITECTURE STREQUAL "core") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3") + elseif(TARGET_ARCHITECTURE STREQUAL "merom") + list(APPEND _march_flag_list "merom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") + elseif(TARGET_ARCHITECTURE STREQUAL "penryn") + list(APPEND _march_flag_list "penryn") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") + message(STATUS "Sadly the Penryn architecture exists in variants with SSE4.1 and without SSE4.1.") + if(_cpu_flags MATCHES "sse4_1") + message(STATUS "SSE4.1: enabled (auto-detected from this computer's CPU flags)") + list(APPEND _available_vector_units_list "sse4.1") + else() + message(STATUS "SSE4.1: disabled (auto-detected from this computer's CPU flags)") + endif() + elseif(TARGET_ARCHITECTURE STREQUAL "knl") + _knightslanding() + elseif(TARGET_ARCHITECTURE STREQUAL "cannonlake") + _cannonlake() + elseif(TARGET_ARCHITECTURE STREQUAL "kaby-lake") + _skylake() + elseif(TARGET_ARCHITECTURE STREQUAL "skylake-xeon" OR TARGET_ARCHITECTURE STREQUAL "skylake-avx512") + _skylake_avx512() + elseif(TARGET_ARCHITECTURE STREQUAL "skylake") + _skylake() + elseif(TARGET_ARCHITECTURE STREQUAL "broadwell") + _broadwell() + elseif(TARGET_ARCHITECTURE STREQUAL "haswell") + _haswell() + elseif(TARGET_ARCHITECTURE STREQUAL "ivy-bridge") + _ivybridge() + elseif(TARGET_ARCHITECTURE STREQUAL "sandy-bridge") + _sandybridge() + elseif(TARGET_ARCHITECTURE STREQUAL "westmere") + _westmere() + elseif(TARGET_ARCHITECTURE STREQUAL "nehalem") + _nehalem() + elseif(TARGET_ARCHITECTURE STREQUAL "goldmont") + _goldmont() + elseif(TARGET_ARCHITECTURE STREQUAL "silvermont") + _silvermont() + elseif(TARGET_ARCHITECTURE STREQUAL "atom") + list(APPEND _march_flag_list "atom") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3") + elseif(TARGET_ARCHITECTURE STREQUAL "k8") + list(APPEND _march_flag_list "k8") + list(APPEND _available_vector_units_list "sse" "sse2") + elseif(TARGET_ARCHITECTURE STREQUAL "k8-sse3") + list(APPEND _march_flag_list "k8-sse3") + list(APPEND _march_flag_list "k8") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3") + elseif(TARGET_ARCHITECTURE STREQUAL "AMD 16h") + list(APPEND _march_flag_list "btver2") + list(APPEND _march_flag_list "btver1") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "f16c") + elseif(TARGET_ARCHITECTURE STREQUAL "AMD 14h") + list(APPEND _march_flag_list "btver1") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a") + elseif(TARGET_ARCHITECTURE STREQUAL "zen") + list(APPEND _march_flag_list "znver1") + _skylake() + list(APPEND _available_vector_units_list "sse4a") + elseif(TARGET_ARCHITECTURE STREQUAL "piledriver") + list(APPEND _march_flag_list "bdver2") + list(APPEND _march_flag_list "bdver1") + list(APPEND _march_flag_list "bulldozer") + list(APPEND _march_flag_list "barcelona") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4" "fma" "f16c") + elseif(TARGET_ARCHITECTURE STREQUAL "interlagos") + list(APPEND _march_flag_list "bdver1") + list(APPEND _march_flag_list "bulldozer") + list(APPEND _march_flag_list "barcelona") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4") + elseif(TARGET_ARCHITECTURE STREQUAL "bulldozer") + list(APPEND _march_flag_list "bdver1") + list(APPEND _march_flag_list "bulldozer") + list(APPEND _march_flag_list "barcelona") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "ssse3" "sse4a" "sse4.1" "sse4.2" "avx" "xop" "fma4") + elseif(TARGET_ARCHITECTURE STREQUAL "barcelona") + list(APPEND _march_flag_list "barcelona") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") + elseif(TARGET_ARCHITECTURE STREQUAL "istanbul") + list(APPEND _march_flag_list "barcelona") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") + elseif(TARGET_ARCHITECTURE STREQUAL "magny-cours") + list(APPEND _march_flag_list "barcelona") + list(APPEND _march_flag_list "core2") + list(APPEND _available_vector_units_list "sse" "sse2" "sse3" "sse4a") + elseif(TARGET_ARCHITECTURE STREQUAL "generic") + list(APPEND _march_flag_list "generic") + elseif(TARGET_ARCHITECTURE STREQUAL "none") + # add this clause to remove it from the else clause + else(TARGET_ARCHITECTURE STREQUAL "core") + message(FATAL_ERROR "Unknown target architecture: \"${TARGET_ARCHITECTURE}\". Please set TARGET_ARCHITECTURE to a supported value.") + endif(TARGET_ARCHITECTURE STREQUAL "core") + + if(NOT TARGET_ARCHITECTURE STREQUAL "none") + set(_disable_vector_unit_list) + set(_enable_vector_unit_list) + if(DEFINED Vc_AVX_INTRINSICS_BROKEN AND Vc_AVX_INTRINSICS_BROKEN) + message(STATUS "AVX disabled because of old/broken toolchain") + set(_avx_broken true) + set(_avx2_broken true) + set(_fma4_broken true) + set(_xop_broken true) + else() + set(_avx_broken false) + if(DEFINED Vc_FMA4_INTRINSICS_BROKEN AND Vc_FMA4_INTRINSICS_BROKEN) + message(STATUS "FMA4 disabled because of old/broken toolchain") + set(_fma4_broken true) + else() + set(_fma4_broken false) + endif() + if(DEFINED Vc_XOP_INTRINSICS_BROKEN AND Vc_XOP_INTRINSICS_BROKEN) + message(STATUS "XOP disabled because of old/broken toolchain") + set(_xop_broken true) + else() + set(_xop_broken false) + endif() + if(DEFINED Vc_AVX2_INTRINSICS_BROKEN AND Vc_AVX2_INTRINSICS_BROKEN) + message(STATUS "AVX2 disabled because of old/broken toolchain") + set(_avx2_broken true) + else() + set(_avx2_broken false) + endif() + endif() + + macro(_enable_or_disable _name _flag _documentation _broken) + if(_broken) + set(_found false) + else() + _my_find(_available_vector_units_list "${_flag}" _found) + endif() + set(USE_${_name} ${_found} CACHE BOOL "${documentation}" ${_force}) + mark_as_advanced(USE_${_name}) + if(USE_${_name}) + list(APPEND _enable_vector_unit_list "${_flag}") + else() + list(APPEND _disable_vector_unit_list "${_flag}") + endif() + endmacro() + _enable_or_disable(SSE2 "sse2" "Use SSE2. If SSE2 instructions are not enabled the SSE implementation will be disabled." false) + _enable_or_disable(SSE3 "sse3" "Use SSE3. If SSE3 instructions are not enabled they will be emulated." false) + _enable_or_disable(SSSE3 "ssse3" "Use SSSE3. If SSSE3 instructions are not enabled they will be emulated." false) + _enable_or_disable(SSE4_1 "sse4.1" "Use SSE4.1. If SSE4.1 instructions are not enabled they will be emulated." false) + _enable_or_disable(SSE4_2 "sse4.2" "Use SSE4.2. If SSE4.2 instructions are not enabled they will be emulated." false) + _enable_or_disable(SSE4a "sse4a" "Use SSE4a. If SSE4a instructions are not enabled they will be emulated." false) + _enable_or_disable(AVX "avx" "Use AVX. This will all floating-point vector sizes relative to SSE." _avx_broken) + _enable_or_disable(FMA "fma" "Use FMA." _avx_broken) + _enable_or_disable(BMI2 "bmi2" "Use BMI2." _avx_broken) + _enable_or_disable(AVX2 "avx2" "Use AVX2. This will double all of the vector sizes relative to SSE." _avx2_broken) + _enable_or_disable(XOP "xop" "Use XOP." _xop_broken) + _enable_or_disable(FMA4 "fma4" "Use FMA4." _fma4_broken) + _enable_or_disable(AVX512F "avx512f" "Use AVX512F. This will double all floating-point vector sizes relative to AVX2." false) + _enable_or_disable(AVX512VL "avx512vl" "Use AVX512VL. This enables 128- and 256-bit vector length instructions with EVEX coding (improved write-masking & more vector registers)." _avx2_broken) + _enable_or_disable(AVX512PF "avx512pf" "Use AVX512PF. This enables prefetch instructions for gathers and scatters." false) + _enable_or_disable(AVX512ER "avx512er" "Use AVX512ER. This enables exponential and reciprocal instructions." false) + _enable_or_disable(AVX512CD "avx512cd" "Use AVX512CD." false) + _enable_or_disable(AVX512DQ "avx512dq" "Use AVX512DQ." false) + _enable_or_disable(AVX512BW "avx512bw" "Use AVX512BW." false) + _enable_or_disable(AVX512IFMA "avx512ifma" "Use AVX512IFMA." false) + _enable_or_disable(AVX512VBMI "avx512vbmi" "Use AVX512VBMI." false) + +# if(MSVC) +# # MSVC on 32 bit can select /arch:SSE2 (since 2010 also /arch:AVX) +# # MSVC on 64 bit cannot select anything (should have changed with MSVC 2010) +# _my_find(_enable_vector_unit_list "avx2" _found) +# if(_found) +# AddCompilerFlag("/arch:AVX2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found) +# endif() +# if(NOT _found) +# _my_find(_enable_vector_unit_list "avx" _found) +# if(_found) +# AddCompilerFlag("/arch:AVX" CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _found) +# endif() +# endif() +# if(NOT _found) +# _my_find(_enable_vector_unit_list "sse2" _found) +# if(_found) +# AddCompilerFlag("/arch:SSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) +# endif() +# endif() +# foreach(_flag ${_enable_vector_unit_list}) +# string(TOUPPER "${_flag}" _flag) +# string(REPLACE "." "_" _flag "__${_flag}__") +# add_definitions("-D${_flag}") +# endforeach(_flag) +# elseif(CMAKE_CXX_COMPILER MATCHES "/(icpc|icc)$") # ICC (on Linux) +# set(OFA_map_knl "-xMIC-AVX512") +# set(OFA_map_cannonlake "-xCORE-AVX512") +# set(OFA_map_skylake-avx512 "-xCORE-AVX512") +# set(OFA_map_skylake "-xCORE-AVX2") +# set(OFA_map_broadwell "-xCORE-AVX2") +# set(OFA_map_haswell "-xCORE-AVX2") +# set(OFA_map_ivybridge "-xCORE-AVX-I") +# set(OFA_map_sandybridge "-xAVX") +# set(OFA_map_westmere "-xSSE4.2") +# set(OFA_map_nehalem "-xSSE4.2") +# set(OFA_map_penryn "-xSSSE3") +# set(OFA_map_merom "-xSSSE3") +# set(OFA_map_core2 "-xSSE3") +# set(_ok FALSE) +# foreach(arch ${_march_flag_list}) +# if(DEFINED OFA_map_${arch}) +# AddCompilerFlag(${OFA_map_${arch}} CXX_FLAGS Vc_ARCHITECTURE_FLAGS CXX_RESULT _ok) +# if(_ok) +# break() +# endif() +# endif() +# endforeach() +# if(NOT _ok) +# # This is the Intel compiler, so SSE2 is a very reasonable baseline. +# message(STATUS "Did not recognize the requested architecture flag, falling back to SSE2") +# AddCompilerFlag("-xSSE2" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) +# endif() +# else() # not MSVC and not ICC => GCC, Clang, Open64 +# foreach(_flag ${_march_flag_list}) +# AddCompilerFlag("-march=${_flag}" CXX_RESULT _good CXX_FLAGS Vc_ARCHITECTURE_FLAGS) +# if(_good) +# break() +# endif(_good) +# endforeach(_flag) +# foreach(_flag ${_enable_vector_unit_list}) +# AddCompilerFlag("-m${_flag}" CXX_RESULT _result) +# if(_result) +# set(_header FALSE) +# if(_flag STREQUAL "sse3") +# set(_header "pmmintrin.h") +# elseif(_flag STREQUAL "ssse3") +# set(_header "tmmintrin.h") +# elseif(_flag STREQUAL "sse4.1") +# set(_header "smmintrin.h") +# elseif(_flag STREQUAL "sse4.2") +# set(_header "smmintrin.h") +# elseif(_flag STREQUAL "sse4a") +# set(_header "ammintrin.h") +# elseif(_flag STREQUAL "avx") +# set(_header "immintrin.h") +# elseif(_flag STREQUAL "avx2") +# set(_header "immintrin.h") +# elseif(_flag STREQUAL "fma4") +# set(_header "x86intrin.h") +# elseif(_flag STREQUAL "xop") +# set(_header "x86intrin.h") +# endif() +# set(_resultVar "HAVE_${_header}") +# string(REPLACE "." "_" _resultVar "${_resultVar}") +# if(_header) +# CHECK_INCLUDE_FILE_CXX("${_header}" ${_resultVar} "-m${_flag}") +# if(NOT ${_resultVar}) +# set(_useVar "USE_${_flag}") +# string(TOUPPER "${_useVar}" _useVar) +# string(REPLACE "." "_" _useVar "${_useVar}") +# message(STATUS "disabling ${_useVar} because ${_header} is missing") +# set(${_useVar} FALSE) +# list(APPEND _disable_vector_unit_list "${_flag}") +# endif() +# endif() +# if(NOT _header OR ${_resultVar}) +# list(APPEND Vc_ARCHITECTURE_FLAGS "-m${_flag}") +# endif() +# endif() +# endforeach(_flag) +# foreach(_flag ${_disable_vector_unit_list}) +# AddCompilerFlag("-mno-${_flag}" CXX_FLAGS Vc_ARCHITECTURE_FLAGS) +# endforeach(_flag) +# endif() + endif() +endmacro() +# \ No newline at end of file From 16223faa1e14a6581769b348fde43813bfa0ed82 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:28:45 +0200 Subject: [PATCH 12/17] removed cmake folder --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4fc12ad..99dbb19 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,3 @@ debug/* *.vcxproj* x64 x32 -cmake \ No newline at end of file From c1f0d58831626f2667079c3b66d0b8aab84b29af Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:32:27 +0200 Subject: [PATCH 13/17] removed println --- cmake/FindCPU_FEATURES.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index a2d88eb..e67efc6 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -252,7 +252,7 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") OFA_AutodetectHostArchitecture() OFA_HandleX86Options() - message("++ _available_vector_units_list = ${_available_vector_units_list}") + list(FIND _available_vector_units_list "sse" SSE_INDEX) list(FIND _available_vector_units_list "sse2" SSE2_INDEX) @@ -296,7 +296,7 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") set(AVX_FOUND false CACHE BOOL "AVX available on host") set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") - + ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") message(STATUS "found hardware: ${CPU_VENDOR} ${CPU_MODEL_NAME}") From 919a574d88a1bad59c5395ca7632fe716219ced5 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:41:08 +0200 Subject: [PATCH 14/17] added safe-guard in case L2 cache size not available --- cmake/FindCPU_FEATURES.cmake | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index e67efc6..95b0535 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -1,4 +1,3 @@ - ##extract a single line that contains the given pattern (starting from that patter) function(extract_line LINESTART_PATTERN MULTILINE_STRING OUTPUT_VARIABLE) @@ -226,10 +225,10 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") - #as an alternative to wmic, use + #as an alternative to wmic, use #get_filename_component(_vendor_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;VendorIdentifier]" NAME CACHE) #get_filename_component(_cpu_id "[HKEY_LOCAL_MACHINE\\Hardware\\Description\\System\\CentralProcessor\\0;Identifier]" NAME CACHE) - + wmic_get("Name" MODEL_NAME) string(STRIP ${MODEL_NAME} MODEL_NAME) set(CPU_MODEL_NAME "${MODEL_NAME}" CACHE STRING "cpu model name") @@ -241,19 +240,25 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") wmic_get("NumberOfCores" CPU_NPHYS_CORES) #dirty hack that I need to validate with other machines in Win7 - if(${CPU_NPHYS_CORES} GREATER 1) - math(EXPR L2_CACHE_SIZE "${L2_CACHE_SIZE}/${CPU_NPHYS_CORES}") + if(NOT "${L2_CACHE_SIZE}" STR_EQUAL "") + if(${CPU_NPHYS_CORES} GREATER 1) + math(EXPR L2_CACHE_SIZE "${L2_CACHE_SIZE}/${CPU_NPHYS_CORES}") + endif() + set(CPU_L2_SIZE_KB "${L2_CACHE_SIZE}" CACHE STRING "cpu L2 cache size in kB") + else() + message("unable to decipher L2 cache size") endif() - - set(CPU_L2_SIZE_KB "${L2_CACHE_SIZE}" CACHE STRING "cpu L2 cache size in kB") + set(CPU_L2_SIZE_KB "0" CACHE STRING "cpu L2 cache size in kB") + set(CPU_VENDOR "" CACHE STRING "cpu model vendor") + set(CPU_MODEL_NAME "" CACHE STRING "cpu model name") #thanks to the wonderful VC project (https://github.com/VcDevel/Vc) include (OptimizeForArchitecture) - + OFA_AutodetectHostArchitecture() OFA_HandleX86Options() - - + + list(FIND _available_vector_units_list "sse" SSE_INDEX) list(FIND _available_vector_units_list "sse2" SSE2_INDEX) list(FIND _available_vector_units_list "sse3" SSE3_INDEX) From a4427def36463b987b527fbfa90602fb430d6110 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:41:22 +0200 Subject: [PATCH 15/17] calling wmic from CLI to provide reference data --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index fe6f9fc..b86830f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -15,6 +15,7 @@ install: [] build_script: - IF "%APPVEYOR_BUILD_WORKER_IMAGE%" == "Visual Studio 2015" ( SET GEN="Visual Studio 14 2015") ELSE (SET GEN="Visual Studio 15 2017") + - wmic.exe cpu get - cmake . -G%GEN% # -DCMAKE_CXX_FLAGS="%additional_flags%" - cmake --build . --config Release From b0e196ad803ccebc27f911f996991ac4ded0f3e9 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:46:06 +0200 Subject: [PATCH 16/17] fixed cmake syntax error --- cmake/FindCPU_FEATURES.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FindCPU_FEATURES.cmake b/cmake/FindCPU_FEATURES.cmake index 95b0535..92b6445 100644 --- a/cmake/FindCPU_FEATURES.cmake +++ b/cmake/FindCPU_FEATURES.cmake @@ -240,7 +240,7 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") wmic_get("NumberOfCores" CPU_NPHYS_CORES) #dirty hack that I need to validate with other machines in Win7 - if(NOT "${L2_CACHE_SIZE}" STR_EQUAL "") + if(NOT "${L2_CACHE_SIZE}" STREQUAL "") if(${CPU_NPHYS_CORES} GREATER 1) math(EXPR L2_CACHE_SIZE "${L2_CACHE_SIZE}/${CPU_NPHYS_CORES}") endif() From 3be9d77cb4055fa77e3d7d4a3fe30cf32c58034f Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 30 Apr 2018 14:57:51 +0200 Subject: [PATCH 17/17] weaked the requirement on finding the cache size as it's not so easy to find in VMs (e.g. used by appveyor) --- tests/test_build_machine.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_build_machine.cpp b/tests/test_build_machine.cpp index 02aee98..d12607b 100644 --- a/tests/test_build_machine.cpp +++ b/tests/test_build_machine.cpp @@ -113,7 +113,10 @@ TEST_CASE_METHOD( host_reference, "machine_specific" ){ auto value = compass::runtime::size::cache::level(2); - REQUIRE(value >> 10 ==expected_L2_size_kB); + REQUIRE(value > 0); + if(expected_L2_size_kB){ + REQUIRE(value >> 10 ==expected_L2_size_kB); + } } }