diff --git a/cmake/DaemonArchitecture.cmake b/cmake/DaemonArchitecture.cmake index 662343ee7a..aa1406776f 100644 --- a/cmake/DaemonArchitecture.cmake +++ b/cmake/DaemonArchitecture.cmake @@ -68,6 +68,17 @@ message(STATUS "Detected architecture: ${ARCH}") add_definitions(-D${ARCH_DEFINE}) +if (USE_CPU_INTRINSICS) + add_definitions(-DDAEMON_CPU_INTRINSICS_${ARCH}=1) + + set(amd64_PARENT "i686") + set(arm64_PARENT "armhf") + + if (${ARCH}_PARENT) + add_definitions(-DDAEMON_CPU_INTRINSICS_${${ARCH}_PARENT}=1) + endif() +endif() + # This string can be modified without breaking compatibility. # Quotes cannot be part of the define as support for them is not reliable. # See: https://cmake.org/cmake/help/latest/prop_dir/COMPILE_DEFINITIONS.html diff --git a/cmake/DaemonFlags.cmake b/cmake/DaemonFlags.cmake index 6d5be42cde..ead5d2decc 100644 --- a/cmake/DaemonFlags.cmake +++ b/cmake/DaemonFlags.cmake @@ -29,6 +29,12 @@ include(CheckCXXCompilerFlag) add_definitions(-DDAEMON_BUILD_${CMAKE_BUILD_TYPE}) +option(USE_CPU_INTRINSICS "Enable custom CPU asm and intrinsics code" ON) + +if (USE_CPU_INTRINSICS) + add_definitions(-DDAEMON_USE_CPU_INTRINSICS=1) +endif() + # Set flag without checking, optional argument specifies build type macro(set_c_flag FLAG) if (${ARGC} GREATER 1) diff --git a/src/common/Compiler.h b/src/common/Compiler.h index c3649c5a7a..d5f3d46fcf 100644 --- a/src/common/Compiler.h +++ b/src/common/Compiler.h @@ -84,6 +84,7 @@ int CountTrailingZeroes(unsigned long long x); // Raise an exception and break in the debugger #if defined(DAEMON_ARCH_i686) || defined(DAEMON_ARCH_amd64) + // Always run this asm code even if DAEMON_USE_CPU_INTRINSICS is not set. #define BREAKPOINT() __asm__ __volatile__("int $3\n\t") #elif defined(DAEMON_ARCH_nacl) // TODO: find how to implement breakpoint on NaCl diff --git a/src/common/Platform.h b/src/common/Platform.h index 9ccef8df7e..91e97f462f 100644 --- a/src/common/Platform.h +++ b/src/common/Platform.h @@ -63,15 +63,38 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define __x86_64__ 1 #endif -// SSE support -#if defined(__x86_64__) || defined(__SSE__) || _M_IX86_FP >= 1 -#include -#if defined(__x86_64__) || defined(__SSE2__) || _M_IX86_FP >= 2 -#include -#define idx86_sse 2 -#else -#define idx86_sse 1 +/* The definition name syntax is: DAEMON_USE_INTRINSICS_[_extension] + +Examples: + +- DAEMON_CPU_INTRINSICS_i686: i686 specific code, including asm code. +- DAEMON_CPU_INTRINSICS_i686_sse: i686 SSE specific code. +- DAEMON_CPU_INTRINSICS_i686_sse2: i686 SSE2 specific code. + +If a platform inherits a feature from an parent platform, the parent +platform name is used. For example on amd64, the definition enabling +SSE code is DAEMON_CPU_INTRINSICS_i686_sse, enabling SSE code on both +i686 with SSE and amd64. + +The definitions for the CPU itself is automatically is set by CMake. */ + +#if defined(DAEMON_USE_CPU_INTRINSICS) + // Set CPU extension definitions. + #if defined(__SSE2__) || _M_IX86_FP >= 2 + #define DAEMON_CPU_INTRINSICS_i686_sse2 + #endif + + #if defined(__SSE__) || _M_IX86_FP >= 1 + #define DAEMON_CPU_INTRINSICS_i686_sse + #endif #endif + +#if defined(DAEMON_CPU_INTRINSICS_i686_sse) + #include +#endif + +#if defined(DAEMON_CPU_INTRINSICS_i686_sse2) + #include #endif // VM Prefixes diff --git a/src/engine/qcommon/q_math.cpp b/src/engine/qcommon/q_math.cpp index 4d166b199b..3e41022c11 100644 --- a/src/engine/qcommon/q_math.cpp +++ b/src/engine/qcommon/q_math.cpp @@ -740,7 +740,7 @@ void SetPlaneSignbits( cplane_t *out ) int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p ) { -#if idx86_sse +#if defined(DAEMON_CPU_INTRINSICS_i686_sse) auto mins = sseLoadVec3Unsafe( emins ); auto maxs = sseLoadVec3Unsafe( emaxs ); auto normal = sseLoadVec3Unsafe( p->normal ); @@ -1799,7 +1799,7 @@ void MatrixSetupShear( matrix_t m, vec_t x, vec_t y ) void MatrixMultiply( const matrix_t a, const matrix_t b, matrix_t out ) { -#if idx86_sse +#if defined(DAEMON_CPU_INTRINSICS_i686_sse) //#error MatrixMultiply int i; __m128 _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; @@ -3288,7 +3288,8 @@ void QuatTransformVectorInverse( const quat_t q, const vec3_t in, vec3_t out ) VectorAdd( out, tmp2, out ); } -#if !idx86_sse +// The SSE variants are inline functions in q_shared.h file. +#if !defined(DAEMON_CPU_INTRINSICS_i686_sse) // create an identity transform void TransInit( transform_t *t ) { diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h index 095af5442a..cd4eb8afc1 100644 --- a/src/engine/qcommon/q_shared.h +++ b/src/engine/qcommon/q_shared.h @@ -248,7 +248,7 @@ void Com_Free_Aligned( void *ptr ); // floats (quat: 4, scale: 1, translation: 3), which is very // convenient for SSE and GLSL, which operate on 4-dimensional // float vectors. -#if idx86_sse +#if defined(DAEMON_CPU_INTRINSICS_i686_sse) // Here we have a union of scalar struct and sse struct, transform_u and the // scalar struct must match transform_t so we have to use anonymous structs. // We disable compiler warnings when using -Wpedantic for this specific case. @@ -375,7 +375,7 @@ extern const quat_t quatIdentity; float y; // compute approximate inverse square root -#if defined( idx86_sse ) +#if defined(DAEMON_CPU_INTRINSICS_i686_sse) // SSE rsqrt relative error bound: 3.7 * 10^-4 _mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) ); #elif idppc @@ -774,7 +774,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y ) //============================================= // combining Transformations -#if idx86_sse +#if defined(DAEMON_CPU_INTRINSICS_i686_sse) /* swizzles for _mm_shuffle_ps instruction */ #define SWZ_XXXX 0x00 #define SWZ_YXXX 0x01 @@ -1293,6 +1293,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y ) t->sseRot = sseQuatNormalize( t->sseRot ); } #else + // The non-SSE variants are in q_math.cpp file. void TransInit( transform_t *t ); void TransCopy( const transform_t *in, transform_t *out );