Skip to content

Commit

Permalink
cmake: add USE_CPU_INTRINSICS CMake option and rewrite the definitions
Browse files Browse the repository at this point in the history
This introduces the USE_CPU_INTRINSICS CMake option. It is enabled by
default.

Disabling it is meant to disable custom asm code and usage of intrinsincs
functions for the target platform in the Dæmon code base, it may also be
used by games built with the Dæmon common code base.

It is not meant to disable asm or intrinsincs usage in third-party libraries.

It is not meant to prevent the compiler to use such intrinsics in its
optimization passes.

It is not meant to disable the compiler flags we set to tell the compiler
to try to use such intrinsics in its optimization passes. For this, one
should disable USE_CPU_RECOMMENDED_FEATURES instead.

For obvious reason the asm code in the BREAKPOINT() implementation is not
meant to be disabled by USE_CPU_INTRINSICS.

The macro syntax is: DAEMON_CPU_INTRINSICS_(architecture)[_extension]

Examples:

- DAEMON_CPU_INTRINSICS_i686: i686 specific code, including asm code.
- DAEMON_CPU_INTRINSICS_i686_sse: i686 SSE specific code.
- DAEMON_CPU_INTRINSICS_i686_sse2: i686 SSE2 specific code.

If a platform inherits feature from an parent platform, the parent
platform name is used. For example on amd64, the definition enabling
SSE code is DAEMON_CPU_INTRINSICS_i686_sse, enabling SSE code on both
i686 with SSE and amd64 platforms. and both DAEMON_CPU_INTRINSICS_amd64
and DAEMON_CPU_INTRINSICS_i686 are available.
  • Loading branch information
illwieckz committed May 15, 2024
1 parent 540b80e commit 9622187
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 14 deletions.
11 changes: 11 additions & 0 deletions cmake/DaemonArchitecture.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ message(STATUS "Detected architecture: ${ARCH}")

add_definitions(-D${ARCH_DEFINE})

if (USE_CPU_INTRINSICS)
add_definitions(-DDAEMON_CPU_INTRINSICS_${ARCH}=1)

set(amd64_PARENT "i686")
set(arm64_PARENT "armhf")

if (${ARCH}_PARENT)
add_definitions(-DDAEMON_CPU_INTRINSICS_${${ARCH}_PARENT}=1)
endif()
endif()

# This string can be modified without breaking compatibility.
# Quotes cannot be part of the define as support for them is not reliable.
# See: https://cmake.org/cmake/help/latest/prop_dir/COMPILE_DEFINITIONS.html
Expand Down
6 changes: 6 additions & 0 deletions cmake/DaemonFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ include(CheckCXXCompilerFlag)

add_definitions(-DDAEMON_BUILD_${CMAKE_BUILD_TYPE})

option(USE_CPU_INTRINSICS "Enable custom CPU asm and intrinsics code" ON)

if (USE_CPU_INTRINSICS)
add_definitions(-DDAEMON_USE_CPU_INTRINSICS=1)
endif()

# Set flag without checking, optional argument specifies build type
macro(set_c_flag FLAG)
if (${ARGC} GREATER 1)
Expand Down
1 change: 1 addition & 0 deletions src/common/Compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ int CountTrailingZeroes(unsigned long long x);

// Raise an exception and break in the debugger
#if defined(DAEMON_ARCH_i686) || defined(DAEMON_ARCH_amd64)
// Always run this asm code even if DAEMON_USE_CPU_INTRINSICS is not set.
#define BREAKPOINT() __asm__ __volatile__("int $3\n\t")
#elif defined(DAEMON_ARCH_nacl)
// TODO: find how to implement breakpoint on NaCl
Expand Down
39 changes: 31 additions & 8 deletions src/common/Platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,38 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define __x86_64__ 1
#endif

// SSE support
#if defined(__x86_64__) || defined(__SSE__) || _M_IX86_FP >= 1
#include <xmmintrin.h>
#if defined(__x86_64__) || defined(__SSE2__) || _M_IX86_FP >= 2
#include <emmintrin.h>
#define idx86_sse 2
#else
#define idx86_sse 1
/* The definition name syntax is: DAEMON_USE_INTRINSICS_<architecture>[_extension]
Examples:
- DAEMON_CPU_INTRINSICS_i686: i686 specific code, including asm code.
- DAEMON_CPU_INTRINSICS_i686_sse: i686 SSE specific code.
- DAEMON_CPU_INTRINSICS_i686_sse2: i686 SSE2 specific code.
If a platform inherits a feature from an parent platform, the parent
platform name is used. For example on amd64, the definition enabling
SSE code is DAEMON_CPU_INTRINSICS_i686_sse, enabling SSE code on both
i686 with SSE and amd64.
The definitions for the CPU itself is automatically is set by CMake. */

#if defined(DAEMON_USE_CPU_INTRINSICS)
// Set CPU extension definitions.
#if defined(__SSE2__) || _M_IX86_FP >= 2
#define DAEMON_CPU_INTRINSICS_i686_sse2
#endif

#if defined(__SSE__) || _M_IX86_FP >= 1
#define DAEMON_CPU_INTRINSICS_i686_sse
#endif
#endif

#if defined(DAEMON_CPU_INTRINSICS_i686_sse)
#include <xmmintrin.h>
#endif

#if defined(DAEMON_CPU_INTRINSICS_i686_sse2)
#include <emmintrin.h>
#endif

// VM Prefixes
Expand Down
7 changes: 4 additions & 3 deletions src/engine/qcommon/q_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ void SetPlaneSignbits( cplane_t *out )

int BoxOnPlaneSide( const vec3_t emins, const vec3_t emaxs, const cplane_t *p )
{
#if idx86_sse
#if defined(DAEMON_CPU_INTRINSICS_i686_sse)
auto mins = sseLoadVec3Unsafe( emins );
auto maxs = sseLoadVec3Unsafe( emaxs );
auto normal = sseLoadVec3Unsafe( p->normal );
Expand Down Expand Up @@ -1799,7 +1799,7 @@ void MatrixSetupShear( matrix_t m, vec_t x, vec_t y )

void MatrixMultiply( const matrix_t a, const matrix_t b, matrix_t out )
{
#if idx86_sse
#if defined(DAEMON_CPU_INTRINSICS_i686_sse)
//#error MatrixMultiply
int i;
__m128 _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
Expand Down Expand Up @@ -3288,7 +3288,8 @@ void QuatTransformVectorInverse( const quat_t q, const vec3_t in, vec3_t out )
VectorAdd( out, tmp2, out );
}

#if !idx86_sse
// The SSE variants are inline functions in q_shared.h file.
#if !defined(DAEMON_CPU_INTRINSICS_i686_sse)
// create an identity transform
void TransInit( transform_t *t )
{
Expand Down
7 changes: 4 additions & 3 deletions src/engine/qcommon/q_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ void Com_Free_Aligned( void *ptr );
// floats (quat: 4, scale: 1, translation: 3), which is very
// convenient for SSE and GLSL, which operate on 4-dimensional
// float vectors.
#if idx86_sse
#if defined(DAEMON_CPU_INTRINSICS_i686_sse)
// Here we have a union of scalar struct and sse struct, transform_u and the
// scalar struct must match transform_t so we have to use anonymous structs.
// We disable compiler warnings when using -Wpedantic for this specific case.
Expand Down Expand Up @@ -375,7 +375,7 @@ extern const quat_t quatIdentity;
float y;

// compute approximate inverse square root
#if defined( idx86_sse )
#if defined(DAEMON_CPU_INTRINSICS_i686_sse)
// SSE rsqrt relative error bound: 3.7 * 10^-4
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
#elif idppc
Expand Down Expand Up @@ -774,7 +774,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
//=============================================
// combining Transformations

#if idx86_sse
#if defined(DAEMON_CPU_INTRINSICS_i686_sse)
/* swizzles for _mm_shuffle_ps instruction */
#define SWZ_XXXX 0x00
#define SWZ_YXXX 0x01
Expand Down Expand Up @@ -1293,6 +1293,7 @@ inline float DotProduct( const vec3_t x, const vec3_t y )
t->sseRot = sseQuatNormalize( t->sseRot );
}
#else
// The non-SSE variants are in q_math.cpp file.
void TransInit( transform_t *t );
void TransCopy( const transform_t *in, transform_t *out );

Expand Down

0 comments on commit 9622187

Please sign in to comment.