Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

seqlock: Enforce C11 Atomics #19

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion seqlock/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
all:
gcc -o tests -std=gnu11 -Wall -O2 seqlock.c tests.c
gcc -o tests -std=gnu11 -Wall -Wextra -O2 seqlock.c tests.c

clean:
rm -f tests
49 changes: 27 additions & 22 deletions seqlock/seqlock.c
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#include <assert.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "seqlock.h"

#define SEQLOCK_WRITER 1U

#if defined(__i386__) || defined(__x86_64__)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to #if !defined(__aarch64) for generic C11 Atomics path.

#define spin_wait() __builtin_ia32_pause()
#define spin_wait() atomic_thread_fence(memory_order_seq_cst)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have to confirm the generated code is identical to the intended instruction sequence.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be generating

dmb     ish

with Apple clang version 14.0.3 (clang-1403.0.22.14.1).

I would figure what's the output in gcc x86

Copy link
Author

@25077667 25077667 Aug 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In gcc 13.2.1 with x86-64, both

  1. using __builtin_ia32_pause()
  2. using atomic_thread_fence(memory_order_seq_cst)

are generating

data16 cs nopw 0x0(%rax,%rax,1)
nop
xchg   %ax,%ax

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In LLVM clang version 15.0.7 with x86-64, adding an extra pause when __builtin_ia32_pause is used.

    125 0000000000001190 <seqlock_acquire_rd>:
    126     1190:       50                      push   %rax
    127     1191:       0f ae f0                mfence
    128     1194:       8b 07                   mov    (%rdi),%eax
    129     1196:       a8 01                   test   $0x1,%al
    130     1198:       75 08                   jne    11a2 <seqlock_acquire_rd+0x12>
    131     119a:       a8 01                   test   $0x1,%al
    132     119c:       75 0f                   jne    11ad <seqlock_acquire_rd+0x1d>
    133     119e:       59                      pop    %rcx
    134     119f:       c3                      ret
    135     11a0:       f3 90                   pause
    136     11a2:       0f ae f0                mfence
    137     11a5:       8b 07                   mov    (%rdi),%eax
    138     11a7:       a8 01                   test   $0x1,%al
    139     11a9:       75 f5                   jne    11a0 <seqlock_acquire_rd+0x10>
    140     11ab:       eb ed                   jmp    119a <seqlock_acquire_rd+0xa>
    141     11ad:       48 8d 3d 6f 0e 00 00    lea    0xe6f(%rip),%rdi        # 2023 <_IO_stdin_used+0x23>
    142     11b4:       48 8d 35 82 0e 00 00    lea    0xe82(%rip),%rsi        # 203d <_IO_stdin_used+0x3d>
    143     11bb:       48 8d 0d 85 0e 00 00    lea    0xe85(%rip),%rcx        # 2047 <_IO_stdin_used+0x47>
    144     11c2:       ba 40 00 00 00          mov    $0x40,%edx
    145     11c7:       e8 84 fe ff ff          call   1050 <__assert_fail@plt>
    146     11cc:       0f 1f 40 00             nopl   0x0(%rax)

#elif defined(__aarch64__)
#define spin_wait() __asm__ __volatile__("isb\n")
#else
Expand All @@ -32,9 +34,9 @@ static inline int wfe(void)
static inline uint32_t ldx(const uint8_t *var, int mm)
{
uint32_t old;
if (mm == __ATOMIC_ACQUIRE)
if (mm == memory_order_acquire)
__asm volatile("ldaxrb %w0, [%1]" : "=&r"(old) : "r"(var) : "memory");
else if (mm == __ATOMIC_RELAXED)
else if (mm == memory_order_relaxed)
__asm volatile("ldxrb %w0, [%1]" : "=&r"(old) : "r"(var) : "memory");
else
abort();
Expand All @@ -43,7 +45,7 @@ static inline uint32_t ldx(const uint8_t *var, int mm)
#else /* generic */
#define SEVL() (void) 0
#define WFE() 1
#define LDX(a, b) __atomic_load_n((a), (b))
#define LDX(a, b) atomic_load_explicit((a), (b))
#endif

#define UNLIKELY(x) __builtin_expect(!!(x), 0)
Expand All @@ -57,7 +59,8 @@ static inline seqlock_t wait_for_no_writer(const seqlock_t *sync, int mo)
{
seqlock_t l;
SEVL(); /* Do SEVL early to avoid excessive loop alignment (NOPs) */
if (UNLIKELY(((l = __atomic_load_n(sync, mo)) & SEQLOCK_WRITER) != 0)) {
if (UNLIKELY(((l = atomic_load_explicit(sync, mo)) & SEQLOCK_WRITER) !=
0)) {
while (WFE() && ((l = LDX(sync, mo)) & SEQLOCK_WRITER) != 0)
spin_wait();
}
Expand All @@ -69,35 +72,35 @@ seqlock_t seqlock_acquire_rd(const seqlock_t *sync)
{
/* Wait for any present writer to go away */
/* B: Synchronize with A */
return wait_for_no_writer(sync, __ATOMIC_ACQUIRE);
return wait_for_no_writer(sync, memory_order_acquire);
}

bool seqlock_release_rd(const seqlock_t *sync, seqlock_t prv)
{
/* Enforce Load/Load order as if synchronizing with a store-release or
* fence-release in another thread.
*/
__atomic_thread_fence(__ATOMIC_ACQUIRE);
atomic_thread_fence(memory_order_acquire);
/* Test if sync remains unchanged => success */
return __atomic_load_n(sync, __ATOMIC_RELAXED) == prv;
return atomic_load_explicit(sync, memory_order_relaxed) == prv;
}

void seqlock_acquire_wr(seqlock_t *sync)
{
seqlock_t l;
do {
/* Wait for any present writer to go away */
l = wait_for_no_writer(sync, __ATOMIC_RELAXED);
l = wait_for_no_writer(sync, memory_order_relaxed);
/* Attempt to increment, setting writer flag */
} while (
/* C: Synchronize with A */
!__atomic_compare_exchange_n(sync, &l, l + SEQLOCK_WRITER,
/*weak=*/true, __ATOMIC_ACQUIRE,
__ATOMIC_RELAXED));
!atomic_compare_exchange_strong_explicit(
sync, (uint32_t *) &l, l + SEQLOCK_WRITER, memory_order_acquire,
memory_order_relaxed));
/* Enforce Store/Store order as if synchronizing with a load-acquire or
* fence-acquire in another thread.
*/
__atomic_thread_fence(__ATOMIC_RELEASE);
atomic_thread_fence(memory_order_release);
}

void seqlock_release_wr(seqlock_t *sync)
Expand All @@ -110,17 +113,19 @@ void seqlock_release_wr(seqlock_t *sync)

/* Increment, clearing writer flag */
/* A: Synchronize with B and C */
__atomic_store_n(sync, cur + 1, __ATOMIC_RELEASE);
atomic_store_explicit(sync, cur + SEQLOCK_WRITER, memory_order_release);
}

#define ATOMIC_COPY(_d, _s, _sz, _type) \
({ \
_type val = __atomic_load_n((const _type *) (_s), __ATOMIC_RELAXED); \
_s += sizeof(_type); \
__atomic_store_n((_type *) (_d), val, __ATOMIC_RELAXED); \
_d += sizeof(_type); \
_sz -= sizeof(_type); \
})
#define ATOMIC_COPY(_d, _s, _sz, _type) \
do { \
const _Atomic _type *src_atomic = (_Atomic const _type *) (_s); \
_type val = atomic_load_explicit(src_atomic, memory_order_relaxed); \
_s += sizeof(_type); \
_Atomic _type *dst_atomic = (_Atomic _type *) (_d); \
atomic_store_explicit(dst_atomic, val, memory_order_relaxed); \
_d += sizeof(_type); \
_sz -= sizeof(_type); \
} while (0)

static inline void atomic_memcpy(char *dst, const char *src, size_t sz)
{
Expand Down
3 changes: 2 additions & 1 deletion seqlock/seqlock.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#pragma once

#include <stdatomic.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

typedef uint32_t seqlock_t;
typedef _Atomic(uint32_t) seqlock_t;

/* Initialise a seqlock aka reader/writer synchronization */
void seqlock_init(seqlock_t *sync);
Expand Down