Skip to content

Commit

Permalink
Merge pull request #1161 from etsal/etsal/lib-cond-break
Browse files Browse the repository at this point in the history
lib/sdt_alloc: replace bpf_for with can_loop
  • Loading branch information
etsal authored Jan 7, 2025
2 parents 78237c3 + 63b6be9 commit ce715ba
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 24 deletions.
89 changes: 65 additions & 24 deletions lib/sdt_alloc.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ struct {

struct sdt_alloc_stack __arena *prealloc_stack;

/*
* Necessary for cond_break/can_loop's semantics. According to kernel commit
* 011832b, the loop counter variable must be seen as imprecise and bounded
* by the verifier. Initializing it from a constant (e.g., i = 0;), then,
* makes it precise and prevents may_goto from helping with converging the
* loop. For these loops we must initialize the loop counter from a variable
* whose value the verifier cannot reason about when checking the program, so
* that the loop counter's value is imprecise.
*/
static __u64 zero = 0;

/*
* XXX Hack to get the verifier to find the arena for sdt_exit_task.
* As of 6.12-rc5, The verifier associates arenas with programs by
Expand Down Expand Up @@ -174,7 +185,17 @@ int sdt_alloc_stack(struct sdt_alloc_stack __arena *stack)
static SDT_TASK_FN_ATTRS
int sdt_alloc_attempt(struct sdt_alloc_stack __arena *stack)
{
bpf_repeat(SDT_TASK_ALLOC_ATTEMPTS) {
int i;

/*
* Use can_loop to help with verification. The can_loop macro was
* introduced in kernel commit ba39486 and wraps around the may_goto
* instruction that helps with verifiying for loops. Using may_goto
* embeds a switch into the loop that ensures it is considered
* terminable by the verifier by adding a hidden switch to the loop
* and counting down with every iteration.
*/
for (i = zero; i < SDT_TASK_ALLOC_ATTEMPTS && can_loop; i++) {
if (sdt_alloc_stack(stack) == 0)
return 0;
}
Expand Down Expand Up @@ -348,6 +369,34 @@ int sdt_set_idx_state(sdt_desc_t *desc, __u64 pos, bool state)
return 0;
}

static __noinline
int sdt_mark_nodes_avail(sdt_desc_t *lv_desc[SDT_TASK_LEVELS], __u64 lv_pos[SDT_TASK_LEVELS])
{
sdt_desc_t *desc;
__u64 u, level;
int ret;

for (u = zero; u < SDT_TASK_LEVELS && can_loop; u++) {
level = SDT_TASK_LEVELS - 1 - u;

/* Only propagate upwards if we are the parent's only free chunk. */
desc = lv_desc[level];

/* Failed calls return unlocked. */
ret = sdt_set_idx_state(desc, lv_pos[level], false);
if (unlikely(ret != 0))
return ret;

cast_kern(desc);

desc->nr_free += 1;
if (desc->nr_free > 1)
return 0;
}

return 0;
}

__hidden
void sdt_free_idx(struct sdt_allocator *alloc, __u64 idx)
{
Expand All @@ -357,7 +406,7 @@ void sdt_free_idx(struct sdt_allocator *alloc, __u64 idx)
struct sdt_chunk __arena *chunk;
sdt_desc_t *desc;
struct sdt_data __arena *data;
__u64 u, level, shift, pos;
__u64 level, shift, pos;
__u64 lv_pos[SDT_TASK_LEVELS];
int ret;
int i;
Expand All @@ -373,7 +422,13 @@ void sdt_free_idx(struct sdt_allocator *alloc, __u64 idx)
return;
}

bpf_for(level, 0, SDT_TASK_LEVELS) {
/* To appease the verifier. */
for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) {
lv_desc[level] = NULL;
lv_pos[level] = 0;
}

for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) {
shift = (SDT_TASK_LEVELS - 1 - level) * SDT_TASK_ENTS_PER_PAGE_SHIFT;
pos = (idx >> shift) & mask;

Expand Down Expand Up @@ -414,29 +469,15 @@ void sdt_free_idx(struct sdt_allocator *alloc, __u64 idx)
};

/* Zero out one word at a time. */
bpf_for(i, 0, alloc->pool.elem_size / 8) {
for (i = zero; i < alloc->pool.elem_size / 8 && can_loop; i++) {
data->payload[i] = 0;
}
}

bpf_for(u, 0, SDT_TASK_LEVELS) {
level = SDT_TASK_LEVELS - 1 - u;

/* Only propagate upwards if we are the parent's only free chunk. */
desc = lv_desc[level];

/* Failed calls return unlocked. */
ret = sdt_set_idx_state(desc, lv_pos[level], false);
if (unlikely(ret != 0)) {
bpf_spin_unlock(&sdt_lock);
return;
}

cast_kern(desc);

desc->nr_free += 1;
if (desc->nr_free > 1)
break;
ret = sdt_mark_nodes_avail(lv_desc, lv_pos);
if (unlikely(ret != 0)) {
bpf_spin_unlock(&sdt_lock);
return;
}

sdt_stats.active_allocs -= 1;
Expand Down Expand Up @@ -465,7 +506,7 @@ sdt_desc_t * sdt_find_empty(sdt_desc_t *desc,
__u64 idx = 0;
int ret;

bpf_for(level, 0, SDT_TASK_LEVELS) {
for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) {
pos = sdt_chunk_find_empty(desc);

/* Something has gone terribly wrong. */
Expand Down Expand Up @@ -499,7 +540,7 @@ sdt_desc_t * sdt_find_empty(sdt_desc_t *desc,
}
}

bpf_for(u, 0, SDT_TASK_LEVELS) {
for (u = zero; u < SDT_TASK_LEVELS && can_loop; u++) {
level = SDT_TASK_LEVELS - 1 - u;
tmp = lv_desc[level];

Expand Down
80 changes: 80 additions & 0 deletions scheds/include/scx/bpf_arena_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,86 @@ void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cn
int node_id, __u64 flags) __ksym __weak;
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;

/*
* Note that cond_break can only be portably used in the body of a breakable
* construct, whereas can_loop can be used anywhere.
*/
#ifdef __BPF_FEATURE_MAY_GOTO
#define can_loop \
({ __label__ l_break, l_continue; \
bool ret = true; \
asm volatile goto("may_goto %l[l_break]" \
:::: l_break); \
goto l_continue; \
l_break: ret = false; \
l_continue:; \
ret; \
})

#define cond_break \
({ __label__ l_break, l_continue; \
asm volatile goto("may_goto %l[l_break]" \
:::: l_break); \
goto l_continue; \
l_break: break; \
l_continue:; \
})
#else
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define can_loop \
({ __label__ l_break, l_continue; \
bool ret = true; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
.long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
.short 0" \
:::: l_break); \
goto l_continue; \
l_break: ret = false; \
l_continue:; \
ret; \
})

#define cond_break \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
.long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
.short 0" \
:::: l_break); \
goto l_continue; \
l_break: break; \
l_continue:; \
})
#else
#define can_loop \
({ __label__ l_break, l_continue; \
bool ret = true; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
.long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
.short 0" \
:::: l_break); \
goto l_continue; \
l_break: ret = false; \
l_continue:; \
ret; \
})

#define cond_break \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
.long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
.short 0" \
:::: l_break); \
goto l_continue; \
l_break: break; \
l_continue:; \
})
#endif
#endif

#else /* when compiled as user space code */

/* Provide the definition of PAGE_SIZE. */
Expand Down

0 comments on commit ce715ba

Please sign in to comment.