Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add netstacklat tool #125

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions netstacklat/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
netstacklat
13 changes: 13 additions & 0 deletions netstacklat/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)

USER_TARGETS := netstacklat
BPF_TARGETS := netstacklat.bpf
BPF_SKEL_OBJ := netstacklat.bpf.o

EXTRA_DEPS += netstacklat.h bits.bpf.h maps.bpf.h
LDLIBS += -lm

LIB_DIR = ../lib

include $(LIB_DIR)/common.mk

36 changes: 36 additions & 0 deletions netstacklat/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Netstacklat - Monitor latency within the network stack
Netstacklat is a simple tool for monitoring latency within the Linux
network stack for ingress traffic. The tool relies on the kernel time
stamping received packets (`SOF_TIMESTAMPING_RX_SOFTWARE`),
specifically setting `sk_buff->tstamp`, and then reports when packets
arrive at various hooks relative to this timestamp, i.e. the time
between the latency between the packet being timestamped and reaching
the hook.

The tool is based on the following bpftrace script from Jesper
Dangaard Brouer:
```console
sudo bpftrace -e '
kfunc:tcp_v4_do_rcv,
kfunc:tcp_data_queue,
kfunc:udp_queue_rcv_one_skb
{
$tai_offset=37000000000;
$now=nsecs(tai)-$tai_offset; @cnt[probe]=count(); @total[probe]=count();
$ts=args->skb->tstamp; $delta=$now-(uint64)$ts;
@hist_ns[probe]=hist($delta);
@stats[probe]=stats($delta);
//printf("now:%llu - ts:%llu = delta:%llu\n", $now, $ts, $delta);
}
interval:s:10 {time("\n%H:%M:%S\n");
print(@cnt); clear(@cnt);
print(@total);
print(@stats);
print(@hist_ns);
}'
```

The eBPF part of the tool (`netstacklat.bpf.c`) is designed to be
compatible with
[ebpf_exporter](https://github.com/cloudflare/ebpf_exporter), so that
the data can easily be exported to Prometheus.
29 changes: 29 additions & 0 deletions netstacklat/bits.bpf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* From https://github.com/iovisor/bcc/blob/v0.25.0/libbpf-tools/bits.bpf.h*/

#ifndef __BITS_BPF_H
#define __BITS_BPF_H

static __always_inline u64 log2(u32 v)
{
u32 shift, r;

r = (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
shift = (v > 0xF) << 2; v >>= shift; r |= shift;
shift = (v > 0x3) << 1; v >>= shift; r |= shift;
r |= (v >> 1);

return r;
}

static __always_inline u64 log2l(u64 v)
{
u32 hi = v >> 32;
if (hi)
return log2(hi) + 32;
else
return log2(v);
}

#endif /* __BITS_BPF_H */
84 changes: 84 additions & 0 deletions netstacklat/maps.bpf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: MIT */
/* From https://github.com/cloudflare/ebpf_exporter/blob/99d2752f9e0a095b57f53e5da6856ad143b0e443/examples/maps.bpf.h */

#include "bits.bpf.h"

#define lookup_or_zero_init_key(map, key, into) \
u64 zero = 0; \
\
into = bpf_map_lookup_elem(map, key); \
if (!into) { \
bpf_map_update_elem(map, key, &zero, BPF_NOEXIST); \
into = bpf_map_lookup_elem(map, key); \
if (!into) { \
return 0; \
} \
}

#define increment_variant(map, key, increment, variant) \
u64 *count; \
\
lookup_or_zero_init_key(map, key, count); \
\
variant; \
\
return *count;

static inline int increment_map(void *map, void *key, u64 increment)
{
increment_variant(map, key, increment, __sync_fetch_and_add(count, increment));
}

static inline int increment_map_nosync(void *map, void *key, u64 increment)
{
increment_variant(map, key, increment, *count += increment);
}

// Arrays are always preallocated, so this only fails if the key is missing
#define read_array_ptr(map, key, into) \
into = bpf_map_lookup_elem(map, key); \
if (!into) { \
return 0; \
}

#define _increment_histogram(map, key, increment, max_bucket, increment_fn) \
if (key.bucket > max_bucket) { \
key.bucket = max_bucket; \
} \
\
increment_fn(map, &key, 1); \
\
if (increment > 0) { \
key.bucket = max_bucket + 1; \
increment_fn(map, &key, increment); \
}

#define _increment_ex2_histogram(map, key, increment, max_bucket, increment_fn) \
key.bucket = log2l(increment); \
\
if (key.bucket > max_bucket) { \
key.bucket = max_bucket; \
} \
\
_increment_histogram(map, key, increment, max_bucket, increment_fn);

#define increment_exp2_histogram(map, key, increment, max_bucket) \
_increment_ex2_histogram(map, key, increment, max_bucket, increment_map)

#define increment_exp2_histogram_nosync(map, key, increment, max_bucket) \
_increment_ex2_histogram(map, key, increment, max_bucket, increment_map_nosync)

#define _increment_exp2zero_histogram(map, key, increment, max_bucket, increment_fn) \
if (increment == 0) { \
key.bucket = 0; \
} else { \
key.bucket = log2l(increment) + 1; \
} \
\
_increment_histogram(map, key, increment, max_bucket, increment_fn);

#define increment_exp2zero_histogram(map, key, increment, max_bucket) \
_increment_exp2zero_histogram(map, key, increment, max_bucket, increment_map)

#define increment_exp2zero_histogram_nosync(map, key, increment, max_bucket) \
_increment_exp2zero_histogram(map, key, increment, max_bucket, increment_map_nosync)
96 changes: 96 additions & 0 deletions netstacklat/netstacklat.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "vmlinux_local.h"
#include <linux/bpf.h>

#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>

#include "netstacklat.h"
#include "maps.bpf.h"

char LICENSE[] SEC("license") = "GPL";

static volatile const u64 TAI_OFFSET = (37UL * NS_PER_S);

/* Helpers in maps.bpf.h require any histogram key to be a struct with a bucket member */
struct hist_key {
u32 bucket;
};

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, HIST_NBINS);
__type(key, u32);
__type(value, u64);
} tcp_v4_do_rcv_hist SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, HIST_NBINS);
__type(key, u32);
__type(value, u64);
} tcp_data_queue_hist SEC(".maps");

struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, HIST_NBINS);
__type(key, u32);
__type(value, u64);
} udp_queue_rcv_hist SEC(".maps");

static void *hook_to_histmap(enum netstacklat_hook hook)
{
switch (hook) {
case NETSTACK_HOOK_TCP_V4_DO_RCV:
return &tcp_v4_do_rcv_hist;
case NETSTACKLAT_HOOK_TCP_DATA_QUEUE:
return &tcp_data_queue_hist;
case NETSTACKLAT_HOOK_UDP_QUEUE_RCV_ONE:
return &udp_queue_rcv_hist;
default:
return NULL;
}
}

static void record_current_netstacklat(struct sk_buff *skb,
enum netstacklat_hook hook)
{
ktime_t delta_ns, skb_tstamp;
struct hist_key key;

if (!skb)
return;

skb_tstamp = BPF_CORE_READ(skb, tstamp);
if (skb_tstamp == 0)
return;

delta_ns = bpf_ktime_get_tai_ns() - TAI_OFFSET - skb_tstamp;
if (delta_ns < 0)
return;

increment_exp2_histogram_nosync(hook_to_histmap(hook), key, delta_ns,
HIST_MAX_LATENCY_SLOT);
}

SEC("fentry/tcp_v4_do_rcv")
int BPF_PROG(netstacklat_tcp_v4_do_rcv, struct sock *sk, struct sk_buff *skb)
{
record_current_netstacklat(skb, NETSTACK_HOOK_TCP_V4_DO_RCV);
return 0;
}

SEC("fentry/tcp_data_queue")
int BPF_PROG(netstacklat_tcp_data_queue, struct sock *sk, struct sk_buff *skb)
{
record_current_netstacklat(skb, NETSTACKLAT_HOOK_TCP_DATA_QUEUE);
return 0;
}

SEC("fentry/udp_queue_rcv_one_skb")
int BPF_PROG(netstacklat_udp_queue_rcv, struct sock *sk, struct sk_buff *skb)
{
record_current_netstacklat(skb, NETSTACKLAT_HOOK_UDP_QUEUE_RCV_ONE);
return 0;
}
Loading