Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sample usage for BPF_PROG_TYPE_NETFILTER #98

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,016 changes: 2,588 additions & 428 deletions headers/linux/bpf.h

Large diffs are not rendered by default.

238 changes: 176 additions & 62 deletions headers/vmlinux/vmlinux_net.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
*/
struct llist_node {
struct llist_node *next;
};

struct sk_buff {
union {
Expand All @@ -29,6 +32,7 @@ struct sk_buff {
};
struct rb_node rbnode;
struct list_head list;
struct llist_node ll_node;
};
union {
struct sock *sk;
Expand All @@ -45,6 +49,7 @@ struct sk_buff {
void (*destructor)(struct sk_buff *);
};
struct list_head tcp_tsorted_anchor;
long unsigned int _sk_redir;
};
long unsigned int _nfct;
unsigned int len;
Expand All @@ -59,73 +64,152 @@ struct sk_buff {
__u8 peeked: 1;
__u8 head_frag: 1;
__u8 pfmemalloc: 1;
__u8 pp_recycle: 1;
__u8 active_extensions;
__u32 headers_start[0];
__u8 __pkt_type_offset[0];
__u8 pkt_type: 3;
__u8 ignore_df: 1;
__u8 nf_trace: 1;
__u8 ip_summed: 2;
__u8 ooo_okay: 1;
__u8 l4_hash: 1;
__u8 sw_hash: 1;
__u8 wifi_acked_valid: 1;
__u8 wifi_acked: 1;
__u8 no_fcs: 1;
__u8 encapsulation: 1;
__u8 encap_hdr_csum: 1;
__u8 csum_valid: 1;
__u8 __pkt_vlan_present_offset[0];
__u8 vlan_present: 1;
__u8 csum_complete_sw: 1;
__u8 csum_level: 2;
__u8 csum_not_inet: 1;
__u8 dst_pending_confirm: 1;
__u8 ndisc_nodetype: 2;
__u8 ipvs_property: 1;
__u8 inner_protocol_type: 1;
__u8 remcsum_offload: 1;
__u8 offload_fwd_mark: 1;
__u8 offload_l3_fwd_mark: 1;
__u8 tc_skip_classify: 1;
__u8 tc_at_ingress: 1;
__u8 redirected: 1;
__u8 from_ingress: 1;
__u8 decrypted: 1;
__u16 tc_index;
union {
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
__u8 __pkt_type_offset[0];
__u8 pkt_type: 3;
__u8 ignore_df: 1;
__u8 dst_pending_confirm: 1;
__u8 ip_summed: 2;
__u8 ooo_okay: 1;
__u8 __mono_tc_offset[0];
__u8 mono_delivery_time: 1;
__u8 tc_at_ingress: 1;
__u8 tc_skip_classify: 1;
__u8 remcsum_offload: 1;
__u8 csum_complete_sw: 1;
__u8 csum_level: 2;
__u8 inner_protocol_type: 1;
__u8 l4_hash: 1;
__u8 sw_hash: 1;
__u8 wifi_acked_valid: 1;
__u8 wifi_acked: 1;
__u8 no_fcs: 1;
__u8 encapsulation: 1;
__u8 encap_hdr_csum: 1;
__u8 csum_valid: 1;
__u8 ndisc_nodetype: 2;
__u8 ipvs_property: 1;
__u8 nf_trace: 1;
__u8 redirected: 1;
__u8 from_ingress: 1;
__u8 nf_skip_egress: 1;
__u8 slow_gro: 1;
__u8 csum_not_inet: 1;
__u16 tc_index;
u16 alloc_cpu;
union {
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
};
};
__u32 priority;
int skb_iif;
__u32 hash;
union {
u32 vlan_all;
struct {
__be16 vlan_proto;
__u16 vlan_tci;
};
};
union {
unsigned int napi_id;
unsigned int sender_cpu;
};
__u32 secmark;
union {
__u32 mark;
__u32 reserved_tailroom;
};
union {
__be16 inner_protocol;
__u8 inner_ipproto;
};
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
__be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
};
struct {
__u8 __pkt_type_offset[0];
__u8 pkt_type: 3;
__u8 ignore_df: 1;
__u8 dst_pending_confirm: 1;
__u8 ip_summed: 2;
__u8 ooo_okay: 1;
__u8 __mono_tc_offset[0];
__u8 mono_delivery_time: 1;
__u8 tc_at_ingress: 1;
__u8 tc_skip_classify: 1;
__u8 remcsum_offload: 1;
__u8 csum_complete_sw: 1;
__u8 csum_level: 2;
__u8 inner_protocol_type: 1;
__u8 l4_hash: 1;
__u8 sw_hash: 1;
__u8 wifi_acked_valid: 1;
__u8 wifi_acked: 1;
__u8 no_fcs: 1;
__u8 encapsulation: 1;
__u8 encap_hdr_csum: 1;
__u8 csum_valid: 1;
__u8 ndisc_nodetype: 2;
__u8 ipvs_property: 1;
__u8 nf_trace: 1;
__u8 redirected: 1;
__u8 from_ingress: 1;
__u8 nf_skip_egress: 1;
__u8 slow_gro: 1;
__u8 csum_not_inet: 1;
__u16 tc_index;
u16 alloc_cpu;
union {
__wsum csum;
struct {
__u16 csum_start;
__u16 csum_offset;
};
};
__u32 priority;
int skb_iif;
__u32 hash;
union {
u32 vlan_all;
struct {
__be16 vlan_proto;
__u16 vlan_tci;
};
};
union {
unsigned int napi_id;
unsigned int sender_cpu;
};
__u32 secmark;
union {
__u32 mark;
__u32 reserved_tailroom;
};
union {
__be16 inner_protocol;
__u8 inner_ipproto;
};
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
__be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
} headers;
};
__u32 priority;
int skb_iif;
__u32 hash;
__be16 vlan_proto;
__u16 vlan_tci;
union {
unsigned int napi_id;
unsigned int sender_cpu;
};
__u32 secmark;
union {
__u32 mark;
__u32 reserved_tailroom;
};
union {
__be16 inner_protocol;
__u8 inner_ipproto;
};
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
__be16 protocol;
__u16 transport_header;
__u16 network_header;
__u16 mac_header;
__u32 headers_end[0];
sk_buff_data_t tail;
sk_buff_data_t end;
unsigned char *head;
Expand All @@ -135,4 +219,34 @@ struct sk_buff {
struct skb_ext *extensions;
};



struct iphdr {
__u8 ihl: 4;
__u8 version: 4;
__u8 tos;
__be16 tot_len;
__be16 id;
__be16 frag_off;
__u8 ttl;
__u8 protocol;
__sum16 check;
union {
struct {
__be32 saddr;
__be32 daddr;
};
struct {
__be32 saddr;
__be32 daddr;
} addrs;
};
};

struct bpf_nf_ctx {
const struct nf_hook_state *state;
struct sk_buff *skb;
};


#endif /* __VMLINUX_NET_H__ */
3 changes: 3 additions & 0 deletions headers/vmlinux/vmlinux_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@ typedef __u64 u64;

typedef s64 ktime_t;

typedef u32 uint32_t;


#endif /* __VMLINUX_TYPES_H__ */
2 changes: 1 addition & 1 deletion lib/libbpf
Submodule libbpf updated 99 files
+1 −0 .gitattributes
+3 −0 .github/PULL_REQUEST_TEMPLATE.md
+3 −2 .github/actions/build-selftests/action.yml
+22 −4 .github/actions/build-selftests/build_selftests.sh
+2 −0 .github/actions/build-selftests/helpers.sh
+86,704 −77,624 .github/actions/build-selftests/vmlinux.h
+2 −2 .github/actions/setup/action.yml
+23 −13 .github/actions/vmtest/action.yml
+14 −4 .github/workflows/build.yml
+52 −0 .github/workflows/codeql.yml
+3 −2 .github/workflows/coverity.yml
+19 −0 .github/workflows/lint.yml
+1 −1 .github/workflows/pahole.yml
+4 −4 .github/workflows/test.yml
+0 −14 .lgtm.yml
+1 −1 BPF-CHECKPOINT-COMMIT
+1 −1 CHECKPOINT-COMMIT
+8 −4 README.md
+281 −0 SYNC.md
+ assets/libbpf-logo-compact-darkbg.png
+ assets/libbpf-logo-compact-mono.png
+ assets/libbpf-logo-compact.png
+ assets/libbpf-logo-sideways-darkbg.png
+ assets/libbpf-logo-sideways-mono.png
+ assets/libbpf-logo-sideways.png
+ assets/libbpf-logo-sparse-darkbg.png
+ assets/libbpf-logo-sparse-mono.png
+ assets/libbpf-logo-sparse.png
+0 −0 ci/diffs/.keep
+70 −0 ci/diffs/0001-s390-define-RUNTIME_DISCARD_EXIT-to-fix-link-error-w.patch
+37 −0 ci/diffs/0001-selftests-bpf-Check-whether-to-run-selftest.patch
+46 −0 ci/diffs/0001-selftests-bpf-Select-CONFIG_FUNCTION_ERROR_INJECTION.patch
+68 −0 ci/diffs/0001-tracing-fprobe-Initialize-ret-valiable-to-fix-smatch.patch
+83 −0 ci/diffs/0001-veth-take-into-account-peer-device-for-NETDEV_XDP_AC.patch
+0 −35 ci/diffs/001-fix-oob-write-in-test_verifier.diff
+16 −11 ci/managers/debian.sh
+0 −107 ci/rootfs/mkrootfs_arch.sh
+0 −52 ci/rootfs/mkrootfs_debian.sh
+0 −61 ci/rootfs/mkrootfs_tweak.sh
+0 −107 ci/rootfs/s390x-self-hosted-builder/README.md
+0 −50 ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.Dockerfile
+0 −24 ci/rootfs/s390x-self-hosted-builder/actions-runner-libbpf.service
+0 −40 ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/actions-runner
+0 −35 ci/rootfs/s390x-self-hosted-builder/fs/usr/bin/entrypoint
+0 −11 ci/rootfs/s390x-self-hosted-builder/qemu-user-static.service
+0 −5 ci/vmtest/configs/ALLOWLIST-5.5.0
+4 −115 ci/vmtest/configs/DENYLIST-5.5.0
+4 −6 ci/vmtest/configs/DENYLIST-latest
+3 −67 ci/vmtest/configs/DENYLIST-latest.s390x
+2 −0 ci/vmtest/helpers.sh
+27 −9 ci/vmtest/run_selftests.sh
+20 −8 docs/index.rst
+3 −3 docs/libbpf_naming_convention.rst
+228 −0 docs/libbpf_overview.rst
+203 −0 docs/program_types.rst
+718 −280 include/uapi/linux/bpf.h
+120 −0 include/uapi/linux/fcntl.h
+2 −0 include/uapi/linux/if_link.h
+9 −0 include/uapi/linux/if_xdp.h
+63 −0 include/uapi/linux/netdev.h
+43 −0 include/uapi/linux/openat2.h
+55 −5 include/uapi/linux/perf_event.h
+4 −3 scripts/build-fuzzers.sh
+15 −10 scripts/sync-kernel.sh
+9 −5 src/Makefile
+202 −54 src/bpf.c
+227 −23 src/bpf.h
+2 −2 src/bpf_core_read.h
+3 −1 src/bpf_gen_internal.h
+186 −41 src/bpf_helper_defs.h
+120 −7 src/bpf_helpers.h
+398 −37 src/bpf_tracing.h
+213 −89 src/btf.c
+202 −63 src/btf_dump.c
+440 −0 src/elf.c
+32 −30 src/gen_loader.c
+9 −9 src/hashmap.c
+57 −44 src/hashmap.h
+1,289 −503 src/libbpf.c
+353 −22 src/libbpf.h
+34 −1 src/libbpf.map
+16 −0 src/libbpf_common.h
+12 −4 src/libbpf_errno.c
+22 −0 src/libbpf_internal.h
+89 −1 src/libbpf_probes.c
+1 −1 src/libbpf_version.h
+15 −10 src/linker.c
+120 −11 src/netlink.c
+2 −2 src/nlattr.c
+12 −0 src/nlattr.h
+1 −4 src/relo_core.c
+292 −7 src/ringbuf.c
+23 −0 src/skel_internal.h
+9 −9 src/strset.c
+6 −3 src/usdt.bpf.h
+226 −145 src/usdt.c
+333 −0 src/zip.c
+47 −0 src/zip.h
+0 −1 travis-ci
9 changes: 9 additions & 0 deletions netfilter-bpf/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)

USER_TARGETS := netfilter_ip4_blocklist
BPF_TARGETS := netfilter_ip4_blocklist.bpf


LIB_DIR = ../lib

include $(LIB_DIR)/common.mk
20 changes: 20 additions & 0 deletions netfilter-bpf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Introduction

BPF_PROG_TYPE_NETFILTER was introduced in 6.4, now with a new kernel, a bpf program could attach to netfilter hooks and handles package in a similiar way as iptables/nftables. By now, 6.5.0, there is no bpf kfunc implemented yet for DNAT/SNAT, and the only thing a bpf program can do is to decide whether to DROP the package or not.

* netfilter_ip4_blocklist.c/netfilter_ip4_blocklist.bpf.c

This sample code implements a simple ipv4 blocklist.
The bpf program drops package if destination ip address hits a match in the map of type BPF_MAP_TYPE_LPM_TRIE,
The userspace code would load the bpf program, attach it to netfilter's FORWARD/OUTPUT hook, and then write ip patterns into the bpf map.


# TODO

This sample hard-codes ip address to be blocked, just for demonstration.
It would be better to break the userspace program into two parts:
* init program
Loads bpf program and pin bpf program and map into somewhere under /sys/fs/bpf
* interactive program
add/delete/query ip blocklist via bpf map under /sys/fs/bpf

63 changes: 63 additions & 0 deletions netfilter-bpf/netfilter_ip4_blocklist.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// SPDX-License-Identifier: GPL-2.0

#include "vmlinux_local.h"
#include "linux/bpf.h"
#include <bpf/bpf_helpers.h>


#define NF_DROP 0
#define NF_ACCEPT 1

int bpf_dynptr_from_skb(struct sk_buff *skb,
__u64 flags, struct bpf_dynptr *ptr__uninit) __ksym;
void *bpf_dynptr_slice(const struct bpf_dynptr *ptr,
uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym;


struct ipv4_lpm_key {
__u32 prefixlen;
__u32 data;
};

struct {
__uint(type, BPF_MAP_TYPE_LPM_TRIE);
__type(key, struct ipv4_lpm_key);
__type(value, __u32);
__uint(map_flags, BPF_F_NO_PREALLOC);
__uint(max_entries, 200);
} ipv4_lpm_map SEC(".maps");


SEC("netfilter")
int netfilter_ip4block(struct bpf_nf_ctx *ctx)
{
struct sk_buff *skb = ctx->skb;
struct bpf_dynptr ptr;
struct iphdr *p, iph = {};
struct ipv4_lpm_key key;
__u32 *pvalue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please follow the kernel networking style of declaring variables in reverse x-mas tree order


if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_ACCEPT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For consistency, let's have an empty line after this early return as well as the other ones.

p = bpf_dynptr_slice(&ptr, 0, &iph, sizeof(iph));
if (!p)
return NF_ACCEPT;

/* ip4 only */
if (p->version != 4)
return NF_ACCEPT;

/* search p->daddr in trie */
key.prefixlen = 32;
key.data = p->daddr;
pvalue = bpf_map_lookup_elem(&ipv4_lpm_map, &key);
if (pvalue) {
/* cat /sys/kernel/debug/tracing/trace_pipe */
bpf_printk("rule matched with %d...\n", *pvalue);
return NF_DROP;
}
return NF_ACCEPT;
}

char _license[] SEC("license") = "GPL";

Loading