From eff94154cc1a55c399ce71199bfbc1662ddc0f16 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 2 Jul 2021 16:06:16 +0200 Subject: samples/bpf: xdp_redirect_cpu_user: Cpumap qsize set larger default Experience from production shows queue size of 192 is too small, as this caused packet drops during cpumap-enqueue on RX-CPU. This can be diagnosed with xdp_monitor sample program. This bpftrace program was used to diagnose the problem in more detail: bpftrace -e ' tracepoint:xdp:xdp_cpumap_kthread { @deq_bulk = lhist(args->processed,0,10,1); @drop_net = lhist(args->drops,0,10,1) } tracepoint:xdp:xdp_cpumap_enqueue { @enq_bulk = lhist(args->processed,0,10,1); @enq_drops = lhist(args->drops,0,10,1); }' Watch out for the @enq_drops counter. The @drop_net counter can happen when netstack gets invalid packets, so don't despair it can be natural, and that counter will likely disappear in newer kernels as it was a source of confusion (look at netstat info for reason of the netstack @drop_net counters). The production system was configured with CPU power-saving C6 state. Learn more in this blogpost[1]. And wakeup latency in usec for the states are: # grep -H . /sys/devices/system/cpu/cpu0/cpuidle/*/latency /sys/devices/system/cpu/cpu0/cpuidle/state0/latency:0 /sys/devices/system/cpu/cpu0/cpuidle/state1/latency:2 /sys/devices/system/cpu/cpu0/cpuidle/state2/latency:10 /sys/devices/system/cpu/cpu0/cpuidle/state3/latency:133 Deepest state take 133 usec to wakeup from (133/10^6). The link speed is 25Gbit/s ((25*10^9/8) in bytes/sec). How many bytes can arrive with in 133 usec at this speed: (25*10^9/8)*(133/10^6) = 415625 bytes. With MTU size packets this is 275 packets, and with minimum Ethernet (incl intergap overhead) 84 bytes it is 4948 packets. Clearly default queue size is too small. Setting default cpumap queue to 2048 as worst-case (small packet) at 10Gbit/s is 1979 packets with 133 usec wakeup time, +64 packet before kthread wakeup call (due to xdp_do_flush) worst-case 2043 packets. Thus, if a packet burst on RX-CPU will enqueue packets to a remote cpumap CPU that is in deep-sleep state it can overrun the cpumap queue. The production system was also configured to avoid deep-sleep via: tuned-adm profile network-latency [1] https://jeremyeder.com/2013/08/30/oh-did-you-expect-the-cpu/ Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/162523477604.786243.13372630844944530891.stgit@firesoul --- samples/bpf/xdp_redirect_cpu_user.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 576411612523..d3ecdc18b9c1 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -792,13 +792,23 @@ int main(int argc, char **argv) n_cpus = get_nprocs_conf(); - /* Notice: choosing he queue size is very important with the - * ixgbe driver, because it's driver page recycling trick is - * dependend on pages being returned quickly. The number of - * out-standing packets in the system must be less-than 2x - * RX-ring size. + /* Notice: Choosing the queue size is very important when CPU is + * configured with power-saving states. + * + * If deepest state take 133 usec to wakeup from (133/10^6). When link + * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can + * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = + * 166250 bytes. With MTU size packets this is 110 packets, and with + * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. + * + * Setting default cpumap queue to 2048 as worst-case (small packet) + * should be +64 packet due kthread wakeup call (due to xdp_do_flush) + * worst-case is 2043 packets. + * + * Sysadm can configured system to avoid deep-sleep via: + * tuned-adm profile network-latency */ - qsize = 128+64; + qsize = 2048; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; -- cgit v1.2.3-59-g8ed1b From 7d07006f05922b95518be403f08ef8437b67aa32 Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Tue, 27 Jul 2021 04:10:55 +0000 Subject: samples: bpf: Fix tracex7 error raised on the missing argument The current behavior of 'tracex7' doesn't consist with other bpf samples tracex{1..6}. Other samples do not require any argument to run with, but tracex7 should be run with btrfs device argument. (it should be executed with test_override_return.sh) Currently, tracex7 doesn't have any description about how to run this program and raises an unexpected error. And this result might be confusing since users might not have a hunch about how to run this program. // Current behavior # ./tracex7 sh: 1: Syntax error: word unexpected (expecting ")") // Fixed behavior # ./tracex7 ERROR: Run with the btrfs device argument! In order to fix this error, this commit adds logic to report a message and exit when running this program with a missing argument. Additionally in test_override_return.sh, there is a problem with multiple directory(tmpmnt) creation. So in this commit adds a line with removing the directory with every execution. Signed-off-by: Juhee Kang Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210727041056.23455-1-claudiajkang@gmail.com --- samples/bpf/test_override_return.sh | 1 + samples/bpf/tracex7_user.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'samples') diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh index e68b9ee6814b..35db26f736b9 100755 --- a/samples/bpf/test_override_return.sh +++ b/samples/bpf/test_override_return.sh @@ -1,5 +1,6 @@ #!/bin/bash +rm -r tmpmnt rm -f testfile.img dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 DEVICE=$(losetup --show -f testfile.img) diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c index fdcd6580dd73..8be7ce18d3ba 100644 --- a/samples/bpf/tracex7_user.c +++ b/samples/bpf/tracex7_user.c @@ -14,6 +14,11 @@ int main(int argc, char **argv) int ret = 0; FILE *f; + if (!argv[1]) { + fprintf(stderr, "ERROR: Run with the btrfs device argument!\n"); + return 0; + } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { -- cgit v1.2.3-59-g8ed1b From 05e9b4f60d31a03815a699be24226bf26b7021f9 Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Tue, 27 Jul 2021 04:10:56 +0000 Subject: samples: bpf: Add the omitted xdp samples to .gitignore There are recently added xdp samples (xdp_redirect_map_multi and xdpsock_ctrl_proc) which are not managed by .gitignore. This commit adds these files to .gitignore. Signed-off-by: Juhee Kang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210727041056.23455-2-claudiajkang@gmail.com --- samples/bpf/.gitignore | 2 ++ 1 file changed, 2 insertions(+) (limited to 'samples') diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0b9548ea8477..fcba217f0ae2 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -45,11 +45,13 @@ xdp_monitor xdp_redirect xdp_redirect_cpu xdp_redirect_map +xdp_redirect_map_multi xdp_router_ipv4 xdp_rxq_info xdp_sample_pkts xdp_tx_iptunnel xdpsock +xdpsock_ctrl_proc xsk_fwd testfile.img hbm_out.log -- cgit v1.2.3-59-g8ed1b From 34ad6d9d8c27293e1895b448af7d6cf5d351ad8d Mon Sep 17 00:00:00 2001 From: Matthew Cover Date: Fri, 30 Jul 2021 17:56:32 -0700 Subject: bpf, samples: Add missing mprog-disable to xdp_redirect_cpu's optstring Commit ce4dade7f12a ("samples/bpf: xdp_redirect_cpu: Load a eBPF program on cpumap") added the following option, but missed adding it to optstring: - mprog-disable: disable loading XDP program on cpumap entries Fix it and add the missing option character. Fixes: ce4dade7f12a ("samples/bpf: xdp_redirect_cpu: Load a eBPF program on cpumap") Signed-off-by: Matthew Cover Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210731005632.13228-1-matthew.cover@stackpath.com --- samples/bpf/xdp_redirect_cpu_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index d3ecdc18b9c1..9e225c96b77e 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -841,7 +841,7 @@ int main(int argc, char **argv) memset(cpu, 0, n_cpus * sizeof(int)); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n", long_options, &longindex)) != -1) { switch (opt) { case 'd': -- cgit v1.2.3-59-g8ed1b From 29f24c43cbe09b83162776a370848d5a782dc3b7 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Fri, 6 Aug 2021 14:28:54 +0200 Subject: samples/bpf: xdpsock: Make the sample more useful outside the tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xdpsock sample application is a useful base for experiment's around AF_XDP sockets. Compiling the sample outside of the kernel tree is made harder then it has to be as the sample includes two headers and that are not installed by 'make install_header' nor are usually part of distributions kernel headers. The first header asm/barrier.h is not used and can just be dropped. The second linux/compiler.h are only needed for the decorator __force and are only used in ip_fast_csum(), csum_fold() and csum_tcpudp_nofold(). These functions are copied verbatim from include/asm-generic/checksum.h and lib/checksum.c. While it's fine to copy and use these functions in the sample application the decorator brings no value and can be dropped together with the include. With this change it's trivial to compile the xdpsock sample outside the kernel tree from xdpsock_user.c and xdpsock.h. $ gcc -o xdpsock xdpsock_user.c -lbpf -lpthread Signed-off-by: Niklas Söderlund Signed-off-by: Simon Horman Signed-off-by: Andrii Nakryiko Reviewed-by: Louis Peens Link: https://lore.kernel.org/bpf/20210806122855.26115-2-simon.horman@corigine.com --- samples/bpf/xdpsock_user.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 33d0bdebbed8..7c56a7a784e1 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2018 Intel Corporation. */ -#include #include #include #include #include -#include #include #include #include @@ -663,7 +661,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl); */ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - return (__force __sum16)~do_csum(iph, ihl * 4); + return (__sum16)~do_csum(iph, ihl * 4); } /* @@ -673,11 +671,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ static inline __sum16 csum_fold(__wsum csum) { - u32 sum = (__force u32)csum; + u32 sum = (u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; + return (__sum16)~sum; } /* @@ -703,16 +701,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { - unsigned long long s = (__force u32)sum; + unsigned long long s = (u32)sum; - s += (__force u32)saddr; - s += (__force u32)daddr; + s += (u32)saddr; + s += (u32)daddr; #ifdef __BIG_ENDIAN__ s += proto + len; #else s += (proto + len) << 8; #endif - return (__force __wsum)from64to32(s); + return (__wsum)from64to32(s); } /* -- cgit v1.2.3-59-g8ed1b From f4700a62c27161e364f66fdce527e8b04083c444 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Fri, 6 Aug 2021 14:28:55 +0200 Subject: samples/bpf: xdpsock: Remove forward declaration of ip_fast_csum() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a forward declaration of ip_fast_csum() just before its implementation, remove the unneeded forward declaration. While at it mark the implementation as static inline. Signed-off-by: Niklas Söderlund Signed-off-by: Simon Horman Signed-off-by: Andrii Nakryiko Reviewed-by: Louis Peens Link: https://lore.kernel.org/bpf/20210806122855.26115-3-simon.horman@corigine.com --- samples/bpf/xdpsock_user.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 7c56a7a784e1..49d7a6ad7e39 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -651,15 +651,13 @@ out: return result; } -__sum16 ip_fast_csum(const void *iph, unsigned int ihl); - /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * This function code has been taken from * Linux kernel lib/checksum.c */ -__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { return (__sum16)~do_csum(iph, ihl * 4); } -- cgit v1.2.3-59-g8ed1b From d692a637b4c5151a064f1eabd404944b31e28336 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Mon, 9 Aug 2021 12:30:46 +0530 Subject: samples, bpf: Add an explict comment to handle nested vlan tagging. A codeblock for handling nested vlan trips newbies into thinking it as duplicate code. Explicitly add a comment to clarify. Signed-off-by: Muhammad Falak R Wani Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809070046.32142-1-falakreyaz@gmail.com --- samples/bpf/xdp1_kern.c | 2 ++ samples/bpf/xdp2_kern.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index 34b64394ed9c..f0c5d95084de 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index c787f4b49646..d8a64ab077b0 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; -- cgit v1.2.3-59-g8ed1b From d1bf7c4d5deae6685a42463f4d29418fd2515d05 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Sun, 15 Aug 2021 12:20:13 +0530 Subject: samples/bpf: Define MAX_ENTRIES instead of a magic number in offwaketime Define MAX_ENTRIES instead of using 10000 as a magic number in various places. Signed-off-by: Muhammad Falak R Wani Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210815065013.15411-1-falakreyaz@gmail.com --- samples/bpf/offwaketime_kern.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'samples') diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c index 14b792915a9c..4866afd054da 100644 --- a/samples/bpf/offwaketime_kern.c +++ b/samples/bpf/offwaketime_kern.c @@ -20,6 +20,7 @@ }) #define MINBLOCK_US 1 +#define MAX_ENTRIES 10000 struct key_t { char waker[TASK_COMM_LEN]; @@ -32,14 +33,14 @@ struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, struct key_t); __type(value, u64); - __uint(max_entries, 10000); + __uint(max_entries, MAX_ENTRIES); } counts SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, u32); __type(value, u64); - __uint(max_entries, 10000); + __uint(max_entries, MAX_ENTRIES); } start SEC(".maps"); struct wokeby_t { @@ -51,14 +52,14 @@ struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, u32); __type(value, struct wokeby_t); - __uint(max_entries, 10000); + __uint(max_entries, MAX_ENTRIES); } wokeby SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); __uint(key_size, sizeof(u32)); __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); - __uint(max_entries, 10000); + __uint(max_entries, MAX_ENTRIES); } stackmap SEC(".maps"); #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) -- cgit v1.2.3-59-g8ed1b From 7caeabd726f22e6a6c44c434574fb489986e5baa Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Fri, 13 Aug 2021 00:08:12 +0900 Subject: samples: pktgen: pass the environment variable of normal user to sudo All pktgen samples can use the environment variable instead of option parameters(eg. $DEV is able to use instead of '-i' option). This is results of running sample as root and user: // running as root # DEV=eth0 DEST_IP=10.1.0.1 DST_MAC=00:11:22:33:44:55 ./pktgen_sample01_simple.sh -v -n 1 Running... ctrl^C to stop // running as normal user $ DEV=eth0 DEST_IP=10.1.0.1 DST_MAC=00:11:22:33:44:55 ./pktgen_sample01_simple.sh -v -n 1 [...] ERROR: Please specify output device This results show the sample doesn't work properly when the sample runs as normal user. Because the sample is restarted by the function (root_check_run_with_sudo) to run with sudo. In this process, the environment variable of normal user doesn't propagate to sudo. It can be solved by using "-E"(--preserve-env) option of "sudo", which preserve normal user's existing environment variables. So this commit adds "-E" option in the function (root_check_run_with_sudo). Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- samples/pktgen/functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index a335393157eb..933194257a24 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -123,7 +123,7 @@ function root_check_run_with_sudo() { if [ "$EUID" -ne 0 ]; then if [ -x $0 ]; then # Directly executable use sudo info "Not root, running with sudo" - sudo "$0" "$@" + sudo -E "$0" "$@" exit $? fi err 4 "cannot perform sudo run of $0" -- cgit v1.2.3-59-g8ed1b From 0f0c4f1b72e090b23131700bb155944cc28b2a7b Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Fri, 13 Aug 2021 00:08:13 +0900 Subject: samples: pktgen: add missing IPv6 option to pktgen scripts Currently, "sample04" and "sample05" are not working properly when running with an IPv6 option("-6"). The commit 0f06a6787e05 ("samples: Add an IPv6 "-6" option to the pktgen scripts") has omitted the addition of this option at "sample04" and "sample05". In order to support IPv6 option, this commit adds logic related to IPv6 option. Fixes: 0f06a6787e05 ("samples: Add an IPv6 "-6" option to the pktgen scripts") Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- samples/pktgen/pktgen_sample04_many_flows.sh | 12 +++++++----- samples/pktgen/pktgen_sample05_flow_per_thread.sh | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'samples') diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh index 56c5f5af350f..cff51f861506 100755 --- a/samples/pktgen/pktgen_sample04_many_flows.sh +++ b/samples/pktgen/pktgen_sample04_many_flows.sh @@ -13,13 +13,15 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh # Set some default params, if they didn't get set -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely if [ -n "$DEST_IP" ]; then - validate_addr $DEST_IP - read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) fi if [ -n "$DST_PORT" ]; then read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) @@ -62,8 +64,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst_min $DST_MIN" - pg_set $dev "dst_max $DST_MAX" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh index 6e0effabca59..3578d0aa4ac5 100755 --- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh +++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh @@ -17,14 +17,16 @@ root_check_run_with_sudo "$@" # Parameter parsing via include source ${basedir}/parameters.sh # Set some default params, if they didn't get set -[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +if [ -z "$DEST_IP" ]; then + [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1" +fi [ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" [ -z "$CLONE_SKB" ] && CLONE_SKB="0" [ -z "$BURST" ] && BURST=32 [ -z "$COUNT" ] && COUNT="0" # Zero means indefinitely if [ -n "$DEST_IP" ]; then - validate_addr $DEST_IP - read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP) + validate_addr${IP6} $DEST_IP + read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP) fi if [ -n "$DST_PORT" ]; then read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT) @@ -52,8 +54,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do # Single destination pg_set $dev "dst_mac $DST_MAC" - pg_set $dev "dst_min $DST_MIN" - pg_set $dev "dst_max $DST_MAX" + pg_set $dev "dst${IP6}_min $DST_MIN" + pg_set $dev "dst${IP6}_max $DST_MAX" if [ -n "$DST_PORT" ]; then # Single destination port or random port range -- cgit v1.2.3-59-g8ed1b From 50b796e645a5d217fd9d8648ec594241e6f1dd57 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:49 +0530 Subject: samples: bpf: Fix a couple of warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cookie_uid_helper_example.c: In function ‘main’: cookie_uid_helper_example.c:178:69: warning: ‘ -j ACCEPT’ directive writing 10 bytes into a region of size between 8 and 58 [-Wformat-overflow=] 178 | sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", | ^~~~~~~~~~ /home/kkd/src/linux/samples/bpf/cookie_uid_helper_example.c:178:9: note: ‘sprintf’ output between 53 and 103 bytes into a destination of size 100 178 | sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 179 | file); | ~~~~~ Fix by using snprintf and a sufficiently sized buffer. tracex4_user.c:35:15: warning: ‘write’ reading 12 bytes from a region of size 11 [-Wstringop-overread] 35 | key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use size as 11. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-2-memxor@gmail.com --- samples/bpf/cookie_uid_helper_example.c | 11 ++++++++--- samples/bpf/tracex4_user.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'samples') diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c index cc3bce8d3aac..54958802c032 100644 --- a/samples/bpf/cookie_uid_helper_example.c +++ b/samples/bpf/cookie_uid_helper_example.c @@ -167,7 +167,7 @@ static void prog_load(void) static void prog_attach_iptables(char *file) { int ret; - char rules[100]; + char rules[256]; if (bpf_obj_pin(prog_fd, file)) error(1, errno, "bpf_obj_pin"); @@ -175,8 +175,13 @@ static void prog_attach_iptables(char *file) printf("file path too long: %s\n", file); exit(1); } - sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", - file); + ret = snprintf(rules, sizeof(rules), + "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", + file); + if (ret < 0 || ret >= sizeof(rules)) { + printf("error constructing iptables command\n"); + exit(1); + } ret = system(rules); if (ret < 0) { printf("iptables rule update failed: %d/n", WEXITSTATUS(ret)); diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index cea399424bca..566e6440e8c2 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -32,7 +32,7 @@ static void print_old_objects(int fd) __u64 key, next_key; struct pair v; - key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */ + key = write(1, "\e[1;1H\e[2J", 11); /* clear screen */ key = -1; while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { -- cgit v1.2.3-59-g8ed1b From 156f886cf69715265f7b65cb4153bce8f8570326 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:51 +0530 Subject: samples: bpf: Add basic infrastructure for XDP samples This file implements some common helpers to consolidate differences in features and functionality between the various XDP samples and give them a consistent look, feel, and reporting capabilities. This commit only adds support for receive statistics, which does not rely on any tracepoint, but on the XDP program installed on the device by each XDP redirect sample. Some of the key features are: * A concise output format accompanied by helpful text explaining its fields. * An elaborate output format building upon the concise one, and folding out details in case of errors and staying out of view otherwise. * Printing driver names for devices redirecting packets. * Getting mac address for interface. * Printing summarized total statistics for the entire session. * Ability to dynamically switch between concise and verbose mode, using SIGQUIT (Ctrl + \). In later patches, the support will be extended for each tracepoint with its own custom output in concise and verbose mode. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-4-memxor@gmail.com --- samples/bpf/xdp_sample_shared.h | 17 + samples/bpf/xdp_sample_user.c | 838 ++++++++++++++++++++++++++++++++++++++++ samples/bpf/xdp_sample_user.h | 60 +++ 3 files changed, 915 insertions(+) create mode 100644 samples/bpf/xdp_sample_shared.h create mode 100644 samples/bpf/xdp_sample_user.c create mode 100644 samples/bpf/xdp_sample_user.h (limited to 'samples') diff --git a/samples/bpf/xdp_sample_shared.h b/samples/bpf/xdp_sample_shared.h new file mode 100644 index 000000000000..8a7669a5d563 --- /dev/null +++ b/samples/bpf/xdp_sample_shared.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef _XDP_SAMPLE_SHARED_H +#define _XDP_SAMPLE_SHARED_H + +struct datarec { + size_t processed; + size_t dropped; + size_t issue; + union { + size_t xdp_pass; + size_t info; + }; + size_t xdp_drop; + size_t xdp_redirect; +} __attribute__((aligned(64))); + +#endif diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c new file mode 100644 index 000000000000..073aa3424e4b --- /dev/null +++ b/samples/bpf/xdp_sample_user.c @@ -0,0 +1,838 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_util.h" +#include "xdp_sample_user.h" + +#define __sample_print(fmt, cond, ...) \ + ({ \ + if (cond) \ + printf(fmt, ##__VA_ARGS__); \ + }) + +#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__) +#define print_default(fmt, ...) \ + __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__) +#define __print_err(err, fmt, ...) \ + ({ \ + __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \ + ##__VA_ARGS__); \ + sample_err_exp = sample_err_exp ? true : err > 0; \ + }) +#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__) + +#define __COLUMN(x) "%'10" x " %-13s" +#define FMT_COLUMNf __COLUMN(".0f") +#define FMT_COLUMNd __COLUMN("d") +#define FMT_COLUMNl __COLUMN("llu") +#define RX(rx) rx, "rx/s" +#define PPS(pps) pps, "pkt/s" +#define DROP(drop) drop, "drop/s" +#define ERR(err) err, "error/s" +#define HITS(hits) hits, "hit/s" +#define XMIT(xmit) xmit, "xmit/s" +#define PASS(pass) pass, "pass/s" +#define REDIR(redir) redir, "redir/s" +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + +#define XDP_UNKNOWN (XDP_REDIRECT + 1) +#define XDP_ACTION_MAX (XDP_UNKNOWN + 1) +#define XDP_REDIRECT_ERR_MAX 7 + +enum map_type { + MAP_RX, + NUM_MAP, +}; + +enum log_level { + LL_DEFAULT = 1U << 0, + LL_SIMPLE = 1U << 1, + LL_DEBUG = 1U << 2, +}; + +struct record { + __u64 timestamp; + struct datarec total; + struct datarec *cpu; +}; + +struct map_entry { + struct hlist_node node; + __u64 pair; + struct record val; +}; + +struct stats_record { + struct record rx_cnt; +}; + +struct sample_output { + struct { + __u64 rx; + } totals; + struct { + __u64 pps; + __u64 drop; + __u64 err; + } rx_cnt; +}; + +struct xdp_desc { + int ifindex; + __u32 prog_id; + int flags; +} sample_xdp_progs[32]; + +struct datarec *sample_mmap[NUM_MAP]; +struct bpf_map *sample_map[NUM_MAP]; +size_t sample_map_count[NUM_MAP]; +enum log_level sample_log_level; +struct sample_output sample_out; +unsigned long sample_interval; +bool sample_err_exp; +int sample_xdp_cnt; +int sample_n_cpus; +int sample_sig_fd; +int sample_mask; + +static __u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + return UINT64_MAX; + } + return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +static void sample_print_help(int mask) +{ + printf("Output format description\n\n" + "By default, redirect success statistics are disabled, use -s to enable.\n" + "The terse output mode is default, verbose mode can be activated using -v\n" + "Use SIGQUIT (Ctrl + \\) to switch the mode dynamically at runtime\n\n" + "Terse mode displays at most the following fields:\n" + " rx/s Number of packets received per second\n" + " redir/s Number of packets successfully redirected per second\n" + " err,drop/s Aggregated count of errors per second (including dropped packets)\n" + " xmit/s Number of packets transmitted on the output device per second\n\n" + "Output description for verbose mode:\n" + " FIELD DESCRIPTION\n"); + + if (mask & SAMPLE_RX_CNT) { + printf(" receive\t\tDisplays the number of packets received & errors encountered\n" + " \t\t\tWhenever an error or packet drop occurs, details of per CPU error\n" + " \t\t\tand drop statistics will be expanded inline in terse mode.\n" + " \t\t\t\tpkt/s - Packets received per second\n" + " \t\t\t\tdrop/s - Packets dropped per second\n" + " \t\t\t\terror/s - Errors encountered per second\n\n"); + } +} + +void sample_usage(char *argv[], const struct option *long_options, + const char *doc, int mask, bool error) +{ + int i; + + if (!error) + sample_print_help(mask); + + printf("\n%s\nOption for %s:\n", doc, argv[0]); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-15s", long_options[i].name); + if (long_options[i].flag != NULL) + printf(" flag (internal value: %d)", + *long_options[i].flag); + else + printf("\t short-option: -%c", long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +static struct datarec *alloc_record_per_cpu(void) +{ + unsigned int nr_cpus = libbpf_num_possible_cpus(); + struct datarec *array; + + array = calloc(nr_cpus, sizeof(*array)); + if (!array) { + fprintf(stderr, "Failed to allocate memory (nr_cpus: %u)\n", + nr_cpus); + return NULL; + } + return array; +} + +static int map_entry_init(struct map_entry *e, __u64 pair) +{ + e->pair = pair; + INIT_HLIST_NODE(&e->node); + e->val.timestamp = gettime(); + e->val.cpu = alloc_record_per_cpu(); + if (!e->val.cpu) + return -ENOMEM; + return 0; +} + +static void map_collect_percpu(struct datarec *values, struct record *rec) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = libbpf_num_possible_cpus(); + __u64 sum_xdp_redirect = 0; + __u64 sum_processed = 0; + __u64 sum_xdp_pass = 0; + __u64 sum_xdp_drop = 0; + __u64 sum_dropped = 0; + __u64 sum_issue = 0; + int i; + + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = READ_ONCE(values[i].processed); + rec->cpu[i].dropped = READ_ONCE(values[i].dropped); + rec->cpu[i].issue = READ_ONCE(values[i].issue); + rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass); + rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop); + rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect); + + sum_processed += rec->cpu[i].processed; + sum_dropped += rec->cpu[i].dropped; + sum_issue += rec->cpu[i].issue; + sum_xdp_pass += rec->cpu[i].xdp_pass; + sum_xdp_drop += rec->cpu[i].xdp_drop; + sum_xdp_redirect += rec->cpu[i].xdp_redirect; + } + + rec->total.processed = sum_processed; + rec->total.dropped = sum_dropped; + rec->total.issue = sum_issue; + rec->total.xdp_pass = sum_xdp_pass; + rec->total.xdp_drop = sum_xdp_drop; + rec->total.xdp_redirect = sum_xdp_redirect; +} + +static struct stats_record *alloc_stats_record(void) +{ + struct stats_record *rec; + int i; + + rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record)); + if (!rec) { + fprintf(stderr, "Failed to allocate memory\n"); + return NULL; + } + + if (sample_mask & SAMPLE_RX_CNT) { + rec->rx_cnt.cpu = alloc_record_per_cpu(); + if (!rec->rx_cnt.cpu) { + fprintf(stderr, + "Failed to allocate rx_cnt per-CPU array\n"); + goto end_rec; + } + } + + return rec; +end_rec: + free(rec); + return NULL; +} + +static void free_stats_record(struct stats_record *r) +{ + struct hlist_node *tmp; + struct map_entry *e; + int i; + + free(r->rx_cnt.cpu); + free(r); +} + +static double calc_period(struct record *r, struct record *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double)period / NANOSEC_PER_SEC); + + return period_; +} + +static double sample_round(double val) +{ + if (val - floor(val) < 0.5) + return floor(val); + return ceil(val); +} + +static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->processed - p->processed; + pps = sample_round(packets / period_); + } + return pps; +} + +static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->dropped - p->dropped; + pps = sample_round(packets / period_); + } + return pps; +} + +static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->issue - p->issue; + pps = sample_round(packets / period_); + } + return pps; +} + +static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->info - p->info; + pps = sample_round(packets / period_); + } + return pps; +} + +static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass, + double *xdp_drop, double *xdp_redirect, double period_) +{ + *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; + if (period_ > 0) { + *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; + *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; + *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; + } +} + +static void stats_get_rx_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus, struct sample_output *out) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i; + + rec = &stats_rec->rx_cnt; + prev = &stats_prev->rx_cnt; + t = calc_period(rec, prev); + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PPS(pps), DROP(drop), ERR(err)); + } + + if (out) { + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + + out->rx_cnt.pps = pps; + out->rx_cnt.drop = drop; + out->rx_cnt.err = err; + out->totals.rx += pps; + out->totals.drop += drop; + out->totals.err += err; + } +} + + +static void stats_print(const char *prefix, int mask, struct stats_record *r, + struct stats_record *p, struct sample_output *out) +{ + int nr_cpus = libbpf_num_possible_cpus(); + const char *str; + + print_always("%-23s", prefix ?: "Summary"); + if (mask & SAMPLE_RX_CNT) + print_always(FMT_COLUMNl, RX(out->totals.rx)); + printf("\n"); + + if (mask & SAMPLE_RX_CNT) { + str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ? + "receive total" : + "receive"; + print_err((out->rx_cnt.err || out->rx_cnt.drop), + " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n", + str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop), + ERR(out->rx_cnt.err)); + + stats_get_rx_cnt(r, p, nr_cpus, NULL); + } + + if (sample_log_level & LL_DEFAULT || + ((sample_log_level & LL_SIMPLE) && sample_err_exp)) { + sample_err_exp = false; + printf("\n"); + } +} + +int sample_setup_maps(struct bpf_map **maps) +{ + sample_n_cpus = libbpf_num_possible_cpus(); + + for (int i = 0; i < NUM_MAP; i++) { + sample_map[i] = maps[i]; + + switch (i) { + case MAP_RX: + sample_map_count[i] = sample_n_cpus; + break; + default: + return -EINVAL; + } + if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0) + return -errno; + } + return 0; +} + +static int sample_setup_maps_mappings(void) +{ + for (int i = 0; i < NUM_MAP; i++) { + size_t size = sample_map_count[i] * sizeof(struct datarec); + + sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, bpf_map__fd(sample_map[i]), 0); + if (sample_mmap[i] == MAP_FAILED) + return -errno; + } + return 0; +} + +int __sample_init(int mask) +{ + sigset_t st; + + sigemptyset(&st); + sigaddset(&st, SIGQUIT); + sigaddset(&st, SIGINT); + sigaddset(&st, SIGTERM); + + if (sigprocmask(SIG_BLOCK, &st, NULL) < 0) + return -errno; + + sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK); + if (sample_sig_fd < 0) + return -errno; + + sample_mask = mask; + + return sample_setup_maps_mappings(); +} + +static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags) +{ + __u32 cur_prog_id = 0; + int ret; + + if (prog_id) { + ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags); + if (ret < 0) + return -errno; + + if (prog_id != cur_prog_id) { + print_always( + "Program on ifindex %d does not match installed " + "program, skipping unload\n", + ifindex); + return -ENOENT; + } + } + + return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); +} + +int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, + bool force) +{ + int ret, xdp_flags = 0; + __u32 prog_id = 0; + + if (sample_xdp_cnt == 32) { + fprintf(stderr, + "Total limit for installed XDP programs in a sample reached\n"); + return -ENOTSUP; + } + + xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0; + xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; + ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog), + xdp_flags); + if (ret < 0) { + ret = -errno; + fprintf(stderr, + "Failed to install program \"%s\" on ifindex %d, mode = %s, " + "force = %s: %s\n", + bpf_program__name(xdp_prog), ifindex, + generic ? "skb" : "native", force ? "true" : "false", + strerror(-ret)); + return ret; + } + + ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags); + if (ret < 0) { + ret = -errno; + fprintf(stderr, + "Failed to get XDP program id for ifindex %d, removing program: %s\n", + ifindex, strerror(errno)); + __sample_remove_xdp(ifindex, 0, xdp_flags); + return ret; + } + sample_xdp_progs[sample_xdp_cnt++] = + (struct xdp_desc){ ifindex, prog_id, xdp_flags }; + + return 0; +} + +static void sample_summary_print(void) +{ + double period = sample_out.rx_cnt.pps; + + if (sample_out.totals.rx) { + double pkts = sample_out.totals.rx; + + print_always(" Packets received : %'-10llu\n", + sample_out.totals.rx); + print_always(" Average packets/s : %'-10.0f\n", + sample_round(pkts / period)); + } +} + +void sample_exit(int status) +{ + size_t size; + + for (int i = 0; i < NUM_MAP; i++) { + size = sample_map_count[i] * sizeof(**sample_mmap); + munmap(sample_mmap[i], size); + } + while (sample_xdp_cnt--) { + int i = sample_xdp_cnt, ifindex, xdp_flags; + __u32 prog_id; + + prog_id = sample_xdp_progs[i].prog_id; + ifindex = sample_xdp_progs[i].ifindex; + xdp_flags = sample_xdp_progs[i].flags; + + __sample_remove_xdp(ifindex, prog_id, xdp_flags); + } + sample_summary_print(); + close(sample_sig_fd); + exit(status); +} + +static int sample_stats_collect(struct stats_record *rec) +{ + int i; + + if (sample_mask & SAMPLE_RX_CNT) + map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt); + + return 0; +} + +static void sample_summary_update(struct sample_output *out, int interval) +{ + sample_out.totals.rx += out->totals.rx; + sample_out.rx_cnt.pps += interval; +} + +static void sample_stats_print(int mask, struct stats_record *cur, + struct stats_record *prev, char *prog_name, + int interval) +{ + struct sample_output out = {}; + + if (mask & SAMPLE_RX_CNT) + stats_get_rx_cnt(cur, prev, 0, &out); + sample_summary_update(&out, interval); + + stats_print(prog_name, mask, cur, prev, &out); +} + +void sample_switch_mode(void) +{ + sample_log_level ^= LL_DEBUG - 1; +} + +static int sample_signal_cb(void) +{ + struct signalfd_siginfo si; + int r; + + r = read(sample_sig_fd, &si, sizeof(si)); + if (r < 0) + return -errno; + + switch (si.ssi_signo) { + case SIGQUIT: + sample_switch_mode(); + printf("\n"); + break; + default: + printf("\n"); + return 1; + } + + return 0; +} + +/* Pointer swap trick */ +static void swap(struct stats_record **a, struct stats_record **b) +{ + struct stats_record *tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} + +static int sample_timer_cb(int timerfd, struct stats_record **rec, + struct stats_record **prev, int interval) +{ + char line[64] = "Summary"; + int ret; + __u64 t; + + ret = read(timerfd, &t, sizeof(t)); + if (ret < 0) + return -errno; + + swap(prev, rec); + ret = sample_stats_collect(*rec); + if (ret < 0) + return ret; + + if (sample_xdp_cnt == 2) { + char fi[IFNAMSIZ]; + char to[IFNAMSIZ]; + const char *f, *t; + + f = t = NULL; + if (if_indextoname(sample_xdp_progs[0].ifindex, fi)) + f = fi; + if (if_indextoname(sample_xdp_progs[1].ifindex, to)) + t = to; + + snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?"); + } + + sample_stats_print(sample_mask, *rec, *prev, line, interval); + return 0; +} + +int sample_run(int interval, void (*post_cb)(void *), void *ctx) +{ + struct timespec ts = { interval, 0 }; + struct itimerspec its = { ts, ts }; + struct stats_record *rec, *prev; + struct pollfd pfd[2] = {}; + int timerfd, ret; + + if (!interval) { + fprintf(stderr, "Incorrect interval 0\n"); + return -EINVAL; + } + sample_interval = interval; + /* Pretty print numbers */ + setlocale(LC_NUMERIC, "en_US.UTF-8"); + + timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK); + if (timerfd < 0) + return -errno; + timerfd_settime(timerfd, 0, &its, NULL); + + pfd[0].fd = sample_sig_fd; + pfd[0].events = POLLIN; + + pfd[1].fd = timerfd; + pfd[1].events = POLLIN; + + ret = -ENOMEM; + rec = alloc_stats_record(); + if (!rec) + goto end; + prev = alloc_stats_record(); + if (!prev) + goto end_rec; + + ret = sample_stats_collect(rec); + if (ret < 0) + goto end_rec_prev; + + for (;;) { + ret = poll(pfd, 2, -1); + if (ret < 0) { + if (errno == EINTR) + continue; + else + break; + } + + if (pfd[0].revents & POLLIN) + ret = sample_signal_cb(); + else if (pfd[1].revents & POLLIN) + ret = sample_timer_cb(timerfd, &rec, &prev, interval); + + if (ret) + break; + + if (post_cb) + post_cb(ctx); + } + +end_rec_prev: + free_stats_record(prev); +end_rec: + free_stats_record(rec); +end: + close(timerfd); + + return ret; +} + +const char *get_driver_name(int ifindex) +{ + struct ethtool_drvinfo drv = {}; + char ifname[IF_NAMESIZE]; + static char drvname[32]; + struct ifreq ifr = {}; + int fd, r = 0; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return "[error]"; + + if (!if_indextoname(ifindex, ifname)) + goto end; + + drv.cmd = ETHTOOL_GDRVINFO; + safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + ifr.ifr_data = (void *)&drv; + + r = ioctl(fd, SIOCETHTOOL, &ifr); + if (r) + goto end; + + safe_strncpy(drvname, drv.driver, sizeof(drvname)); + + close(fd); + return drvname; + +end: + r = errno; + close(fd); + return r == EOPNOTSUPP ? "loopback" : "[error]"; +} + +int get_mac_addr(int ifindex, void *mac_addr) +{ + char ifname[IF_NAMESIZE]; + struct ifreq ifr = {}; + int fd, r; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -errno; + + if (!if_indextoname(ifindex, ifname)) { + r = -errno; + goto end; + } + + safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + r = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (r) { + r = -errno; + goto end; + } + + memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); + +end: + close(fd); + return r; +} + +__attribute__((constructor)) static void sample_ctor(void) +{ + if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) { + fprintf(stderr, "Failed to set libbpf strict mode: %s\n", + strerror(errno)); + /* Just exit, nothing to cleanup right now */ + exit(EXIT_FAIL_BPF); + } +} diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h new file mode 100644 index 000000000000..d630998df547 --- /dev/null +++ b/samples/bpf/xdp_sample_user.h @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef XDP_SAMPLE_USER_H +#define XDP_SAMPLE_USER_H + +#include +#include + +#include "xdp_sample_shared.h" + +enum stats_mask { + _SAMPLE_REDIRECT_MAP = 1U << 0, + SAMPLE_RX_CNT = 1U << 1, +}; + +/* Exit return codes */ +#define EXIT_OK 0 +#define EXIT_FAIL 1 +#define EXIT_FAIL_OPTION 2 +#define EXIT_FAIL_XDP 3 +#define EXIT_FAIL_BPF 4 +#define EXIT_FAIL_MEM 5 + +int sample_setup_maps(struct bpf_map **maps); +int __sample_init(int mask); +void sample_exit(int status); +int sample_run(int interval, void (*post_cb)(void *), void *ctx); + +void sample_switch_mode(void); +int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, + bool force); +void sample_usage(char *argv[], const struct option *long_options, + const char *doc, int mask, bool error); + +const char *get_driver_name(int ifindex); +int get_mac_addr(int ifindex, void *mac_addr); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-truncation" +__attribute__((unused)) +static inline char *safe_strncpy(char *dst, const char *src, size_t size) +{ + if (!size) + return dst; + strncpy(dst, src, size - 1); + dst[size - 1] = '\0'; + return dst; +} +#pragma GCC diagnostic pop + +#define DEFINE_SAMPLE_INIT(name) \ + static int sample_init(struct name *skel, int mask) \ + { \ + int ret; \ + ret = __sample_init(mask); \ + if (ret < 0) \ + return ret; \ + return 0; \ + } + +#endif -- cgit v1.2.3-59-g8ed1b From 323140389405e5d5d2020b2e3e04863d12cf3e32 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:52 +0530 Subject: samples: bpf: Add BPF support for redirect tracepoint This adds the shared BPF file that will be used going forward for sharing tracepoint programs among XDP redirect samples. Since vmlinux.h conflicts with tools/include for READ_ONCE/WRITE_ONCE and ARRAY_SIZE, they are copied in to xdp_sample.bpf.h along with other helpers that will be required. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-5-memxor@gmail.com --- samples/bpf/xdp_sample.bpf.c | 112 ++++++++++++++++++++++++++++++++++ samples/bpf/xdp_sample.bpf.h | 141 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 samples/bpf/xdp_sample.bpf.c create mode 100644 samples/bpf/xdp_sample.bpf.h (limited to 'samples') diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c new file mode 100644 index 000000000000..e22f2a97a988 --- /dev/null +++ b/samples/bpf/xdp_sample.bpf.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ +#include "xdp_sample.bpf.h" + +#include +#include +#include + +array_map rx_cnt SEC(".maps"); +array_map redir_err_cnt SEC(".maps"); + +const volatile int nr_cpus = 0; + +/* These can be set before loading so that redundant comparisons can be DCE'd by + * the verifier, and only actual matches are tried after loading tp_btf program. + * This allows sample to filter tracepoint stats based on net_device. + */ +const volatile int from_match[32] = {}; +const volatile int to_match[32] = {}; + +/* Find if b is part of set a, but if a is empty set then evaluate to true */ +#define IN_SET(a, b) \ + ({ \ + bool __res = !(a)[0]; \ + for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \ + __res = (a)[i] == (b); \ + if (__res) \ + break; \ + } \ + __res; \ + }) + +static __always_inline __u32 xdp_get_err_key(int err) +{ + switch (err) { + case 0: + return 0; + case -EINVAL: + return 2; + case -ENETDOWN: + return 3; + case -EMSGSIZE: + return 4; + case -EOPNOTSUPP: + return 5; + case -ENOSPC: + return 6; + default: + return 1; + } +} + +static __always_inline int xdp_redirect_collect_stat(int from, int err) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 key = XDP_REDIRECT_ERROR; + struct datarec *rec; + u32 idx; + + if (!IN_SET(from_match, from)) + return 0; + + key = xdp_get_err_key(err); + + idx = key * nr_cpus + cpu; + rec = bpf_map_lookup_elem(&redir_err_cnt, &idx); + if (!rec) + return 0; + if (key) + NO_TEAR_INC(rec->dropped); + else + NO_TEAR_INC(rec->processed); + return 0; /* Indicate event was filtered (no further processing)*/ + /* + * Returning 1 here would allow e.g. a perf-record tracepoint + * to see and record these events, but it doesn't work well + * in-practice as stopping perf-record also unload this + * bpf_prog. Plus, there is additional overhead of doing so. + */ +} + +SEC("tp_btf/xdp_redirect_err") +int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev, + const struct bpf_prog *xdp, const void *tgt, int err, + const struct bpf_map *map, u32 index) +{ + return xdp_redirect_collect_stat(dev->ifindex, err); +} + +SEC("tp_btf/xdp_redirect_map_err") +int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev, + const struct bpf_prog *xdp, const void *tgt, int err, + const struct bpf_map *map, u32 index) +{ + return xdp_redirect_collect_stat(dev->ifindex, err); +} + +SEC("tp_btf/xdp_redirect") +int BPF_PROG(tp_xdp_redirect, const struct net_device *dev, + const struct bpf_prog *xdp, const void *tgt, int err, + const struct bpf_map *map, u32 index) +{ + return xdp_redirect_collect_stat(dev->ifindex, err); +} + +SEC("tp_btf/xdp_redirect_map") +int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev, + const struct bpf_prog *xdp, const void *tgt, int err, + const struct bpf_map *map, u32 index) +{ + return xdp_redirect_collect_stat(dev->ifindex, err); +} diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h new file mode 100644 index 000000000000..25b1dbe9b37b --- /dev/null +++ b/samples/bpf/xdp_sample.bpf.h @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _XDP_SAMPLE_BPF_H +#define _XDP_SAMPLE_BPF_H + +#include "vmlinux.h" +#include +#include +#include + +#include "xdp_sample_shared.h" + +#define ETH_ALEN 6 +#define ETH_P_802_3_MIN 0x0600 +#define ETH_P_8021Q 0x8100 +#define ETH_P_8021AD 0x88A8 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD +#define ETH_P_ARP 0x0806 +#define IPPROTO_ICMPV6 58 + +#define EINVAL 22 +#define ENETDOWN 100 +#define EMSGSIZE 90 +#define EOPNOTSUPP 95 +#define ENOSPC 28 + +typedef struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, unsigned int); + __type(value, struct datarec); +} array_map; + +extern array_map rx_cnt; +extern const volatile int nr_cpus; + +enum { + XDP_REDIRECT_SUCCESS = 0, + XDP_REDIRECT_ERROR = 1 +}; + +static __always_inline void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define bpf_ntohs(x) __builtin_bswap16(x) +#define bpf_htons(x) __builtin_bswap16(x) +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define bpf_ntohs(x) (x) +#define bpf_htons(x) (x) +#else +# error "Endianness detection needs to be set up for your compiler?!" +#endif + +/* + * Note: including linux/compiler.h or linux/kernel.h for the macros below + * conflicts with vmlinux.h include in BPF files, so we define them here. + * + * Following functions are taken from kernel sources and + * break aliasing rules in their original form. + * + * While kernel is compiled with -fno-strict-aliasing, + * perf uses -Wstrict-aliasing=3 which makes build fail + * under gcc 4.4. + * + * Using extra __may_alias__ type to allow aliasing + * in this case. + */ +typedef __u8 __attribute__((__may_alias__)) __u8_alias_t; +typedef __u16 __attribute__((__may_alias__)) __u16_alias_t; +typedef __u32 __attribute__((__may_alias__)) __u32_alias_t; +typedef __u64 __attribute__((__may_alias__)) __u64_alias_t; + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break; + case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break; + case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break; + case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break; + default: + asm volatile ("" : : : "memory"); + __builtin_memcpy((void *)res, (const void *)p, size); + asm volatile ("" : : : "memory"); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break; + case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break; + case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break; + case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break; + default: + asm volatile ("" : : : "memory"); + __builtin_memcpy((void *)p, (const void *)res, size); + asm volatile ("" : : : "memory"); + } +} + +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +/* Add a value using relaxed read and relaxed write. Less expensive than + * fetch_add when there is no write concurrency. + */ +#define NO_TEAR_ADD(x, val) WRITE_ONCE((x), READ_ONCE(x) + (val)) +#define NO_TEAR_INC(x) NO_TEAR_ADD((x), 1) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#endif -- cgit v1.2.3-59-g8ed1b From 1d930fd2cdbf5e156c32c73ea7f3d5b12bdc41d7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:53 +0530 Subject: samples: bpf: Add redirect tracepoint statistics support This implements per-errno reporting (for the ones we explicitly recognize), adds some help output, and implements the stats retrieval and printing functions. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-6-memxor@gmail.com --- samples/bpf/xdp_sample_user.c | 194 ++++++++++++++++++++++++++++++++++++++++++ samples/bpf/xdp_sample_user.h | 21 +++++ 2 files changed, 215 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index 073aa3424e4b..c34592566825 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -73,6 +73,7 @@ enum map_type { MAP_RX, + MAP_REDIRECT_ERR, NUM_MAP, }; @@ -96,17 +97,24 @@ struct map_entry { struct stats_record { struct record rx_cnt; + struct record redir_err[XDP_REDIRECT_ERR_MAX]; }; struct sample_output { struct { __u64 rx; + __u64 redir; + __u64 err; } totals; struct { __u64 pps; __u64 drop; __u64 err; } rx_cnt; + struct { + __u64 suc; + __u64 err; + } redir_cnt; }; struct xdp_desc { @@ -127,6 +135,27 @@ int sample_n_cpus; int sample_sig_fd; int sample_mask; +static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = { + /* Key=1 keeps unknown errors */ + "Success", + "Unknown", + "EINVAL", + "ENETDOWN", + "EMSGSIZE", + "EOPNOTSUPP", + "ENOSPC", +}; + +/* Keyed from Unknown */ +static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = { + "Unknown error", + "Invalid redirection", + "Device being redirected to is down", + "Packet length too large for device", + "Operation not supported", + "No space in ptr_ring of cpumap kthread", +}; + static __u64 gettime(void) { struct timespec t; @@ -162,6 +191,21 @@ static void sample_print_help(int mask) " \t\t\t\tdrop/s - Packets dropped per second\n" " \t\t\t\terror/s - Errors encountered per second\n\n"); } + if (mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) { + printf(" redirect\t\tDisplays the number of packets successfully redirected\n" + " \t\t\tErrors encountered are expanded under redirect_err field\n" + " \t\t\tNote that passing -s to enable it has a per packet overhead\n" + " \t\t\t\tredir/s - Packets redirected successfully per second\n\n" + " redirect_err\t\tDisplays the number of packets that failed redirection\n" + " \t\t\tThe errno is expanded under this field with per CPU count\n" + " \t\t\tThe recognized errors are:\n"); + + for (int i = 2; i < XDP_REDIRECT_ERR_MAX; i++) + printf("\t\t\t %s: %s\n", xdp_redirect_err_names[i], + xdp_redirect_err_help[i - 1]); + + printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n"); + } } void sample_usage(char *argv[], const struct option *long_options, @@ -269,8 +313,25 @@ static struct stats_record *alloc_stats_record(void) goto end_rec; } } + if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) { + for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) { + rec->redir_err[i].cpu = alloc_record_per_cpu(); + if (!rec->redir_err[i].cpu) { + fprintf(stderr, + "Failed to allocate redir_err per-CPU array for " + "\"%s\" case\n", + xdp_redirect_err_names[i]); + while (i--) + free(rec->redir_err[i].cpu); + goto end_rx_cnt; + } + } + } return rec; + +end_rx_cnt: + free(rec->rx_cnt.cpu); end_rec: free(rec); return NULL; @@ -282,6 +343,8 @@ static void free_stats_record(struct stats_record *r) struct map_entry *e; int i; + for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) + free(r->redir_err[i].cpu); free(r->rx_cnt.cpu); free(r); } @@ -407,6 +470,87 @@ static void stats_get_rx_cnt(struct stats_record *stats_rec, } } +static void stats_get_redirect_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus, + struct sample_output *out) +{ + struct record *rec, *prev; + double t, pps; + int i; + + rec = &stats_rec->redir_err[0]; + prev = &stats_prev->redir_err[0]; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + if (!pps) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps)); + } + + if (out) { + pps = calc_pps(&rec->total, &prev->total, t); + out->redir_cnt.suc = pps; + out->totals.redir += pps; + } +} + +static void stats_get_redirect_err_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus, + struct sample_output *out) +{ + struct record *rec, *prev; + double t, drop, sum = 0; + int rec_i, i; + + for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) { + char str[64]; + + rec = &stats_rec->redir_err[rec_i]; + prev = &stats_prev->redir_err[rec_i]; + t = calc_period(rec, prev); + + drop = calc_drop_pps(&rec->total, &prev->total, t); + if (drop > 0 && !out) { + snprintf(str, sizeof(str), + sample_log_level & LL_DEFAULT ? "%s total" : + "%s", + xdp_redirect_err_names[rec_i]); + print_err(drop, " %-18s " FMT_COLUMNf "\n", str, + ERR(drop)); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + double drop; + + drop = calc_drop_pps(r, p, t); + if (!drop) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-16s" FMT_COLUMNf "\n", str, + ERR(drop)); + } + + sum += drop; + } + + if (out) { + out->redir_cnt.err = sum; + out->totals.err += sum; + } +} + static void stats_print(const char *prefix, int mask, struct stats_record *r, struct stats_record *p, struct sample_output *out) @@ -417,6 +561,8 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, print_always("%-23s", prefix ?: "Summary"); if (mask & SAMPLE_RX_CNT) print_always(FMT_COLUMNl, RX(out->totals.rx)); + if (mask & SAMPLE_REDIRECT_CNT) + print_always(FMT_COLUMNl, REDIR(out->totals.redir)); printf("\n"); if (mask & SAMPLE_RX_CNT) { @@ -431,6 +577,24 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, stats_get_rx_cnt(r, p, nr_cpus, NULL); } + if (mask & SAMPLE_REDIRECT_CNT) { + str = out->redir_cnt.suc ? "redirect total" : "redirect"; + print_default(" %-20s " FMT_COLUMNl "\n", str, + REDIR(out->redir_cnt.suc)); + + stats_get_redirect_cnt(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_REDIRECT_ERR_CNT) { + str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ? + "redirect_err total" : + "redirect_err"; + print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str, + ERR(out->redir_cnt.err)); + + stats_get_redirect_err_cnt(r, p, nr_cpus, NULL); + } + if (sample_log_level & LL_DEFAULT || ((sample_log_level & LL_SIMPLE) && sample_err_exp)) { sample_err_exp = false; @@ -449,6 +613,10 @@ int sample_setup_maps(struct bpf_map **maps) case MAP_RX: sample_map_count[i] = sample_n_cpus; break; + case MAP_REDIRECT_ERR: + sample_map_count[i] = + XDP_REDIRECT_ERR_MAX * sample_n_cpus; + break; default: return -EINVAL; } @@ -568,6 +736,17 @@ static void sample_summary_print(void) print_always(" Average packets/s : %'-10.0f\n", sample_round(pkts / period)); } + if (sample_out.totals.redir) { + double pkts = sample_out.totals.redir; + + print_always(" Packets redirected : %'-10llu\n", + sample_out.totals.redir); + print_always(" Average redir/s : %'-10.0f\n", + sample_round(pkts / period)); + } + if (sample_out.totals.err) + print_always(" Errors recorded : %'-10llu\n", + sample_out.totals.err); } void sample_exit(int status) @@ -600,12 +779,23 @@ static int sample_stats_collect(struct stats_record *rec) if (sample_mask & SAMPLE_RX_CNT) map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt); + if (sample_mask & SAMPLE_REDIRECT_CNT) + map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]); + + if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) { + for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++) + map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus], + &rec->redir_err[i]); + } + return 0; } static void sample_summary_update(struct sample_output *out, int interval) { sample_out.totals.rx += out->totals.rx; + sample_out.totals.redir += out->totals.redir; + sample_out.totals.err += out->totals.err; sample_out.rx_cnt.pps += interval; } @@ -617,6 +807,10 @@ static void sample_stats_print(int mask, struct stats_record *cur, if (mask & SAMPLE_RX_CNT) stats_get_rx_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_REDIRECT_CNT) + stats_get_redirect_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_REDIRECT_ERR_CNT) + stats_get_redirect_err_cnt(cur, prev, 0, &out); sample_summary_update(&out, interval); stats_print(prog_name, mask, cur, prev, &out); diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h index d630998df547..1935a0e2f85b 100644 --- a/samples/bpf/xdp_sample_user.h +++ b/samples/bpf/xdp_sample_user.h @@ -10,6 +10,10 @@ enum stats_mask { _SAMPLE_REDIRECT_MAP = 1U << 0, SAMPLE_RX_CNT = 1U << 1, + SAMPLE_REDIRECT_ERR_CNT = 1U << 2, + SAMPLE_REDIRECT_CNT = 1U << 7, + SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, + SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP, }; /* Exit return codes */ @@ -47,6 +51,15 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) } #pragma GCC diagnostic pop +#define __attach_tp(name) \ + ({ \ + if (!bpf_program__is_tracing(skel->progs.name)) \ + return -EINVAL; \ + skel->links.name = bpf_program__attach(skel->progs.name); \ + if (!skel->links.name) \ + return -errno; \ + }) + #define DEFINE_SAMPLE_INIT(name) \ static int sample_init(struct name *skel, int mask) \ { \ @@ -54,6 +67,14 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) ret = __sample_init(mask); \ if (ret < 0) \ return ret; \ + if (mask & SAMPLE_REDIRECT_MAP_CNT) \ + __attach_tp(tp_xdp_redirect_map); \ + if (mask & SAMPLE_REDIRECT_CNT) \ + __attach_tp(tp_xdp_redirect); \ + if (mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \ + __attach_tp(tp_xdp_redirect_map_err); \ + if (mask & SAMPLE_REDIRECT_ERR_CNT) \ + __attach_tp(tp_xdp_redirect_err); \ return 0; \ } -- cgit v1.2.3-59-g8ed1b From 451588764e2f3e3ab197b23c7958f750707e2a24 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:54 +0530 Subject: samples: bpf: Add BPF support for xdp_exception tracepoint This would allow us to store stats for each XDP action, including their per-CPU counts. Consolidating this here allows all redirect samples to detect xdp_exception events. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-7-memxor@gmail.com --- samples/bpf/xdp_sample.bpf.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c index e22f2a97a988..53ab5a972405 100644 --- a/samples/bpf/xdp_sample.bpf.c +++ b/samples/bpf/xdp_sample.bpf.c @@ -8,6 +8,7 @@ array_map rx_cnt SEC(".maps"); array_map redir_err_cnt SEC(".maps"); +array_map exception_cnt SEC(".maps"); const volatile int nr_cpus = 0; @@ -110,3 +111,29 @@ int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev, { return xdp_redirect_collect_stat(dev->ifindex, err); } + +SEC("tp_btf/xdp_exception") +int BPF_PROG(tp_xdp_exception, const struct net_device *dev, + const struct bpf_prog *xdp, u32 act) +{ + u32 cpu = bpf_get_smp_processor_id(); + struct datarec *rec; + u32 key = act, idx; + + if (!IN_SET(from_match, dev->ifindex)) + return 0; + if (!IN_SET(to_match, dev->ifindex)) + return 0; + + if (key > XDP_REDIRECT) + key = XDP_REDIRECT + 1; + + idx = key * nr_cpus + cpu; + rec = bpf_map_lookup_elem(&exception_cnt, &idx); + if (!rec) + return 0; + NO_TEAR_INC(rec->dropped); + + return 0; +} + -- cgit v1.2.3-59-g8ed1b From 82c450803a917da6edb34a0a769d0b5a0b10990c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:55 +0530 Subject: samples: bpf: Add xdp_exception tracepoint statistics support This implements the retrieval and printing, as well the help output. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-8-memxor@gmail.com --- samples/bpf/xdp_sample_user.c | 113 ++++++++++++++++++++++++++++++++++++++++++ samples/bpf/xdp_sample_user.h | 3 ++ 2 files changed, 116 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index c34592566825..52a30fd1f2a3 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -74,6 +74,7 @@ enum map_type { MAP_RX, MAP_REDIRECT_ERR, + MAP_EXCEPTION, NUM_MAP, }; @@ -98,6 +99,7 @@ struct map_entry { struct stats_record { struct record rx_cnt; struct record redir_err[XDP_REDIRECT_ERR_MAX]; + struct record exception[XDP_ACTION_MAX]; }; struct sample_output { @@ -115,6 +117,9 @@ struct sample_output { __u64 suc; __u64 err; } redir_cnt; + struct { + __u64 hits; + } except_cnt; }; struct xdp_desc { @@ -156,6 +161,15 @@ static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = { "No space in ptr_ring of cpumap kthread", }; +static const char *xdp_action_names[XDP_ACTION_MAX] = { + [XDP_ABORTED] = "XDP_ABORTED", + [XDP_DROP] = "XDP_DROP", + [XDP_PASS] = "XDP_PASS", + [XDP_TX] = "XDP_TX", + [XDP_REDIRECT] = "XDP_REDIRECT", + [XDP_UNKNOWN] = "XDP_UNKNOWN", +}; + static __u64 gettime(void) { struct timespec t; @@ -169,6 +183,13 @@ static __u64 gettime(void) return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; } +static const char *action2str(int action) +{ + if (action < XDP_ACTION_MAX) + return xdp_action_names[action]; + return NULL; +} + static void sample_print_help(int mask) { printf("Output format description\n\n" @@ -206,6 +227,15 @@ static void sample_print_help(int mask) printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n"); } + + if (mask & SAMPLE_EXCEPTION_CNT) { + printf(" xdp_exception\t\tDisplays xdp_exception tracepoint events\n" + " \t\t\tThis can occur due to internal driver errors, unrecognized\n" + " \t\t\tXDP actions and due to explicit user trigger by use of XDP_ABORTED\n" + " \t\t\tEach action is expanded below this field with its count\n" + " \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n"); + } + } void sample_usage(char *argv[], const struct option *long_options, @@ -327,9 +357,26 @@ static struct stats_record *alloc_stats_record(void) } } } + if (sample_mask & SAMPLE_EXCEPTION_CNT) { + for (i = 0; i < XDP_ACTION_MAX; i++) { + rec->exception[i].cpu = alloc_record_per_cpu(); + if (!rec->exception[i].cpu) { + fprintf(stderr, + "Failed to allocate exception per-CPU array for " + "\"%s\" case\n", + action2str(i)); + while (i--) + free(rec->exception[i].cpu); + goto end_redir; + } + } + } return rec; +end_redir: + for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) + free(rec->redir_err[i].cpu); end_rx_cnt: free(rec->rx_cnt.cpu); end_rec: @@ -343,6 +390,8 @@ static void free_stats_record(struct stats_record *r) struct map_entry *e; int i; + for (i = 0; i < XDP_ACTION_MAX; i++) + free(r->exception[i].cpu); for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) free(r->redir_err[i].cpu); free(r->rx_cnt.cpu); @@ -551,6 +600,50 @@ static void stats_get_redirect_err_cnt(struct stats_record *stats_rec, } } +static void stats_get_exception_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus, + struct sample_output *out) +{ + double t, drop, sum = 0; + struct record *rec, *prev; + int rec_i, i; + + for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { + rec = &stats_rec->exception[rec_i]; + prev = &stats_prev->exception[rec_i]; + t = calc_period(rec, prev); + + drop = calc_drop_pps(&rec->total, &prev->total, t); + /* Fold out errors after heading */ + sum += drop; + + if (drop > 0 && !out) { + print_always(" %-18s " FMT_COLUMNf "\n", + action2str(rec_i), ERR(drop)); + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + double drop; + + drop = calc_drop_pps(r, p, t); + if (!drop) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-16s" FMT_COLUMNf "\n", + str, ERR(drop)); + } + } + } + + if (out) { + out->except_cnt.hits = sum; + out->totals.err += sum; + } +} static void stats_print(const char *prefix, int mask, struct stats_record *r, struct stats_record *p, struct sample_output *out) @@ -595,6 +688,16 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, stats_get_redirect_err_cnt(r, p, nr_cpus, NULL); } + if (mask & SAMPLE_EXCEPTION_CNT) { + str = out->except_cnt.hits ? "xdp_exception total" : + "xdp_exception"; + + print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str, + HITS(out->except_cnt.hits)); + + stats_get_exception_cnt(r, p, nr_cpus, NULL); + } + if (sample_log_level & LL_DEFAULT || ((sample_log_level & LL_SIMPLE) && sample_err_exp)) { sample_err_exp = false; @@ -617,6 +720,9 @@ int sample_setup_maps(struct bpf_map **maps) sample_map_count[i] = XDP_REDIRECT_ERR_MAX * sample_n_cpus; break; + case MAP_EXCEPTION: + sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus; + break; default: return -EINVAL; } @@ -788,6 +894,11 @@ static int sample_stats_collect(struct stats_record *rec) &rec->redir_err[i]); } + if (sample_mask & SAMPLE_EXCEPTION_CNT) + for (i = 0; i < XDP_ACTION_MAX; i++) + map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus], + &rec->exception[i]); + return 0; } @@ -811,6 +922,8 @@ static void sample_stats_print(int mask, struct stats_record *cur, stats_get_redirect_cnt(cur, prev, 0, &out); if (mask & SAMPLE_REDIRECT_ERR_CNT) stats_get_redirect_err_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_EXCEPTION_CNT) + stats_get_exception_cnt(cur, prev, 0, &out); sample_summary_update(&out, interval); stats_print(prog_name, mask, cur, prev, &out); diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h index 1935a0e2f85b..aa28e4bdd628 100644 --- a/samples/bpf/xdp_sample_user.h +++ b/samples/bpf/xdp_sample_user.h @@ -11,6 +11,7 @@ enum stats_mask { _SAMPLE_REDIRECT_MAP = 1U << 0, SAMPLE_RX_CNT = 1U << 1, SAMPLE_REDIRECT_ERR_CNT = 1U << 2, + SAMPLE_EXCEPTION_CNT = 1U << 5, SAMPLE_REDIRECT_CNT = 1U << 7, SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP, @@ -75,6 +76,8 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) __attach_tp(tp_xdp_redirect_map_err); \ if (mask & SAMPLE_REDIRECT_ERR_CNT) \ __attach_tp(tp_xdp_redirect_err); \ + if (mask & SAMPLE_EXCEPTION_CNT) \ + __attach_tp(tp_xdp_exception); \ return 0; \ } -- cgit v1.2.3-59-g8ed1b From 0cf3c2fc4b1afbd8d9c376754af34c1d2bd56de7 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:56 +0530 Subject: samples: bpf: Add BPF support for cpumap tracepoints These are invoked in two places, when the XDP frame or SKB (for generic XDP) enqueued to the ptr_ring (cpumap_enqueue) and when kthread processes the frame after invoking the CPUMAP program for it (returning stats for the batch). We use cpumap_map_id to filter on the map_id as a way to avoid printing incorrect stats for parallel sessions of xdp_redirect_cpu. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-9-memxor@gmail.com --- samples/bpf/xdp_sample.bpf.c | 58 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c index 53ab5a972405..f01a5529751c 100644 --- a/samples/bpf/xdp_sample.bpf.c +++ b/samples/bpf/xdp_sample.bpf.c @@ -8,6 +8,8 @@ array_map rx_cnt SEC(".maps"); array_map redir_err_cnt SEC(".maps"); +array_map cpumap_enqueue_cnt SEC(".maps"); +array_map cpumap_kthread_cnt SEC(".maps"); array_map exception_cnt SEC(".maps"); const volatile int nr_cpus = 0; @@ -19,6 +21,8 @@ const volatile int nr_cpus = 0; const volatile int from_match[32] = {}; const volatile int to_match[32] = {}; +int cpumap_map_id = 0; + /* Find if b is part of set a, but if a is empty set then evaluate to true */ #define IN_SET(a, b) \ ({ \ @@ -112,6 +116,59 @@ int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev, return xdp_redirect_collect_stat(dev->ifindex, err); } +SEC("tp_btf/xdp_cpumap_enqueue") +int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed, + unsigned int drops, int to_cpu) +{ + u32 cpu = bpf_get_smp_processor_id(); + struct datarec *rec; + u32 idx; + + if (cpumap_map_id && cpumap_map_id != map_id) + return 0; + + idx = to_cpu * nr_cpus + cpu; + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx); + if (!rec) + return 0; + NO_TEAR_ADD(rec->processed, processed); + NO_TEAR_ADD(rec->dropped, drops); + /* Record bulk events, then userspace can calc average bulk size */ + if (processed > 0) + NO_TEAR_INC(rec->issue); + /* Inception: It's possible to detect overload situations, via + * this tracepoint. This can be used for creating a feedback + * loop to XDP, which can take appropriate actions to mitigate + * this overload situation. + */ + return 0; +} + +SEC("tp_btf/xdp_cpumap_kthread") +int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed, + unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats) +{ + struct datarec *rec; + u32 cpu; + + if (cpumap_map_id && cpumap_map_id != map_id) + return 0; + + cpu = bpf_get_smp_processor_id(); + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu); + if (!rec) + return 0; + NO_TEAR_ADD(rec->processed, processed); + NO_TEAR_ADD(rec->dropped, drops); + NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass); + NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop); + NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect); + /* Count times kthread yielded CPU via schedule call */ + if (sched) + NO_TEAR_INC(rec->issue); + return 0; +} + SEC("tp_btf/xdp_exception") int BPF_PROG(tp_xdp_exception, const struct net_device *dev, const struct bpf_prog *xdp, u32 act) @@ -136,4 +193,3 @@ int BPF_PROG(tp_xdp_exception, const struct net_device *dev, return 0; } - -- cgit v1.2.3-59-g8ed1b From d771e217506adcfbfb08c693fb9332ee4859d61d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:57 +0530 Subject: samples: bpf: Add cpumap tracepoint statistics support This consolidates retrieval and printing into the XDP sample helper. For the kthread stats, it expands xdp_stats separately with its own per-CPU stats. For cpumap enqueue, we display FROM->TO stats also with its per-CPU stats. The help out explains in detail the various aspects of the output. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-10-memxor@gmail.com --- samples/bpf/xdp_sample_user.c | 219 +++++++++++++++++++++++++++++++++++++++++- samples/bpf/xdp_sample_user.h | 6 ++ 2 files changed, 224 insertions(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index 52a30fd1f2a3..e2692dee1dbb 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -74,6 +74,8 @@ enum map_type { MAP_RX, MAP_REDIRECT_ERR, + MAP_CPUMAP_ENQUEUE, + MAP_CPUMAP_KTHREAD, MAP_EXCEPTION, NUM_MAP, }; @@ -99,13 +101,16 @@ struct map_entry { struct stats_record { struct record rx_cnt; struct record redir_err[XDP_REDIRECT_ERR_MAX]; + struct record kthread; struct record exception[XDP_ACTION_MAX]; + struct record enq[]; }; struct sample_output { struct { __u64 rx; __u64 redir; + __u64 drop; __u64 err; } totals; struct { @@ -228,6 +233,30 @@ static void sample_print_help(int mask) printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n"); } + if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) { + printf(" enqueue to cpu N\tDisplays the number of packets enqueued to bulk queue of CPU N\n" + " \t\t\tExpands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N\n" + " \t\t\tReceived packets can be associated with the CPU redirect program is enqueuing \n" + " \t\t\tpackets to.\n" + " \t\t\t\tpkt/s - Packets enqueued per second from other CPU to CPU N\n" + " \t\t\t\tdrop/s - Packets dropped when trying to enqueue to CPU N\n" + " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n"); + } + + if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) { + printf(" kthread\t\tDisplays the number of packets processed in CPUMAP kthread for each CPU\n" + " \t\t\tPackets consumed from ptr_ring in kthread, and its xdp_stats (after calling \n" + " \t\t\tCPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and\n" + " \t\t\tthen per-CPU to associate it to each CPU's pinned CPUMAP kthread.\n" + " \t\t\t\tpkt/s - Packets consumed per second from ptr_ring\n" + " \t\t\t\tdrop/s - Packets dropped per second in kthread\n" + " \t\t\t\tsched - Number of times kthread called schedule()\n\n" + " \t\t\txdp_stats (also expands to per-CPU counts)\n" + " \t\t\t\tpass/s - XDP_PASS count for CPUMAP program execution\n" + " \t\t\t\tdrop/s - XDP_DROP count for CPUMAP program execution\n" + " \t\t\t\tredir/s - XDP_REDIRECT count for CPUMAP program execution\n\n"); + } + if (mask & SAMPLE_EXCEPTION_CNT) { printf(" xdp_exception\t\tDisplays xdp_exception tracepoint events\n" " \t\t\tThis can occur due to internal driver errors, unrecognized\n" @@ -357,6 +386,14 @@ static struct stats_record *alloc_stats_record(void) } } } + if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) { + rec->kthread.cpu = alloc_record_per_cpu(); + if (!rec->kthread.cpu) { + fprintf(stderr, + "Failed to allocate kthread per-CPU array\n"); + goto end_redir; + } + } if (sample_mask & SAMPLE_EXCEPTION_CNT) { for (i = 0; i < XDP_ACTION_MAX; i++) { rec->exception[i].cpu = alloc_record_per_cpu(); @@ -367,13 +404,32 @@ static struct stats_record *alloc_stats_record(void) action2str(i)); while (i--) free(rec->exception[i].cpu); - goto end_redir; + goto end_kthread; + } + } + } + if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) { + for (i = 0; i < sample_n_cpus; i++) { + rec->enq[i].cpu = alloc_record_per_cpu(); + if (!rec->enq[i].cpu) { + fprintf(stderr, + "Failed to allocate enqueue per-CPU array for " + "CPU %d\n", + i); + while (i--) + free(rec->enq[i].cpu); + goto end_exception; } } } return rec; +end_exception: + for (i = 0; i < XDP_ACTION_MAX; i++) + free(rec->exception[i].cpu); +end_kthread: + free(rec->kthread.cpu); end_redir: for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) free(rec->redir_err[i].cpu); @@ -390,8 +446,11 @@ static void free_stats_record(struct stats_record *r) struct map_entry *e; int i; + for (i = 0; i < sample_n_cpus; i++) + free(r->enq[i].cpu); for (i = 0; i < XDP_ACTION_MAX; i++) free(r->exception[i].cpu); + free(r->kthread.cpu); for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) free(r->redir_err[i].cpu); free(r->rx_cnt.cpu); @@ -519,6 +578,137 @@ static void stats_get_rx_cnt(struct stats_record *stats_rec, } } +static void stats_get_cpumap_enqueue(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i, to_cpu; + + /* cpumap enqueue stats */ + for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) { + rec = &stats_rec->enq[to_cpu]; + prev = &stats_prev->enq[to_cpu]; + t = calc_period(rec, prev); + + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + + if (pps > 0 || drop > 0) { + char str[64]; + + snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu); + + if (err > 0) + err = pps / err; /* calc average bulk size */ + + print_err(drop, + " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN( + ".2f") "\n", + str, PPS(pps), DROP(drop), err, "bulk-avg"); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu); + if (err > 0) + err = pps / err; /* calc average bulk size */ + print_default( + " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN( + ".2f") "\n", + str, PPS(pps), DROP(drop), err, "bulk-avg"); + } + } +} + +static void stats_get_cpumap_remote(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus) +{ + double xdp_pass, xdp_drop, xdp_redirect; + struct record *rec, *prev; + double t; + int i; + + rec = &stats_rec->kthread; + prev = &stats_prev->kthread; + t = calc_period(rec, prev); + + calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, + &xdp_redirect, t); + if (xdp_pass || xdp_drop || xdp_redirect) { + print_err(xdp_drop, + " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n", + "xdp_stats", PASS(xdp_pass), DROP(xdp_drop), + REDIR(xdp_redirect)); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t); + if (!xdp_pass && !xdp_drop && !xdp_redirect) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PASS(xdp_pass), DROP(xdp_drop), + REDIR(xdp_redirect)); + } +} + +static void stats_get_cpumap_kthread(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i; + + rec = &stats_rec->kthread; + prev = &stats_prev->kthread; + t = calc_period(rec, prev); + + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + + print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n", + pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err, + "sched"); + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PPS(pps), DROP(drop), err, "sched"); + } +} + static void stats_get_redirect_cnt(struct stats_record *stats_rec, struct stats_record *stats_prev, unsigned int nr_cpus, @@ -656,6 +846,9 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, print_always(FMT_COLUMNl, RX(out->totals.rx)); if (mask & SAMPLE_REDIRECT_CNT) print_always(FMT_COLUMNl, REDIR(out->totals.redir)); + printf(FMT_COLUMNl, + out->totals.err + out->totals.drop + out->totals.drop_xmit, + "err,drop/s"); printf("\n"); if (mask & SAMPLE_RX_CNT) { @@ -670,6 +863,14 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, stats_get_rx_cnt(r, p, nr_cpus, NULL); } + if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) + stats_get_cpumap_enqueue(r, p, nr_cpus); + + if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) { + stats_get_cpumap_kthread(r, p, nr_cpus); + stats_get_cpumap_remote(r, p, nr_cpus); + } + if (mask & SAMPLE_REDIRECT_CNT) { str = out->redir_cnt.suc ? "redirect total" : "redirect"; print_default(" %-20s " FMT_COLUMNl "\n", str, @@ -714,6 +915,7 @@ int sample_setup_maps(struct bpf_map **maps) switch (i) { case MAP_RX: + case MAP_CPUMAP_KTHREAD: sample_map_count[i] = sample_n_cpus; break; case MAP_REDIRECT_ERR: @@ -722,6 +924,8 @@ int sample_setup_maps(struct bpf_map **maps) break; case MAP_EXCEPTION: sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus; + case MAP_CPUMAP_ENQUEUE: + sample_map_count[i] = sample_n_cpus * sample_n_cpus; break; default: return -EINVAL; @@ -850,6 +1054,9 @@ static void sample_summary_print(void) print_always(" Average redir/s : %'-10.0f\n", sample_round(pkts / period)); } + if (sample_out.totals.drop) + print_always(" Rx dropped : %'-10llu\n", + sample_out.totals.drop); if (sample_out.totals.err) print_always(" Errors recorded : %'-10llu\n", sample_out.totals.err); @@ -894,6 +1101,15 @@ static int sample_stats_collect(struct stats_record *rec) &rec->redir_err[i]); } + if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) + for (i = 0; i < sample_n_cpus; i++) + map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus], + &rec->enq[i]); + + if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) + map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD], + &rec->kthread); + if (sample_mask & SAMPLE_EXCEPTION_CNT) for (i = 0; i < XDP_ACTION_MAX; i++) map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus], @@ -906,6 +1122,7 @@ static void sample_summary_update(struct sample_output *out, int interval) { sample_out.totals.rx += out->totals.rx; sample_out.totals.redir += out->totals.redir; + sample_out.totals.drop += out->totals.drop; sample_out.totals.err += out->totals.err; sample_out.rx_cnt.pps += interval; } diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h index aa28e4bdd628..203732615fee 100644 --- a/samples/bpf/xdp_sample_user.h +++ b/samples/bpf/xdp_sample_user.h @@ -11,6 +11,8 @@ enum stats_mask { _SAMPLE_REDIRECT_MAP = 1U << 0, SAMPLE_RX_CNT = 1U << 1, SAMPLE_REDIRECT_ERR_CNT = 1U << 2, + SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3, + SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4, SAMPLE_EXCEPTION_CNT = 1U << 5, SAMPLE_REDIRECT_CNT = 1U << 7, SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, @@ -76,6 +78,10 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) __attach_tp(tp_xdp_redirect_map_err); \ if (mask & SAMPLE_REDIRECT_ERR_CNT) \ __attach_tp(tp_xdp_redirect_err); \ + if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \ + __attach_tp(tp_xdp_cpumap_enqueue); \ + if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) \ + __attach_tp(tp_xdp_cpumap_kthread); \ if (mask & SAMPLE_EXCEPTION_CNT) \ __attach_tp(tp_xdp_exception); \ return 0; \ -- cgit v1.2.3-59-g8ed1b From 5f116212f4018fc9aa7a2a828b27aab540b8e5fa Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:58 +0530 Subject: samples: bpf: Add BPF support for devmap_xmit tracepoint This adds support for the devmap_xmit tracepoint, and its multi device variant that can be used to obtain streams for each individual net_device to net_device redirection. This is useful for decomposing total xmit stats in xdp_monitor. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-11-memxor@gmail.com --- samples/bpf/xdp_sample.bpf.c | 71 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c index f01a5529751c..0eb7e1dcae22 100644 --- a/samples/bpf/xdp_sample.bpf.c +++ b/samples/bpf/xdp_sample.bpf.c @@ -11,6 +11,14 @@ array_map redir_err_cnt SEC(".maps"); array_map cpumap_enqueue_cnt SEC(".maps"); array_map cpumap_kthread_cnt SEC(".maps"); array_map exception_cnt SEC(".maps"); +array_map devmap_xmit_cnt SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 32 * 32); + __type(key, u64); + __type(value, struct datarec); +} devmap_xmit_cnt_multi SEC(".maps"); const volatile int nr_cpus = 0; @@ -193,3 +201,66 @@ int BPF_PROG(tp_xdp_exception, const struct net_device *dev, return 0; } + +SEC("tp_btf/xdp_devmap_xmit") +int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev, + const struct net_device *to_dev, int sent, int drops, int err) +{ + struct datarec *rec; + int idx_in, idx_out; + u32 cpu; + + idx_in = from_dev->ifindex; + idx_out = to_dev->ifindex; + + if (!IN_SET(from_match, idx_in)) + return 0; + if (!IN_SET(to_match, idx_out)) + return 0; + + cpu = bpf_get_smp_processor_id(); + rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu); + if (!rec) + return 0; + NO_TEAR_ADD(rec->processed, sent); + NO_TEAR_ADD(rec->dropped, drops); + /* Record bulk events, then userspace can calc average bulk size */ + NO_TEAR_INC(rec->info); + /* Record error cases, where no frame were sent */ + /* Catch API error of drv ndo_xdp_xmit sent more than count */ + if (err || drops < 0) + NO_TEAR_INC(rec->issue); + return 0; +} + +SEC("tp_btf/xdp_devmap_xmit") +int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev, + const struct net_device *to_dev, int sent, int drops, int err) +{ + struct datarec empty = {}; + struct datarec *rec; + int idx_in, idx_out; + u64 idx; + + idx_in = from_dev->ifindex; + idx_out = to_dev->ifindex; + idx = idx_in; + idx = idx << 32 | idx_out; + + if (!IN_SET(from_match, idx_in)) + return 0; + if (!IN_SET(to_match, idx_out)) + return 0; + + bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST); + rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx); + if (!rec) + return 0; + + NO_TEAR_ADD(rec->processed, sent); + NO_TEAR_ADD(rec->dropped, drops); + NO_TEAR_INC(rec->info); + if (err || drops < 0) + NO_TEAR_INC(rec->issue); + return 0; +} -- cgit v1.2.3-59-g8ed1b From af93d58c27b6ac4154f1651f47be2a159f8ce30f Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:49:59 +0530 Subject: samples: bpf: Add devmap_xmit tracepoint statistics support This adds support for retrieval and printing for devmap_xmit total and mutli mode tracepoint. For multi mode, we keep a hash map entry for each redirection stream, such that we can dynamically add and remove entries on output. The from_match and to_match will be set by individual samples when setting up the XDP program on these devices. The multi mode tracepoint is also handy for xdp_redirect_map_multi, where up to 32 devices can be specified. Also add samples_init_pre_load macro to finally set up the resized maps and mmap them in place for low overhead stats retrieval. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-12-memxor@gmail.com --- samples/bpf/xdp_sample_user.c | 317 +++++++++++++++++++++++++++++++++++++++++- samples/bpf/xdp_sample_user.h | 17 +++ 2 files changed, 331 insertions(+), 3 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index e2692dee1dbb..eb484c15492d 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -77,6 +77,8 @@ enum map_type { MAP_CPUMAP_ENQUEUE, MAP_CPUMAP_KTHREAD, MAP_EXCEPTION, + MAP_DEVMAP_XMIT, + MAP_DEVMAP_XMIT_MULTI, NUM_MAP, }; @@ -103,6 +105,8 @@ struct stats_record { struct record redir_err[XDP_REDIRECT_ERR_MAX]; struct record kthread; struct record exception[XDP_ACTION_MAX]; + struct record devmap_xmit; + DECLARE_HASHTABLE(xmit_map, 5); struct record enq[]; }; @@ -111,7 +115,9 @@ struct sample_output { __u64 rx; __u64 redir; __u64 drop; + __u64 drop_xmit; __u64 err; + __u64 xmit; } totals; struct { __u64 pps; @@ -125,6 +131,12 @@ struct sample_output { struct { __u64 hits; } except_cnt; + struct { + __u64 pps; + __u64 drop; + __u64 err; + double bavg; + } xmit_cnt; }; struct xdp_desc { @@ -265,6 +277,16 @@ static void sample_print_help(int mask) " \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n"); } + if (mask & SAMPLE_DEVMAP_XMIT_CNT) { + printf(" devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n" + " \t\t\tThis tracepoint is invoked for successful transmissions on output\n" + " \t\t\tdevice but these statistics are not available for generic XDP mode,\n" + " \t\t\thence they will be omitted from the output when using SKB mode\n" + " \t\t\t\txmit/s - Number of packets that were transmitted per second\n" + " \t\t\t\tdrop/s - Number of packets that failed transmissions per second\n" + " \t\t\t\tdrv_err/s - Number of internal driver errors per second\n" + " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n"); + } } void sample_usage(char *argv[], const struct option *long_options, @@ -353,6 +375,74 @@ static void map_collect_percpu(struct datarec *values, struct record *rec) rec->total.xdp_redirect = sum_xdp_redirect; } +static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + __u32 batch, count = 32; + struct datarec *values; + bool init = false; + __u64 *keys; + int i, ret; + + keys = calloc(count, sizeof(__u64)); + if (!keys) + return -ENOMEM; + values = calloc(count * nr_cpus, sizeof(struct datarec)); + if (!values) { + free(keys); + return -ENOMEM; + } + + for (;;) { + bool exit = false; + + ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch, + keys, values, &count, NULL); + if (ret < 0 && errno != ENOENT) + break; + if (errno == ENOENT) + exit = true; + + init = true; + for (i = 0; i < count; i++) { + struct map_entry *e, *x = NULL; + __u64 pair = keys[i]; + struct datarec *arr; + + arr = &values[i * nr_cpus]; + hash_for_each_possible(rec->xmit_map, e, node, pair) { + if (e->pair == pair) { + x = e; + break; + } + } + if (!x) { + x = calloc(1, sizeof(*x)); + if (!x) + goto cleanup; + if (map_entry_init(x, pair) < 0) { + free(x); + goto cleanup; + } + hash_add(rec->xmit_map, &x->node, pair); + } + map_collect_percpu(arr, &x->val); + } + + if (exit) + break; + count = 32; + } + + free(values); + free(keys); + return 0; +cleanup: + free(values); + free(keys); + return -ENOMEM; +} + static struct stats_record *alloc_stats_record(void) { struct stats_record *rec; @@ -408,6 +498,16 @@ static struct stats_record *alloc_stats_record(void) } } } + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) { + rec->devmap_xmit.cpu = alloc_record_per_cpu(); + if (!rec->devmap_xmit.cpu) { + fprintf(stderr, + "Failed to allocate devmap_xmit per-CPU array\n"); + goto end_exception; + } + } + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + hash_init(rec->xmit_map); if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) { for (i = 0; i < sample_n_cpus; i++) { rec->enq[i].cpu = alloc_record_per_cpu(); @@ -418,13 +518,15 @@ static struct stats_record *alloc_stats_record(void) i); while (i--) free(rec->enq[i].cpu); - goto end_exception; + goto end_devmap_xmit; } } } return rec; +end_devmap_xmit: + free(rec->devmap_xmit.cpu); end_exception: for (i = 0; i < XDP_ACTION_MAX; i++) free(rec->exception[i].cpu); @@ -448,6 +550,12 @@ static void free_stats_record(struct stats_record *r) for (i = 0; i < sample_n_cpus; i++) free(r->enq[i].cpu); + hash_for_each_safe(r->xmit_map, i, tmp, e, node) { + hash_del(&e->node); + free(e->val.cpu); + free(e); + } + free(r->devmap_xmit.cpu); for (i = 0; i < XDP_ACTION_MAX; i++) free(r->exception[i].cpu); free(r->kthread.cpu); @@ -835,6 +943,160 @@ static void stats_get_exception_cnt(struct stats_record *stats_rec, } } +static void stats_get_devmap_xmit(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus, + struct sample_output *out) +{ + double pps, drop, info, err; + struct record *rec, *prev; + double t; + int i; + + rec = &stats_rec->devmap_xmit; + prev = &stats_prev->devmap_xmit; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + info = calc_info_pps(r, p, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + __COLUMN(".2f") "\n", + str, XMIT(pps), DROP(drop), err, "drv_err/s", + info, "bulk-avg"); + } + if (out) { + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + info = calc_info_pps(&rec->total, &prev->total, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + err = calc_errs_pps(&rec->total, &prev->total, t); + + out->xmit_cnt.pps = pps; + out->xmit_cnt.drop = drop; + out->xmit_cnt.bavg = info; + out->xmit_cnt.err = err; + out->totals.xmit += pps; + out->totals.drop_xmit += drop; + out->totals.err += err; + } +} + +static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec, + struct stats_record *stats_prev, + unsigned int nr_cpus, + struct sample_output *out, + bool xmit_total) +{ + double pps, drop, info, err; + struct map_entry *entry; + struct record *r, *p; + double t; + int bkt; + + hash_for_each(stats_rec->xmit_map, bkt, entry, node) { + struct map_entry *e, *x = NULL; + char ifname_from[IFNAMSIZ]; + char ifname_to[IFNAMSIZ]; + const char *fstr, *tstr; + unsigned long prev_time; + struct record beg = {}; + __u32 from_idx, to_idx; + char str[128]; + __u64 pair; + int i; + + prev_time = sample_interval * NANOSEC_PER_SEC; + + pair = entry->pair; + from_idx = pair >> 32; + to_idx = pair & 0xFFFFFFFF; + + r = &entry->val; + beg.timestamp = r->timestamp - prev_time; + + /* Find matching entry from stats_prev map */ + hash_for_each_possible(stats_prev->xmit_map, e, node, pair) { + if (e->pair == pair) { + x = e; + break; + } + } + if (x) + p = &x->val; + else + p = &beg; + t = calc_period(r, p); + pps = calc_pps(&r->total, &p->total, t); + drop = calc_drop_pps(&r->total, &p->total, t); + info = calc_info_pps(&r->total, &p->total, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + err = calc_errs_pps(&r->total, &p->total, t); + + if (out) { + /* We are responsible for filling out totals */ + out->totals.xmit += pps; + out->totals.drop_xmit += drop; + out->totals.err += err; + continue; + } + + fstr = tstr = NULL; + if (if_indextoname(from_idx, ifname_from)) + fstr = ifname_from; + if (if_indextoname(to_idx, ifname_to)) + tstr = ifname_to; + + snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?", + tstr ?: "?"); + /* Skip idle streams of redirection */ + if (pps || drop || err) { + print_err(drop, + " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop), + err, "drv_err/s", info, "bulk-avg"); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *rc = &r->cpu[i]; + struct datarec *pc, p_beg = {}; + char str[64]; + + pc = p == &beg ? &p_beg : &p->cpu[i]; + + pps = calc_pps(rc, pc, t); + drop = calc_drop_pps(rc, pc, t); + err = calc_errs_pps(rc, pc, t); + + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + info = calc_info_pps(rc, pc, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + + print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + __COLUMN(".2f") "\n", str, XMIT(pps), + DROP(drop), err, "drv_err/s", info, "bulk-avg"); + } + } +} + static void stats_print(const char *prefix, int mask, struct stats_record *r, struct stats_record *p, struct sample_output *out) { @@ -849,6 +1111,9 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, printf(FMT_COLUMNl, out->totals.err + out->totals.drop + out->totals.drop_xmit, "err,drop/s"); + if (mask & SAMPLE_DEVMAP_XMIT_CNT || + mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + printf(FMT_COLUMNl, XMIT(out->totals.xmit)); printf("\n"); if (mask & SAMPLE_RX_CNT) { @@ -899,6 +1164,25 @@ static void stats_print(const char *prefix, int mask, struct stats_record *r, stats_get_exception_cnt(r, p, nr_cpus, NULL); } + if (mask & SAMPLE_DEVMAP_XMIT_CNT) { + str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ? + "devmap_xmit total" : + "devmap_xmit"; + + print_err(out->xmit_cnt.err || out->xmit_cnt.drop, + " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl + __COLUMN(".2f") "\n", + str, XMIT(out->xmit_cnt.pps), + DROP(out->xmit_cnt.drop), out->xmit_cnt.err, + "drv_err/s", out->xmit_cnt.bavg, "bulk-avg"); + + stats_get_devmap_xmit(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL, + mask & SAMPLE_DEVMAP_XMIT_CNT); + if (sample_log_level & LL_DEFAULT || ((sample_log_level & LL_SIMPLE) && sample_err_exp)) { sample_err_exp = false; @@ -910,12 +1194,13 @@ int sample_setup_maps(struct bpf_map **maps) { sample_n_cpus = libbpf_num_possible_cpus(); - for (int i = 0; i < NUM_MAP; i++) { + for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) { sample_map[i] = maps[i]; switch (i) { case MAP_RX: case MAP_CPUMAP_KTHREAD: + case MAP_DEVMAP_XMIT: sample_map_count[i] = sample_n_cpus; break; case MAP_REDIRECT_ERR: @@ -933,12 +1218,13 @@ int sample_setup_maps(struct bpf_map **maps) if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0) return -errno; } + sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI]; return 0; } static int sample_setup_maps_mappings(void) { - for (int i = 0; i < NUM_MAP; i++) { + for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) { size_t size = sample_map_count[i] * sizeof(struct datarec); sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE, @@ -1057,9 +1343,20 @@ static void sample_summary_print(void) if (sample_out.totals.drop) print_always(" Rx dropped : %'-10llu\n", sample_out.totals.drop); + if (sample_out.totals.drop_xmit) + print_always(" Tx dropped : %'-10llu\n", + sample_out.totals.drop_xmit); if (sample_out.totals.err) print_always(" Errors recorded : %'-10llu\n", sample_out.totals.err); + if (sample_out.totals.xmit) { + double pkts = sample_out.totals.xmit; + + print_always(" Packets transmitted : %'-10llu\n", + sample_out.totals.xmit); + print_always(" Average transmit/s : %'-10.0f\n", + sample_round(pkts / period)); + } } void sample_exit(int status) @@ -1115,6 +1412,13 @@ static int sample_stats_collect(struct stats_record *rec) map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus], &rec->exception[i]); + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) + map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit); + + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) { + if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0) + return -EINVAL; + } return 0; } @@ -1123,7 +1427,9 @@ static void sample_summary_update(struct sample_output *out, int interval) sample_out.totals.rx += out->totals.rx; sample_out.totals.redir += out->totals.redir; sample_out.totals.drop += out->totals.drop; + sample_out.totals.drop_xmit += out->totals.drop_xmit; sample_out.totals.err += out->totals.err; + sample_out.totals.xmit += out->totals.xmit; sample_out.rx_cnt.pps += interval; } @@ -1141,6 +1447,11 @@ static void sample_stats_print(int mask, struct stats_record *cur, stats_get_redirect_err_cnt(cur, prev, 0, &out); if (mask & SAMPLE_EXCEPTION_CNT) stats_get_exception_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_DEVMAP_XMIT_CNT) + stats_get_devmap_xmit(cur, prev, 0, &out); + else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + stats_get_devmap_xmit_multi(cur, prev, 0, &out, + mask & SAMPLE_DEVMAP_XMIT_CNT); sample_summary_update(&out, interval); stats_print(prog_name, mask, cur, prev, &out); diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h index 203732615fee..3a678986cce2 100644 --- a/samples/bpf/xdp_sample_user.h +++ b/samples/bpf/xdp_sample_user.h @@ -14,9 +14,11 @@ enum stats_mask { SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3, SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4, SAMPLE_EXCEPTION_CNT = 1U << 5, + SAMPLE_DEVMAP_XMIT_CNT = 1U << 6, SAMPLE_REDIRECT_CNT = 1U << 7, SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP, + SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8, }; /* Exit return codes */ @@ -63,6 +65,17 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) return -errno; \ }) +#define sample_init_pre_load(skel) \ + ({ \ + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \ + sample_setup_maps((struct bpf_map *[]){ \ + skel->maps.rx_cnt, skel->maps.redir_err_cnt, \ + skel->maps.cpumap_enqueue_cnt, \ + skel->maps.cpumap_kthread_cnt, \ + skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \ + skel->maps.devmap_xmit_cnt_multi }); \ + }) + #define DEFINE_SAMPLE_INIT(name) \ static int sample_init(struct name *skel, int mask) \ { \ @@ -84,6 +97,10 @@ static inline char *safe_strncpy(char *dst, const char *src, size_t size) __attach_tp(tp_xdp_cpumap_kthread); \ if (mask & SAMPLE_EXCEPTION_CNT) \ __attach_tp(tp_xdp_exception); \ + if (mask & SAMPLE_DEVMAP_XMIT_CNT) \ + __attach_tp(tp_xdp_devmap_xmit); \ + if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \ + __attach_tp(tp_xdp_devmap_xmit_multi); \ return 0; \ } -- cgit v1.2.3-59-g8ed1b From 384b6b3bbf0d3b60ca118459a91b7b8ce1dcd6bd Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:00 +0530 Subject: samples: bpf: Add vmlinux.h generation support Also, take this opportunity to depend on in-tree bpftool, so that we can use static linking support in subsequent commits for XDP samples BPF helper object. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-13-memxor@gmail.com --- samples/bpf/Makefile | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 036998d11ded..ff1932e16bc5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -276,6 +276,11 @@ $(LIBBPF): FORCE $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O= +BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool +BPFTOOL := $(BPFTOOLDIR)/bpftool +$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) + $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ + $(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE $(call filechk,offsets,__SYSCALL_NRS_H__) @@ -313,6 +318,26 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h -include $(BPF_SAMPLES_PATH)/Makefile.target +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif + +$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) +ifeq ($(VMLINUX_H),) + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif + +clean-files += vmlinux.h + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. -- cgit v1.2.3-59-g8ed1b From 3f19956010d26906e84baec4cd9c48bd8808de96 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:01 +0530 Subject: samples: bpf: Convert xdp_monitor_kern.o to XDP samples helper We already moved all the functionality it provided in XDP samples helper userspace and kernel BPF object, so just delete the unneeded code. We also add generation of BPF skeleton and compilation using clang -target bpf for files ending with .bpf.c suffix (to denote that they use vmlinux.h). Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-14-memxor@gmail.com --- samples/bpf/Makefile | 42 ++++++- samples/bpf/xdp_monitor.bpf.c | 8 ++ samples/bpf/xdp_monitor_kern.c | 257 ----------------------------------------- 3 files changed, 49 insertions(+), 258 deletions(-) create mode 100644 samples/bpf/xdp_monitor.bpf.c delete mode 100644 samples/bpf/xdp_monitor_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index ff1932e16bc5..0d7086a2a393 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -164,7 +164,6 @@ always-y += xdp_redirect_kern.o always-y += xdp_redirect_map_kern.o always-y += xdp_redirect_map_multi_kern.o always-y += xdp_redirect_cpu_kern.o -always-y += xdp_monitor_kern.o always-y += xdp_rxq_info_kern.o always-y += xdp2skb_meta_kern.o always-y += syscall_tp_kern.o @@ -338,6 +337,47 @@ endif clean-files += vmlinux.h +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) -v -E - &1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) -dM -E - $@ + # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c new file mode 100644 index 000000000000..cfb41e2205f4 --- /dev/null +++ b/samples/bpf/xdp_monitor.bpf.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. + * + * XDP monitor tool, based on tracepoints + */ +#include "xdp_sample.bpf.h" + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c deleted file mode 100644 index 5c955b812c47..000000000000 --- a/samples/bpf/xdp_monitor_kern.c +++ /dev/null @@ -1,257 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. - * - * XDP monitor tool, based on tracepoints - */ -#include -#include - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, u64); - __uint(max_entries, 2); - /* TODO: have entries for all possible errno's */ -} redirect_err_cnt SEC(".maps"); - -#define XDP_UNKNOWN XDP_REDIRECT + 1 -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, u64); - __uint(max_entries, XDP_UNKNOWN + 1); -} exception_cnt SEC(".maps"); - -/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct xdp_redirect_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int prog_id; // offset:8; size:4; signed:1; - u32 act; // offset:12 size:4; signed:0; - int ifindex; // offset:16 size:4; signed:1; - int err; // offset:20 size:4; signed:1; - int to_ifindex; // offset:24 size:4; signed:1; - u32 map_id; // offset:28 size:4; signed:0; - int map_index; // offset:32 size:4; signed:1; -}; // offset:36 - -enum { - XDP_REDIRECT_SUCCESS = 0, - XDP_REDIRECT_ERROR = 1 -}; - -static __always_inline -int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) -{ - u32 key = XDP_REDIRECT_ERROR; - int err = ctx->err; - u64 *cnt; - - if (!err) - key = XDP_REDIRECT_SUCCESS; - - cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key); - if (!cnt) - return 1; - *cnt += 1; - - return 0; /* Indicate event was filtered (no further processing)*/ - /* - * Returning 1 here would allow e.g. a perf-record tracepoint - * to see and record these events, but it doesn't work well - * in-practice as stopping perf-record also unload this - * bpf_prog. Plus, there is additional overhead of doing so. - */ -} - -SEC("tracepoint/xdp/xdp_redirect_err") -int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) -{ - return xdp_redirect_collect_stat(ctx); -} - - -SEC("tracepoint/xdp/xdp_redirect_map_err") -int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) -{ - return xdp_redirect_collect_stat(ctx); -} - -/* Likely unloaded when prog starts */ -SEC("tracepoint/xdp/xdp_redirect") -int trace_xdp_redirect(struct xdp_redirect_ctx *ctx) -{ - return xdp_redirect_collect_stat(ctx); -} - -/* Likely unloaded when prog starts */ -SEC("tracepoint/xdp/xdp_redirect_map") -int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx) -{ - return xdp_redirect_collect_stat(ctx); -} - -/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct xdp_exception_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int prog_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int ifindex; // offset:16; size:4; signed:1; -}; - -SEC("tracepoint/xdp/xdp_exception") -int trace_xdp_exception(struct xdp_exception_ctx *ctx) -{ - u64 *cnt; - u32 key; - - key = ctx->act; - if (key > XDP_REDIRECT) - key = XDP_UNKNOWN; - - cnt = bpf_map_lookup_elem(&exception_cnt, &key); - if (!cnt) - return 1; - *cnt += 1; - - return 0; -} - -/* Common stats data record shared with _user.c */ -struct datarec { - u64 processed; - u64 dropped; - u64 info; - u64 err; -}; -#define MAX_CPUS 64 - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, MAX_CPUS); -} cpumap_enqueue_cnt SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 1); -} cpumap_kthread_cnt SEC(".maps"); - -/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct cpumap_enqueue_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int map_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int cpu; // offset:16; size:4; signed:1; - unsigned int drops; // offset:20; size:4; signed:0; - unsigned int processed; // offset:24; size:4; signed:0; - int to_cpu; // offset:28; size:4; signed:1; -}; - -SEC("tracepoint/xdp/xdp_cpumap_enqueue") -int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) -{ - u32 to_cpu = ctx->to_cpu; - struct datarec *rec; - - if (to_cpu >= MAX_CPUS) - return 1; - - rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); - if (!rec) - return 0; - rec->processed += ctx->processed; - rec->dropped += ctx->drops; - - /* Record bulk events, then userspace can calc average bulk size */ - if (ctx->processed > 0) - rec->info += 1; - - return 0; -} - -/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct cpumap_kthread_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int map_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int cpu; // offset:16; size:4; signed:1; - unsigned int drops; // offset:20; size:4; signed:0; - unsigned int processed; // offset:24; size:4; signed:0; - int sched; // offset:28; size:4; signed:1; -}; - -SEC("tracepoint/xdp/xdp_cpumap_kthread") -int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) -{ - struct datarec *rec; - u32 key = 0; - - rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); - if (!rec) - return 0; - rec->processed += ctx->processed; - rec->dropped += ctx->drops; - - /* Count times kthread yielded CPU via schedule call */ - if (ctx->sched) - rec->info++; - - return 0; -} - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 1); -} devmap_xmit_cnt SEC(".maps"); - -/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct devmap_xmit_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int from_ifindex; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int to_ifindex; // offset:16; size:4; signed:1; - int drops; // offset:20; size:4; signed:1; - int sent; // offset:24; size:4; signed:1; - int err; // offset:28; size:4; signed:1; -}; - -SEC("tracepoint/xdp/xdp_devmap_xmit") -int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx) -{ - struct datarec *rec; - u32 key = 0; - - rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key); - if (!rec) - return 0; - rec->processed += ctx->sent; - rec->dropped += ctx->drops; - - /* Record bulk events, then userspace can calc average bulk size */ - rec->info += 1; - - /* Record error cases, where no frame were sent */ - if (ctx->err) - rec->err++; - - /* Catch API error of drv ndo_xdp_xmit sent more than count */ - if (ctx->drops < 0) - rec->err++; - - return 1; -} -- cgit v1.2.3-59-g8ed1b From 6e1051a54e3100df59dae01c24ff4a6d6027e303 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:02 +0530 Subject: samples: bpf: Convert xdp_monitor to XDP samples helper Use the libbpf skeleton facility and other utilities provided by XDP samples helper. A lot of the code in xdp_monitor and xdp_redirect_cpu has been moved to the xdp_sample_user.o helper, so we remove the duplicate functions here that are no longer needed. Thanks to BPF skeleton, we no longer depend on order of tracepoints to uninstall them on startup. Instead, the sample mask is used to install the needed tracepoints. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-15-memxor@gmail.com --- samples/bpf/Makefile | 9 +- samples/bpf/Makefile.target | 11 + samples/bpf/xdp_monitor_user.c | 798 ++++------------------------------------- 3 files changed, 83 insertions(+), 735 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0d7086a2a393..479778439f5e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -43,7 +43,6 @@ tprogs-y += xdp_redirect tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect_map_multi tprogs-y += xdp_redirect_cpu -tprogs-y += xdp_monitor tprogs-y += xdp_rxq_info tprogs-y += syscall_tp tprogs-y += cpustat @@ -57,11 +56,14 @@ tprogs-y += xdp_sample_pkts tprogs-y += ibumad tprogs-y += hbm +tprogs-y += xdp_monitor + # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o +XDP_SAMPLE := xdp_sample_user.o fds_example-objs := fds_example.o sockex1-objs := sockex1_user.o @@ -102,7 +104,6 @@ xdp_redirect-objs := xdp_redirect_user.o xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o -xdp_monitor-objs := xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o @@ -116,6 +117,8 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) + # Tell kbuild to always build the programs always-y := $(tprogs-y) always-y += sockex1_kern.o @@ -310,6 +313,8 @@ verify_target_bpf: verify_cmds $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) +$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h + $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h $(obj)/hbm.o: $(src)/hbm.h diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target index 7621f55e2947..5a368affa038 100644 --- a/samples/bpf/Makefile.target +++ b/samples/bpf/Makefile.target @@ -73,3 +73,14 @@ quiet_cmd_tprog-cobjs = CC $@ cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $< $(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE $(call if_changed_dep,tprog-cobjs) + +# Override includes for xdp_sample_user.o because $(srctree)/usr/include in +# TPROGS_CFLAGS causes conflicts +XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \ + -I./tools/include \ + -I./tools/include/uapi \ + -I./tools/lib \ + -I./tools/testing/selftests/bpf +$(obj)/xdp_sample_user.o: $(src)/xdp_sample_user.c \ + $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h + $(CC) $(XDP_SAMPLE_CFLAGS) -c -o $@ $< diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 49ebc49aefc3..fb9391a5ec62 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -1,15 +1,12 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. - */ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ static const char *__doc__= - "XDP monitor tool, based on tracepoints\n" -; +"XDP monitor tool, based on tracepoints\n"; static const char *__doc_err_only__= - " NOTICE: Only tracking XDP redirect errors\n" - " Enable TX success stats via '--stats'\n" - " (which comes with a per packet processing overhead)\n" -; +" NOTICE: Only tracking XDP redirect errors\n" +" Enable redirect success stats via '-s/--stats'\n" +" (which comes with a per packet processing overhead)\n"; #include #include @@ -20,768 +17,103 @@ static const char *__doc_err_only__= #include #include #include - #include #include #include #include - #include #include #include #include "bpf_util.h" +#include "xdp_sample_user.h" +#include "xdp_monitor.skel.h" -enum map_type { - REDIRECT_ERR_CNT, - EXCEPTION_CNT, - CPUMAP_ENQUEUE_CNT, - CPUMAP_KTHREAD_CNT, - DEVMAP_XMIT_CNT, -}; +static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT | + SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT | + SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; -static const char *const map_type_strings[] = { - [REDIRECT_ERR_CNT] = "redirect_err_cnt", - [EXCEPTION_CNT] = "exception_cnt", - [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt", - [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt", - [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt", -}; - -#define NUM_MAP 5 -#define NUM_TP 8 - -static int tp_cnt; -static int map_cnt; -static int verbose = 1; -static bool debug = false; -struct bpf_map *map_data[NUM_MAP] = {}; -struct bpf_link *tp_links[NUM_TP] = {}; -struct bpf_object *obj; +DEFINE_SAMPLE_INIT(xdp_monitor); static const struct option long_options[] = { - {"help", no_argument, NULL, 'h' }, - {"debug", no_argument, NULL, 'D' }, - {"stats", no_argument, NULL, 'S' }, - {"sec", required_argument, NULL, 's' }, - {0, 0, NULL, 0 } -}; - -static void int_exit(int sig) -{ - /* Detach tracepoints */ - while (tp_cnt) - bpf_link__destroy(tp_links[--tp_cnt]); - - bpf_object__close(obj); - exit(0); -} - -/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ -#define EXIT_FAIL_MEM 5 - -static void usage(char *argv[]) -{ - int i; - printf("\nDOCUMENTATION:\n%s\n", __doc__); - printf("\n"); - printf(" Usage: %s (options-see-below)\n", - argv[0]); - printf(" Listing options:\n"); - for (i = 0; long_options[i].name != 0; i++) { - printf(" --%-15s", long_options[i].name); - if (long_options[i].flag != NULL) - printf(" flag (internal value:%d)", - *long_options[i].flag); - else - printf("short-option: -%c", - long_options[i].val); - printf("\n"); - } - printf("\n"); -} - -#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ -static __u64 gettime(void) -{ - struct timespec t; - int res; - - res = clock_gettime(CLOCK_MONOTONIC, &t); - if (res < 0) { - fprintf(stderr, "Error with gettimeofday! (%i)\n", res); - exit(EXIT_FAILURE); - } - return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; -} - -enum { - REDIR_SUCCESS = 0, - REDIR_ERROR = 1, -}; -#define REDIR_RES_MAX 2 -static const char *redir_names[REDIR_RES_MAX] = { - [REDIR_SUCCESS] = "Success", - [REDIR_ERROR] = "Error", -}; -static const char *err2str(int err) -{ - if (err < REDIR_RES_MAX) - return redir_names[err]; - return NULL; -} -/* enum xdp_action */ -#define XDP_UNKNOWN XDP_REDIRECT + 1 -#define XDP_ACTION_MAX (XDP_UNKNOWN + 1) -static const char *xdp_action_names[XDP_ACTION_MAX] = { - [XDP_ABORTED] = "XDP_ABORTED", - [XDP_DROP] = "XDP_DROP", - [XDP_PASS] = "XDP_PASS", - [XDP_TX] = "XDP_TX", - [XDP_REDIRECT] = "XDP_REDIRECT", - [XDP_UNKNOWN] = "XDP_UNKNOWN", -}; -static const char *action2str(int action) -{ - if (action < XDP_ACTION_MAX) - return xdp_action_names[action]; - return NULL; -} - -/* Common stats data record shared with _kern.c */ -struct datarec { - __u64 processed; - __u64 dropped; - __u64 info; - __u64 err; -}; -#define MAX_CPUS 64 - -/* Userspace structs for collection of stats from maps */ -struct record { - __u64 timestamp; - struct datarec total; - struct datarec *cpu; + { "help", no_argument, NULL, 'h' }, + { "stats", no_argument, NULL, 's' }, + { "interval", required_argument, NULL, 'i' }, + { "verbose", no_argument, NULL, 'v' }, + {} }; -struct u64rec { - __u64 processed; -}; -struct record_u64 { - /* record for _kern side __u64 values */ - __u64 timestamp; - struct u64rec total; - struct u64rec *cpu; -}; - -struct stats_record { - struct record_u64 xdp_redirect[REDIR_RES_MAX]; - struct record_u64 xdp_exception[XDP_ACTION_MAX]; - struct record xdp_cpumap_kthread; - struct record xdp_cpumap_enqueue[MAX_CPUS]; - struct record xdp_devmap_xmit; -}; - -static bool map_collect_record(int fd, __u32 key, struct record *rec) -{ - /* For percpu maps, userspace gets a value per possible CPU */ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct datarec values[nr_cpus]; - __u64 sum_processed = 0; - __u64 sum_dropped = 0; - __u64 sum_info = 0; - __u64 sum_err = 0; - int i; - - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { - fprintf(stderr, - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); - return false; - } - /* Get time as close as possible to reading map contents */ - rec->timestamp = gettime(); - - /* Record and sum values from each CPU */ - for (i = 0; i < nr_cpus; i++) { - rec->cpu[i].processed = values[i].processed; - sum_processed += values[i].processed; - rec->cpu[i].dropped = values[i].dropped; - sum_dropped += values[i].dropped; - rec->cpu[i].info = values[i].info; - sum_info += values[i].info; - rec->cpu[i].err = values[i].err; - sum_err += values[i].err; - } - rec->total.processed = sum_processed; - rec->total.dropped = sum_dropped; - rec->total.info = sum_info; - rec->total.err = sum_err; - return true; -} - -static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec) -{ - /* For percpu maps, userspace gets a value per possible CPU */ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct u64rec values[nr_cpus]; - __u64 sum_total = 0; - int i; - - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { - fprintf(stderr, - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); - return false; - } - /* Get time as close as possible to reading map contents */ - rec->timestamp = gettime(); - - /* Record and sum values from each CPU */ - for (i = 0; i < nr_cpus; i++) { - rec->cpu[i].processed = values[i].processed; - sum_total += values[i].processed; - } - rec->total.processed = sum_total; - return true; -} - -static double calc_period(struct record *r, struct record *p) -{ - double period_ = 0; - __u64 period = 0; - - period = r->timestamp - p->timestamp; - if (period > 0) - period_ = ((double) period / NANOSEC_PER_SEC); - - return period_; -} - -static double calc_period_u64(struct record_u64 *r, struct record_u64 *p) -{ - double period_ = 0; - __u64 period = 0; - - period = r->timestamp - p->timestamp; - if (period > 0) - period_ = ((double) period / NANOSEC_PER_SEC); - - return period_; -} - -static double calc_pps(struct datarec *r, struct datarec *p, double period) -{ - __u64 packets = 0; - double pps = 0; - - if (period > 0) { - packets = r->processed - p->processed; - pps = packets / period; - } - return pps; -} - -static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period) -{ - __u64 packets = 0; - double pps = 0; - - if (period > 0) { - packets = r->processed - p->processed; - pps = packets / period; - } - return pps; -} - -static double calc_drop(struct datarec *r, struct datarec *p, double period) -{ - __u64 packets = 0; - double pps = 0; - - if (period > 0) { - packets = r->dropped - p->dropped; - pps = packets / period; - } - return pps; -} - -static double calc_info(struct datarec *r, struct datarec *p, double period) -{ - __u64 packets = 0; - double pps = 0; - - if (period > 0) { - packets = r->info - p->info; - pps = packets / period; - } - return pps; -} - -static double calc_err(struct datarec *r, struct datarec *p, double period) -{ - __u64 packets = 0; - double pps = 0; - - if (period > 0) { - packets = r->err - p->err; - pps = packets / period; - } - return pps; -} - -static void stats_print(struct stats_record *stats_rec, - struct stats_record *stats_prev, - bool err_only) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - int rec_i = 0, i, to_cpu; - double t = 0, pps = 0; - - /* Header */ - printf("%-15s %-7s %-12s %-12s %-9s\n", - "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info"); - - /* tracepoint: xdp:xdp_redirect_* */ - if (err_only) - rec_i = REDIR_ERROR; - - for (; rec_i < REDIR_RES_MAX; rec_i++) { - struct record_u64 *rec, *prev; - char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; - char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; - - rec = &stats_rec->xdp_redirect[rec_i]; - prev = &stats_prev->xdp_redirect[rec_i]; - t = calc_period_u64(rec, prev); - - for (i = 0; i < nr_cpus; i++) { - struct u64rec *r = &rec->cpu[i]; - struct u64rec *p = &prev->cpu[i]; - - pps = calc_pps_u64(r, p, t); - if (pps > 0) - printf(fmt1, "XDP_REDIRECT", i, - rec_i ? 0.0: pps, rec_i ? pps : 0.0, - err2str(rec_i)); - } - pps = calc_pps_u64(&rec->total, &prev->total, t); - printf(fmt2, "XDP_REDIRECT", "total", - rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i)); - } - - /* tracepoint: xdp:xdp_exception */ - for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { - struct record_u64 *rec, *prev; - char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; - char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; - - rec = &stats_rec->xdp_exception[rec_i]; - prev = &stats_prev->xdp_exception[rec_i]; - t = calc_period_u64(rec, prev); - - for (i = 0; i < nr_cpus; i++) { - struct u64rec *r = &rec->cpu[i]; - struct u64rec *p = &prev->cpu[i]; - - pps = calc_pps_u64(r, p, t); - if (pps > 0) - printf(fmt1, "Exception", i, - 0.0, pps, action2str(rec_i)); - } - pps = calc_pps_u64(&rec->total, &prev->total, t); - if (pps > 0) - printf(fmt2, "Exception", "total", - 0.0, pps, action2str(rec_i)); - } - - /* cpumap enqueue stats */ - for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { - char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; - char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; - struct record *rec, *prev; - char *info_str = ""; - double drop, info; - - rec = &stats_rec->xdp_cpumap_enqueue[to_cpu]; - prev = &stats_prev->xdp_cpumap_enqueue[to_cpu]; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop(r, p, t); - info = calc_info(r, p, t); - if (info > 0) { - info_str = "bulk-average"; - info = pps / info; /* calc average bulk size */ - } - if (pps > 0) - printf(fmt1, "cpumap-enqueue", - i, to_cpu, pps, drop, info, info_str); - } - pps = calc_pps(&rec->total, &prev->total, t); - if (pps > 0) { - drop = calc_drop(&rec->total, &prev->total, t); - info = calc_info(&rec->total, &prev->total, t); - if (info > 0) { - info_str = "bulk-average"; - info = pps / info; /* calc average bulk size */ - } - printf(fmt2, "cpumap-enqueue", - "sum", to_cpu, pps, drop, info, info_str); - } - } - - /* cpumap kthread stats */ - { - char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n"; - char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n"; - struct record *rec, *prev; - double drop, info; - char *i_str = ""; - - rec = &stats_rec->xdp_cpumap_kthread; - prev = &stats_prev->xdp_cpumap_kthread; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop(r, p, t); - info = calc_info(r, p, t); - if (info > 0) - i_str = "sched"; - if (pps > 0 || drop > 0) - printf(fmt1, "cpumap-kthread", - i, pps, drop, info, i_str); - } - pps = calc_pps(&rec->total, &prev->total, t); - drop = calc_drop(&rec->total, &prev->total, t); - info = calc_info(&rec->total, &prev->total, t); - if (info > 0) - i_str = "sched-sum"; - printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); - } - - /* devmap ndo_xdp_xmit stats */ - { - char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n"; - char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n"; - struct record *rec, *prev; - double drop, info, err; - char *i_str = ""; - char *err_str = ""; - - rec = &stats_rec->xdp_devmap_xmit; - prev = &stats_prev->xdp_devmap_xmit; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop(r, p, t); - info = calc_info(r, p, t); - err = calc_err(r, p, t); - if (info > 0) { - i_str = "bulk-average"; - info = (pps+drop) / info; /* calc avg bulk */ - } - if (err > 0) - err_str = "drv-err"; - if (pps > 0 || drop > 0) - printf(fmt1, "devmap-xmit", - i, pps, drop, info, i_str, err_str); - } - pps = calc_pps(&rec->total, &prev->total, t); - drop = calc_drop(&rec->total, &prev->total, t); - info = calc_info(&rec->total, &prev->total, t); - err = calc_err(&rec->total, &prev->total, t); - if (info > 0) { - i_str = "bulk-average"; - info = (pps+drop) / info; /* calc avg bulk */ - } - if (err > 0) - err_str = "drv-err"; - printf(fmt2, "devmap-xmit", "total", pps, drop, - info, i_str, err_str); - } - - printf("\n"); -} - -static bool stats_collect(struct stats_record *rec) -{ - int fd; - int i; - - /* TODO: Detect if someone unloaded the perf event_fd's, as - * this can happen by someone running perf-record -e - */ - - fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]); - for (i = 0; i < REDIR_RES_MAX; i++) - map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); - - fd = bpf_map__fd(map_data[EXCEPTION_CNT]); - for (i = 0; i < XDP_ACTION_MAX; i++) { - map_collect_record_u64(fd, i, &rec->xdp_exception[i]); - } - - fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]); - for (i = 0; i < MAX_CPUS; i++) - map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); - - fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]); - map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); - - fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]); - map_collect_record(fd, 0, &rec->xdp_devmap_xmit); - - return true; -} - -static void *alloc_rec_per_cpu(int record_size) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - void *array; - - array = calloc(nr_cpus, record_size); - if (!array) { - fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); - exit(EXIT_FAIL_MEM); - } - return array; -} - -static struct stats_record *alloc_stats_record(void) -{ - struct stats_record *rec; - int rec_sz; - int i; - - /* Alloc main stats_record structure */ - rec = calloc(1, sizeof(*rec)); - if (!rec) { - fprintf(stderr, "Mem alloc error\n"); - exit(EXIT_FAIL_MEM); - } - - /* Alloc stats stored per CPU for each record */ - rec_sz = sizeof(struct u64rec); - for (i = 0; i < REDIR_RES_MAX; i++) - rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz); - - for (i = 0; i < XDP_ACTION_MAX; i++) - rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz); - - rec_sz = sizeof(struct datarec); - rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); - rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz); - - for (i = 0; i < MAX_CPUS; i++) - rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); - - return rec; -} - -static void free_stats_record(struct stats_record *r) -{ - int i; - - for (i = 0; i < REDIR_RES_MAX; i++) - free(r->xdp_redirect[i].cpu); - - for (i = 0; i < XDP_ACTION_MAX; i++) - free(r->xdp_exception[i].cpu); - - free(r->xdp_cpumap_kthread.cpu); - free(r->xdp_devmap_xmit.cpu); - - for (i = 0; i < MAX_CPUS; i++) - free(r->xdp_cpumap_enqueue[i].cpu); - - free(r); -} - -/* Pointer swap trick */ -static inline void swap(struct stats_record **a, struct stats_record **b) -{ - struct stats_record *tmp; - - tmp = *a; - *a = *b; - *b = tmp; -} - -static void stats_poll(int interval, bool err_only) -{ - struct stats_record *rec, *prev; - - rec = alloc_stats_record(); - prev = alloc_stats_record(); - stats_collect(rec); - - if (err_only) - printf("\n%s\n", __doc_err_only__); - - /* Trick to pretty printf with thousands separators use %' */ - setlocale(LC_NUMERIC, "en_US"); - - /* Header */ - if (verbose) - printf("\n%s", __doc__); - - /* TODO Need more advanced stats on error types */ - if (verbose) { - printf(" - Stats map0: %s\n", bpf_map__name(map_data[0])); - printf(" - Stats map1: %s\n", bpf_map__name(map_data[1])); - printf("\n"); - } - fflush(stdout); - - while (1) { - swap(&prev, &rec); - stats_collect(rec); - stats_print(rec, prev, err_only); - fflush(stdout); - sleep(interval); - } - - free_stats_record(rec); - free_stats_record(prev); -} - -static void print_bpf_prog_info(void) -{ - struct bpf_program *prog; - struct bpf_map *map; - int i = 0; - - /* Prog info */ - printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt); - bpf_object__for_each_program(prog, obj) { - printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog)); - i++; - } - - i = 0; - /* Maps info */ - printf("Loaded BPF prog have %d map(s)\n", map_cnt); - bpf_object__for_each_map(map, obj) { - const char *name = bpf_map__name(map); - int fd = bpf_map__fd(map); - - printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name); - i++; - } - - /* Event info */ - printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt); - for (i = 0; i < tp_cnt; i++) { - int fd = bpf_link__fd(tp_links[i]); - - if (fd != -1) - printf(" - event_fd[%d] = fd(%d)\n", i, fd); - } -} int main(int argc, char **argv) { - struct bpf_program *prog; - int longindex = 0, opt; - int ret = EXIT_FAILURE; - enum map_type type; - char filename[256]; - - /* Default settings: */ + unsigned long interval = 2; + int ret = EXIT_FAIL_OPTION; + struct xdp_monitor *skel; bool errors_only = true; - int interval = 2; + int longindex = 0, opt; + bool error = true; /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hDSs:", + while ((opt = getopt_long(argc, argv, "si:vh", long_options, &longindex)) != -1) { switch (opt) { - case 'D': - debug = true; - break; - case 'S': + case 's': errors_only = false; + mask |= SAMPLE_REDIRECT_CNT; break; - case 's': - interval = atoi(optarg); + case 'i': + interval = strtoul(optarg, NULL, 0); + break; + case 'v': + sample_switch_mode(); break; case 'h': + error = false; default: - usage(argv); + sample_usage(argv, long_options, __doc__, mask, error); return ret; } } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - /* Remove tracepoint program when program is interrupted or killed */ - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); - - obj = bpf_object__open_file(filename, NULL); - if (libbpf_get_error(obj)) { - printf("ERROR: opening BPF object file failed\n"); - obj = NULL; - goto cleanup; - } - - /* load BPF program */ - if (bpf_object__load(obj)) { - printf("ERROR: loading BPF object file failed\n"); - goto cleanup; + skel = xdp_monitor__open(); + if (!skel) { + fprintf(stderr, "Failed to xdp_monitor__open: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; } - for (type = 0; type < NUM_MAP; type++) { - map_data[type] = - bpf_object__find_map_by_name(obj, map_type_strings[type]); - - if (libbpf_get_error(map_data[type])) { - printf("ERROR: finding a map in obj file failed\n"); - goto cleanup; - } - map_cnt++; + ret = sample_init_pre_load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - bpf_object__for_each_program(prog, obj) { - tp_links[tp_cnt] = bpf_program__attach(prog); - if (libbpf_get_error(tp_links[tp_cnt])) { - printf("ERROR: bpf_program__attach failed\n"); - tp_links[tp_cnt] = NULL; - goto cleanup; - } - tp_cnt++; + ret = xdp_monitor__load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - if (debug) { - print_bpf_prog_info(); + ret = sample_init(skel, mask); + if (ret < 0) { + fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - /* Unload/stop tracepoint event by closing bpf_link's */ - if (errors_only) { - /* The bpf_link[i] depend on the order of - * the functions was defined in _kern.c - */ - bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */ - tp_links[2] = NULL; + if (errors_only) + printf("%s", __doc_err_only__); - bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */ - tp_links[3] = NULL; + ret = sample_run(interval, NULL, NULL); + if (ret < 0) { + fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; } - - stats_poll(interval, errors_only); - - ret = EXIT_SUCCESS; - -cleanup: - /* Detach tracepoints */ - while (tp_cnt) - bpf_link__destroy(tp_links[--tp_cnt]); - - bpf_object__close(obj); - return ret; + ret = EXIT_OK; +end_destroy: + xdp_monitor__destroy(skel); +end: + sample_exit(ret); } -- cgit v1.2.3-59-g8ed1b From 66fc4ca85d910bdeecf019c3999bc2df7c80b726 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:03 +0530 Subject: samples: bpf: Convert xdp_redirect_kern.o to XDP samples helper We moved swap_src_dst_mac to xdp_sample.bpf.h to be shared with other potential users, so drop it while moving code to the new file. Also, consistently use SEC("xdp") naming instead. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-16-memxor@gmail.com --- samples/bpf/Makefile | 5 ++- samples/bpf/xdp_redirect.bpf.c | 49 ++++++++++++++++++++++ samples/bpf/xdp_redirect_kern.c | 90 ----------------------------------------- 3 files changed, 52 insertions(+), 92 deletions(-) create mode 100644 samples/bpf/xdp_redirect.bpf.c delete mode 100644 samples/bpf/xdp_redirect_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 479778439f5e..0b94a6acb348 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -163,7 +163,6 @@ always-y += tcp_clamp_kern.o always-y += tcp_basertt_kern.o always-y += tcp_tos_reflect_kern.o always-y += tcp_dumpstats_kern.o -always-y += xdp_redirect_kern.o always-y += xdp_redirect_map_kern.o always-y += xdp_redirect_map_multi_kern.o always-y += xdp_redirect_cpu_kern.o @@ -356,6 +355,7 @@ endef CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h @@ -366,9 +366,10 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ -LINKED_SKELS := xdp_monitor.skel.h +LINKED_SKELS := xdp_redirect.skel.h xdp_monitor.skel.h clean-files += $(LINKED_SKELS) +xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c new file mode 100644 index 000000000000..7c02bacfe96b --- /dev/null +++ b/samples/bpf/xdp_redirect.bpf.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2016 John Fastabend + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include "vmlinux.h" +#include "xdp_sample.bpf.h" +#include "xdp_sample_shared.h" + +const volatile int ifindex_out; + +SEC("xdp") +int xdp_redirect_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + swap_src_dst_mac(data); + return bpf_redirect(ifindex_out, 0); +} + +/* Redirect require an XDP bpf_prog loaded on the TX device */ +SEC("xdp") +int xdp_redirect_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c deleted file mode 100644 index d26ec3aa215e..000000000000 --- a/samples/bpf/xdp_redirect_kern.c +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2016 John Fastabend - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#define KBUILD_MODNAME "foo" -#include -#include -#include -#include -#include -#include -#include -#include - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, int); - __uint(max_entries, 1); -} tx_port SEC(".maps"); - -/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success - * feedback. Redirect TX errors can be caught via a tracepoint. - */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, long); - __uint(max_entries, 1); -} rxcnt SEC(".maps"); - -static void swap_src_dst_mac(void *data) -{ - unsigned short *p = data; - unsigned short dst[3]; - - dst[0] = p[0]; - dst[1] = p[1]; - dst[2] = p[2]; - p[0] = p[3]; - p[1] = p[4]; - p[2] = p[5]; - p[3] = dst[0]; - p[4] = dst[1]; - p[5] = dst[2]; -} - -SEC("xdp_redirect") -int xdp_redirect_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - int rc = XDP_DROP; - int *ifindex, port = 0; - long *value; - u32 key = 0; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return rc; - - ifindex = bpf_map_lookup_elem(&tx_port, &port); - if (!ifindex) - return rc; - - value = bpf_map_lookup_elem(&rxcnt, &key); - if (value) - *value += 1; - - swap_src_dst_mac(data); - return bpf_redirect(*ifindex, 0); -} - -/* Redirect require an XDP bpf_prog loaded on the TX device */ -SEC("xdp_redirect_dummy") -int xdp_redirect_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From b926c55d856cbe2593c44e783b31e2cdb0ef6371 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:04 +0530 Subject: samples: bpf: Convert xdp_redirect to XDP samples helper Use the libbpf skeleton facility and other utilities provided by XDP samples helper. One important note: The XDP samples helper handles ownership of installed XDP programs on devices, including responding to SIGINT and SIGTERM, so drop the code here and use the helpers we provide going forward for all xdp_redirect* conversions. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-17-memxor@gmail.com --- samples/bpf/Makefile | 5 +- samples/bpf/xdp_redirect_user.c | 270 +++++++++++++++++----------------------- 2 files changed, 116 insertions(+), 159 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0b94a6acb348..d05105227ec5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -39,7 +39,6 @@ tprogs-y += lwt_len_hist tprogs-y += xdp_tx_iptunnel tprogs-y += test_map_in_map tprogs-y += per_socket_stats_example -tprogs-y += xdp_redirect tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect_map_multi tprogs-y += xdp_redirect_cpu @@ -56,6 +55,7 @@ tprogs-y += xdp_sample_pkts tprogs-y += ibumad tprogs-y += hbm +tprogs-y += xdp_redirect tprogs-y += xdp_monitor # Libbpf dependencies @@ -100,7 +100,6 @@ lwt_len_hist-objs := lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect-objs := xdp_redirect_user.o xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o @@ -117,6 +116,7 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) # Tell kbuild to always build the programs @@ -312,6 +312,7 @@ verify_target_bpf: verify_cmds $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) +$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 93854e135134..7af5b07a7523 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 John Fastabend */ +static const char *__doc__ = +"XDP redirect tool, using bpf_redirect helper\n" +"Usage: xdp_redirect _IN _OUT\n"; + #include #include #include @@ -13,126 +17,73 @@ #include #include #include +#include #include - -#include "bpf_util.h" #include #include +#include "bpf_util.h" +#include "xdp_sample_user.h" +#include "xdp_redirect.skel.h" -static int ifindex_in; -static int ifindex_out; -static bool ifindex_out_xdp_dummy_attached = true; -static __u32 prog_id; -static __u32 dummy_prog_id; - -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int rxcnt_map_fd; - -static void int_exit(int sig) -{ - __u32 curr_prog_id = 0; - - if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(1); - } - if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - else if (!curr_prog_id) - printf("couldn't find a prog id on iface IN\n"); - else - printf("program on iface IN changed, not removing\n"); - - if (ifindex_out_xdp_dummy_attached) { - curr_prog_id = 0; - if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, - xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(1); - } - if (dummy_prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); - else if (!curr_prog_id) - printf("couldn't find a prog id on iface OUT\n"); - else - printf("program on iface OUT changed, not removing\n"); - } - exit(0); -} - -static void poll_stats(int interval, int ifindex) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - __u64 values[nr_cpus], prev[nr_cpus]; - - memset(prev, 0, sizeof(prev)); - - while (1) { - __u64 sum = 0; - __u32 key = 0; - int i; - - sleep(interval); - assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); - for (i = 0; i < nr_cpus; i++) - sum += (values[i] - prev[i]); - if (sum) - printf("ifindex %i: %10llu pkt/s\n", - ifindex, sum / interval); - memcpy(prev, values, sizeof(values)); - } -} +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT | + SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; -static void usage(const char *prog) -{ - fprintf(stderr, - "usage: %s [OPTS] _IN _OUT\n\n" - "OPTS:\n" - " -S use skb-mode\n" - " -N enforce native mode\n" - " -F force loading prog\n", - prog); -} +DEFINE_SAMPLE_INIT(xdp_redirect); +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"skb-mode", no_argument, NULL, 'S' }, + {"force", no_argument, NULL, 'F' }, + {"stats", no_argument, NULL, 's' }, + {"interval", required_argument, NULL, 'i' }, + {"verbose", no_argument, NULL, 'v' }, + {} +}; int main(int argc, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; - struct bpf_program *prog, *dummy_prog; - int prog_fd, tx_port_map_fd, opt; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - const char *optstr = "FSN"; - struct bpf_object *obj; - char filename[256]; - int dummy_prog_fd; - int ret, key = 0; - - while ((opt = getopt(argc, argv, optstr)) != -1) { + int ifindex_in, ifindex_out, opt; + char str[2 * IF_NAMESIZE + 1]; + char ifname_out[IF_NAMESIZE]; + char ifname_in[IF_NAMESIZE]; + int ret = EXIT_FAIL_OPTION; + unsigned long interval = 2; + struct xdp_redirect *skel; + bool generic = false; + bool force = false; + bool error = true; + + while ((opt = getopt_long(argc, argv, "hSFi:vs", + long_options, NULL)) != -1) { switch (opt) { case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case 'N': - /* default, set below */ + generic = true; + mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI); break; case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + force = true; + break; + case 'i': + interval = strtoul(optarg, NULL, 0); + break; + case 'v': + sample_switch_mode(); + break; + case 's': + mask |= SAMPLE_REDIRECT_CNT; break; + case 'h': + error = false; default: - usage(basename(argv[0])); - return 1; + sample_usage(argv, long_options, __doc__, mask, error); + return ret; } } - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) - xdp_flags |= XDP_FLAGS_DRV_MODE; - - if (optind + 2 != argc) { - printf("usage: %s _IN _OUT\n", argv[0]); - return 1; + if (argc <= optind + 1) { + sample_usage(argv, long_options, __doc__, mask, true); + return ret; } ifindex_in = if_nametoindex(argv[optind]); @@ -143,75 +94,80 @@ int main(int argc, char **argv) if (!ifindex_out) ifindex_out = strtoul(argv[optind + 1], NULL, 0); - printf("input: %d output: %d\n", ifindex_in, ifindex_out); - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) - return 1; - - prog = bpf_program__next(NULL, obj); - dummy_prog = bpf_program__next(prog, obj); - if (!prog || !dummy_prog) { - printf("finding a prog in obj file failed\n"); - return 1; + if (!ifindex_in || !ifindex_out) { + fprintf(stderr, "Bad interface index or name\n"); + sample_usage(argv, long_options, __doc__, mask, true); + goto end; } - /* bpf_prog_load_xattr gives us the pointer to first prog's fd, - * so we're missing only the fd for dummy prog - */ - dummy_prog_fd = bpf_program__fd(dummy_prog); - if (prog_fd < 0 || dummy_prog_fd < 0) { - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); - return 1; + + skel = xdp_redirect__open(); + if (!skel) { + fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; } - tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); - rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); - if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { - printf("bpf_object__find_map_fd_by_name failed\n"); - return 1; + ret = sample_init_pre_load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { - printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); - return 1; + skel->rodata->from_match[0] = ifindex_in; + skel->rodata->to_match[0] = ifindex_out; + skel->rodata->ifindex_out = ifindex_out; + + ret = xdp_redirect__load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (ret) { - printf("can't get prog info - %s\n", strerror(errno)); - return ret; + ret = sample_init(skel, mask); + if (ret < 0) { + fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; } - prog_id = info.id; + + ret = EXIT_FAIL_XDP; + if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in, + generic, force) < 0) + goto end_destroy; /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, - (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { - printf("WARN: link set xdp fd failed on %d\n", ifindex_out); - ifindex_out_xdp_dummy_attached = false; + sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, + generic, force); + + ret = EXIT_FAIL; + if (!if_indextoname(ifindex_in, ifname_in)) { + fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, + strerror(errno)); + goto end_destroy; } - memset(&info, 0, sizeof(info)); - ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len); - if (ret) { - printf("can't get prog info - %s\n", strerror(errno)); - return ret; + if (!if_indextoname(ifindex_out, ifname_out)) { + fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, + strerror(errno)); + goto end_destroy; } - dummy_prog_id = info.id; - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); + safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); + printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", + ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); + snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); - /* bpf redirect port */ - ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); - if (ret) { - perror("bpf_update_elem"); - goto out; + ret = sample_run(interval, NULL, NULL); + if (ret < 0) { + fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; } - - poll_stats(2, ifindex_out); - -out: - return ret; + ret = EXIT_OK; +end_destroy: + xdp_redirect__destroy(skel); +end: + sample_exit(ret); } -- cgit v1.2.3-59-g8ed1b From 79ccf4529ee67510fdad8ecdfcf37528d353a36c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:05 +0530 Subject: samples: bpf: Convert xdp_redirect_cpu_kern.o to XDP samples helper Similar to xdp_monitor_kern, a lot of these BPF programs have been reimplemented properly consolidating missing features from other XDP samples. Hence, drop the unneeded code and rename to .bpf.c suffix. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-18-memxor@gmail.com --- samples/bpf/Makefile | 5 +- samples/bpf/xdp_redirect_cpu.bpf.c | 541 ++++++++++++++++++++++++++ samples/bpf/xdp_redirect_cpu_kern.c | 730 ------------------------------------ 3 files changed, 544 insertions(+), 732 deletions(-) create mode 100644 samples/bpf/xdp_redirect_cpu.bpf.c delete mode 100644 samples/bpf/xdp_redirect_cpu_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index d05105227ec5..231cdbc773a7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -165,7 +165,6 @@ always-y += tcp_tos_reflect_kern.o always-y += tcp_dumpstats_kern.o always-y += xdp_redirect_map_kern.o always-y += xdp_redirect_map_multi_kern.o -always-y += xdp_redirect_cpu_kern.o always-y += xdp_rxq_info_kern.o always-y += xdp2skb_meta_kern.o always-y += syscall_tp_kern.o @@ -356,6 +355,7 @@ endef CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o @@ -367,9 +367,10 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ -LINKED_SKELS := xdp_redirect.skel.h xdp_monitor.skel.h +LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect.skel.h xdp_monitor.skel.h clean-files += $(LINKED_SKELS) +xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c new file mode 100644 index 000000000000..f10fe3cf25f6 --- /dev/null +++ b/samples/bpf/xdp_redirect_cpu.bpf.c @@ -0,0 +1,541 @@ +/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) + * + * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +#include "vmlinux.h" +#include "xdp_sample.bpf.h" +#include "xdp_sample_shared.h" +#include "hash_func01.h" + +/* Special map type that can XDP_REDIRECT frames to another CPU */ +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); +} cpu_map SEC(".maps"); + +/* Set of maps controlling available CPU, and for iterating through + * selectable redirect CPUs. + */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); +} cpus_available SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} cpus_count SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} cpus_iterator SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} tx_port SEC(".maps"); + +char tx_mac_addr[ETH_ALEN]; + +/* Helper parse functions */ + +static __always_inline +bool parse_eth(struct ethhdr *eth, void *data_end, + u16 *eth_proto, u64 *l3_offset) +{ + u16 eth_type; + u64 offset; + + offset = sizeof(*eth); + if ((void *)eth + offset > data_end) + return false; + + eth_type = eth->h_proto; + + /* Skip non 802.3 Ethertypes */ + if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) + return false; + + /* Handle VLAN tagged packet */ + if (eth_type == bpf_htons(ETH_P_8021Q) || + eth_type == bpf_htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + /* Handle double VLAN tagged packet */ + if (eth_type == bpf_htons(ETH_P_8021Q) || + eth_type == bpf_htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + + *eth_proto = bpf_ntohs(eth_type); + *l3_offset = offset; + return true; +} + +static __always_inline +u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + struct udphdr *udph; + u16 dport; + + if (iph + 1 > data_end) + return 0; + if (!(iph->protocol == IPPROTO_UDP)) + return 0; + + udph = (void *)(iph + 1); + if (udph + 1 > data_end) + return 0; + + dport = bpf_ntohs(udph->dest); + return dport; +} + +static __always_inline +int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static __always_inline +int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp") +int xdp_prognum0_no_touch(struct xdp_md *ctx) +{ + u32 key = bpf_get_smp_processor_id(); + struct datarec *rec; + u32 *cpu_selected; + u32 cpu_dest = 0; + u32 key0 = 0; + + /* Only use first entry in cpus_available */ + cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int xdp_prognum1_touch_data(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + u32 *cpu_selected; + u32 cpu_dest = 0; + u32 key0 = 0; + u16 eth_type; + + /* Only use first entry in cpus_available */ + cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + /* Validate packet length is minimum Eth header size */ + if (eth + 1 > data_end) + return XDP_ABORTED; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + /* Read packet data, and use it (drop non 802.3 Ethertypes) */ + eth_type = eth->h_proto; + if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { + NO_TEAR_INC(rec->dropped); + return XDP_DROP; + } + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int xdp_prognum2_round_robin(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct datarec *rec; + u32 cpu_dest = 0; + u32 key0 = 0; + + u32 *cpu_selected; + u32 *cpu_iterator; + u32 *cpu_max; + u32 cpu_idx; + + cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); + if (!cpu_max) + return XDP_ABORTED; + + cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); + if (!cpu_iterator) + return XDP_ABORTED; + cpu_idx = *cpu_iterator; + + *cpu_iterator += 1; + if (*cpu_iterator == *cpu_max) + *cpu_iterator = 0; + + cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int xdp_prognum3_proto_separate(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + u8 ip_proto = IPPROTO_UDP; + struct datarec *rec; + u16 eth_proto = 0; + u64 l3_offset = 0; + u32 cpu_dest = 0; + u32 *cpu_lookup; + u32 cpu_idx = 0; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Extract L4 protocol */ + switch (eth_proto) { + case ETH_P_IP: + ip_proto = get_proto_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: + ip_proto = get_proto_ipv6(ctx, l3_offset); + break; + case ETH_P_ARP: + cpu_idx = 0; /* ARP packet handled on separate CPU */ + break; + default: + cpu_idx = 0; + } + + /* Choose CPU based on L4 protocol */ + switch (ip_proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + cpu_idx = 2; + break; + case IPPROTO_TCP: + cpu_idx = 0; + break; + case IPPROTO_UDP: + cpu_idx = 1; + break; + default: + cpu_idx = 0; + } + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + u8 ip_proto = IPPROTO_UDP; + struct datarec *rec; + u16 eth_proto = 0; + u64 l3_offset = 0; + u32 cpu_dest = 0; + u32 *cpu_lookup; + u32 cpu_idx = 0; + u16 dest_port; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Extract L4 protocol */ + switch (eth_proto) { + case ETH_P_IP: + ip_proto = get_proto_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: + ip_proto = get_proto_ipv6(ctx, l3_offset); + break; + case ETH_P_ARP: + cpu_idx = 0; /* ARP packet handled on separate CPU */ + break; + default: + cpu_idx = 0; + } + + /* Choose CPU based on L4 protocol */ + switch (ip_proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + cpu_idx = 2; + break; + case IPPROTO_TCP: + cpu_idx = 0; + break; + case IPPROTO_UDP: + cpu_idx = 1; + /* DDoS filter UDP port 9 (pktgen) */ + dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); + if (dest_port == 9) { + NO_TEAR_INC(rec->dropped); + return XDP_DROP; + } + break; + default: + cpu_idx = 0; + } + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +/* Hashing initval */ +#define INITVAL 15485863 + +static __always_inline +u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + u32 cpu_hash; + + if (iph + 1 > data_end) + return 0; + + cpu_hash = iph->saddr + iph->daddr; + cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); + + return cpu_hash; +} + +static __always_inline +u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ipv6hdr *ip6h = data + nh_off; + u32 cpu_hash; + + if (ip6h + 1 > data_end) + return 0; + + cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; + cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; + cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; + cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; + cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); + + return cpu_hash; +} + +/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The + * hashing scheme is symmetric, meaning swapping IP src/dest still hit + * same CPU. + */ +SEC("xdp") +int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + u16 eth_proto = 0; + u64 l3_offset = 0; + u32 cpu_dest = 0; + u32 cpu_idx = 0; + u32 *cpu_lookup; + u32 key0 = 0; + u32 *cpu_max; + u32 cpu_hash; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); + if (!cpu_max) + return XDP_ABORTED; + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Hash for IPv4 and IPv6 */ + switch (eth_proto) { + case ETH_P_IP: + cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); + break; + case ETH_P_IPV6: + cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); + break; + case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ + default: + cpu_hash = 0; + } + + /* Choose CPU based on hash */ + cpu_idx = cpu_hash % *cpu_max; + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp_cpumap/redirect") +int xdp_redirect_cpu_devmap(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + swap_src_dst_mac(data); + return bpf_redirect_map(&tx_port, 0, 0); +} + +SEC("xdp_cpumap/pass") +int xdp_redirect_cpu_pass(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp_cpumap/drop") +int xdp_redirect_cpu_drop(struct xdp_md *ctx) +{ + return XDP_DROP; +} + +SEC("xdp_devmap/egress") +int xdp_redirect_egress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c deleted file mode 100644 index 8255025dea97..000000000000 --- a/samples/bpf/xdp_redirect_cpu_kern.c +++ /dev/null @@ -1,730 +0,0 @@ -/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) - * - * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include "hash_func01.h" - -#define MAX_CPUS NR_CPUS - -/* Special map type that can XDP_REDIRECT frames to another CPU */ -struct { - __uint(type, BPF_MAP_TYPE_CPUMAP); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(struct bpf_cpumap_val)); - __uint(max_entries, MAX_CPUS); -} cpu_map SEC(".maps"); - -/* Common stats data record to keep userspace more simple */ -struct datarec { - __u64 processed; - __u64 dropped; - __u64 issue; - __u64 xdp_pass; - __u64 xdp_drop; - __u64 xdp_redirect; -}; - -/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success - * feedback. Redirect TX errors can be caught via a tracepoint. - */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 1); -} rx_cnt SEC(".maps"); - -/* Used by trace point */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 2); - /* TODO: have entries for all possible errno's */ -} redirect_err_cnt SEC(".maps"); - -/* Used by trace point */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, MAX_CPUS); -} cpumap_enqueue_cnt SEC(".maps"); - -/* Used by trace point */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 1); -} cpumap_kthread_cnt SEC(".maps"); - -/* Set of maps controlling available CPU, and for iterating through - * selectable redirect CPUs. - */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, u32); - __uint(max_entries, MAX_CPUS); -} cpus_available SEC(".maps"); -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, u32); - __uint(max_entries, 1); -} cpus_count SEC(".maps"); -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, u32); - __uint(max_entries, 1); -} cpus_iterator SEC(".maps"); - -/* Used by trace point */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, struct datarec); - __uint(max_entries, 1); -} exception_cnt SEC(".maps"); - -/* Helper parse functions */ - -/* Parse Ethernet layer 2, extract network layer 3 offset and protocol - * - * Returns false on error and non-supported ether-type - */ -struct vlan_hdr { - __be16 h_vlan_TCI; - __be16 h_vlan_encapsulated_proto; -}; - -static __always_inline -bool parse_eth(struct ethhdr *eth, void *data_end, - u16 *eth_proto, u64 *l3_offset) -{ - u16 eth_type; - u64 offset; - - offset = sizeof(*eth); - if ((void *)eth + offset > data_end) - return false; - - eth_type = eth->h_proto; - - /* Skip non 802.3 Ethertypes */ - if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) - return false; - - /* Handle VLAN tagged packet */ - if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { - struct vlan_hdr *vlan_hdr; - - vlan_hdr = (void *)eth + offset; - offset += sizeof(*vlan_hdr); - if ((void *)eth + offset > data_end) - return false; - eth_type = vlan_hdr->h_vlan_encapsulated_proto; - } - /* Handle double VLAN tagged packet */ - if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { - struct vlan_hdr *vlan_hdr; - - vlan_hdr = (void *)eth + offset; - offset += sizeof(*vlan_hdr); - if ((void *)eth + offset > data_end) - return false; - eth_type = vlan_hdr->h_vlan_encapsulated_proto; - } - - *eth_proto = ntohs(eth_type); - *l3_offset = offset; - return true; -} - -static __always_inline -u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct iphdr *iph = data + nh_off; - struct udphdr *udph; - u16 dport; - - if (iph + 1 > data_end) - return 0; - if (!(iph->protocol == IPPROTO_UDP)) - return 0; - - udph = (void *)(iph + 1); - if (udph + 1 > data_end) - return 0; - - dport = ntohs(udph->dest); - return dport; -} - -static __always_inline -int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct iphdr *iph = data + nh_off; - - if (iph + 1 > data_end) - return 0; - return iph->protocol; -} - -static __always_inline -int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ipv6hdr *ip6h = data + nh_off; - - if (ip6h + 1 > data_end) - return 0; - return ip6h->nexthdr; -} - -SEC("xdp_cpu_map0") -int xdp_prognum0_no_touch(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct datarec *rec; - u32 *cpu_selected; - u32 cpu_dest; - u32 key = 0; - - /* Only use first entry in cpus_available */ - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); - if (!cpu_selected) - return XDP_ABORTED; - cpu_dest = *cpu_selected; - - /* Count RX packet in map */ - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - if (cpu_dest >= MAX_CPUS) { - rec->issue++; - return XDP_ABORTED; - } - - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp_cpu_map1_touch_data") -int xdp_prognum1_touch_data(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - struct datarec *rec; - u32 *cpu_selected; - u32 cpu_dest; - u16 eth_type; - u32 key = 0; - - /* Only use first entry in cpus_available */ - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); - if (!cpu_selected) - return XDP_ABORTED; - cpu_dest = *cpu_selected; - - /* Validate packet length is minimum Eth header size */ - if (eth + 1 > data_end) - return XDP_ABORTED; - - /* Count RX packet in map */ - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - /* Read packet data, and use it (drop non 802.3 Ethertypes) */ - eth_type = eth->h_proto; - if (ntohs(eth_type) < ETH_P_802_3_MIN) { - rec->dropped++; - return XDP_DROP; - } - - if (cpu_dest >= MAX_CPUS) { - rec->issue++; - return XDP_ABORTED; - } - - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp_cpu_map2_round_robin") -int xdp_prognum2_round_robin(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - struct datarec *rec; - u32 cpu_dest; - u32 *cpu_lookup; - u32 key0 = 0; - - u32 *cpu_selected; - u32 *cpu_iterator; - u32 *cpu_max; - u32 cpu_idx; - - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); - if (!cpu_max) - return XDP_ABORTED; - - cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); - if (!cpu_iterator) - return XDP_ABORTED; - cpu_idx = *cpu_iterator; - - *cpu_iterator += 1; - if (*cpu_iterator == *cpu_max) - *cpu_iterator = 0; - - cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_selected) - return XDP_ABORTED; - cpu_dest = *cpu_selected; - - /* Count RX packet in map */ - rec = bpf_map_lookup_elem(&rx_cnt, &key0); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - if (cpu_dest >= MAX_CPUS) { - rec->issue++; - return XDP_ABORTED; - } - - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp_cpu_map3_proto_separate") -int xdp_prognum3_proto_separate(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - u8 ip_proto = IPPROTO_UDP; - struct datarec *rec; - u16 eth_proto = 0; - u64 l3_offset = 0; - u32 cpu_dest = 0; - u32 cpu_idx = 0; - u32 *cpu_lookup; - u32 key = 0; - - /* Count RX packet in map */ - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) - return XDP_PASS; /* Just skip */ - - /* Extract L4 protocol */ - switch (eth_proto) { - case ETH_P_IP: - ip_proto = get_proto_ipv4(ctx, l3_offset); - break; - case ETH_P_IPV6: - ip_proto = get_proto_ipv6(ctx, l3_offset); - break; - case ETH_P_ARP: - cpu_idx = 0; /* ARP packet handled on separate CPU */ - break; - default: - cpu_idx = 0; - } - - /* Choose CPU based on L4 protocol */ - switch (ip_proto) { - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: - cpu_idx = 2; - break; - case IPPROTO_TCP: - cpu_idx = 0; - break; - case IPPROTO_UDP: - cpu_idx = 1; - break; - default: - cpu_idx = 0; - } - - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_lookup) - return XDP_ABORTED; - cpu_dest = *cpu_lookup; - - if (cpu_dest >= MAX_CPUS) { - rec->issue++; - return XDP_ABORTED; - } - - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -SEC("xdp_cpu_map4_ddos_filter_pktgen") -int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - u8 ip_proto = IPPROTO_UDP; - struct datarec *rec; - u16 eth_proto = 0; - u64 l3_offset = 0; - u32 cpu_dest = 0; - u32 cpu_idx = 0; - u16 dest_port; - u32 *cpu_lookup; - u32 key = 0; - - /* Count RX packet in map */ - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) - return XDP_PASS; /* Just skip */ - - /* Extract L4 protocol */ - switch (eth_proto) { - case ETH_P_IP: - ip_proto = get_proto_ipv4(ctx, l3_offset); - break; - case ETH_P_IPV6: - ip_proto = get_proto_ipv6(ctx, l3_offset); - break; - case ETH_P_ARP: - cpu_idx = 0; /* ARP packet handled on separate CPU */ - break; - default: - cpu_idx = 0; - } - - /* Choose CPU based on L4 protocol */ - switch (ip_proto) { - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: - cpu_idx = 2; - break; - case IPPROTO_TCP: - cpu_idx = 0; - break; - case IPPROTO_UDP: - cpu_idx = 1; - /* DDoS filter UDP port 9 (pktgen) */ - dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); - if (dest_port == 9) { - if (rec) - rec->dropped++; - return XDP_DROP; - } - break; - default: - cpu_idx = 0; - } - - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_lookup) - return XDP_ABORTED; - cpu_dest = *cpu_lookup; - - if (cpu_dest >= MAX_CPUS) { - rec->issue++; - return XDP_ABORTED; - } - - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -/* Hashing initval */ -#define INITVAL 15485863 - -static __always_inline -u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct iphdr *iph = data + nh_off; - u32 cpu_hash; - - if (iph + 1 > data_end) - return 0; - - cpu_hash = iph->saddr + iph->daddr; - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); - - return cpu_hash; -} - -static __always_inline -u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ipv6hdr *ip6h = data + nh_off; - u32 cpu_hash; - - if (ip6h + 1 > data_end) - return 0; - - cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0]; - cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1]; - cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2]; - cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3]; - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); - - return cpu_hash; -} - -/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The - * hashing scheme is symmetric, meaning swapping IP src/dest still hit - * same CPU. - */ -SEC("xdp_cpu_map5_lb_hash_ip_pairs") -int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - u8 ip_proto = IPPROTO_UDP; - struct datarec *rec; - u16 eth_proto = 0; - u64 l3_offset = 0; - u32 cpu_dest = 0; - u32 cpu_idx = 0; - u32 *cpu_lookup; - u32 *cpu_max; - u32 cpu_hash; - u32 key = 0; - - /* Count RX packet in map */ - rec = bpf_map_lookup_elem(&rx_cnt, &key); - if (!rec) - return XDP_ABORTED; - rec->processed++; - - cpu_max = bpf_map_lookup_elem(&cpus_count, &key); - if (!cpu_max) - return XDP_ABORTED; - - if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) - return XDP_PASS; /* Just skip */ - - /* Hash for IPv4 and IPv6 */ - switch (eth_proto) { - case ETH_P_IP: - cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); - break; - case ETH_P_IPV6: - cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); - break; - case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ - default: - cpu_hash = 0; - } - - /* Choose CPU based on hash */ - cpu_idx = cpu_hash % *cpu_max; - - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); - if (!cpu_lookup) - return XDP_ABORTED; - cpu_dest = *cpu_lookup; - - if (cpu_dest >= MAX_CPUS) { - rec->issue++; - return XDP_ABORTED; - } - - return bpf_redirect_map(&cpu_map, cpu_dest, 0); -} - -char _license[] SEC("license") = "GPL"; - -/*** Trace point code ***/ - -/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct xdp_redirect_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int prog_id; // offset:8; size:4; signed:1; - u32 act; // offset:12 size:4; signed:0; - int ifindex; // offset:16 size:4; signed:1; - int err; // offset:20 size:4; signed:1; - int to_ifindex; // offset:24 size:4; signed:1; - u32 map_id; // offset:28 size:4; signed:0; - int map_index; // offset:32 size:4; signed:1; -}; // offset:36 - -enum { - XDP_REDIRECT_SUCCESS = 0, - XDP_REDIRECT_ERROR = 1 -}; - -static __always_inline -int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) -{ - u32 key = XDP_REDIRECT_ERROR; - struct datarec *rec; - int err = ctx->err; - - if (!err) - key = XDP_REDIRECT_SUCCESS; - - rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); - if (!rec) - return 0; - rec->dropped += 1; - - return 0; /* Indicate event was filtered (no further processing)*/ - /* - * Returning 1 here would allow e.g. a perf-record tracepoint - * to see and record these events, but it doesn't work well - * in-practice as stopping perf-record also unload this - * bpf_prog. Plus, there is additional overhead of doing so. - */ -} - -SEC("tracepoint/xdp/xdp_redirect_err") -int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) -{ - return xdp_redirect_collect_stat(ctx); -} - -SEC("tracepoint/xdp/xdp_redirect_map_err") -int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) -{ - return xdp_redirect_collect_stat(ctx); -} - -/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct xdp_exception_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int prog_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int ifindex; // offset:16; size:4; signed:1; -}; - -SEC("tracepoint/xdp/xdp_exception") -int trace_xdp_exception(struct xdp_exception_ctx *ctx) -{ - struct datarec *rec; - u32 key = 0; - - rec = bpf_map_lookup_elem(&exception_cnt, &key); - if (!rec) - return 1; - rec->dropped += 1; - - return 0; -} - -/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct cpumap_enqueue_ctx { - u64 __pad; // First 8 bytes are not accessible by bpf code - int map_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int cpu; // offset:16; size:4; signed:1; - unsigned int drops; // offset:20; size:4; signed:0; - unsigned int processed; // offset:24; size:4; signed:0; - int to_cpu; // offset:28; size:4; signed:1; -}; - -SEC("tracepoint/xdp/xdp_cpumap_enqueue") -int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) -{ - u32 to_cpu = ctx->to_cpu; - struct datarec *rec; - - if (to_cpu >= MAX_CPUS) - return 1; - - rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); - if (!rec) - return 0; - rec->processed += ctx->processed; - rec->dropped += ctx->drops; - - /* Record bulk events, then userspace can calc average bulk size */ - if (ctx->processed > 0) - rec->issue += 1; - - /* Inception: It's possible to detect overload situations, via - * this tracepoint. This can be used for creating a feedback - * loop to XDP, which can take appropriate actions to mitigate - * this overload situation. - */ - return 0; -} - -/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format - * Code in: kernel/include/trace/events/xdp.h - */ -struct cpumap_kthread_ctx { - u64 __pad; // First 8 bytes are not accessible - int map_id; // offset:8; size:4; signed:1; - u32 act; // offset:12; size:4; signed:0; - int cpu; // offset:16; size:4; signed:1; - unsigned int drops; // offset:20; size:4; signed:0; - unsigned int processed; // offset:24; size:4; signed:0; - int sched; // offset:28; size:4; signed:1; - unsigned int xdp_pass; // offset:32; size:4; signed:0; - unsigned int xdp_drop; // offset:36; size:4; signed:0; - unsigned int xdp_redirect; // offset:40; size:4; signed:0; -}; - -SEC("tracepoint/xdp/xdp_cpumap_kthread") -int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) -{ - struct datarec *rec; - u32 key = 0; - - rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); - if (!rec) - return 0; - rec->processed += ctx->processed; - rec->dropped += ctx->drops; - rec->xdp_pass += ctx->xdp_pass; - rec->xdp_drop += ctx->xdp_drop; - rec->xdp_redirect += ctx->xdp_redirect; - - /* Count times kthread yielded CPU via schedule call */ - if (ctx->sched) - rec->issue++; - - return 0; -} -- cgit v1.2.3-59-g8ed1b From e531a220cc59282a3f371608f1a6fa960416e231 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:06 +0530 Subject: samples: bpf: Convert xdp_redirect_cpu to XDP samples helper Use the libbpf skeleton facility and other utilities provided by XDP samples helper. Similar to xdp_monitor, xdp_redirect_cpu was quite featureful except a few minor omissions (e.g. redirect errno reporting). All of these have been moved to XDP samples helper, hence drop the unneeded code and convert to usage of helpers provided by it. One of the important changes here is dropping of mprog-disable option, as we make that the default. Also, we support built-in programs for some common actions on the packet when it reaches kthread (pass, drop, redirect to device). If the user still needs to install a custom program, they can still supply a BPF object, however the program should be suitably tagged with SEC("xdp_cpumap") annotation so that the expected attach type is correct when updating our cpumap map element. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-19-memxor@gmail.com --- samples/bpf/Makefile | 5 +- samples/bpf/xdp_redirect_cpu_user.c | 1105 +++++++++++------------------------ 2 files changed, 343 insertions(+), 767 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 231cdbc773a7..43d3e52a8659 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -41,7 +41,6 @@ tprogs-y += test_map_in_map tprogs-y += per_socket_stats_example tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect_map_multi -tprogs-y += xdp_redirect_cpu tprogs-y += xdp_rxq_info tprogs-y += syscall_tp tprogs-y += cpustat @@ -55,6 +54,7 @@ tprogs-y += xdp_sample_pkts tprogs-y += ibumad tprogs-y += hbm +tprogs-y += xdp_redirect_cpu tprogs-y += xdp_redirect tprogs-y += xdp_monitor @@ -102,7 +102,6 @@ test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o -xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o @@ -116,6 +115,7 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) @@ -311,6 +311,7 @@ verify_target_bpf: verify_cmds $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) +$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 9e225c96b77e..631700aef69c 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -2,7 +2,16 @@ /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ static const char *__doc__ = - " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\""; +"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n" +"Usage: xdp_redirect_cpu -d -c 0 ... -c N\n" +"Valid specification for CPUMAP BPF program:\n" +" --mprog-name/-e pass (use built-in XDP_PASS program)\n" +" --mprog-name/-e drop (use built-in XDP_DROP program)\n" +" --redirect-device/-r (use built-in DEVMAP redirect program)\n" +" Custom CPUMAP BPF program:\n" +" --mprog-filename/-f --mprog-name/-e \n" +" Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n" +" to configure DEVMAP in BPF object \n"; #include #include @@ -18,558 +27,62 @@ static const char *__doc__ = #include #include #include - #include #include - -/* How many xdp_progs are defined in _kern.c */ -#define MAX_PROG 6 - #include #include - #include "bpf_util.h" +#include "xdp_sample_user.h" +#include "xdp_redirect_cpu.skel.h" -static int ifindex = -1; -static char ifname_buf[IF_NAMESIZE]; -static char *ifname; -static __u32 prog_id; - -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int n_cpus; - -enum map_type { - CPU_MAP, - RX_CNT, - REDIRECT_ERR_CNT, - CPUMAP_ENQUEUE_CNT, - CPUMAP_KTHREAD_CNT, - CPUS_AVAILABLE, - CPUS_COUNT, - CPUS_ITERATOR, - EXCEPTION_CNT, -}; +static int map_fd; +static int avail_fd; +static int count_fd; -static const char *const map_type_strings[] = { - [CPU_MAP] = "cpu_map", - [RX_CNT] = "rx_cnt", - [REDIRECT_ERR_CNT] = "redirect_err_cnt", - [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt", - [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt", - [CPUS_AVAILABLE] = "cpus_available", - [CPUS_COUNT] = "cpus_count", - [CPUS_ITERATOR] = "cpus_iterator", - [EXCEPTION_CNT] = "exception_cnt", -}; +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT | + SAMPLE_EXCEPTION_CNT; -#define NUM_TP 5 -#define NUM_MAP 9 -struct bpf_link *tp_links[NUM_TP] = {}; -static int map_fds[NUM_MAP]; -static int tp_cnt = 0; - -/* Exit return codes */ -#define EXIT_OK 0 -#define EXIT_FAIL 1 -#define EXIT_FAIL_OPTION 2 -#define EXIT_FAIL_XDP 3 -#define EXIT_FAIL_BPF 4 -#define EXIT_FAIL_MEM 5 +DEFINE_SAMPLE_INIT(xdp_redirect_cpu); static const struct option long_options[] = { - {"help", no_argument, NULL, 'h' }, - {"dev", required_argument, NULL, 'd' }, - {"skb-mode", no_argument, NULL, 'S' }, - {"sec", required_argument, NULL, 's' }, - {"progname", required_argument, NULL, 'p' }, - {"qsize", required_argument, NULL, 'q' }, - {"cpu", required_argument, NULL, 'c' }, - {"stress-mode", no_argument, NULL, 'x' }, - {"no-separators", no_argument, NULL, 'z' }, - {"force", no_argument, NULL, 'F' }, - {"mprog-disable", no_argument, NULL, 'n' }, - {"mprog-name", required_argument, NULL, 'e' }, - {"mprog-filename", required_argument, NULL, 'f' }, - {"redirect-device", required_argument, NULL, 'r' }, - {"redirect-map", required_argument, NULL, 'm' }, - {0, 0, NULL, 0 } + { "help", no_argument, NULL, 'h' }, + { "dev", required_argument, NULL, 'd' }, + { "skb-mode", no_argument, NULL, 'S' }, + { "progname", required_argument, NULL, 'p' }, + { "qsize", required_argument, NULL, 'q' }, + { "cpu", required_argument, NULL, 'c' }, + { "stress-mode", no_argument, NULL, 'x' }, + { "force", no_argument, NULL, 'F' }, + { "interval", required_argument, NULL, 'i' }, + { "verbose", no_argument, NULL, 'v' }, + { "stats", no_argument, NULL, 's' }, + { "mprog-name", required_argument, NULL, 'e' }, + { "mprog-filename", required_argument, NULL, 'f' }, + { "redirect-device", required_argument, NULL, 'r' }, + { "redirect-map", required_argument, NULL, 'm' }, + {} }; -static void int_exit(int sig) -{ - __u32 curr_prog_id = 0; - - if (ifindex > -1) { - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(EXIT_FAIL); - } - if (prog_id == curr_prog_id) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); - } else if (!curr_prog_id) { - printf("couldn't find a prog id on a given iface\n"); - } else { - printf("program on interface changed, not removing\n"); - } - } - /* Detach tracepoints */ - while (tp_cnt) - bpf_link__destroy(tp_links[--tp_cnt]); - - exit(EXIT_OK); -} - static void print_avail_progs(struct bpf_object *obj) { struct bpf_program *pos; + printf(" Programs to be used for -p/--progname:\n"); bpf_object__for_each_program(pos, obj) { - if (bpf_program__is_xdp(pos)) - printf(" %s\n", bpf_program__section_name(pos)); - } -} - -static void usage(char *argv[], struct bpf_object *obj) -{ - int i; - - printf("\nDOCUMENTATION:\n%s\n", __doc__); - printf("\n"); - printf(" Usage: %s (options-see-below)\n", argv[0]); - printf(" Listing options:\n"); - for (i = 0; long_options[i].name != 0; i++) { - printf(" --%-12s", long_options[i].name); - if (long_options[i].flag != NULL) - printf(" flag (internal value:%d)", - *long_options[i].flag); - else - printf(" short-option: -%c", - long_options[i].val); - printf("\n"); - } - printf("\n Programs to be used for --progname:\n"); - print_avail_progs(obj); - printf("\n"); -} - -/* gettime returns the current time of day in nanoseconds. - * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC) - * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE) - */ -#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ -static __u64 gettime(void) -{ - struct timespec t; - int res; - - res = clock_gettime(CLOCK_MONOTONIC, &t); - if (res < 0) { - fprintf(stderr, "Error with gettimeofday! (%i)\n", res); - exit(EXIT_FAIL); - } - return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; -} - -/* Common stats data record shared with _kern.c */ -struct datarec { - __u64 processed; - __u64 dropped; - __u64 issue; - __u64 xdp_pass; - __u64 xdp_drop; - __u64 xdp_redirect; -}; -struct record { - __u64 timestamp; - struct datarec total; - struct datarec *cpu; -}; -struct stats_record { - struct record rx_cnt; - struct record redir_err; - struct record kthread; - struct record exception; - struct record enq[]; -}; - -static bool map_collect_percpu(int fd, __u32 key, struct record *rec) -{ - /* For percpu maps, userspace gets a value per possible CPU */ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct datarec values[nr_cpus]; - __u64 sum_xdp_redirect = 0; - __u64 sum_xdp_pass = 0; - __u64 sum_xdp_drop = 0; - __u64 sum_processed = 0; - __u64 sum_dropped = 0; - __u64 sum_issue = 0; - int i; - - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { - fprintf(stderr, - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); - return false; - } - /* Get time as close as possible to reading map contents */ - rec->timestamp = gettime(); - - /* Record and sum values from each CPU */ - for (i = 0; i < nr_cpus; i++) { - rec->cpu[i].processed = values[i].processed; - sum_processed += values[i].processed; - rec->cpu[i].dropped = values[i].dropped; - sum_dropped += values[i].dropped; - rec->cpu[i].issue = values[i].issue; - sum_issue += values[i].issue; - rec->cpu[i].xdp_pass = values[i].xdp_pass; - sum_xdp_pass += values[i].xdp_pass; - rec->cpu[i].xdp_drop = values[i].xdp_drop; - sum_xdp_drop += values[i].xdp_drop; - rec->cpu[i].xdp_redirect = values[i].xdp_redirect; - sum_xdp_redirect += values[i].xdp_redirect; - } - rec->total.processed = sum_processed; - rec->total.dropped = sum_dropped; - rec->total.issue = sum_issue; - rec->total.xdp_pass = sum_xdp_pass; - rec->total.xdp_drop = sum_xdp_drop; - rec->total.xdp_redirect = sum_xdp_redirect; - return true; -} - -static struct datarec *alloc_record_per_cpu(void) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - struct datarec *array; - - array = calloc(nr_cpus, sizeof(struct datarec)); - if (!array) { - fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); - exit(EXIT_FAIL_MEM); - } - return array; -} - -static struct stats_record *alloc_stats_record(void) -{ - struct stats_record *rec; - int i, size; - - size = sizeof(*rec) + n_cpus * sizeof(struct record); - rec = malloc(size); - if (!rec) { - fprintf(stderr, "Mem alloc error\n"); - exit(EXIT_FAIL_MEM); - } - memset(rec, 0, size); - rec->rx_cnt.cpu = alloc_record_per_cpu(); - rec->redir_err.cpu = alloc_record_per_cpu(); - rec->kthread.cpu = alloc_record_per_cpu(); - rec->exception.cpu = alloc_record_per_cpu(); - for (i = 0; i < n_cpus; i++) - rec->enq[i].cpu = alloc_record_per_cpu(); - - return rec; -} - -static void free_stats_record(struct stats_record *r) -{ - int i; - - for (i = 0; i < n_cpus; i++) - free(r->enq[i].cpu); - free(r->exception.cpu); - free(r->kthread.cpu); - free(r->redir_err.cpu); - free(r->rx_cnt.cpu); - free(r); -} - -static double calc_period(struct record *r, struct record *p) -{ - double period_ = 0; - __u64 period = 0; - - period = r->timestamp - p->timestamp; - if (period > 0) - period_ = ((double) period / NANOSEC_PER_SEC); - - return period_; -} - -static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) -{ - __u64 packets = 0; - __u64 pps = 0; - - if (period_ > 0) { - packets = r->processed - p->processed; - pps = packets / period_; - } - return pps; -} - -static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_) -{ - __u64 packets = 0; - __u64 pps = 0; - - if (period_ > 0) { - packets = r->dropped - p->dropped; - pps = packets / period_; - } - return pps; -} - -static __u64 calc_errs_pps(struct datarec *r, - struct datarec *p, double period_) -{ - __u64 packets = 0; - __u64 pps = 0; - - if (period_ > 0) { - packets = r->issue - p->issue; - pps = packets / period_; - } - return pps; -} - -static void calc_xdp_pps(struct datarec *r, struct datarec *p, - double *xdp_pass, double *xdp_drop, - double *xdp_redirect, double period_) -{ - *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; - if (period_ > 0) { - *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; - *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; - *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; - } -} - -static void stats_print(struct stats_record *stats_rec, - struct stats_record *stats_prev, - char *prog_name, char *mprog_name, int mprog_fd) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - double pps = 0, drop = 0, err = 0; - bool mprog_enabled = false; - struct record *rec, *prev; - int to_cpu; - double t; - int i; - - if (mprog_fd > 0) - mprog_enabled = true; - - /* Header */ - printf("Running XDP/eBPF prog_name:%s\n", prog_name); - printf("%-15s %-7s %-14s %-11s %-9s\n", - "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info"); - - /* XDP rx_cnt */ - { - char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n"; - char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n"; - char *errstr = ""; - - rec = &stats_rec->rx_cnt; - prev = &stats_prev->rx_cnt; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop_pps(r, p, t); - err = calc_errs_pps(r, p, t); - if (err > 0) - errstr = "cpu-dest/err"; - if (pps > 0) - printf(fmt_rx, "XDP-RX", - i, pps, drop, err, errstr); - } - pps = calc_pps(&rec->total, &prev->total, t); - drop = calc_drop_pps(&rec->total, &prev->total, t); - err = calc_errs_pps(&rec->total, &prev->total, t); - printf(fm2_rx, "XDP-RX", "total", pps, drop); - } - - /* cpumap enqueue stats */ - for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) { - char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; - char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n"; - char *errstr = ""; - - rec = &stats_rec->enq[to_cpu]; - prev = &stats_prev->enq[to_cpu]; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop_pps(r, p, t); - err = calc_errs_pps(r, p, t); - if (err > 0) { - errstr = "bulk-average"; - err = pps / err; /* calc average bulk size */ - } - if (pps > 0) - printf(fmt, "cpumap-enqueue", - i, to_cpu, pps, drop, err, errstr); - } - pps = calc_pps(&rec->total, &prev->total, t); - if (pps > 0) { - drop = calc_drop_pps(&rec->total, &prev->total, t); - err = calc_errs_pps(&rec->total, &prev->total, t); - if (err > 0) { - errstr = "bulk-average"; - err = pps / err; /* calc average bulk size */ - } - printf(fm2, "cpumap-enqueue", - "sum", to_cpu, pps, drop, err, errstr); - } - } - - /* cpumap kthread stats */ - { - char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n"; - char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n"; - char *e_str = ""; - - rec = &stats_rec->kthread; - prev = &stats_prev->kthread; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop_pps(r, p, t); - err = calc_errs_pps(r, p, t); - if (err > 0) - e_str = "sched"; - if (pps > 0) - printf(fmt_k, "cpumap_kthread", - i, pps, drop, err, e_str); - } - pps = calc_pps(&rec->total, &prev->total, t); - drop = calc_drop_pps(&rec->total, &prev->total, t); - err = calc_errs_pps(&rec->total, &prev->total, t); - if (err > 0) - e_str = "sched-sum"; - printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str); - } - - /* XDP redirect err tracepoints (very unlikely) */ - { - char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n"; - char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n"; - - rec = &stats_rec->redir_err; - prev = &stats_prev->redir_err; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop_pps(r, p, t); - if (pps > 0) - printf(fmt_err, "redirect_err", i, pps, drop); + if (bpf_program__is_xdp(pos)) { + if (!strncmp(bpf_program__name(pos), "xdp_prognum", + sizeof("xdp_prognum") - 1)) + printf(" %s\n", bpf_program__name(pos)); } - pps = calc_pps(&rec->total, &prev->total, t); - drop = calc_drop_pps(&rec->total, &prev->total, t); - printf(fm2_err, "redirect_err", "total", pps, drop); } - - /* XDP general exception tracepoints */ - { - char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n"; - char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n"; - - rec = &stats_rec->exception; - prev = &stats_prev->exception; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - pps = calc_pps(r, p, t); - drop = calc_drop_pps(r, p, t); - if (pps > 0) - printf(fmt_err, "xdp_exception", i, pps, drop); - } - pps = calc_pps(&rec->total, &prev->total, t); - drop = calc_drop_pps(&rec->total, &prev->total, t); - printf(fm2_err, "xdp_exception", "total", pps, drop); - } - - /* CPUMAP attached XDP program that runs on remote/destination CPU */ - if (mprog_enabled) { - char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n"; - char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n"; - double xdp_pass, xdp_drop, xdp_redirect; - - printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name); - printf("%-15s %-7s %-14s %-11s %-9s\n", - "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir"); - - rec = &stats_rec->kthread; - prev = &stats_prev->kthread; - t = calc_period(rec, prev); - for (i = 0; i < nr_cpus; i++) { - struct datarec *r = &rec->cpu[i]; - struct datarec *p = &prev->cpu[i]; - - calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, - &xdp_redirect, t); - if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0) - printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop, - xdp_redirect); - } - calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, - &xdp_redirect, t); - printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect); - } - - printf("\n"); - fflush(stdout); -} - -static void stats_collect(struct stats_record *rec) -{ - int fd, i; - - fd = map_fds[RX_CNT]; - map_collect_percpu(fd, 0, &rec->rx_cnt); - - fd = map_fds[REDIRECT_ERR_CNT]; - map_collect_percpu(fd, 1, &rec->redir_err); - - fd = map_fds[CPUMAP_ENQUEUE_CNT]; - for (i = 0; i < n_cpus; i++) - map_collect_percpu(fd, i, &rec->enq[i]); - - fd = map_fds[CPUMAP_KTHREAD_CNT]; - map_collect_percpu(fd, 0, &rec->kthread); - - fd = map_fds[EXCEPTION_CNT]; - map_collect_percpu(fd, 0, &rec->exception); } - -/* Pointer swap trick */ -static inline void swap(struct stats_record **a, struct stats_record **b) +static void usage(char *argv[], const struct option *long_options, + const char *doc, int mask, bool error, struct bpf_object *obj) { - struct stats_record *tmp; - - tmp = *a; - *a = *b; - *b = tmp; + sample_usage(argv, long_options, doc, mask, error); + print_avail_progs(obj); } static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, @@ -582,39 +95,41 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, /* Add a CPU entry to cpumap, as this allocate a cpu entry in * the kernel for the cpu. */ - ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0); - if (ret) { - fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); - exit(EXIT_FAIL_BPF); + ret = bpf_map_update_elem(map_fd, &cpu, value, 0); + if (ret < 0) { + fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno)); + return ret; } /* Inform bpf_prog's that a new CPU is available to select * from via some control maps. */ - ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0); - if (ret) { - fprintf(stderr, "Add to avail CPUs failed\n"); - exit(EXIT_FAIL_BPF); + ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0); + if (ret < 0) { + fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno)); + return ret; } /* When not replacing/updating existing entry, bump the count */ - ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count); - if (ret) { - fprintf(stderr, "Failed reading curr cpus_count\n"); - exit(EXIT_FAIL_BPF); + ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count); + if (ret < 0) { + fprintf(stderr, "Failed reading curr cpus_count: %s\n", + strerror(errno)); + return ret; } if (new) { curr_cpus_count++; - ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key, + ret = bpf_map_update_elem(count_fd, &key, &curr_cpus_count, 0); - if (ret) { - fprintf(stderr, "Failed write curr cpus_count\n"); - exit(EXIT_FAIL_BPF); + if (ret < 0) { + fprintf(stderr, "Failed write curr cpus_count: %s\n", + strerror(errno)); + return ret; } } - /* map_fd[7] = cpus_iterator */ - printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n", - new ? "Add-new":"Replace", cpu, avail_idx, + + printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n", + new ? "Add new" : "Replace", cpu, avail_idx, value->qsize, value->bpf_prog.fd, curr_cpus_count); return 0; @@ -623,24 +138,29 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, /* CPUs are zero-indexed. Thus, add a special sentinel default value * in map cpus_available to mark CPU index'es not configured */ -static void mark_cpus_unavailable(void) +static int mark_cpus_unavailable(void) { - __u32 invalid_cpu = n_cpus; - int ret, i; + int ret, i, n_cpus = libbpf_num_possible_cpus(); + __u32 invalid_cpu; for (i = 0; i < n_cpus; i++) { - ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i, + ret = bpf_map_update_elem(avail_fd, &i, &invalid_cpu, 0); - if (ret) { - fprintf(stderr, "Failed marking CPU unavailable\n"); - exit(EXIT_FAIL_BPF); + if (ret < 0) { + fprintf(stderr, "Failed marking CPU unavailable: %s\n", + strerror(errno)); + return ret; } } + + return 0; } /* Stress cpumap management code by concurrently changing underlying cpumap */ -static void stress_cpumap(struct bpf_cpumap_val *value) +static void stress_cpumap(void *ctx) { + struct bpf_cpumap_val *value = ctx; + /* Changing qsize will cause kernel to free and alloc a new * bpf_cpu_map_entry, with an associated/complicated tear-down * procedure. @@ -653,144 +173,163 @@ static void stress_cpumap(struct bpf_cpumap_val *value) create_cpu_entry(1, value, 0, false); } -static void stats_poll(int interval, bool use_separators, char *prog_name, - char *mprog_name, struct bpf_cpumap_val *value, - bool stress_mode) -{ - struct stats_record *record, *prev; - int mprog_fd; - - record = alloc_stats_record(); - prev = alloc_stats_record(); - stats_collect(record); - - /* Trick to pretty printf with thousands separators use %' */ - if (use_separators) - setlocale(LC_NUMERIC, "en_US"); - - while (1) { - swap(&prev, &record); - mprog_fd = value->bpf_prog.fd; - stats_collect(record); - stats_print(record, prev, prog_name, mprog_name, mprog_fd); - sleep(interval); - if (stress_mode) - stress_cpumap(value); - } - - free_stats_record(record); - free_stats_record(prev); -} - -static int init_tracepoints(struct bpf_object *obj) +static int set_cpumap_prog(struct xdp_redirect_cpu *skel, + const char *redir_interface, const char *redir_map, + const char *mprog_filename, const char *mprog_name) { - struct bpf_program *prog; - - bpf_object__for_each_program(prog, obj) { - if (bpf_program__is_tracepoint(prog) != true) - continue; - - tp_links[tp_cnt] = bpf_program__attach(prog); - if (libbpf_get_error(tp_links[tp_cnt])) { - tp_links[tp_cnt] = NULL; - return -EINVAL; + if (mprog_filename) { + struct bpf_program *prog; + struct bpf_object *obj; + int ret; + + if (!mprog_name) { + fprintf(stderr, "BPF program not specified for file %s\n", + mprog_filename); + goto end; + } + if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) { + fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n", + redir_interface ? "device" : "map", redir_interface ? "map" : "device"); + goto end; } - tp_cnt++; - } - - return 0; -} - -static int init_map_fds(struct bpf_object *obj) -{ - enum map_type type; - - for (type = 0; type < NUM_MAP; type++) { - map_fds[type] = - bpf_object__find_map_fd_by_name(obj, - map_type_strings[type]); - - if (map_fds[type] < 0) - return -ENOENT; - } - - return 0; -} -static int load_cpumap_prog(char *file_name, char *prog_name, - char *redir_interface, char *redir_map) -{ - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - .expected_attach_type = BPF_XDP_CPUMAP, - .file = file_name, - }; - struct bpf_program *prog; - struct bpf_object *obj; - int fd; + /* Custom BPF program */ + obj = bpf_object__open_file(mprog_filename, NULL); + if (!obj) { + ret = -errno; + fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n", + strerror(errno)); + return ret; + } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd)) - return -1; + ret = bpf_object__load(obj); + if (ret < 0) { + ret = -errno; + fprintf(stderr, "Failed to bpf_object__load: %s\n", + strerror(errno)); + return ret; + } - if (fd < 0) { - fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", - strerror(errno)); - return fd; - } + if (redir_map) { + int err, redir_map_fd, ifindex_out, key = 0; - if (redir_interface && redir_map) { - int err, map_fd, ifindex_out, key = 0; + redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); + if (redir_map_fd < 0) { + fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n", + strerror(errno)); + return redir_map_fd; + } - map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); - if (map_fd < 0) - return map_fd; + ifindex_out = if_nametoindex(redir_interface); + if (!ifindex_out) + ifindex_out = strtoul(redir_interface, NULL, 0); + if (!ifindex_out) { + fprintf(stderr, "Bad interface name or index\n"); + return -EINVAL; + } - ifindex_out = if_nametoindex(redir_interface); - if (!ifindex_out) - return -1; + err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0); + if (err < 0) + return err; + } - err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0); - if (err < 0) - return err; - } + prog = bpf_object__find_program_by_name(obj, mprog_name); + if (!prog) { + ret = -errno; + fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n", + strerror(errno)); + return ret; + } - prog = bpf_object__find_program_by_title(obj, prog_name); - if (!prog) { - fprintf(stderr, "bpf_object__find_program_by_title failed\n"); - return EXIT_FAIL; + return bpf_program__fd(prog); + } else { + if (mprog_name) { + if (redir_interface || redir_map) { + fprintf(stderr, "Need to specify --mprog-filename/-f\n"); + goto end; + } + if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) { + /* Use built-in pass/drop programs */ + return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass) + : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop); + } else { + fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n", + mprog_name); + goto end; + } + } else { + if (redir_map) { + fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and" + " --redirect-device with --redirect-map\n"); + goto end; + } + if (redir_interface) { + /* Use built-in devmap redirect */ + struct bpf_devmap_val val = {}; + int ifindex_out, err; + __u32 key = 0; + + if (!redir_interface) + return 0; + + ifindex_out = if_nametoindex(redir_interface); + if (!ifindex_out) + ifindex_out = strtoul(redir_interface, NULL, 0); + if (!ifindex_out) { + fprintf(stderr, "Bad interface name or index\n"); + return -EINVAL; + } + + if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) { + printf("Get interface %d mac failed\n", ifindex_out); + return -EINVAL; + } + + val.ifindex = ifindex_out; + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0); + if (err < 0) + return -errno; + + return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap); + } + } } - return bpf_program__fd(prog); + /* Disabled */ + return 0; +end: + fprintf(stderr, "Invalid options for CPUMAP BPF program\n"); + return -EINVAL; } int main(int argc, char **argv) { - char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; - char *mprog_filename = "xdp_redirect_kern.o"; - char *redir_interface = NULL, *redir_map = NULL; - char *mprog_name = "xdp_redirect_dummy"; - bool mprog_disable = false; - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); + const char *redir_interface = NULL, *redir_map = NULL; + const char *mprog_filename = NULL, *mprog_name = NULL; + struct xdp_redirect_cpu *skel; + struct bpf_map_info info = {}; + char ifname_buf[IF_NAMESIZE]; struct bpf_cpumap_val value; - bool use_separators = true; + __u32 infosz = sizeof(info); + int ret = EXIT_FAIL_OPTION; + unsigned long interval = 2; bool stress_mode = false; struct bpf_program *prog; - struct bpf_object *obj; - int err = EXIT_FAIL; - char filename[256]; + const char *prog_name; + bool generic = false; + bool force = false; int added_cpus = 0; + bool error = true; int longindex = 0; - int interval = 2; int add_cpu = -1; - int opt, prog_fd; - int *cpu, i; + int ifindex = -1; + int *cpu, i, opt; + char *ifname; __u32 qsize; + int n_cpus; - n_cpus = get_nprocs_conf(); + n_cpus = libbpf_num_possible_cpus(); /* Notice: Choosing the queue size is very important when CPU is * configured with power-saving states. @@ -810,73 +349,87 @@ int main(int argc, char **argv) */ qsize = 2048; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) - return err; - - if (prog_fd < 0) { - fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", + skel = xdp_redirect_cpu__open(); + if (!skel) { + fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n", strerror(errno)); - return err; + ret = EXIT_FAIL_BPF; + goto end; + } + + ret = sample_init_pre_load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - if (init_tracepoints(obj) < 0) { - fprintf(stderr, "ERR: bpf_program__attach failed\n"); - return err; + if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) { + fprintf(stderr, "Failed to set max entries for cpu_map map: %s", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - if (init_map_fds(obj) < 0) { - fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); - return err; + if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) { + fprintf(stderr, "Failed to set max entries for cpus_available map: %s", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - mark_cpus_unavailable(); - cpu = malloc(n_cpus * sizeof(int)); + cpu = calloc(n_cpus, sizeof(int)); if (!cpu) { - fprintf(stderr, "failed to allocate cpu array\n"); - return err; + fprintf(stderr, "Failed to allocate cpu array\n"); + goto end_destroy; } - memset(cpu, 0, n_cpus * sizeof(int)); - /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n", + prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs; + while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh", long_options, &longindex)) != -1) { switch (opt) { case 'd': if (strlen(optarg) >= IF_NAMESIZE) { - fprintf(stderr, "ERR: --dev name too long\n"); - goto error; + fprintf(stderr, "-d/--dev name too long\n"); + goto end_cpu; } ifname = (char *)&ifname_buf; - strncpy(ifname, optarg, IF_NAMESIZE); + safe_strncpy(ifname, optarg, sizeof(ifname)); ifindex = if_nametoindex(ifname); - if (ifindex == 0) { - fprintf(stderr, - "ERR: --dev name unknown err(%d):%s\n", + if (!ifindex) + ifindex = strtoul(optarg, NULL, 0); + if (!ifindex) { + fprintf(stderr, "Bad interface index or name (%d): %s\n", errno, strerror(errno)); - goto error; + usage(argv, long_options, __doc__, mask, true, skel->obj); + goto end_cpu; } break; case 's': - interval = atoi(optarg); + mask |= SAMPLE_REDIRECT_MAP_CNT; + break; + case 'i': + interval = strtoul(optarg, NULL, 0); break; case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; + generic = true; break; case 'x': stress_mode = true; break; - case 'z': - use_separators = false; - break; case 'p': /* Selecting eBPF prog to load */ prog_name = optarg; - break; - case 'n': - mprog_disable = true; + prog = bpf_object__find_program_by_name(skel->obj, + prog_name); + if (!prog) { + fprintf(stderr, + "Failed to find program %s specified by" + " option -p/--progname\n", + prog_name); + print_avail_progs(skel->obj); + goto end_cpu; + } break; case 'f': mprog_filename = optarg; @@ -886,6 +439,7 @@ int main(int argc, char **argv) break; case 'r': redir_interface = optarg; + mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI; break; case 'm': redir_map = optarg; @@ -897,91 +451,112 @@ int main(int argc, char **argv) fprintf(stderr, "--cpu nr too large for cpumap err(%d):%s\n", errno, strerror(errno)); - goto error; + goto end_cpu; } cpu[added_cpus++] = add_cpu; break; case 'q': - qsize = atoi(optarg); + qsize = strtoul(optarg, NULL, 0); break; case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + force = true; + break; + case 'v': + sample_switch_mode(); break; case 'h': - error: + error = false; default: - free(cpu); - usage(argv, obj); - return EXIT_FAIL_OPTION; + usage(argv, long_options, __doc__, mask, error, skel->obj); + goto end_cpu; } } - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) - xdp_flags |= XDP_FLAGS_DRV_MODE; - - /* Required option */ + ret = EXIT_FAIL_OPTION; if (ifindex == -1) { - fprintf(stderr, "ERR: required option --dev missing\n"); - usage(argv, obj); - err = EXIT_FAIL_OPTION; - goto out; + fprintf(stderr, "Required option --dev missing\n"); + usage(argv, long_options, __doc__, mask, true, skel->obj); + goto end_cpu; } - /* Required option */ + if (add_cpu == -1) { - fprintf(stderr, "ERR: required option --cpu missing\n"); - fprintf(stderr, " Specify multiple --cpu option to add more\n"); - usage(argv, obj); - err = EXIT_FAIL_OPTION; - goto out; + fprintf(stderr, "Required option --cpu missing\n" + "Specify multiple --cpu option to add more\n"); + usage(argv, long_options, __doc__, mask, true, skel->obj); + goto end_cpu; } - value.bpf_prog.fd = 0; - if (!mprog_disable) - value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name, - redir_interface, redir_map); - if (value.bpf_prog.fd < 0) { - err = value.bpf_prog.fd; - goto out; + skel->rodata->from_match[0] = ifindex; + if (redir_interface) + skel->rodata->to_match[0] = if_nametoindex(redir_interface); + + ret = xdp_redirect_cpu__load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n", + strerror(errno)); + goto end_cpu; } - value.qsize = qsize; - for (i = 0; i < added_cpus; i++) - create_cpu_entry(cpu[i], &value, i, true); + ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz); + if (ret < 0) { + fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n", + strerror(errno)); + goto end_cpu; + } - /* Remove XDP program when program is interrupted or killed */ - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); + skel->bss->cpumap_map_id = info.id; - prog = bpf_object__find_program_by_title(obj, prog_name); - if (!prog) { - fprintf(stderr, "bpf_object__find_program_by_title failed\n"); - goto out; + map_fd = bpf_map__fd(skel->maps.cpu_map); + avail_fd = bpf_map__fd(skel->maps.cpus_available); + count_fd = bpf_map__fd(skel->maps.cpus_count); + + ret = mark_cpus_unavailable(); + if (ret < 0) { + fprintf(stderr, "Unable to mark CPUs as unavailable\n"); + goto end_cpu; } - prog_fd = bpf_program__fd(prog); - if (prog_fd < 0) { - fprintf(stderr, "bpf_program__fd failed\n"); - goto out; + ret = sample_init(skel, mask); + if (ret < 0) { + fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_cpu; } - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { - fprintf(stderr, "link set xdp fd failed\n"); - err = EXIT_FAIL_XDP; - goto out; + value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map, + mprog_filename, mprog_name); + if (value.bpf_prog.fd < 0) { + fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n", + strerror(-value.bpf_prog.fd)); + usage(argv, long_options, __doc__, mask, true, skel->obj); + ret = EXIT_FAIL_BPF; + goto end_cpu; } + value.qsize = qsize; - err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (err) { - printf("can't get prog info - %s\n", strerror(errno)); - goto out; + for (i = 0; i < added_cpus; i++) { + if (create_cpu_entry(cpu[i], &value, i, true) < 0) { + fprintf(stderr, "Cannot proceed, exiting\n"); + usage(argv, long_options, __doc__, mask, true, skel->obj); + goto end_cpu; + } } - prog_id = info.id; - stats_poll(interval, use_separators, prog_name, mprog_name, - &value, stress_mode); + ret = EXIT_FAIL_XDP; + if (sample_install_xdp(prog, ifindex, generic, force) < 0) + goto end_cpu; - err = EXIT_OK; -out: + ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value); + if (ret < 0) { + fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_cpu; + } + ret = EXIT_OK; +end_cpu: free(cpu); - return err; +end_destroy: + xdp_redirect_cpu__destroy(skel); +end: + sample_exit(ret); } -- cgit v1.2.3-59-g8ed1b From 54af769db92a47be8a9d23a4434dbd343b36f216 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:07 +0530 Subject: samples: bpf: Convert xdp_redirect_map_kern.o to XDP samples helper Also update it to use consistent SEC("xdp") and SEC("xdp_devmap") naming, and use global variable instead of BPF map for copying the mac address. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-20-memxor@gmail.com --- samples/bpf/Makefile | 6 +- samples/bpf/xdp_redirect_map.bpf.c | 95 ++++++++++++++++++++++++ samples/bpf/xdp_redirect_map_kern.c | 142 ------------------------------------ 3 files changed, 99 insertions(+), 144 deletions(-) create mode 100644 samples/bpf/xdp_redirect_map.bpf.c delete mode 100644 samples/bpf/xdp_redirect_map_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 43d3e52a8659..8faef4bcead4 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -163,7 +163,6 @@ always-y += tcp_clamp_kern.o always-y += tcp_basertt_kern.o always-y += tcp_tos_reflect_kern.o always-y += tcp_dumpstats_kern.o -always-y += xdp_redirect_map_kern.o always-y += xdp_redirect_map_multi_kern.o always-y += xdp_rxq_info_kern.o always-y += xdp2skb_meta_kern.o @@ -357,6 +356,7 @@ endef CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) $(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o +$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o @@ -368,10 +368,12 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ -LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect.skel.h xdp_monitor.skel.h +LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map.skel.h \ + xdp_redirect.skel.h xdp_monitor.skel.h clean-files += $(LINKED_SKELS) xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o +xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c new file mode 100644 index 000000000000..59efd656e1b2 --- /dev/null +++ b/samples/bpf/xdp_redirect_map.bpf.c @@ -0,0 +1,95 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" + +#include "vmlinux.h" +#include "xdp_sample.bpf.h" +#include "xdp_sample_shared.h" + +/* The 2nd xdp prog on egress does not support skb mode, so we define two + * maps, tx_port_general and tx_port_native. + */ +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 1); +} tx_port_general SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} tx_port_native SEC(".maps"); + +/* store egress interface mac address */ +const volatile char tx_mac_addr[ETH_ALEN]; + +static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + swap_src_dst_mac(data); + return bpf_redirect_map(redirect_map, 0, 0); +} + +SEC("xdp") +int xdp_redirect_map_general(struct xdp_md *ctx) +{ + return xdp_redirect_map(ctx, &tx_port_general); +} + +SEC("xdp") +int xdp_redirect_map_native(struct xdp_md *ctx) +{ + return xdp_redirect_map(ctx, &tx_port_native); +} + +SEC("xdp_devmap/egress") +int xdp_redirect_map_egress(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); + + return XDP_PASS; +} + +/* Redirect require an XDP bpf_prog loaded on the TX device */ +SEC("xdp") +int xdp_redirect_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c deleted file mode 100644 index a92b8e567bdd..000000000000 --- a/samples/bpf/xdp_redirect_map_kern.c +++ /dev/null @@ -1,142 +0,0 @@ -/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#define KBUILD_MODNAME "foo" -#include -#include -#include -#include -#include -#include -#include -#include - -/* The 2nd xdp prog on egress does not support skb mode, so we define two - * maps, tx_port_general and tx_port_native. - */ -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); - __uint(max_entries, 100); -} tx_port_general SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(struct bpf_devmap_val)); - __uint(max_entries, 100); -} tx_port_native SEC(".maps"); - -/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success - * feedback. Redirect TX errors can be caught via a tracepoint. - */ -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, long); - __uint(max_entries, 1); -} rxcnt SEC(".maps"); - -/* map to store egress interface mac address */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, __be64); - __uint(max_entries, 1); -} tx_mac SEC(".maps"); - -static void swap_src_dst_mac(void *data) -{ - unsigned short *p = data; - unsigned short dst[3]; - - dst[0] = p[0]; - dst[1] = p[1]; - dst[2] = p[2]; - p[0] = p[3]; - p[1] = p[4]; - p[2] = p[5]; - p[3] = dst[0]; - p[4] = dst[1]; - p[5] = dst[2]; -} - -static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - int rc = XDP_DROP; - long *value; - u32 key = 0; - u64 nh_off; - int vport; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return rc; - - /* constant virtual port */ - vport = 0; - - /* count packet in global counter */ - value = bpf_map_lookup_elem(&rxcnt, &key); - if (value) - *value += 1; - - swap_src_dst_mac(data); - - /* send packet out physical port */ - return bpf_redirect_map(redirect_map, vport, 0); -} - -SEC("xdp_redirect_general") -int xdp_redirect_map_general(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &tx_port_general); -} - -SEC("xdp_redirect_native") -int xdp_redirect_map_native(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &tx_port_native); -} - -SEC("xdp_devmap/map_prog") -int xdp_redirect_map_egress(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct ethhdr *eth = data; - __be64 *mac; - u32 key = 0; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - mac = bpf_map_lookup_elem(&tx_mac, &key); - if (mac) - __builtin_memcpy(eth->h_source, mac, ETH_ALEN); - - return XDP_PASS; -} - -/* Redirect require an XDP bpf_prog loaded on the TX device */ -SEC("xdp_redirect_dummy") -int xdp_redirect_dummy_prog(struct xdp_md *ctx) -{ - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From bbe65865aa05fdbd20e37bbd3b2c95a0e9e24416 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:08 +0530 Subject: samples: bpf: Convert xdp_redirect_map to XDP samples helper Use the libbpf skeleton facility and other utilities provided by XDP samples helper. Since get_mac_addr is already provided by XDP samples helper, we drop it. Also convert to XDP samples helper similar to prior samples to minimize duplication of code. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-21-memxor@gmail.com --- samples/bpf/Makefile | 5 +- samples/bpf/xdp_redirect_map_user.c | 385 +++++++++++++++--------------------- 2 files changed, 161 insertions(+), 229 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8faef4bcead4..6decc8f9bcc2 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -39,7 +39,6 @@ tprogs-y += lwt_len_hist tprogs-y += xdp_tx_iptunnel tprogs-y += test_map_in_map tprogs-y += per_socket_stats_example -tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect_map_multi tprogs-y += xdp_rxq_info tprogs-y += syscall_tp @@ -55,6 +54,7 @@ tprogs-y += ibumad tprogs-y += hbm tprogs-y += xdp_redirect_cpu +tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect tprogs-y += xdp_monitor @@ -100,7 +100,6 @@ lwt_len_hist-objs := lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o @@ -116,6 +115,7 @@ ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) +xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE) xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) @@ -311,6 +311,7 @@ $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h +$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 0e8192688dfc..b6e4fc849577 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io */ +static const char *__doc__ = +"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n" +"Usage: xdp_redirect_map _IN _OUT\n"; + #include #include #include @@ -13,165 +17,83 @@ #include #include #include -#include -#include -#include -#include -#include - -#include "bpf_util.h" +#include #include #include +#include "bpf_util.h" +#include "xdp_sample_user.h" +#include "xdp_redirect_map.skel.h" -static int ifindex_in; -static int ifindex_out; -static bool ifindex_out_xdp_dummy_attached = true; -static bool xdp_devmap_attached; -static __u32 prog_id; -static __u32 dummy_prog_id; - -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; -static int rxcnt_map_fd; - -static void int_exit(int sig) -{ - __u32 curr_prog_id = 0; - - if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(1); - } - if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - else if (!curr_prog_id) - printf("couldn't find a prog id on iface IN\n"); - else - printf("program on iface IN changed, not removing\n"); - - if (ifindex_out_xdp_dummy_attached) { - curr_prog_id = 0; - if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, - xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(1); - } - if (dummy_prog_id == curr_prog_id) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); - else if (!curr_prog_id) - printf("couldn't find a prog id on iface OUT\n"); - else - printf("program on iface OUT changed, not removing\n"); - } - exit(0); -} - -static void poll_stats(int interval, int ifindex) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - __u64 values[nr_cpus], prev[nr_cpus]; - - memset(prev, 0, sizeof(prev)); - - while (1) { - __u64 sum = 0; - __u32 key = 0; - int i; - - sleep(interval); - assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); - for (i = 0; i < nr_cpus; i++) - sum += (values[i] - prev[i]); - if (sum) - printf("ifindex %i: %10llu pkt/s\n", - ifindex, sum / interval); - memcpy(prev, values, sizeof(values)); - } -} - -static int get_mac_addr(unsigned int ifindex_out, void *mac_addr) -{ - char ifname[IF_NAMESIZE]; - struct ifreq ifr; - int fd, ret = -1; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) - return ret; - - if (!if_indextoname(ifindex_out, ifname)) - goto err_out; - - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) - goto err_out; - - memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); - ret = 0; +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; -err_out: - close(fd); - return ret; -} +DEFINE_SAMPLE_INIT(xdp_redirect_map); -static void usage(const char *prog) -{ - fprintf(stderr, - "usage: %s [OPTS] _IN _OUT\n\n" - "OPTS:\n" - " -S use skb-mode\n" - " -N enforce native mode\n" - " -F force loading prog\n" - " -X load xdp program on egress\n", - prog); -} +static const struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "skb-mode", no_argument, NULL, 'S' }, + { "force", no_argument, NULL, 'F' }, + { "load-egress", no_argument, NULL, 'X' }, + { "stats", no_argument, NULL, 's' }, + { "interval", required_argument, NULL, 'i' }, + { "verbose", no_argument, NULL, 'v' }, + {} +}; int main(int argc, char **argv) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - struct bpf_program *prog, *dummy_prog, *devmap_prog; - int prog_fd, dummy_prog_fd, devmap_prog_fd = 0; - int tx_port_map_fd, tx_mac_map_fd; - struct bpf_devmap_val devmap_val; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - const char *optstr = "FSNX"; - struct bpf_object *obj; - int ret, opt, key = 0; - char filename[256]; - - while ((opt = getopt(argc, argv, optstr)) != -1) { + struct bpf_devmap_val devmap_val = {}; + bool xdp_devmap_attached = false; + struct xdp_redirect_map *skel; + char str[2 * IF_NAMESIZE + 1]; + char ifname_out[IF_NAMESIZE]; + struct bpf_map *tx_port_map; + char ifname_in[IF_NAMESIZE]; + int ifindex_in, ifindex_out; + unsigned long interval = 2; + int ret = EXIT_FAIL_OPTION; + struct bpf_program *prog; + bool generic = false; + bool force = false; + bool tried = false; + bool error = true; + int opt, key = 0; + + while ((opt = getopt_long(argc, argv, "hSFXi:vs", + long_options, NULL)) != -1) { switch (opt) { case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case 'N': - /* default, set below */ + generic = true; + /* devmap_xmit tracepoint not available */ + mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI); break; case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + force = true; break; case 'X': xdp_devmap_attached = true; break; + case 'i': + interval = strtoul(optarg, NULL, 0); + break; + case 'v': + sample_switch_mode(); + break; + case 's': + mask |= SAMPLE_REDIRECT_MAP_CNT; + break; + case 'h': + error = false; default: - usage(basename(argv[0])); - return 1; + sample_usage(argv, long_options, __doc__, mask, error); + return ret; } } - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) { - xdp_flags |= XDP_FLAGS_DRV_MODE; - } else if (xdp_devmap_attached) { - printf("Load xdp program on egress with SKB mode not supported yet\n"); - return 1; - } - - if (optind == argc) { - printf("usage: %s _IN _OUT\n", argv[0]); - return 1; + if (argc <= optind + 1) { + sample_usage(argv, long_options, __doc__, mask, true); + goto end; } ifindex_in = if_nametoindex(argv[optind]); @@ -182,107 +104,116 @@ int main(int argc, char **argv) if (!ifindex_out) ifindex_out = strtoul(argv[optind + 1], NULL, 0); - printf("input: %d output: %d\n", ifindex_in, ifindex_out); - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) - return 1; - - if (xdp_flags & XDP_FLAGS_SKB_MODE) { - prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general"); - tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_general"); - } else { - prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native"); - tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_native"); - } - dummy_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_dummy_prog"); - if (!prog || dummy_prog < 0 || tx_port_map_fd < 0) { - printf("finding prog/dummy_prog/tx_port_map in obj file failed\n"); - goto out; - } - prog_fd = bpf_program__fd(prog); - dummy_prog_fd = bpf_program__fd(dummy_prog); - if (prog_fd < 0 || dummy_prog_fd < 0 || tx_port_map_fd < 0) { - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); - return 1; - } - - tx_mac_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_mac"); - rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); - if (tx_mac_map_fd < 0 || rxcnt_map_fd < 0) { - printf("bpf_object__find_map_fd_by_name failed\n"); - return 1; + if (!ifindex_in || !ifindex_out) { + fprintf(stderr, "Bad interface index or name\n"); + sample_usage(argv, long_options, __doc__, mask, true); + goto end; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { - printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); - return 1; + skel = xdp_redirect_map__open(); + if (!skel) { + fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; } - ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (ret) { - printf("can't get prog info - %s\n", strerror(errno)); - return ret; + ret = sample_init_pre_load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - prog_id = info.id; - - /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, - (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { - printf("WARN: link set xdp fd failed on %d\n", ifindex_out); - ifindex_out_xdp_dummy_attached = false; - } - - memset(&info, 0, sizeof(info)); - ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len); - if (ret) { - printf("can't get prog info - %s\n", strerror(errno)); - return ret; - } - dummy_prog_id = info.id; /* Load 2nd xdp prog on egress. */ if (xdp_devmap_attached) { - unsigned char mac_addr[6]; - - devmap_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_egress"); - if (!devmap_prog) { - printf("finding devmap_prog in obj file failed\n"); - goto out; - } - devmap_prog_fd = bpf_program__fd(devmap_prog); - if (devmap_prog_fd < 0) { - printf("finding devmap_prog fd failed\n"); - goto out; - } - - if (get_mac_addr(ifindex_out, mac_addr) < 0) { - printf("get interface %d mac failed\n", ifindex_out); - goto out; + ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr); + if (ret < 0) { + fprintf(stderr, "Failed to get interface %d mac address: %s\n", + ifindex_out, strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; } + } - ret = bpf_map_update_elem(tx_mac_map_fd, &key, mac_addr, 0); - if (ret) { - perror("bpf_update_elem tx_mac_map_fd"); - goto out; + skel->rodata->from_match[0] = ifindex_in; + skel->rodata->to_match[0] = ifindex_out; + + ret = xdp_redirect_map__load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = sample_init(skel, mask); + if (ret < 0) { + fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; + } + + prog = skel->progs.xdp_redirect_map_native; + tx_port_map = skel->maps.tx_port_native; +restart: + if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) { + /* First try with struct bpf_devmap_val as value for generic + * mode, then fallback to sizeof(int) for older kernels. + */ + fprintf(stderr, + "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); + if (generic && !tried) { + prog = skel->progs.xdp_redirect_map_general; + tx_port_map = skel->maps.tx_port_general; + tried = true; + goto restart; } + ret = EXIT_FAIL_XDP; + goto end_destroy; } - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); + /* Loading dummy XDP prog on out-device */ + sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force); devmap_val.ifindex = ifindex_out; - devmap_val.bpf_prog.fd = devmap_prog_fd; - ret = bpf_map_update_elem(tx_port_map_fd, &key, &devmap_val, 0); - if (ret) { - perror("bpf_update_elem"); - goto out; - } - - poll_stats(2, ifindex_out); - -out: - return 0; + if (xdp_devmap_attached) + devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress); + ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0); + if (ret < 0) { + fprintf(stderr, "Failed to update devmap value: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = EXIT_FAIL; + if (!if_indextoname(ifindex_in, ifname_in)) { + fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, + strerror(errno)); + goto end_destroy; + } + + if (!if_indextoname(ifindex_out, ifname_out)) { + fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, + strerror(errno)); + goto end_destroy; + } + + safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); + printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", + ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); + snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); + + ret = sample_run(interval, NULL, NULL); + if (ret < 0) { + fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; + } + ret = EXIT_OK; +end_destroy: + xdp_redirect_map__destroy(skel); +end: + sample_exit(ret); } -- cgit v1.2.3-59-g8ed1b From a29b3ca17ee69e3e5182f1ed29be6b6ec306c149 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:09 +0530 Subject: samples: bpf: Convert xdp_redirect_map_multi_kern.o to XDP samples helper One of the notable changes is using a BPF_MAP_TYPE_HASH instead of array map to store mac addresses of devices, as the resizing behavior was based on max_ifindex, which unecessarily maximized the capacity of map beyond what was needed. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-22-memxor@gmail.com --- samples/bpf/Makefile | 7 +-- samples/bpf/xdp_redirect_map_multi.bpf.c | 82 ++++++++++++++++++++++++++++ samples/bpf/xdp_redirect_map_multi_kern.c | 88 ------------------------------- 3 files changed, 86 insertions(+), 91 deletions(-) create mode 100644 samples/bpf/xdp_redirect_map_multi.bpf.c delete mode 100644 samples/bpf/xdp_redirect_map_multi_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 6decc8f9bcc2..2b3d9e39c4f3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -163,7 +163,6 @@ always-y += tcp_clamp_kern.o always-y += tcp_basertt_kern.o always-y += tcp_tos_reflect_kern.o always-y += tcp_dumpstats_kern.o -always-y += xdp_redirect_map_multi_kern.o always-y += xdp_rxq_info_kern.o always-y += xdp2skb_meta_kern.o always-y += syscall_tp_kern.o @@ -357,6 +356,7 @@ endef CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) $(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o +$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o @@ -369,11 +369,12 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ -LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map.skel.h \ - xdp_redirect.skel.h xdp_monitor.skel.h +LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ + xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h clean-files += $(LINKED_SKELS) xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o +xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c new file mode 100644 index 000000000000..8f59d430cb64 --- /dev/null +++ b/samples/bpf/xdp_redirect_map_multi.bpf.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +#define KBUILD_MODNAME "foo" + +#include "vmlinux.h" +#include "xdp_sample.bpf.h" +#include "xdp_sample_shared.h" + +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 32); +} forward_map_general SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 32); +} forward_map_native SEC(".maps"); + +/* map to store egress interfaces mac addresses */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, __be64); + __uint(max_entries, 32); +} mac_map SEC(".maps"); + +static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map) +{ + u32 key = bpf_get_smp_processor_id(); + struct datarec *rec; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + return bpf_redirect_map(forward_map, 0, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); +} + +SEC("xdp") +int xdp_redirect_map_general(struct xdp_md *ctx) +{ + return xdp_redirect_map(ctx, &forward_map_general); +} + +SEC("xdp") +int xdp_redirect_map_native(struct xdp_md *ctx) +{ + return xdp_redirect_map(ctx, &forward_map_native); +} + +SEC("xdp_devmap/egress") +int xdp_devmap_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + u32 key = ctx->egress_ifindex; + struct ethhdr *eth = data; + __be64 *mac; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + mac = bpf_map_lookup_elem(&mac_map, &key); + if (mac) + __builtin_memcpy(eth->h_source, mac, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi_kern.c deleted file mode 100644 index 71aa23d1cb2b..000000000000 --- a/samples/bpf/xdp_redirect_map_multi_kern.c +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define KBUILD_MODNAME "foo" -#include -#include -#include -#include -#include -#include - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); - __uint(max_entries, 32); -} forward_map_general SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(struct bpf_devmap_val)); - __uint(max_entries, 32); -} forward_map_native SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __type(key, u32); - __type(value, long); - __uint(max_entries, 1); -} rxcnt SEC(".maps"); - -/* map to store egress interfaces mac addresses, set the - * max_entries to 1 and extend it in user sapce prog. - */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, u32); - __type(value, __be64); - __uint(max_entries, 1); -} mac_map SEC(".maps"); - -static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map) -{ - long *value; - u32 key = 0; - - /* count packet in global counter */ - value = bpf_map_lookup_elem(&rxcnt, &key); - if (value) - *value += 1; - - return bpf_redirect_map(forward_map, key, - BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); -} - -SEC("xdp_redirect_general") -int xdp_redirect_map_general(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &forward_map_general); -} - -SEC("xdp_redirect_native") -int xdp_redirect_map_native(struct xdp_md *ctx) -{ - return xdp_redirect_map(ctx, &forward_map_native); -} - -SEC("xdp_devmap/map_prog") -int xdp_devmap_prog(struct xdp_md *ctx) -{ - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - u32 key = ctx->egress_ifindex; - struct ethhdr *eth = data; - __be64 *mac; - u64 nh_off; - - nh_off = sizeof(*eth); - if (data + nh_off > data_end) - return XDP_DROP; - - mac = bpf_map_lookup_elem(&mac_map, &key); - if (mac) - __builtin_memcpy(eth->h_source, mac, ETH_ALEN); - - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From 594a116b2aa1985dbb5318c2be39b64b74ebff84 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 21 Aug 2021 05:50:10 +0530 Subject: samples: bpf: Convert xdp_redirect_map_multi to XDP samples helper Use the libbpf skeleton facility and other utilities provided by XDP samples helper. Also adapt to change of type of mac address map, so that no resizing is required. Add a new flag for sample mask that skips priting the from_device->to_device heading for each line, as xdp_redirect_map_multi may have two devices but the flow of data may be bidirectional, so the output would be confusing. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210821002010.845777-23-memxor@gmail.com --- samples/bpf/Makefile | 5 +- samples/bpf/xdp_redirect_map_multi_user.c | 345 ++++++++++++------------------ samples/bpf/xdp_sample_user.c | 2 +- samples/bpf/xdp_sample_user.h | 21 +- 4 files changed, 153 insertions(+), 220 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 2b3d9e39c4f3..4dc20be5fb96 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -39,7 +39,6 @@ tprogs-y += lwt_len_hist tprogs-y += xdp_tx_iptunnel tprogs-y += test_map_in_map tprogs-y += per_socket_stats_example -tprogs-y += xdp_redirect_map_multi tprogs-y += xdp_rxq_info tprogs-y += syscall_tp tprogs-y += cpustat @@ -54,6 +53,7 @@ tprogs-y += ibumad tprogs-y += hbm tprogs-y += xdp_redirect_cpu +tprogs-y += xdp_redirect_map_multi tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect tprogs-y += xdp_monitor @@ -100,7 +100,6 @@ lwt_len_hist-objs := lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o @@ -114,6 +113,7 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE) xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE) xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) @@ -310,6 +310,7 @@ $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h +$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c index 84cdbbed20b7..315314716121 100644 --- a/samples/bpf/xdp_redirect_map_multi_user.c +++ b/samples/bpf/xdp_redirect_map_multi_user.c @@ -1,7 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 +static const char *__doc__ = +"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n" +"Usage: xdp_redirect_map_multi ... \n"; + #include #include #include +#include #include #include #include @@ -15,106 +20,54 @@ #include #include #include - -#include "bpf_util.h" +#include #include #include +#include "bpf_util.h" +#include "xdp_sample_user.h" +#include "xdp_redirect_map_multi.skel.h" #define MAX_IFACE_NUM 32 - -static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int ifaces[MAX_IFACE_NUM] = {}; -static int rxcnt_map_fd; - -static void int_exit(int sig) -{ - __u32 prog_id = 0; - int i; - - for (i = 0; ifaces[i] > 0; i++) { - if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(1); - } - if (prog_id) - bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags); - } - - exit(0); -} - -static void poll_stats(int interval) -{ - unsigned int nr_cpus = bpf_num_possible_cpus(); - __u64 values[nr_cpus], prev[nr_cpus]; - - memset(prev, 0, sizeof(prev)); - - while (1) { - __u64 sum = 0; - __u32 key = 0; - int i; - sleep(interval); - assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); - for (i = 0; i < nr_cpus; i++) - sum += (values[i] - prev[i]); - if (sum) - printf("Forwarding %10llu pkt/s\n", sum / interval); - memcpy(prev, values, sizeof(values)); - } -} - -static int get_mac_addr(unsigned int ifindex, void *mac_addr) -{ - char ifname[IF_NAMESIZE]; - struct ifreq ifr; - int fd, ret = -1; - - fd = socket(AF_INET, SOCK_DGRAM, 0); - if (fd < 0) - return ret; - - if (!if_indextoname(ifindex, ifname)) - goto err_out; - - strcpy(ifr.ifr_name, ifname); +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING; - if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) - goto err_out; +DEFINE_SAMPLE_INIT(xdp_redirect_map_multi); - memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); - ret = 0; +static const struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "skb-mode", no_argument, NULL, 'S' }, + { "force", no_argument, NULL, 'F' }, + { "load-egress", no_argument, NULL, 'X' }, + { "stats", no_argument, NULL, 's' }, + { "interval", required_argument, NULL, 'i' }, + { "verbose", no_argument, NULL, 'v' }, + {} +}; -err_out: - close(fd); - return ret; -} - -static int update_mac_map(struct bpf_object *obj) +static int update_mac_map(struct bpf_map *map) { - int i, ret = -1, mac_map_fd; + int mac_map_fd = bpf_map__fd(map); unsigned char mac_addr[6]; unsigned int ifindex; - - mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map"); - if (mac_map_fd < 0) { - printf("find mac map fd failed\n"); - return ret; - } + int i, ret = -1; for (i = 0; ifaces[i] > 0; i++) { ifindex = ifaces[i]; ret = get_mac_addr(ifindex, mac_addr); if (ret < 0) { - printf("get interface %d mac failed\n", ifindex); + fprintf(stderr, "get interface %d mac failed\n", + ifindex); return ret; } ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0); - if (ret) { - perror("bpf_update_elem mac_map_fd"); + if (ret < 0) { + fprintf(stderr, "Failed to update mac address for ifindex %d\n", + ifindex); return ret; } } @@ -122,181 +75,159 @@ static int update_mac_map(struct bpf_object *obj) return 0; } -static void usage(const char *prog) -{ - fprintf(stderr, - "usage: %s [OPTS] ...\n" - "OPTS:\n" - " -S use skb-mode\n" - " -N enforce native mode\n" - " -F force loading prog\n" - " -X load xdp program on egress\n", - prog); -} - int main(int argc, char **argv) { - int i, ret, opt, forward_map_fd, max_ifindex = 0; - struct bpf_program *ingress_prog, *egress_prog; - int ingress_prog_fd, egress_prog_fd = 0; - struct bpf_devmap_val devmap_val; - bool attach_egress_prog = false; + struct bpf_devmap_val devmap_val = {}; + struct xdp_redirect_map_multi *skel; + struct bpf_program *ingress_prog; + bool xdp_devmap_attached = false; + struct bpf_map *forward_map; + int ret = EXIT_FAIL_OPTION; + unsigned long interval = 2; char ifname[IF_NAMESIZE]; - struct bpf_map *mac_map; - struct bpf_object *obj; unsigned int ifindex; - char filename[256]; - - while ((opt = getopt(argc, argv, "SNFX")) != -1) { + bool generic = false; + bool force = false; + bool tried = false; + bool error = true; + int i, opt; + + while ((opt = getopt_long(argc, argv, "hSFXi:vs", + long_options, NULL)) != -1) { switch (opt) { case 'S': - xdp_flags |= XDP_FLAGS_SKB_MODE; - break; - case 'N': - /* default, set below */ + generic = true; + /* devmap_xmit tracepoint not available */ + mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI); break; case 'F': - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + force = true; break; case 'X': - attach_egress_prog = true; + xdp_devmap_attached = true; + break; + case 'i': + interval = strtoul(optarg, NULL, 0); + break; + case 'v': + sample_switch_mode(); break; + case 's': + mask |= SAMPLE_REDIRECT_MAP_CNT; + break; + case 'h': + error = false; default: - usage(basename(argv[0])); - return 1; + sample_usage(argv, long_options, __doc__, mask, error); + return ret; } } - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) { - xdp_flags |= XDP_FLAGS_DRV_MODE; - } else if (attach_egress_prog) { - printf("Load xdp program on egress with SKB mode not supported yet\n"); - return 1; + if (argc <= optind + 1) { + sample_usage(argv, long_options, __doc__, mask, error); + return ret; } - if (optind == argc) { - printf("usage: %s ...\n", argv[0]); - return 1; + skel = xdp_redirect_map_multi__open(); + if (!skel) { + fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; } - printf("Get interfaces"); + ret = sample_init_pre_load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = EXIT_FAIL_OPTION; for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { ifaces[i] = if_nametoindex(argv[optind + i]); if (!ifaces[i]) ifaces[i] = strtoul(argv[optind + i], NULL, 0); if (!if_indextoname(ifaces[i], ifname)) { - perror("Invalid interface name or i"); - return 1; + fprintf(stderr, "Bad interface index or name\n"); + sample_usage(argv, long_options, __doc__, mask, true); + goto end_destroy; } - /* Find the largest index number */ - if (ifaces[i] > max_ifindex) - max_ifindex = ifaces[i]; - - printf(" %d", ifaces[i]); - } - printf("\n"); - - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - - obj = bpf_object__open(filename); - if (libbpf_get_error(obj)) { - printf("ERROR: opening BPF object file failed\n"); - obj = NULL; - goto err_out; + skel->rodata->from_match[i] = ifaces[i]; + skel->rodata->to_match[i] = ifaces[i]; } - /* Reset the map size to max ifindex + 1 */ - if (attach_egress_prog) { - mac_map = bpf_object__find_map_by_name(obj, "mac_map"); - ret = bpf_map__resize(mac_map, max_ifindex + 1); - if (ret < 0) { - printf("ERROR: reset mac map size failed\n"); - goto err_out; - } + ret = xdp_redirect_map_multi__load(skel); + if (ret < 0) { + fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } - /* load BPF program */ - if (bpf_object__load(obj)) { - printf("ERROR: loading BPF object file failed\n"); - goto err_out; - } - - if (xdp_flags & XDP_FLAGS_SKB_MODE) { - ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general"); - forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general"); - } else { - ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native"); - forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native"); - } - if (!ingress_prog || forward_map_fd < 0) { - printf("finding ingress_prog/forward_map in obj file failed\n"); - goto err_out; - } - - ingress_prog_fd = bpf_program__fd(ingress_prog); - if (ingress_prog_fd < 0) { - printf("find ingress_prog fd failed\n"); - goto err_out; - } - - rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); - if (rxcnt_map_fd < 0) { - printf("bpf_object__find_map_fd_by_name failed\n"); - goto err_out; - } - - if (attach_egress_prog) { + if (xdp_devmap_attached) { /* Update mac_map with all egress interfaces' mac addr */ - if (update_mac_map(obj) < 0) { - printf("Error: update mac map failed"); - goto err_out; + if (update_mac_map(skel->maps.mac_map) < 0) { + fprintf(stderr, "Updating mac address failed\n"); + ret = EXIT_FAIL; + goto end_destroy; } + } - /* Find egress prog fd */ - egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog"); - if (!egress_prog) { - printf("finding egress_prog in obj file failed\n"); - goto err_out; - } - egress_prog_fd = bpf_program__fd(egress_prog); - if (egress_prog_fd < 0) { - printf("find egress_prog fd failed\n"); - goto err_out; - } + ret = sample_init(skel, mask); + if (ret < 0) { + fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; } - /* Remove attached program when program is interrupted or killed */ - signal(SIGINT, int_exit); - signal(SIGTERM, int_exit); + ingress_prog = skel->progs.xdp_redirect_map_native; + forward_map = skel->maps.forward_map_native; - /* Init forward multicast groups */ for (i = 0; ifaces[i] > 0; i++) { ifindex = ifaces[i]; + ret = EXIT_FAIL_XDP; +restart: /* bind prog_fd to each interface */ - ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags); - if (ret) { - printf("Set xdp fd failed on %d\n", ifindex); - goto err_out; + if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) { + if (generic && !tried) { + fprintf(stderr, + "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); + ingress_prog = skel->progs.xdp_redirect_map_general; + forward_map = skel->maps.forward_map_general; + tried = true; + goto restart; + } + goto end_destroy; } /* Add all the interfaces to forward group and attach - * egress devmap programe if exist + * egress devmap program if exist */ devmap_val.ifindex = ifindex; - devmap_val.bpf_prog.fd = egress_prog_fd; - ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0); - if (ret) { - perror("bpf_map_update_elem forward_map"); - goto err_out; + if (xdp_devmap_attached) + devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog); + ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0); + if (ret < 0) { + fprintf(stderr, "Failed to update devmap value: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; } } - poll_stats(2); - - return 0; - -err_out: - return 1; + ret = sample_run(interval, NULL, NULL); + if (ret < 0) { + fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; + } + ret = EXIT_OK; +end_destroy: + xdp_redirect_map_multi__destroy(skel); +end: + sample_exit(ret); } diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c index eb484c15492d..b32d82178199 100644 --- a/samples/bpf/xdp_sample_user.c +++ b/samples/bpf/xdp_sample_user.c @@ -1510,7 +1510,7 @@ static int sample_timer_cb(int timerfd, struct stats_record **rec, if (ret < 0) return ret; - if (sample_xdp_cnt == 2) { + if (sample_xdp_cnt == 2 && !(sample_mask & SAMPLE_SKIP_HEADING)) { char fi[IFNAMSIZ]; char to[IFNAMSIZ]; const char *f, *t; diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h index 3a678986cce2..d97465ff8c62 100644 --- a/samples/bpf/xdp_sample_user.h +++ b/samples/bpf/xdp_sample_user.h @@ -8,17 +8,18 @@ #include "xdp_sample_shared.h" enum stats_mask { - _SAMPLE_REDIRECT_MAP = 1U << 0, - SAMPLE_RX_CNT = 1U << 1, - SAMPLE_REDIRECT_ERR_CNT = 1U << 2, - SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3, - SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4, - SAMPLE_EXCEPTION_CNT = 1U << 5, - SAMPLE_DEVMAP_XMIT_CNT = 1U << 6, - SAMPLE_REDIRECT_CNT = 1U << 7, - SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, - SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP, + _SAMPLE_REDIRECT_MAP = 1U << 0, + SAMPLE_RX_CNT = 1U << 1, + SAMPLE_REDIRECT_ERR_CNT = 1U << 2, + SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3, + SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4, + SAMPLE_EXCEPTION_CNT = 1U << 5, + SAMPLE_DEVMAP_XMIT_CNT = 1U << 6, + SAMPLE_REDIRECT_CNT = 1U << 7, + SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, + SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP, SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8, + SAMPLE_SKIP_HEADING = 1U << 9, }; /* Exit return codes */ -- cgit v1.2.3-59-g8ed1b From c0e9422c4e6ca9abd4bd6e1598400c7231eb600b Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Wed, 25 Aug 2021 19:57:15 +0900 Subject: samples: pktgen: fix to print when terminated normally Currently, most pktgen samples print the execution result when the program is terminated normally. However, sample03 doesn't work appropriately. This is results of samples: # DEV=eth0 DEST_IP=10.1.0.1 DST_MAC=00:11:22:33:44:55 ./pktgen_sample04_many_flows.sh -n 1 Running... ctrl^C to stop Device: eth0@0 Result: OK: 19(c5+d13) usec, 1 (60byte,0frags) 51762pps 24Mb/sec (24845760bps) errors: 0 # DEV=eth0 DEST_IP=10.1.0.1 DST_MAC=00:11:22:33:44:55 ./pktgen_sample03_burst_single_flow.sh -n 1 Running... ctrl^C to stop The reason why it doesn't print the execution result when the program is terminated usually is that sample03 doesn't call the function which prints the result, unlike other samples. So, this commit solves this issue by calling the function before termination. Also, this commit changes control_c function to print_result to maintain consistency with other samples. Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- samples/pktgen/pktgen_sample03_burst_single_flow.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'samples') diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh index ab87de440277..8bf2fdffba16 100755 --- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh +++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh @@ -85,7 +85,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do done # Run if user hits control-c -function control_c() { +function print_result() { # Print results for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} @@ -94,11 +94,13 @@ function control_c() { done } # trap keyboard interrupt (Ctrl-C) -trap control_c SIGINT +trap true SIGINT if [ -z "$APPEND" ]; then echo "Running... ctrl^C to stop" >&2 pg_ctrl "start" + + print_result else echo "Append mode: config done. Do more or use 'pg_ctrl start' to run" fi -- cgit v1.2.3-59-g8ed1b From 6c882bdc4bcd63e164f05738e7677b8a62fc0ec1 Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Wed, 25 Aug 2021 19:57:16 +0900 Subject: samples: pktgen: add trap SIGINT for printing execution result All pktgen samples can send indefinitely num messages per thread by setting the count option to 0(-n 0). If running sample with setting count 0 and press Ctrl-C to stop this program, the program prints the result of the execution so far. Currently, the samples besides sample{3...5} don't work properly. Because Ctrl-C stops the script, not just pktgen. This is results of samples: # DEV=eth0 DEST_IP=10.1.0.1 DST_MAC=00:11:22:33:44:55 ./pktgen_sample04_many_flows.sh -n 0 Running... ctrl^C to stop ^CDevice: eth0@0 Result: OK: 569657(c569538+d118) usec, 84650 (60byte,0frags) 148597pps 71Mb/sec (71326560bps) errors: 0 # DEV=eth0 DEST_IP=10.1.0.1 DST_MAC=00:11:22:33:44:55 ./pktgen_sample01_simple.sh -n 0 Running... ctrl^C to stop ^C In order to solve this, this commit adds trap SIGINT. Also, this commit changes control_c function to print_result to maintain consistency with other samples. Signed-off-by: Juhee Kang Signed-off-by: David S. Miller --- .../pktgen/pktgen_bench_xmit_mode_netif_receive.sh | 19 +++++++++++++------ samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh | 19 +++++++++++++------ samples/pktgen/pktgen_sample01_simple.sh | 13 ++++++++++--- samples/pktgen/pktgen_sample02_multiqueue.sh | 19 +++++++++++++------ .../pktgen_sample06_numa_awared_queue_irq_affinity.sh | 19 +++++++++++++------ 5 files changed, 62 insertions(+), 27 deletions(-) (limited to 'samples') diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh index 30a610b541ad..99ec0688b044 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh @@ -89,14 +89,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do pg_set $dev "burst $BURST" done +# Run if user hits control-c +function print_result() { + # Print results + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" + done +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + # start_run echo "Running... ctrl^C to stop" >&2 pg_ctrl "start" echo "Done" >&2 -# Print results -for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do - dev=${DEV}@${thread} - echo "Device: $dev" - cat /proc/net/pktgen/$dev | grep -A2 "Result:" -done +print_result diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh index a6195bd77532..04b0dd0c36d6 100755 --- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -69,14 +69,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do pg_set $dev "xmit_mode queue_xmit" done +# Run if user hits control-c +function print_result { + # Print results + for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" + done +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + # start_run echo "Running... ctrl^C to stop" >&2 pg_ctrl "start" echo "Done" >&2 -# Print results -for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do - dev=${DEV}@${thread} - echo "Device: $dev" - cat /proc/net/pktgen/$dev | grep -A2 "Result:" -done +print_result diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh index 246cfe02bb82..09a92ea963f9 100755 --- a/samples/pktgen/pktgen_sample01_simple.sh +++ b/samples/pktgen/pktgen_sample01_simple.sh @@ -79,15 +79,22 @@ pg_set $DEV "flag UDPSRC_RND" pg_set $DEV "udp_src_min $UDP_SRC_MIN" pg_set $DEV "udp_src_max $UDP_SRC_MAX" +# Run if user hits control-c +function print_result() { + # Print results + echo "Result device: $DEV" + cat /proc/net/pktgen/$DEV +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + if [ -z "$APPEND" ]; then # start_run echo "Running... ctrl^C to stop" >&2 pg_ctrl "start" echo "Done" >&2 - # Print results - echo "Result device: $DEV" - cat /proc/net/pktgen/$DEV + print_result else echo "Append mode: config done. Do more or use 'pg_ctrl start' to run" fi \ No newline at end of file diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh index c6af3d9d5171..7fa41c84c32f 100755 --- a/samples/pktgen/pktgen_sample02_multiqueue.sh +++ b/samples/pktgen/pktgen_sample02_multiqueue.sh @@ -83,18 +83,25 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do pg_set $dev "udp_src_max $UDP_SRC_MAX" done -if [ -z "$APPEND" ]; then - # start_run - echo "Running... ctrl^C to stop" >&2 - pg_ctrl "start" - echo "Done" >&2 - +# Run if user hits control-c +function print_result() { # Print results for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do dev=${DEV}@${thread} echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" done +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + +if [ -z "$APPEND" ]; then + # start_run + echo "Running... ctrl^C to stop" >&2 + pg_ctrl "start" + echo "Done" >&2 + + print_result else echo "Append mode: config done. Do more or use 'pg_ctrl start' to run" fi diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh index 7c27923083a6..264cc5db9c49 100755 --- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh +++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh @@ -100,12 +100,8 @@ for ((i = 0; i < $THREADS; i++)); do pg_set $dev "udp_src_max $UDP_SRC_MAX" done -# start_run -if [ -z "$APPEND" ]; then - echo "Running... ctrl^C to stop" >&2 - pg_ctrl "start" - echo "Done" >&2 - +# Run if user hits control-c +function print_result() { # Print results for ((i = 0; i < $THREADS; i++)); do thread=${cpu_array[$((i+F_THREAD))]} @@ -113,6 +109,17 @@ if [ -z "$APPEND" ]; then echo "Device: $dev" cat /proc/net/pktgen/$dev | grep -A2 "Result:" done +} +# trap keyboard interrupt (Ctrl-C) +trap true SIGINT + +# start_run +if [ -z "$APPEND" ]; then + echo "Running... ctrl^C to stop" >&2 + pg_ctrl "start" + echo "Done" >&2 + + print_result else echo "Append mode: config done. Do more or use 'pg_ctrl start' to run" fi -- cgit v1.2.3-59-g8ed1b From 48b2e71c2e53263ebbb6798bbf208e191937e691 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 26 Aug 2021 17:39:10 +0530 Subject: samples: bpf: Fix uninitialized variable in xdp_redirect_cpu While at it, also improve help output when CPU number is greater than possible. Fixes: e531a220cc59 ("samples: bpf: Convert xdp_redirect_cpu to XDP samples helper") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210826120910.454081-1-memxor@gmail.com --- samples/bpf/xdp_redirect_cpu_user.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 631700aef69c..6e25fba64c72 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -141,7 +141,7 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, static int mark_cpus_unavailable(void) { int ret, i, n_cpus = libbpf_num_possible_cpus(); - __u32 invalid_cpu; + __u32 invalid_cpu = n_cpus; for (i = 0; i < n_cpus; i++) { ret = bpf_map_update_elem(avail_fd, &i, @@ -449,8 +449,9 @@ int main(int argc, char **argv) add_cpu = strtoul(optarg, NULL, 0); if (add_cpu >= n_cpus) { fprintf(stderr, - "--cpu nr too large for cpumap err(%d):%s\n", + "--cpu nr too large for cpumap err (%d):%s\n", errno, strerror(errno)); + usage(argv, long_options, __doc__, mask, true, skel->obj); goto end_cpu; } cpu[added_cpus++] = add_cpu; -- cgit v1.2.3-59-g8ed1b