From 6ea848b5ced53cbfd5677ff176c2aca10fd61bf4 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Sat, 26 Jan 2019 12:26:13 -0500 Subject: selftests: bpf: functional and min/max reasoning unit tests for JMP32 This patch adds unit tests for new JMP32 instructions. This patch also added the new BPF_JMP32_REG and BPF_JMP32_IMM macros to samples/bpf/bpf_insn.h so that JMP32 insn builders are available to tests under 'samples' directory. Reviewed-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Alexei Starovoitov --- samples/bpf/bpf_insn.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'samples') diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h index 20dc5cefec84..544237980582 100644 --- a/samples/bpf/bpf_insn.h +++ b/samples/bpf/bpf_insn.h @@ -164,6 +164,16 @@ struct bpf_insn; .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -174,6 +184,16 @@ struct bpf_insn; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ -- cgit v1.2.3-59-g8ed1b From 7313798b144c5b945dc47502a233cabb7022cb70 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 1 Feb 2019 22:42:24 +0100 Subject: samples/bpf: xdp_redirect_cpu have not need for read_trace_pipe The sample xdp_redirect_cpu is not using helper bpf_trace_printk. Thus it makes no sense that the --debug option us reading from /sys/kernel/debug/tracing/trace_pipe via read_trace_pipe. Simply remove it. Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_redirect_cpu_user.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 2d23054aaccf..f141e752ca0a 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -51,7 +51,6 @@ static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"dev", required_argument, NULL, 'd' }, {"skb-mode", no_argument, NULL, 'S' }, - {"debug", no_argument, NULL, 'D' }, {"sec", required_argument, NULL, 's' }, {"prognum", required_argument, NULL, 'p' }, {"qsize", required_argument, NULL, 'q' }, @@ -563,7 +562,6 @@ int main(int argc, char **argv) bool use_separators = true; bool stress_mode = false; char filename[256]; - bool debug = false; int added_cpus = 0; int longindex = 0; int interval = 2; @@ -624,9 +622,6 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; - case 'D': - debug = true; - break; case 'x': stress_mode = true; break; @@ -688,11 +683,6 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } - if (debug) { - printf("Debug-mode reading trace pipe (fix #define DEBUG)\n"); - read_trace_pipe(); - } - stats_poll(interval, use_separators, prog_num, stress_mode); return EXIT_OK; } -- cgit v1.2.3-59-g8ed1b From bbaf6029c49ccf2dc93d9564af58a20d125947a1 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:25 +0100 Subject: samples/bpf: Convert XDP samples to libbpf usage Some of XDP samples that are attaching the bpf program to the interface via libbpf's bpf_set_link_xdp_fd are still using the bpf_load.c for loading and manipulating the ebpf program and maps. Convert them to do this through libbpf usage and remove bpf_load from the picture. While at it remove what looks like debug leftover in xdp_redirect_map_user.c In xdp_redirect_cpu, change the way that the program to be loaded onto interface is chosen - user now needs to pass the program's section name instead of the relative number. In case of typo print out the section names to choose from. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 8 +- samples/bpf/xdp_redirect_cpu_user.c | 145 +++++++++++++++++++++++++----------- samples/bpf/xdp_redirect_map_user.c | 47 ++++++++---- samples/bpf/xdp_redirect_user.c | 44 ++++++++--- samples/bpf/xdp_router_ipv4_user.c | 75 +++++++++++++------ samples/bpf/xdp_tx_iptunnel_user.c | 37 ++++++--- 6 files changed, 253 insertions(+), 103 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index db1a91dfa702..a0ef7eddd0b3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -87,18 +87,18 @@ test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o -xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o +xdp_router_ipv4-objs := xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS) sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o +xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o -xdp_redirect-objs := bpf_load.o xdp_redirect_user.o -xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o +xdp_redirect-objs := xdp_redirect_user.o +xdp_redirect_map-objs := xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o xdp_monitor_user.o xdp_rxq_info-objs := xdp_rxq_info_user.o diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f141e752ca0a..8645ddc2da0e 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -24,12 +24,8 @@ static const char *__doc__ = /* How many xdp_progs are defined in _kern.c */ #define MAX_PROG 6 -/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead - * use bpf/libbpf.h), but cannot as (currently) needed for XDP - * attaching to a device via bpf_set_link_xdp_fd() - */ #include -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "bpf_util.h" @@ -38,6 +34,15 @@ static char ifname_buf[IF_NAMESIZE]; static char *ifname; static __u32 xdp_flags; +static int cpu_map_fd; +static int rx_cnt_map_fd; +static int redirect_err_cnt_map_fd; +static int cpumap_enqueue_cnt_map_fd; +static int cpumap_kthread_cnt_map_fd; +static int cpus_available_map_fd; +static int cpus_count_map_fd; +static int cpus_iterator_map_fd; +static int exception_cnt_map_fd; /* Exit return codes */ #define EXIT_OK 0 @@ -52,7 +57,7 @@ static const struct option long_options[] = { {"dev", required_argument, NULL, 'd' }, {"skb-mode", no_argument, NULL, 'S' }, {"sec", required_argument, NULL, 's' }, - {"prognum", required_argument, NULL, 'p' }, + {"progname", required_argument, NULL, 'p' }, {"qsize", required_argument, NULL, 'q' }, {"cpu", required_argument, NULL, 'c' }, {"stress-mode", no_argument, NULL, 'x' }, @@ -70,7 +75,17 @@ static void int_exit(int sig) exit(EXIT_OK); } -static void usage(char *argv[]) +static void print_avail_progs(struct bpf_object *obj) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (bpf_program__is_xdp(pos)) + printf(" %s\n", bpf_program__title(pos, false)); + } +} + +static void usage(char *argv[], struct bpf_object *obj) { int i; @@ -88,6 +103,8 @@ static void usage(char *argv[]) long_options[i].val); printf("\n"); } + printf("\n Programs to be used for --progname:\n"); + print_avail_progs(obj); printf("\n"); } @@ -262,7 +279,7 @@ static __u64 calc_errs_pps(struct datarec *r, static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, - int prog_num) + char *prog_name) { unsigned int nr_cpus = bpf_num_possible_cpus(); double pps = 0, drop = 0, err = 0; @@ -272,7 +289,7 @@ static void stats_print(struct stats_record *stats_rec, int i; /* Header */ - printf("Running XDP/eBPF prog_num:%d\n", prog_num); + printf("Running XDP/eBPF prog_name:%s\n", prog_name); printf("%-15s %-7s %-14s %-11s %-9s\n", "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info"); @@ -423,20 +440,20 @@ static void stats_collect(struct stats_record *rec) { int fd, i; - fd = map_fd[1]; /* map: rx_cnt */ + fd = rx_cnt_map_fd; map_collect_percpu(fd, 0, &rec->rx_cnt); - fd = map_fd[2]; /* map: redirect_err_cnt */ + fd = redirect_err_cnt_map_fd; map_collect_percpu(fd, 1, &rec->redir_err); - fd = map_fd[3]; /* map: cpumap_enqueue_cnt */ + fd = cpumap_enqueue_cnt_map_fd; for (i = 0; i < MAX_CPUS; i++) map_collect_percpu(fd, i, &rec->enq[i]); - fd = map_fd[4]; /* map: cpumap_kthread_cnt */ + fd = cpumap_kthread_cnt_map_fd; map_collect_percpu(fd, 0, &rec->kthread); - fd = map_fd[8]; /* map: exception_cnt */ + fd = exception_cnt_map_fd; map_collect_percpu(fd, 0, &rec->exception); } @@ -461,7 +478,7 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Add a CPU entry to cpumap, as this allocate a cpu entry in * the kernel for the cpu. */ - ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0); + ret = bpf_map_update_elem(cpu_map_fd, &cpu, &queue_size, 0); if (ret) { fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret); exit(EXIT_FAIL_BPF); @@ -470,23 +487,22 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size, /* Inform bpf_prog's that a new CPU is available to select * from via some control maps. */ - /* map_fd[5] = cpus_available */ - ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0); + ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0); if (ret) { fprintf(stderr, "Add to avail CPUs failed\n"); exit(EXIT_FAIL_BPF); } /* When not replacing/updating existing entry, bump the count */ - /* map_fd[6] = cpus_count */ - ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count); + ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count); if (ret) { fprintf(stderr, "Failed reading curr cpus_count\n"); exit(EXIT_FAIL_BPF); } if (new) { curr_cpus_count++; - ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0); + ret = bpf_map_update_elem(cpus_count_map_fd, &key, + &curr_cpus_count, 0); if (ret) { fprintf(stderr, "Failed write curr cpus_count\n"); exit(EXIT_FAIL_BPF); @@ -509,8 +525,8 @@ static void mark_cpus_unavailable(void) int ret, i; for (i = 0; i < MAX_CPUS; i++) { - /* map_fd[5] = cpus_available */ - ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0); + ret = bpf_map_update_elem(cpus_available_map_fd, &i, + &invalid_cpu, 0); if (ret) { fprintf(stderr, "Failed marking CPU unavailable\n"); exit(EXIT_FAIL_BPF); @@ -530,7 +546,7 @@ static void stress_cpumap(void) create_cpu_entry(1, 16000, 0, false); } -static void stats_poll(int interval, bool use_separators, int prog_num, +static void stats_poll(int interval, bool use_separators, char *prog_name, bool stress_mode) { struct stats_record *record, *prev; @@ -546,7 +562,7 @@ static void stats_poll(int interval, bool use_separators, int prog_num, while (1) { swap(&prev, &record); stats_collect(record); - stats_print(record, prev, prog_num); + stats_print(record, prev, prog_name); sleep(interval); if (stress_mode) stress_cpumap(); @@ -556,17 +572,51 @@ static void stats_poll(int interval, bool use_separators, int prog_num, free_stats_record(prev); } +static int init_map_fds(struct bpf_object *obj) +{ + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + redirect_err_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); + cpumap_enqueue_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); + cpumap_kthread_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); + cpus_available_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_available"); + cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); + cpus_iterator_map_fd = + bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); + exception_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); + + if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || + redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || + cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 || + cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 || + exception_cnt_map_fd < 0) + return -ENOENT; + + return 0; +} + int main(int argc, char **argv) { struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; bool use_separators = true; bool stress_mode = false; + struct bpf_program *prog; + struct bpf_object *obj; char filename[256]; int added_cpus = 0; int longindex = 0; int interval = 2; - int prog_num = 5; int add_cpu = -1; + int prog_fd; __u32 qsize; int opt; @@ -579,22 +629,25 @@ int main(int argc, char **argv) qsize = 128+64; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); return 1; } - if (load_bpf_file(filename)) { - fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return EXIT_FAIL; - } - if (!prog_fd[0]) { - fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno)); + if (prog_fd < 0) { + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", + strerror(errno)); + return EXIT_FAIL; + } + if (init_map_fds(obj) < 0) { + fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); return EXIT_FAIL; } - mark_cpus_unavailable(); /* Parse commands line args */ @@ -630,13 +683,7 @@ int main(int argc, char **argv) break; case 'p': /* Selecting eBPF prog to load */ - prog_num = atoi(optarg); - if (prog_num < 0 || prog_num >= MAX_PROG) { - fprintf(stderr, - "--prognum too large err(%d):%s\n", - errno, strerror(errno)); - goto error; - } + prog_name = optarg; break; case 'c': /* Add multiple CPUs */ @@ -656,21 +703,21 @@ int main(int argc, char **argv) case 'h': error: default: - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } } /* Required option */ if (ifindex == -1) { fprintf(stderr, "ERR: required option --dev missing\n"); - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } /* Required option */ if (add_cpu == -1) { fprintf(stderr, "ERR: required option --cpu missing\n"); fprintf(stderr, " Specify multiple --cpu option to add more\n"); - usage(argv); + usage(argv, obj); return EXIT_FAIL_OPTION; } @@ -678,11 +725,23 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) { + prog = bpf_object__find_program_by_title(obj, prog_name); + if (!prog) { + fprintf(stderr, "bpf_object__find_program_by_title failed\n"); + return EXIT_FAIL; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "bpf_program__fd failed\n"); + return EXIT_FAIL; + } + + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { fprintf(stderr, "link set xdp fd failed\n"); return EXIT_FAIL_XDP; } - stats_poll(interval, use_separators, prog_num, stress_mode); + stats_poll(interval, use_separators, prog_name, stress_mode); return EXIT_OK; } diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 4445e76854b5..60d46eea225b 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -22,15 +22,16 @@ #include #include -#include "bpf_load.h" #include "bpf_util.h" #include +#include "bpf/libbpf.h" static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; static __u32 xdp_flags; +static int rxcnt_map_fd; static void int_exit(int sig) { @@ -53,7 +54,7 @@ static void poll_stats(int interval, int ifindex) int i; sleep(interval); - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[i]); if (sum) @@ -76,9 +77,16 @@ static void usage(const char *prog) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_program *prog, *dummy_prog; + int prog_fd, dummy_prog_fd; const char *optstr = "SN"; - char filename[256]; + struct bpf_object *obj; int ret, opt, key = 0; + char filename[256]; + int tx_port_map_fd; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -109,24 +117,40 @@ int main(int argc, char **argv) printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + prog = bpf_program__next(NULL, obj); + dummy_prog = bpf_program__next(prog, obj); + if (!prog || !dummy_prog) { + printf("finding a prog in obj file failed\n"); + return 1; + } + /* bpf_prog_load_xattr gives us the pointer to first prog's fd, + * so we're missing only the fd for dummy prog + */ + dummy_prog_fd = bpf_program__fd(dummy_prog); + if (prog_fd < 0 || dummy_prog_fd < 0) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1], + if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { printf("WARN: link set xdp fd failed on %d\n", ifindex_out); ifindex_out_xdp_dummy_attached = false; @@ -135,11 +159,8 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n", - map_fd[0], map_fd[1], map_fd[2]); - /* populate virtual to physical port map */ - ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); if (ret) { perror("bpf_update_elem"); goto out; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 81a69e36cb78..93404820df68 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -22,15 +22,16 @@ #include #include -#include "bpf_load.h" #include "bpf_util.h" #include +#include "bpf/libbpf.h" static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; static __u32 xdp_flags; +static int rxcnt_map_fd; static void int_exit(int sig) { @@ -53,7 +54,7 @@ static void poll_stats(int interval, int ifindex) int i; sleep(interval); - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[i]); if (sum) @@ -77,9 +78,16 @@ static void usage(const char *prog) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_program *prog, *dummy_prog; + int prog_fd, tx_port_map_fd, opt; const char *optstr = "SN"; + struct bpf_object *obj; char filename[256]; - int ret, opt, key = 0; + int dummy_prog_fd; + int ret, key = 0; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -110,24 +118,40 @@ int main(int argc, char **argv) printf("input: %d output: %d\n", ifindex_in, ifindex_out); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + prog = bpf_program__next(NULL, obj); + dummy_prog = bpf_program__next(prog, obj); + if (!prog || !dummy_prog) { + printf("finding a prog in obj file failed\n"); + return 1; + } + /* bpf_prog_load_xattr gives us the pointer to first prog's fd, + * so we're missing only the fd for dummy prog + */ + dummy_prog_fd = bpf_program__fd(dummy_prog); + if (prog_fd < 0 || dummy_prog_fd < 0) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); return 1; } - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) { printf("ERROR: link set xdp fd failed on %d\n", ifindex_in); return 1; } /* Loading dummy XDP prog on out-device */ - if (bpf_set_link_xdp_fd(ifindex_out, prog_fd[1], + if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { printf("WARN: link set xdp fd failed on %d\n", ifindex_out); ifindex_out_xdp_dummy_attached = false; @@ -137,7 +161,7 @@ int main(int argc, char **argv) signal(SIGTERM, int_exit); /* bpf redirect port */ - ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0); + ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0); if (ret) { perror("bpf_update_elem"); goto out; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index b2b4dfa776c8..cea2306f5ab7 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -15,7 +15,6 @@ #include #include #include -#include "bpf_load.h" #include #include #include @@ -25,11 +24,17 @@ #include #include #include "bpf_util.h" +#include "bpf/libbpf.h" int sock, sock_arp, flags = 0; static int total_ifindex; int *ifindex_list; char buf[8192]; +static int lpm_map_fd; +static int rxcnt_map_fd; +static int arp_table_map_fd; +static int exact_match_map_fd; +static int tx_port_map_fd; static int get_route_table(int rtm_family); static void int_exit(int sig) @@ -186,7 +191,8 @@ static void read_route(struct nlmsghdr *nh, int nll) bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); exit(0); } - assert(bpf_map_update_elem(map_fd[4], &route.iface, &route.iface, 0) == 0); + assert(bpf_map_update_elem(tx_port_map_fd, + &route.iface, &route.iface, 0) == 0); if (rtm_family == AF_INET) { struct trie_value { __u8 prefix[4]; @@ -207,11 +213,16 @@ static void read_route(struct nlmsghdr *nh, int nll) direct_entry.arp.dst = 0; if (route.dst_len == 32) { if (nh->nlmsg_type == RTM_DELROUTE) { - assert(bpf_map_delete_elem(map_fd[3], &route.dst) == 0); + assert(bpf_map_delete_elem(exact_match_map_fd, + &route.dst) == 0); } else { - if (bpf_map_lookup_elem(map_fd[2], &route.dst, &direct_entry.arp.mac) == 0) + if (bpf_map_lookup_elem(arp_table_map_fd, + &route.dst, + &direct_entry.arp.mac) == 0) direct_entry.arp.dst = route.dst; - assert(bpf_map_update_elem(map_fd[3], &route.dst, &direct_entry, 0) == 0); + assert(bpf_map_update_elem(exact_match_map_fd, + &route.dst, + &direct_entry, 0) == 0); } } for (i = 0; i < 4; i++) @@ -225,7 +236,7 @@ static void read_route(struct nlmsghdr *nh, int nll) route.gw, route.dst_len, route.metric, route.iface_name); - if (bpf_map_lookup_elem(map_fd[0], prefix_key, + if (bpf_map_lookup_elem(lpm_map_fd, prefix_key, prefix_value) < 0) { for (i = 0; i < 4; i++) prefix_value->prefix[i] = prefix_key->data[i]; @@ -234,7 +245,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_value->gw = route.gw; prefix_value->metric = route.metric; - assert(bpf_map_update_elem(map_fd[0], + assert(bpf_map_update_elem(lpm_map_fd, prefix_key, prefix_value, 0 ) == 0); @@ -247,7 +258,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_key->data[2], prefix_key->data[3], prefix_key->prefixlen); - assert(bpf_map_delete_elem(map_fd[0], + assert(bpf_map_delete_elem(lpm_map_fd, prefix_key ) == 0); /* Rereading the route table to check if @@ -275,8 +286,7 @@ static void read_route(struct nlmsghdr *nh, int nll) prefix_value->ifindex = route.iface; prefix_value->gw = route.gw; prefix_value->metric = route.metric; - assert(bpf_map_update_elem( - map_fd[0], + assert(bpf_map_update_elem(lpm_map_fd, prefix_key, prefix_value, 0) == 0); @@ -401,7 +411,8 @@ static void read_arp(struct nlmsghdr *nh, int nll) arp_entry.mac = atol(mac); printf("%x\t\t%llx\n", arp_entry.dst, arp_entry.mac); if (ndm_family == AF_INET) { - if (bpf_map_lookup_elem(map_fd[3], &arp_entry.dst, + if (bpf_map_lookup_elem(exact_match_map_fd, + &arp_entry.dst, &direct_entry) == 0) { if (nh->nlmsg_type == RTM_DELNEIGH) { direct_entry.arp.dst = 0; @@ -410,16 +421,17 @@ static void read_arp(struct nlmsghdr *nh, int nll) direct_entry.arp.dst = arp_entry.dst; direct_entry.arp.mac = arp_entry.mac; } - assert(bpf_map_update_elem(map_fd[3], + assert(bpf_map_update_elem(exact_match_map_fd, &arp_entry.dst, &direct_entry, 0 ) == 0); memset(&direct_entry, 0, sizeof(direct_entry)); } if (nh->nlmsg_type == RTM_DELNEIGH) { - assert(bpf_map_delete_elem(map_fd[2], &arp_entry.dst) == 0); + assert(bpf_map_delete_elem(arp_table_map_fd, + &arp_entry.dst) == 0); } else if (nh->nlmsg_type == RTM_NEWNEIGH) { - assert(bpf_map_update_elem(map_fd[2], + assert(bpf_map_update_elem(arp_table_map_fd, &arp_entry.dst, &arp_entry.mac, 0 ) == 0); @@ -553,7 +565,8 @@ static int monitor_route(void) for (key = 0; key < nr_keys; key++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, + &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[key][i]); if (sum) @@ -596,11 +609,18 @@ cleanup: int main(int ac, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_object *obj; char filename[256]; char **ifname_list; + int prog_fd; int i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + if (ac < 2) { printf("usage: %s [-S] Interface name list\n", argv[0]); return 1; @@ -614,15 +634,28 @@ int main(int ac, char **argv) total_ifindex = ac - 1; ifname_list = (argv + 1); } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - } + printf("\n**************loading bpf file*********************\n\n\n"); - if (!prog_fd[0]) { - printf("load_bpf_file: %s\n", strerror(errno)); + if (!prog_fd) { + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); return 1; } + + lpm_map_fd = bpf_object__find_map_fd_by_name(obj, "lpm_map"); + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + arp_table_map_fd = bpf_object__find_map_fd_by_name(obj, "arp_table"); + exact_match_map_fd = bpf_object__find_map_fd_by_name(obj, + "exact_match"); + tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port"); + if (lpm_map_fd < 0 || rxcnt_map_fd < 0 || arp_table_map_fd < 0 || + exact_match_map_fd < 0 || tx_port_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + return 1; + } + ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); for (i = 0; i < total_ifindex; i++) { ifindex_list[i] = if_nametoindex(ifname_list[i]); @@ -633,7 +666,7 @@ int main(int ac, char **argv) } } for (i = 0; i < total_ifindex; i++) { - if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd[0], flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { printf("link set xdp fd failed\n"); int recovery_index = i; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index a4ccc33adac0..5093d8220da5 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -17,7 +17,7 @@ #include #include #include -#include "bpf_load.h" +#include "bpf/libbpf.h" #include #include "bpf_util.h" #include "xdp_tx_iptunnel_common.h" @@ -26,6 +26,7 @@ static int ifindex = -1; static __u32 xdp_flags = 0; +static int rxcnt_map_fd; static void int_exit(int sig) { @@ -53,7 +54,8 @@ static void poll_stats(unsigned int kill_after_s) for (proto = 0; proto < nr_protos; proto++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0); + assert(bpf_map_lookup_elem(rxcnt_map_fd, &proto, + values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[proto][i]); @@ -138,15 +140,19 @@ static int parse_ports(const char *port_str, int *min_port, int *max_port) int main(int argc, char **argv) { + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int min_port = 0, max_port = 0, vip2tnl_map_fd; + const char *optstr = "i:a:p:s:d:m:T:P:SNh"; unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:SNh"; - int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; struct vip vip = {}; char filename[256]; - int opt; + int opt, prog_fd; int i; tnl.family = AF_UNSPEC; @@ -232,29 +238,36 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - } - if (!prog_fd[0]) { + if (!prog_fd) { printf("load_bpf_file: %s\n", strerror(errno)); return 1; } + rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt"); + vip2tnl_map_fd = bpf_object__find_map_fd_by_name(obj, "vip2tnl"); + if (vip2tnl_map_fd < 0 || rxcnt_map_fd < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + return 1; + } + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); while (min_port <= max_port) { vip.dport = htons(min_port++); - if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) { + if (bpf_map_update_elem(vip2tnl_map_fd, &vip, &tnl, + BPF_NOEXIST)) { perror("bpf_map_update_elem(&vip2tnl)"); return 1; } } - if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } -- cgit v1.2.3-59-g8ed1b From 6a5457618f62147aeac706d26ff28e0e57a324e3 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:26 +0100 Subject: samples/bpf: Extend RLIMIT_MEMLOCK for xdp_{sample_pkts, router_ipv4} There is a common problem with xdp samples that happens when user wants to run a particular sample and some bpf program is already loaded. The default 64kb RLIMIT_MEMLOCK resource limit will cause a following error (assuming that xdp sample that is failing was converted to libbpf usage): libbpf: Error in bpf_object__probe_name():Operation not permitted(1). Couldn't load basic 'r0 = 0' BPF program. libbpf: failed to load object './xdp_sample_pkts_kern.o' Fix it in xdp_sample_pkts and xdp_router_ipv4 by setting RLIMIT_MEMLOCK to RLIM_INFINITY. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_router_ipv4_user.c | 7 +++++++ samples/bpf/xdp_sample_pkts_user.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index cea2306f5ab7..c63c6beec7d6 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -25,6 +25,7 @@ #include #include "bpf_util.h" #include "bpf/libbpf.h" +#include int sock, sock_arp, flags = 0; static int total_ifindex; @@ -609,6 +610,7 @@ cleanup: int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -635,6 +637,11 @@ int main(int ac, char **argv) ifname_list = (argv + 1); } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 8dd87c1eb560..5f5828ee0761 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "perf-sys.h" #include "trace_helpers.h" @@ -99,6 +100,7 @@ static void sig_handler(int signo) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -114,6 +116,11 @@ int main(int argc, char **argv) return 1; } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + numcpus = get_nprocs(); if (numcpus > MAX_CPUS) numcpus = MAX_CPUS; -- cgit v1.2.3-59-g8ed1b From 743e568c15860d4061202f73214c106a5bb0890b Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:28 +0100 Subject: samples/bpf: Add a "force" flag to XDP samples Make xdp samples consistent with iproute2 behavior and set the XDP_FLAGS_UPDATE_IF_NOEXIST by default when setting the xdp program on interface. Provide an option for user to force the program loading, which as a result will not include the mentioned flag in bpf_set_link_xdp_fd call. Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- samples/bpf/xdp1_user.c | 10 +++++--- samples/bpf/xdp_adjust_tail_user.c | 8 ++++-- samples/bpf/xdp_redirect_cpu_user.c | 8 ++++-- samples/bpf/xdp_redirect_map_user.c | 10 +++++--- samples/bpf/xdp_redirect_user.c | 10 +++++--- samples/bpf/xdp_router_ipv4_user.c | 50 +++++++++++++++++++++++++++---------- samples/bpf/xdp_rxq_info_user.c | 8 ++++-- samples/bpf/xdp_sample_pkts_user.c | 40 +++++++++++++++++++++++------ samples/bpf/xdp_tx_iptunnel_user.c | 8 ++++-- samples/bpf/xdpsock_user.c | 7 ++++-- 10 files changed, 119 insertions(+), 40 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 8bfda95c77ad..505bce207165 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -22,7 +22,7 @@ #include "bpf/libbpf.h" static int ifindex; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static void int_exit(int sig) { @@ -63,7 +63,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFACE\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -73,7 +74,7 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; - const char *optstr = "SN"; + const char *optstr = "FSN"; int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; @@ -87,6 +88,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 3042ce37dae8..049bddf7778b 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -24,7 +24,7 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static void int_exit(int sig) { @@ -60,6 +60,7 @@ static void usage(const char *cmd) printf(" -T Default: 0 (forever)\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); + printf(" -F force loading prog\n"); printf(" -h Display this help\n"); } @@ -70,8 +71,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; unsigned char opt_flags[256] = {}; + const char *optstr = "i:T:SNFh"; unsigned int kill_after_s = 0; - const char *optstr = "i:T:SNh"; int i, prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; @@ -96,6 +97,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(argv[0]); return 1; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 8645ddc2da0e..0224afb55845 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -33,7 +33,7 @@ static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int cpu_map_fd; static int rx_cnt_map_fd; static int redirect_err_cnt_map_fd; @@ -62,6 +62,7 @@ static const struct option long_options[] = { {"cpu", required_argument, NULL, 'c' }, {"stress-mode", no_argument, NULL, 'x' }, {"no-separators", no_argument, NULL, 'z' }, + {"force", no_argument, NULL, 'F' }, {0, 0, NULL, 0 } }; @@ -651,7 +652,7 @@ int main(int argc, char **argv) mark_cpus_unavailable(); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzF", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -700,6 +701,9 @@ int main(int argc, char **argv) case 'q': qsize = atoi(optarg); break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'h': error: default: diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 60d46eea225b..470e1a7e8810 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -30,7 +30,7 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) @@ -70,7 +70,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -82,7 +83,7 @@ int main(int argc, char **argv) }; struct bpf_program *prog, *dummy_prog; int prog_fd, dummy_prog_fd; - const char *optstr = "SN"; + const char *optstr = "FSN"; struct bpf_object *obj; int ret, opt, key = 0; char filename[256]; @@ -96,6 +97,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 93404820df68..be6058cda97c 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -30,7 +30,7 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) @@ -70,7 +70,8 @@ static void usage(const char *prog) "usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n" "OPTS:\n" " -S use skb-mode\n" - " -N enforce native mode\n", + " -N enforce native mode\n" + " -F force loading prog\n", prog); } @@ -83,7 +84,7 @@ int main(int argc, char **argv) }; struct bpf_program *prog, *dummy_prog; int prog_fd, tx_port_map_fd, opt; - const char *optstr = "SN"; + const char *optstr = "FSN"; struct bpf_object *obj; char filename[256]; int dummy_prog_fd; @@ -97,6 +98,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index c63c6beec7d6..208d6a996478 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -26,8 +26,9 @@ #include "bpf_util.h" #include "bpf/libbpf.h" #include +#include -int sock, sock_arp, flags = 0; +int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int total_ifindex; int *ifindex_list; char buf[8192]; @@ -608,33 +609,56 @@ cleanup: return ret; } +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTS] interface name list\n\n" + "OPTS:\n" + " -S use skb-mode\n" + " -F force loading prog\n", + __func__, prog); +} + int main(int ac, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + const char *optstr = "SF"; struct bpf_object *obj; char filename[256]; char **ifname_list; - int prog_fd; + int prog_fd, opt; int i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; - if (ac < 2) { - printf("usage: %s [-S] Interface name list\n", argv[0]); - return 1; + total_ifindex = ac - 1; + ifname_list = (argv + 1); + + while ((opt = getopt(ac, argv, optstr)) != -1) { + switch (opt) { + case 'S': + flags |= XDP_FLAGS_SKB_MODE; + total_ifindex--; + ifname_list++; + break; + case 'F': + flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + total_ifindex--; + ifname_list++; + break; + default: + usage(basename(argv[0])); + return 1; + } } - if (!strcmp(argv[1], "-S")) { - flags = XDP_FLAGS_SKB_MODE; - total_ifindex = ac - 2; - ifname_list = (argv + 2); - } else { - flags = 0; - total_ifindex = ac - 1; - ifname_list = (argv + 1); + + if (optind == ac) { + usage(basename(argv[0])); + return 1; } if (setrlimit(RLIMIT_MEMLOCK, &r)) { diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index ef26f882f92f..e7a98c2a440f 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -30,7 +30,7 @@ static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; -static __u32 xdp_flags; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static struct bpf_map *stats_global_map; static struct bpf_map *rx_queue_index_map; @@ -52,6 +52,7 @@ static const struct option long_options[] = { {"action", required_argument, NULL, 'a' }, {"readmem", no_argument, NULL, 'r' }, {"swapmac", no_argument, NULL, 'm' }, + {"force", no_argument, NULL, 'F' }, {0, 0, NULL, 0 } }; @@ -487,7 +488,7 @@ int main(int argc, char **argv) } /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:", + while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:", long_options, &longindex)) != -1) { switch (opt) { case 'd': @@ -524,6 +525,9 @@ int main(int argc, char **argv) case 'm': cfg_options |= SWAP_MAC; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'h': error: default: diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 5f5828ee0761..62f34827c775 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "perf-sys.h" #include "trace_helpers.h" @@ -21,12 +23,13 @@ static int pmu_fds[MAX_CPUS], if_idx; static struct perf_event_mmap_page *headers[MAX_CPUS]; static char *if_name; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int do_attach(int idx, int fd, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, fd, 0); + err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); if (err < 0) printf("ERROR: failed to attach program to %s\n", name); @@ -98,21 +101,42 @@ static void sig_handler(int signo) exit(0); } +static void usage(const char *prog) +{ + fprintf(stderr, + "%s: %s [OPTS] \n\n" + "OPTS:\n" + " -F force loading prog\n", + __func__, prog); +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + const char *optstr = "F"; + int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; - int prog_fd, map_fd; char filename[256]; int ret, err, i; int numcpus; - if (argc < 2) { - printf("Usage: %s \n", argv[0]); + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + usage(basename(argv[0])); return 1; } @@ -143,16 +167,16 @@ int main(int argc, char **argv) } map_fd = bpf_map__fd(map); - if_idx = if_nametoindex(argv[1]); + if_idx = if_nametoindex(argv[optind]); if (!if_idx) - if_idx = strtoul(argv[1], NULL, 0); + if_idx = strtoul(argv[optind], NULL, 0); if (!if_idx) { fprintf(stderr, "Invalid ifname\n"); return 1; } - if_name = argv[1]; - err = do_attach(if_idx, prog_fd, argv[1]); + if_name = argv[optind]; + err = do_attach(if_idx, prog_fd, if_name); if (err) return err; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 5093d8220da5..e3de60930d27 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -25,7 +25,7 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; -static __u32 xdp_flags = 0; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) @@ -83,6 +83,7 @@ static void usage(const char *cmd) printf(" -P Default is TCP\n"); printf(" -S use skb-mode\n"); printf(" -N enforce native mode\n"); + printf(" -F Force loading the XDP prog\n"); printf(" -h Display this help\n"); } @@ -145,7 +146,7 @@ int main(int argc, char **argv) }; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int min_port = 0, max_port = 0, vip2tnl_map_fd; - const char *optstr = "i:a:p:s:d:m:T:P:SNh"; + const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; struct iptnl_info tnl = {}; @@ -217,6 +218,9 @@ int main(int argc, char **argv) case 'N': xdp_flags |= XDP_FLAGS_DRV_MODE; break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(argv[0]); return 1; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 57ecadc58403..188723784768 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -68,7 +68,7 @@ enum benchmark_type { }; static enum benchmark_type opt_bench = BENCH_RXDROP; -static u32 opt_xdp_flags; +static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static const char *opt_if = ""; static int opt_ifindex; static int opt_queue; @@ -682,7 +682,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "rtli:q:psSNn:cz", long_options, + c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options, &option_index); if (c == -1) break; @@ -725,6 +725,9 @@ static void parse_command_line(int argc, char **argv) case 'c': opt_xdp_bind_flags |= XDP_COPY; break; + case 'F': + opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; default: usage(basename(argv[0])); } -- cgit v1.2.3-59-g8ed1b From 3b7a8ec2dec3e408288dbc80b8aef25df20ba119 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 1 Feb 2019 22:42:30 +0100 Subject: samples/bpf: Check the prog id before exiting Check the program id within the signal handler on polling xdp samples that were previously converted to libbpf usage. Avoid the situation of unloading the program that was not attached by sample that is exiting. Handle also the case where bpf_get_link_xdp_id didn't exit with an error but the xdp program was not found on an interface. Reported-by: Michal Papaj Reported-by: Jakub Spizewski Signed-off-by: Maciej Fijalkowski Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- samples/bpf/xdp1_user.c | 24 ++++++++++++++++- samples/bpf/xdp_adjust_tail_user.c | 30 +++++++++++++++++++--- samples/bpf/xdp_redirect_cpu_user.c | 35 ++++++++++++++++++++----- samples/bpf/xdp_redirect_map_user.c | 49 ++++++++++++++++++++++++++++++++--- samples/bpf/xdp_redirect_user.c | 49 ++++++++++++++++++++++++++++++++--- samples/bpf/xdp_router_ipv4_user.c | 51 ++++++++++++++++++++++++------------- samples/bpf/xdp_rxq_info_user.c | 33 ++++++++++++++++++++---- samples/bpf/xdp_sample_pkts_user.c | 34 +++++++++++++++++++++---- samples/bpf/xdp_tx_iptunnel_user.c | 28 +++++++++++++++++--- samples/bpf/xdpsock_user.c | 23 ++++++++++++++++- 10 files changed, 308 insertions(+), 48 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 505bce207165..6a64e93365e1 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -23,10 +23,22 @@ static int ifindex; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); exit(0); } @@ -74,11 +86,14 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *optstr = "FSN"; int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; char filename[256]; + int err; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -139,6 +154,13 @@ int main(int argc, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + poll_stats(map_fd, 2); return 0; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index 049bddf7778b..07e1b9269e49 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -25,11 +25,24 @@ static int ifindex = -1; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static void int_exit(int sig) { - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given iface\n"); + else + printf("program on interface changed, not removing\n"); + } exit(0); } @@ -72,11 +85,14 @@ int main(int argc, char **argv) }; unsigned char opt_flags[256] = {}; const char *optstr = "i:T:SNFh"; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; int i, prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; char filename[256]; + int err; for (i = 0; i < strlen(optstr); i++) if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') @@ -146,9 +162,15 @@ int main(int argc, char **argv) return 1; } - poll_stats(map_fd, kill_after_s); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return 1; + } + prog_id = info.id; - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + poll_stats(map_fd, kill_after_s); + int_exit(0); return 0; } diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 0224afb55845..586b294d72d3 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -32,6 +32,7 @@ static const char *__doc__ = static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; +static __u32 prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int cpu_map_fd; @@ -68,11 +69,24 @@ static const struct option long_options[] = { static void int_exit(int sig) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAIL); + } + if (prog_id == curr_prog_id) { + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a given iface\n"); + } else { + printf("program on interface changed, not removing\n"); + } + } exit(EXIT_OK); } @@ -608,6 +622,8 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); bool use_separators = true; bool stress_mode = false; struct bpf_program *prog; @@ -617,9 +633,9 @@ int main(int argc, char **argv) int longindex = 0; int interval = 2; int add_cpu = -1; + int opt, err; int prog_fd; __u32 qsize; - int opt; /* Notice: choosing he queue size is very important with the * ixgbe driver, because it's driver page recycling trick is @@ -746,6 +762,13 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + stats_poll(interval, use_separators, prog_name, stress_mode); return EXIT_OK; } diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 470e1a7e8810..327226be5a06 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -29,15 +29,41 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; +static __u32 prog_id; +static __u32 dummy_prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - if (ifindex_out_xdp_dummy_attached) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface IN\n"); + else + printf("program on iface IN changed, not removing\n"); + + if (ifindex_out_xdp_dummy_attached) { + curr_prog_id = 0; + if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, + xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface OUT\n"); + else + printf("program on iface OUT changed, not removing\n"); + } exit(0); } @@ -82,6 +108,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; struct bpf_program *prog, *dummy_prog; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int prog_fd, dummy_prog_fd; const char *optstr = "FSN"; struct bpf_object *obj; @@ -153,6 +181,13 @@ int main(int argc, char **argv) return 1; } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + prog_id = info.id; + /* Loading dummy XDP prog on out-device */ if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { @@ -160,6 +195,14 @@ int main(int argc, char **argv) ifindex_out_xdp_dummy_attached = false; } + memset(&info, 0, sizeof(info)); + ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + dummy_prog_id = info.id; + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index be6058cda97c..a5d8ad3129ed 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -29,15 +29,41 @@ static int ifindex_in; static int ifindex_out; static bool ifindex_out_xdp_dummy_attached = true; +static __u32 prog_id; +static __u32 dummy_prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; static void int_exit(int sig) { - bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); - if (ifindex_out_xdp_dummy_attached) - bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface IN\n"); + else + printf("program on iface IN changed, not removing\n"); + + if (ifindex_out_xdp_dummy_attached) { + curr_prog_id = 0; + if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id, + xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on iface OUT\n"); + else + printf("program on iface OUT changed, not removing\n"); + } exit(0); } @@ -84,6 +110,8 @@ int main(int argc, char **argv) }; struct bpf_program *prog, *dummy_prog; int prog_fd, tx_port_map_fd, opt; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *optstr = "FSN"; struct bpf_object *obj; char filename[256]; @@ -154,6 +182,13 @@ int main(int argc, char **argv) return 1; } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + prog_id = info.id; + /* Loading dummy XDP prog on out-device */ if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd, (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) { @@ -161,6 +196,14 @@ int main(int argc, char **argv) ifindex_out_xdp_dummy_attached = false; } + memset(&info, 0, sizeof(info)); + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return ret; + } + dummy_prog_id = info.id; + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 208d6a996478..79fe7bc26ab4 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -30,7 +30,8 @@ int sock, sock_arp, flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int total_ifindex; -int *ifindex_list; +static int *ifindex_list; +static __u32 *prog_id_list; char buf[8192]; static int lpm_map_fd; static int rxcnt_map_fd; @@ -41,23 +42,34 @@ static int tx_port_map_fd; static int get_route_table(int rtm_family); static void int_exit(int sig) { + __u32 prog_id = 0; int i = 0; - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + for (i = 0; i < total_ifindex; i++) { + if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) { + printf("bpf_get_link_xdp_id on iface %d failed\n", + ifindex_list[i]); + exit(1); + } + if (prog_id_list[i] == prog_id) + bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); + else if (!prog_id) + printf("couldn't find a prog id on iface %d\n", + ifindex_list[i]); + else + printf("program on iface %d changed, not removing\n", + ifindex_list[i]); + prog_id = 0; + } exit(0); } static void close_and_exit(int sig) { - int i = 0; - close(sock); close(sock_arp); - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); - exit(0); + int_exit(0); } /* Get the mac address of the interface given interface name */ @@ -186,13 +198,8 @@ static void read_route(struct nlmsghdr *nh, int nll) route.iface_name = alloca(sizeof(char *) * IFNAMSIZ); route.iface_name = if_indextoname(route.iface, route.iface_name); route.mac = getmac(route.iface_name); - if (route.mac == -1) { - int i = 0; - - for (i = 0; i < total_ifindex; i++) - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); - exit(0); - } + if (route.mac == -1) + int_exit(0); assert(bpf_map_update_elem(tx_port_map_fd, &route.iface, &route.iface, 0) == 0); if (rtm_family == AF_INET) { @@ -625,12 +632,14 @@ int main(int ac, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *optstr = "SF"; struct bpf_object *obj; char filename[256]; char **ifname_list; int prog_fd, opt; - int i = 1; + int err, i = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; @@ -687,7 +696,7 @@ int main(int ac, char **argv) return 1; } - ifindex_list = (int *)malloc(total_ifindex * sizeof(int *)); + ifindex_list = (int *)calloc(total_ifindex, sizeof(int *)); for (i = 0; i < total_ifindex; i++) { ifindex_list[i] = if_nametoindex(ifname_list[i]); if (!ifindex_list[i]) { @@ -696,6 +705,7 @@ int main(int ac, char **argv) return 1; } } + prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); for (i = 0; i < total_ifindex; i++) { if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { printf("link set xdp fd failed\n"); @@ -706,6 +716,13 @@ int main(int ac, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id_list[i] = info.id; + memset(&info, 0, sizeof(info)); printf("Attached to %d\n", ifindex_list[i]); } signal(SIGINT, int_exit); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index e7a98c2a440f..1210f3b170f0 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -29,6 +29,7 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" static int ifindex = -1; static char ifname_buf[IF_NAMESIZE]; static char *ifname; +static __u32 prog_id; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -58,11 +59,24 @@ static const struct option long_options[] = { static void int_exit(int sig) { - fprintf(stderr, - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", - ifindex, ifname); - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAIL); + } + if (prog_id == curr_prog_id) { + fprintf(stderr, + "Interrupted: Removing XDP program on ifindex:%d device:%s\n", + ifindex, ifname); + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a given iface\n"); + } else { + printf("program on interface changed, not removing\n"); + } + } exit(EXIT_OK); } @@ -447,6 +461,8 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int prog_fd, map_fd, opt, err; bool use_separators = true; struct config cfg = { 0 }; @@ -580,6 +596,13 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + stats_poll(interval, action, cfg_options); return EXIT_OK; } diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 62f34827c775..dc66345a929a 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -24,25 +24,49 @@ static int pmu_fds[MAX_CPUS], if_idx; static struct perf_event_mmap_page *headers[MAX_CPUS]; static char *if_name; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static __u32 prog_id; static int do_attach(int idx, int fd, const char *name) { + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); int err; err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); - if (err < 0) + if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); + return err; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; return err; } static int do_detach(int idx, const char *name) { - int err; + __u32 curr_prog_id = 0; + int err = 0; - err = bpf_set_link_xdp_fd(idx, -1, 0); - if (err < 0) - printf("ERROR: failed to detach program from %s\n", name); + err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0); + if (err) { + printf("bpf_get_link_xdp_id failed\n"); + return err; + } + if (prog_id == curr_prog_id) { + err = bpf_set_link_xdp_fd(idx, -1, 0); + if (err < 0) + printf("ERROR: failed to detach prog from %s\n", name); + } else if (!curr_prog_id) { + printf("couldn't find a prog id on a %s\n", name); + } else { + printf("program on interface changed, not removing\n"); + } return err; } diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index e3de60930d27..4a1511eb7812 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -27,11 +27,24 @@ static int ifindex = -1; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static int rxcnt_map_fd; +static __u32 prog_id; static void int_exit(int sig) { - if (ifindex > -1) - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + __u32 curr_prog_id = 0; + + if (ifindex > -1) { + if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given iface\n"); + else + printf("program on interface changed, not removing\n"); + } exit(0); } @@ -148,13 +161,15 @@ int main(int argc, char **argv) int min_port = 0, max_port = 0, vip2tnl_map_fd; const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); unsigned int kill_after_s = 0; struct iptnl_info tnl = {}; struct bpf_object *obj; struct vip vip = {}; char filename[256]; int opt, prog_fd; - int i; + int i, err; tnl.family = AF_UNSPEC; vip.protocol = IPPROTO_TCP; @@ -276,6 +291,13 @@ int main(int argc, char **argv) return 1; } + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) { + printf("can't get prog info - %s\n", strerror(errno)); + return err; + } + prog_id = info.id; + poll_stats(kill_after_s); bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 188723784768..f73055e0191f 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -76,6 +76,7 @@ static int opt_poll; static int opt_shared_packet_buffer; static int opt_interval = 1; static u32 opt_xdp_bind_flags; +static __u32 prog_id; struct xdp_umem_uqueue { u32 cached_prod; @@ -631,9 +632,20 @@ static void *poller(void *arg) static void int_exit(int sig) { + __u32 curr_prog_id = 0; + (void)sig; dump_stats(); - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAILURE); + } + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); exit(EXIT_SUCCESS); } @@ -907,6 +919,8 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; int prog_fd, qidconf_map, xsks_map; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); struct bpf_object *obj; char xdp_filename[256]; struct bpf_map *map; @@ -953,6 +967,13 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (ret) { + printf("can't get prog info - %s\n", strerror(errno)); + return 1; + } + prog_id = info.id; + ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); -- cgit v1.2.3-59-g8ed1b From 915654fd718c2366871b19f8c6687e61909db911 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 21 Feb 2019 17:05:39 +0100 Subject: samples/bpf: Fix dummy program unloading for xdp_redirect samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xdp_redirect and xdp_redirect_map sample programs both load a dummy program onto the egress interfaces. However, the unload code checks these programs against the wrong id number, and thus refuses to unload them. Fix the comparison to avoid this. Fixes: 3b7a8ec2dec3 ("samples/bpf: Check the prog id before exiting") Signed-off-by: Toke Høiland-Jørgensen Acked-by: Maciej Fijalkowski Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_redirect_map_user.c | 2 +- samples/bpf/xdp_redirect_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 327226be5a06..1dbe7fd3a1a8 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -57,7 +57,7 @@ static void int_exit(int sig) printf("bpf_get_link_xdp_id failed\n"); exit(1); } - if (prog_id == curr_prog_id) + if (dummy_prog_id == curr_prog_id) bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); else if (!curr_prog_id) printf("couldn't find a prog id on iface OUT\n"); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index a5d8ad3129ed..e9054c0269ff 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -57,7 +57,7 @@ static void int_exit(int sig) printf("bpf_get_link_xdp_id failed\n"); exit(1); } - if (prog_id == curr_prog_id) + if (dummy_prog_id == curr_prog_id) bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags); else if (!curr_prog_id) printf("couldn't find a prog id on iface OUT\n"); -- cgit v1.2.3-59-g8ed1b From 248c7f9c0e215fcfd847bd3a41cf0160a2359e1a Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 21 Feb 2019 10:21:27 +0100 Subject: samples/bpf: convert xdpsock to use libbpf for AF_XDP access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit converts the xdpsock sample application to use the AF_XDP functions present in libbpf. This cuts down the size of it by nearly 300 lines of code. The default ring sizes plus the batch size has been increased and the size of the umem area has decreased. This so that the sample application will provide higher throughput. Note also that the shared umem code has been removed from the sample as this is not supported by libbpf at this point in time. Tested-by: Björn Töpel Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 1 - samples/bpf/xdpsock.h | 11 - samples/bpf/xdpsock_kern.c | 56 --- samples/bpf/xdpsock_user.c | 841 ++++++++++++++------------------------------- 4 files changed, 261 insertions(+), 648 deletions(-) delete mode 100644 samples/bpf/xdpsock.h delete mode 100644 samples/bpf/xdpsock_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a0ef7eddd0b3..a333e258f319 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -163,7 +163,6 @@ always += xdp2skb_meta_kern.o always += syscall_tp_kern.o always += cpustat_kern.o always += xdp_adjust_tail_kern.o -always += xdpsock_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h deleted file mode 100644 index 533ab81adfa1..000000000000 --- a/samples/bpf/xdpsock.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef XDPSOCK_H_ -#define XDPSOCK_H_ - -/* Power-of-2 number of sockets */ -#define MAX_SOCKS 4 - -/* Round-robin receive */ -#define RR_LB 0 - -#endif /* XDPSOCK_H_ */ diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c deleted file mode 100644 index b8ccd0802b3f..000000000000 --- a/samples/bpf/xdpsock_kern.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define KBUILD_MODNAME "foo" -#include -#include "bpf_helpers.h" - -#include "xdpsock.h" - -struct bpf_map_def SEC("maps") qidconf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1, -}; - -struct bpf_map_def SEC("maps") xsks_map = { - .type = BPF_MAP_TYPE_XSKMAP, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = MAX_SOCKS, -}; - -struct bpf_map_def SEC("maps") rr_map = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(unsigned int), - .max_entries = 1, -}; - -SEC("xdp_sock") -int xdp_sock_prog(struct xdp_md *ctx) -{ - int *qidconf, key = 0, idx; - unsigned int *rr; - - qidconf = bpf_map_lookup_elem(&qidconf_map, &key); - if (!qidconf) - return XDP_ABORTED; - - if (*qidconf != ctx->rx_queue_index) - return XDP_PASS; - -#if RR_LB /* NB! RR_LB is configured in xdpsock.h */ - rr = bpf_map_lookup_elem(&rr_map, &key); - if (!rr) - return XDP_ABORTED; - - *rr = (*rr + 1) & (MAX_SOCKS - 1); - idx = *rr; -#else - idx = 0; -#endif - - return bpf_redirect_map(&xsks_map, idx, 0); -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index f73055e0191f..9c76d6d43deb 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,37 +1,36 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2018 Intel Corporation. */ -#include +#include #include #include #include #include +#include #include #include #include +#include +#include #include +#include +#include #include #include #include #include #include -#include +#include #include #include -#include +#include #include #include -#include -#include -#include -#include #include "bpf/libbpf.h" -#include "bpf_util.h" +#include "bpf/xsk.h" #include -#include "xdpsock.h" - #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -44,17 +43,11 @@ #define PF_XDP AF_XDP #endif -#define NUM_FRAMES 131072 -#define FRAME_HEADROOM 0 -#define FRAME_SHIFT 11 -#define FRAME_SIZE 2048 -#define NUM_DESCS 1024 -#define BATCH_SIZE 16 - -#define FQ_NUM_DESCS 1024 -#define CQ_NUM_DESCS 1024 +#define NUM_FRAMES (4 * 1024) +#define BATCH_SIZE 64 #define DEBUG_HEXDUMP 0 +#define MAX_SOCKS 8 typedef __u64 u64; typedef __u32 u32; @@ -73,54 +66,31 @@ static const char *opt_if = ""; static int opt_ifindex; static int opt_queue; static int opt_poll; -static int opt_shared_packet_buffer; static int opt_interval = 1; static u32 opt_xdp_bind_flags; static __u32 prog_id; -struct xdp_umem_uqueue { - u32 cached_prod; - u32 cached_cons; - u32 mask; - u32 size; - u32 *producer; - u32 *consumer; - u64 *ring; - void *map; +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + void *buffer; }; -struct xdp_umem { - char *frames; - struct xdp_umem_uqueue fq; - struct xdp_umem_uqueue cq; - int fd; -}; - -struct xdp_uqueue { - u32 cached_prod; - u32 cached_cons; - u32 mask; - u32 size; - u32 *producer; - u32 *consumer; - struct xdp_desc *ring; - void *map; -}; - -struct xdpsock { - struct xdp_uqueue rx; - struct xdp_uqueue tx; - int sfd; - struct xdp_umem *umem; - u32 outstanding_tx; +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; unsigned long rx_npkts; unsigned long tx_npkts; unsigned long prev_rx_npkts; unsigned long prev_tx_npkts; + u32 outstanding_tx; }; static int num_socks; -struct xdpsock *xsks[MAX_SOCKS]; +struct xsk_socket_info *xsks[MAX_SOCKS]; static unsigned long get_nsecs(void) { @@ -130,225 +100,124 @@ static unsigned long get_nsecs(void) return ts.tv_sec * 1000000000UL + ts.tv_nsec; } -static void dump_stats(void); - -#define lassert(expr) \ - do { \ - if (!(expr)) { \ - fprintf(stderr, "%s:%s:%i: Assertion failed: " \ - #expr ": errno: %d/\"%s\"\n", \ - __FILE__, __func__, __LINE__, \ - errno, strerror(errno)); \ - dump_stats(); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -#define barrier() __asm__ __volatile__("": : :"memory") -#ifdef __aarch64__ -#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory") -#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory") -#else -#define u_smp_rmb() barrier() -#define u_smp_wmb() barrier() -#endif -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -static const char pkt_data[] = - "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" - "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" - "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" - "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; - -static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) -{ - u32 free_entries = q->cached_cons - q->cached_prod; - - if (free_entries >= nb) - return free_entries; - - /* Refresh the local tail pointer */ - q->cached_cons = *q->consumer + q->size; - - return q->cached_cons - q->cached_prod; -} - -static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) +static void print_benchmark(bool running) { - u32 free_entries = q->cached_cons - q->cached_prod; + const char *bench_str = "INVALID"; - if (free_entries >= ndescs) - return free_entries; + if (opt_bench == BENCH_RXDROP) + bench_str = "rxdrop"; + else if (opt_bench == BENCH_TXONLY) + bench_str = "txonly"; + else if (opt_bench == BENCH_L2FWD) + bench_str = "l2fwd"; - /* Refresh the local tail pointer */ - q->cached_cons = *q->consumer + q->size; - return q->cached_cons - q->cached_prod; -} + printf("%s:%d %s ", opt_if, opt_queue, bench_str); + if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) + printf("xdp-skb "); + else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) + printf("xdp-drv "); + else + printf(" "); -static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) -{ - u32 entries = q->cached_prod - q->cached_cons; + if (opt_poll) + printf("poll() "); - if (entries == 0) { - q->cached_prod = *q->producer; - entries = q->cached_prod - q->cached_cons; + if (running) { + printf("running..."); + fflush(stdout); } - - return (entries > nb) ? nb : entries; } -static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) +static void dump_stats(void) { - u32 entries = q->cached_prod - q->cached_cons; + unsigned long now = get_nsecs(); + long dt = now - prev_time; + int i; - if (entries == 0) { - q->cached_prod = *q->producer; - entries = q->cached_prod - q->cached_cons; - } + prev_time = now; - return (entries > ndescs) ? ndescs : entries; -} + for (i = 0; i < num_socks && xsks[i]; i++) { + char *fmt = "%-15s %'-11.0f %'-11lu\n"; + double rx_pps, tx_pps; -static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, - struct xdp_desc *d, - size_t nb) -{ - u32 i; + rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * + 1000000000. / dt; + tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * + 1000000000. / dt; - if (umem_nb_free(fq, nb) < nb) - return -ENOSPC; + printf("\n sock%d@", i); + print_benchmark(false); + printf("\n"); - for (i = 0; i < nb; i++) { - u32 idx = fq->cached_prod++ & fq->mask; + printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", + dt / 1000000000.); + printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); + printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); - fq->ring[idx] = d[i].addr; + xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; + xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; } - - u_smp_wmb(); - - *fq->producer = fq->cached_prod; - - return 0; } -static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d, - size_t nb) +static void *poller(void *arg) { - u32 i; - - if (umem_nb_free(fq, nb) < nb) - return -ENOSPC; - - for (i = 0; i < nb; i++) { - u32 idx = fq->cached_prod++ & fq->mask; - - fq->ring[idx] = d[i]; + (void)arg; + for (;;) { + sleep(opt_interval); + dump_stats(); } - u_smp_wmb(); - - *fq->producer = fq->cached_prod; - - return 0; + return NULL; } -static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, - u64 *d, size_t nb) +static void remove_xdp_program(void) { - u32 idx, i, entries = umem_nb_avail(cq, nb); - - u_smp_rmb(); - - for (i = 0; i < entries; i++) { - idx = cq->cached_cons++ & cq->mask; - d[i] = cq->ring[idx]; - } - - if (entries > 0) { - u_smp_wmb(); + __u32 curr_prog_id = 0; - *cq->consumer = cq->cached_cons; + if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(EXIT_FAILURE); } - - return entries; -} - -static inline void *xq_get_data(struct xdpsock *xsk, u64 addr) -{ - return &xsk->umem->frames[addr]; + if (prog_id == curr_prog_id) + bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + else if (!curr_prog_id) + printf("couldn't find a prog id on a given interface\n"); + else + printf("program on interface changed, not removing\n"); } -static inline int xq_enq(struct xdp_uqueue *uq, - const struct xdp_desc *descs, - unsigned int ndescs) +static void int_exit(int sig) { - struct xdp_desc *r = uq->ring; - unsigned int i; + struct xsk_umem *umem = xsks[0]->umem->umem; - if (xq_nb_free(uq, ndescs) < ndescs) - return -ENOSPC; - - for (i = 0; i < ndescs; i++) { - u32 idx = uq->cached_prod++ & uq->mask; - - r[idx].addr = descs[i].addr; - r[idx].len = descs[i].len; - } + (void)sig; - u_smp_wmb(); + dump_stats(); + xsk_socket__delete(xsks[0]->xsk); + (void)xsk_umem__delete(umem); + remove_xdp_program(); - *uq->producer = uq->cached_prod; - return 0; + exit(EXIT_SUCCESS); } -static inline int xq_enq_tx_only(struct xdp_uqueue *uq, - unsigned int id, unsigned int ndescs) +static void __exit_with_error(int error, const char *file, const char *func, + int line) { - struct xdp_desc *r = uq->ring; - unsigned int i; - - if (xq_nb_free(uq, ndescs) < ndescs) - return -ENOSPC; - - for (i = 0; i < ndescs; i++) { - u32 idx = uq->cached_prod++ & uq->mask; - - r[idx].addr = (id + i) << FRAME_SHIFT; - r[idx].len = sizeof(pkt_data) - 1; - } - - u_smp_wmb(); - - *uq->producer = uq->cached_prod; - return 0; + fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, + line, error, strerror(error)); + dump_stats(); + remove_xdp_program(); + exit(EXIT_FAILURE); } -static inline int xq_deq(struct xdp_uqueue *uq, - struct xdp_desc *descs, - int ndescs) -{ - struct xdp_desc *r = uq->ring; - unsigned int idx; - int i, entries; - - entries = xq_nb_avail(uq, ndescs); - - u_smp_rmb(); - - for (i = 0; i < entries; i++) { - idx = uq->cached_cons++ & uq->mask; - descs[i] = r[idx]; - } - - if (entries > 0) { - u_smp_wmb(); +#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ + __LINE__) - *uq->consumer = uq->cached_cons; - } - - return entries; -} +static const char pkt_data[] = + "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" + "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" + "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" + "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; static void swap_mac_addresses(void *data) { @@ -397,258 +266,74 @@ static void hex_dump(void *pkt, size_t length, u64 addr) printf("\n"); } -static size_t gen_eth_frame(char *frame) +static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr) { - memcpy(frame, pkt_data, sizeof(pkt_data) - 1); + memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, + sizeof(pkt_data) - 1); return sizeof(pkt_data) - 1; } -static struct xdp_umem *xdp_umem_configure(int sfd) +static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { - int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; - struct xdp_mmap_offsets off; - struct xdp_umem_reg mr; - struct xdp_umem *umem; - socklen_t optlen; - void *bufs; + struct xsk_umem_info *umem; + int ret; umem = calloc(1, sizeof(*umem)); - lassert(umem); - - lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ - NUM_FRAMES * FRAME_SIZE) == 0); - - mr.addr = (__u64)bufs; - mr.len = NUM_FRAMES * FRAME_SIZE; - mr.chunk_size = FRAME_SIZE; - mr.headroom = FRAME_HEADROOM; - - lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0); - lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size, - sizeof(int)) == 0); - lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, - sizeof(int)) == 0); - - optlen = sizeof(off); - lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, - &optlen) == 0); - - umem->fq.map = mmap(0, off.fr.desc + - FQ_NUM_DESCS * sizeof(u64), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_UMEM_PGOFF_FILL_RING); - lassert(umem->fq.map != MAP_FAILED); - - umem->fq.mask = FQ_NUM_DESCS - 1; - umem->fq.size = FQ_NUM_DESCS; - umem->fq.producer = umem->fq.map + off.fr.producer; - umem->fq.consumer = umem->fq.map + off.fr.consumer; - umem->fq.ring = umem->fq.map + off.fr.desc; - umem->fq.cached_cons = FQ_NUM_DESCS; - - umem->cq.map = mmap(0, off.cr.desc + - CQ_NUM_DESCS * sizeof(u64), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_UMEM_PGOFF_COMPLETION_RING); - lassert(umem->cq.map != MAP_FAILED); - - umem->cq.mask = CQ_NUM_DESCS - 1; - umem->cq.size = CQ_NUM_DESCS; - umem->cq.producer = umem->cq.map + off.cr.producer; - umem->cq.consumer = umem->cq.map + off.cr.consumer; - umem->cq.ring = umem->cq.map + off.cr.desc; - - umem->frames = bufs; - umem->fd = sfd; + if (!umem) + exit_with_error(errno); - if (opt_bench == BENCH_TXONLY) { - int i; - - for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE) - (void)gen_eth_frame(&umem->frames[i]); - } + ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, + NULL); + if (ret) + exit_with_error(-ret); + umem->buffer = buffer; return umem; } -static struct xdpsock *xsk_configure(struct xdp_umem *umem) +static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem) { - struct sockaddr_xdp sxdp = {}; - struct xdp_mmap_offsets off; - int sfd, ndescs = NUM_DESCS; - struct xdpsock *xsk; - bool shared = true; - socklen_t optlen; - u64 i; - - sfd = socket(PF_XDP, SOCK_RAW, 0); - lassert(sfd >= 0); + struct xsk_socket_config cfg; + struct xsk_socket_info *xsk; + int ret; + u32 idx; + int i; xsk = calloc(1, sizeof(*xsk)); - lassert(xsk); - - xsk->sfd = sfd; - xsk->outstanding_tx = 0; - - if (!umem) { - shared = false; - xsk->umem = xdp_umem_configure(sfd); - } else { - xsk->umem = umem; - } - - lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING, - &ndescs, sizeof(int)) == 0); - lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, - &ndescs, sizeof(int)) == 0); - optlen = sizeof(off); - lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, - &optlen) == 0); - - /* Rx */ - xsk->rx.map = mmap(NULL, - off.rx.desc + - NUM_DESCS * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_PGOFF_RX_RING); - lassert(xsk->rx.map != MAP_FAILED); - - if (!shared) { - for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE) - lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1) - == 0); - } - - /* Tx */ - xsk->tx.map = mmap(NULL, - off.tx.desc + - NUM_DESCS * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_PGOFF_TX_RING); - lassert(xsk->tx.map != MAP_FAILED); - - xsk->rx.mask = NUM_DESCS - 1; - xsk->rx.size = NUM_DESCS; - xsk->rx.producer = xsk->rx.map + off.rx.producer; - xsk->rx.consumer = xsk->rx.map + off.rx.consumer; - xsk->rx.ring = xsk->rx.map + off.rx.desc; - - xsk->tx.mask = NUM_DESCS - 1; - xsk->tx.size = NUM_DESCS; - xsk->tx.producer = xsk->tx.map + off.tx.producer; - xsk->tx.consumer = xsk->tx.map + off.tx.consumer; - xsk->tx.ring = xsk->tx.map + off.tx.desc; - xsk->tx.cached_cons = NUM_DESCS; - - sxdp.sxdp_family = PF_XDP; - sxdp.sxdp_ifindex = opt_ifindex; - sxdp.sxdp_queue_id = opt_queue; - - if (shared) { - sxdp.sxdp_flags = XDP_SHARED_UMEM; - sxdp.sxdp_shared_umem_fd = umem->fd; - } else { - sxdp.sxdp_flags = opt_xdp_bind_flags; - } - - lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0); + if (!xsk) + exit_with_error(errno); + + xsk->umem = umem; + cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg.libbpf_flags = 0; + cfg.xdp_flags = opt_xdp_flags; + cfg.bind_flags = opt_xdp_bind_flags; + ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, + &xsk->rx, &xsk->tx, &cfg); + if (ret) + exit_with_error(-ret); + + ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); + if (ret) + exit_with_error(-ret); + + ret = xsk_ring_prod__reserve(&xsk->umem->fq, + XSK_RING_PROD__DEFAULT_NUM_DESCS, + &idx); + if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) + exit_with_error(-ret); + for (i = 0; + i < XSK_RING_PROD__DEFAULT_NUM_DESCS * + XSK_UMEM__DEFAULT_FRAME_SIZE; + i += XSK_UMEM__DEFAULT_FRAME_SIZE) + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i; + xsk_ring_prod__submit(&xsk->umem->fq, + XSK_RING_PROD__DEFAULT_NUM_DESCS); return xsk; } -static void print_benchmark(bool running) -{ - const char *bench_str = "INVALID"; - - if (opt_bench == BENCH_RXDROP) - bench_str = "rxdrop"; - else if (opt_bench == BENCH_TXONLY) - bench_str = "txonly"; - else if (opt_bench == BENCH_L2FWD) - bench_str = "l2fwd"; - - printf("%s:%d %s ", opt_if, opt_queue, bench_str); - if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) - printf("xdp-skb "); - else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) - printf("xdp-drv "); - else - printf(" "); - - if (opt_poll) - printf("poll() "); - - if (running) { - printf("running..."); - fflush(stdout); - } -} - -static void dump_stats(void) -{ - unsigned long now = get_nsecs(); - long dt = now - prev_time; - int i; - - prev_time = now; - - for (i = 0; i < num_socks && xsks[i]; i++) { - char *fmt = "%-15s %'-11.0f %'-11lu\n"; - double rx_pps, tx_pps; - - rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * - 1000000000. / dt; - tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * - 1000000000. / dt; - - printf("\n sock%d@", i); - print_benchmark(false); - printf("\n"); - - printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", - dt / 1000000000.); - printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); - printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); - - xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; - xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; - } -} - -static void *poller(void *arg) -{ - (void)arg; - for (;;) { - sleep(opt_interval); - dump_stats(); - } - - return NULL; -} - -static void int_exit(int sig) -{ - __u32 curr_prog_id = 0; - - (void)sig; - dump_stats(); - if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { - printf("bpf_get_link_xdp_id failed\n"); - exit(EXIT_FAILURE); - } - if (prog_id == curr_prog_id) - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); - else if (!curr_prog_id) - printf("couldn't find a prog id on a given interface\n"); - else - printf("program on interface changed, not removing\n"); - exit(EXIT_SUCCESS); -} - static struct option long_options[] = { {"rxdrop", no_argument, 0, 'r'}, {"txonly", no_argument, 0, 't'}, @@ -656,7 +341,6 @@ static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"queue", required_argument, 0, 'q'}, {"poll", no_argument, 0, 'p'}, - {"shared-buffer", no_argument, 0, 's'}, {"xdp-skb", no_argument, 0, 'S'}, {"xdp-native", no_argument, 0, 'N'}, {"interval", required_argument, 0, 'n'}, @@ -676,7 +360,6 @@ static void usage(const char *prog) " -i, --interface=n Run on interface n\n" " -q, --queue=n Use queue n (default 0)\n" " -p, --poll Use poll syscall\n" - " -s, --shared-buffer Use shared packet buffer\n" " -S, --xdp-skb=n Use XDP skb-mod\n" " -N, --xdp-native=n Enfore XDP native mode\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n" @@ -715,9 +398,6 @@ static void parse_command_line(int argc, char **argv) case 'q': opt_queue = atoi(optarg); break; - case 's': - opt_shared_packet_buffer = 1; - break; case 'p': opt_poll = 1; break; @@ -751,75 +431,104 @@ static void parse_command_line(int argc, char **argv) opt_if); usage(basename(argv[0])); } + } -static void kick_tx(int fd) +static void kick_tx(struct xsk_socket_info *xsk) { int ret; - ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY) return; - lassert(0); + exit_with_error(errno); } -static inline void complete_tx_l2fwd(struct xdpsock *xsk) +static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) { - u64 descs[BATCH_SIZE]; + u32 idx_cq, idx_fq; unsigned int rcvd; size_t ndescs; if (!xsk->outstanding_tx) return; - kick_tx(xsk->sfd); + kick_tx(xsk); ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : - xsk->outstanding_tx; + xsk->outstanding_tx; /* re-add completed Tx buffers */ - rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs); + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, ndescs, &idx_cq); if (rcvd > 0) { - umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd); + unsigned int i; + int ret; + + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(-ret); + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, + &idx_fq); + } + for (i = 0; i < rcvd; i++) + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = + *xsk_ring_cons__comp_addr(&xsk->umem->cq, + idx_cq++); + + xsk_ring_prod__submit(&xsk->umem->fq, rcvd); + xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; xsk->tx_npkts += rcvd; } } -static inline void complete_tx_only(struct xdpsock *xsk) +static inline void complete_tx_only(struct xsk_socket_info *xsk) { - u64 descs[BATCH_SIZE]; unsigned int rcvd; + u32 idx; if (!xsk->outstanding_tx) return; - kick_tx(xsk->sfd); + kick_tx(xsk); - rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE); + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx); if (rcvd > 0) { + xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; xsk->tx_npkts += rcvd; } } -static void rx_drop(struct xdpsock *xsk) +static void rx_drop(struct xsk_socket_info *xsk) { - struct xdp_desc descs[BATCH_SIZE]; unsigned int rcvd, i; + u32 idx_rx, idx_fq = 0; + int ret; - rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) return; + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(-ret); + ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + } + for (i = 0; i < rcvd; i++) { - char *pkt = xq_get_data(xsk, descs[i].addr); + u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; + u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; + char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); - hex_dump(pkt, descs[i].len, descs[i].addr); + hex_dump(pkt, len, addr); + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = addr; } + xsk_ring_prod__submit(&xsk->umem->fq, rcvd); + xsk_ring_cons__release(&xsk->rx, rcvd); xsk->rx_npkts += rcvd; - - umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd); } static void rx_drop_all(void) @@ -830,7 +539,7 @@ static void rx_drop_all(void) memset(fds, 0, sizeof(fds)); for (i = 0; i < num_socks; i++) { - fds[i].fd = xsks[i]->sfd; + fds[i].fd = xsk_socket__fd(xsks[i]->xsk); fds[i].events = POLLIN; timeout = 1000; /* 1sn */ } @@ -847,14 +556,14 @@ static void rx_drop_all(void) } } -static void tx_only(struct xdpsock *xsk) +static void tx_only(struct xsk_socket_info *xsk) { int timeout, ret, nfds = 1; struct pollfd fds[nfds + 1]; - unsigned int idx = 0; + u32 idx, frame_nb = 0; memset(fds, 0, sizeof(fds)); - fds[0].fd = xsk->sfd; + fds[0].fd = xsk_socket__fd(xsk->xsk); fds[0].events = POLLOUT; timeout = 1000; /* 1sn */ @@ -864,50 +573,73 @@ static void tx_only(struct xdpsock *xsk) if (ret <= 0) continue; - if (fds[0].fd != xsk->sfd || - !(fds[0].revents & POLLOUT)) + if (!(fds[0].revents & POLLOUT)) continue; } - if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) { - lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0); + if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == + BATCH_SIZE) { + unsigned int i; + for (i = 0; i < BATCH_SIZE; i++) { + xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr + = (frame_nb + i) << + XSK_UMEM__DEFAULT_FRAME_SHIFT; + xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = + sizeof(pkt_data) - 1; + } + + xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); xsk->outstanding_tx += BATCH_SIZE; - idx += BATCH_SIZE; - idx %= NUM_FRAMES; + frame_nb += BATCH_SIZE; + frame_nb %= NUM_FRAMES; } complete_tx_only(xsk); } } -static void l2fwd(struct xdpsock *xsk) +static void l2fwd(struct xsk_socket_info *xsk) { for (;;) { - struct xdp_desc descs[BATCH_SIZE]; unsigned int rcvd, i; + u32 idx_rx, idx_tx = 0; int ret; for (;;) { complete_tx_l2fwd(xsk); - rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, + &idx_rx); if (rcvd > 0) break; } + ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(-ret); + ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); + } + for (i = 0; i < rcvd; i++) { - char *pkt = xq_get_data(xsk, descs[i].addr); + u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, + idx_rx)->addr; + u32 len = xsk_ring_cons__rx_desc(&xsk->rx, + idx_rx++)->len; + char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); swap_mac_addresses(pkt); - hex_dump(pkt, descs[i].len, descs[i].addr); + hex_dump(pkt, len, addr); + xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr; + xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; } - xsk->rx_npkts += rcvd; + xsk_ring_prod__submit(&xsk->tx, rcvd); + xsk_ring_cons__release(&xsk->rx, rcvd); - ret = xq_enq(&xsk->tx, descs, rcvd); - lassert(ret == 0); + xsk->rx_npkts += rcvd; xsk->outstanding_tx += rcvd; } } @@ -915,17 +647,10 @@ static void l2fwd(struct xdpsock *xsk) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_XDP, - }; - int prog_fd, qidconf_map, xsks_map; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - struct bpf_object *obj; - char xdp_filename[256]; - struct bpf_map *map; - int i, ret, key = 0; + struct xsk_umem_info *umem; pthread_t pt; + void *bufs; + int ret; parse_command_line(argc, argv); @@ -935,67 +660,22 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); - prog_load_attr.file = xdp_filename; - - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) - exit(EXIT_FAILURE); - if (prog_fd < 0) { - fprintf(stderr, "ERROR: no program found: %s\n", - strerror(prog_fd)); - exit(EXIT_FAILURE); - } - - map = bpf_object__find_map_by_name(obj, "qidconf_map"); - qidconf_map = bpf_map__fd(map); - if (qidconf_map < 0) { - fprintf(stderr, "ERROR: no qidconf map found: %s\n", - strerror(qidconf_map)); - exit(EXIT_FAILURE); - } - - map = bpf_object__find_map_by_name(obj, "xsks_map"); - xsks_map = bpf_map__fd(map); - if (xsks_map < 0) { - fprintf(stderr, "ERROR: no xsks map found: %s\n", - strerror(xsks_map)); - exit(EXIT_FAILURE); - } - - if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { - fprintf(stderr, "ERROR: link set xdp fd failed\n"); - exit(EXIT_FAILURE); - } - - ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (ret) { - printf("can't get prog info - %s\n", strerror(errno)); - return 1; - } - prog_id = info.id; + ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ + NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + if (ret) + exit_with_error(ret); - ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0); - if (ret) { - fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); - exit(EXIT_FAILURE); - } + /* Create sockets... */ + umem = xsk_configure_umem(bufs, + NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + xsks[num_socks++] = xsk_configure_socket(umem); - /* Create sockets... */ - xsks[num_socks++] = xsk_configure(NULL); - -#if RR_LB - for (i = 0; i < MAX_SOCKS - 1; i++) - xsks[num_socks++] = xsk_configure(xsks[0]->umem); -#endif + if (opt_bench == BENCH_TXONLY) { + int i; - /* ...and insert them into the map. */ - for (i = 0; i < num_socks; i++) { - key = i; - ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0); - if (ret) { - fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); - exit(EXIT_FAILURE); - } + for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE; + i += XSK_UMEM__DEFAULT_FRAME_SIZE) + (void)gen_eth_frame(umem, i); } signal(SIGINT, int_exit); @@ -1005,7 +685,8 @@ int main(int argc, char **argv) setlocale(LC_ALL, ""); ret = pthread_create(&pt, NULL, poller, NULL); - lassert(ret == 0); + if (ret) + exit_with_error(ret); prev_time = get_nsecs(); -- cgit v1.2.3-59-g8ed1b From d2e614cb0795d935aee879e47aab231247274f13 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Wed, 27 Feb 2019 02:52:26 -0500 Subject: samples: bpf: fix: broken sample regarding removed function Currently, running sample "task_fd_query" and "tracex3" occurs the following error. On kernel v5.0-rc* this sample will be unavailable due to the removal of function 'blk_start_request' at commit "a1ce35f". (function removed, as "Single Queue IO scheduler" no longer exists) $ sudo ./task_fd_query failed to create kprobe 'blk_start_request' error 'No such file or directory' This commit will change the function 'blk_start_request' to 'blk_mq_start_request' to fix the broken sample. Signed-off-by: Daniel T. Lee Signed-off-by: Daniel Borkmann --- samples/bpf/task_fd_query_kern.c | 2 +- samples/bpf/task_fd_query_user.c | 2 +- samples/bpf/tracex3_kern.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'samples') diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c index f4b0a9ea674d..fb56fc2a3e5d 100644 --- a/samples/bpf/task_fd_query_kern.c +++ b/samples/bpf/task_fd_query_kern.c @@ -4,7 +4,7 @@ #include #include "bpf_helpers.h" -SEC("kprobe/blk_start_request") +SEC("kprobe/blk_mq_start_request") int bpf_prog1(struct pt_regs *ctx) { return 0; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index 8381d792f138..aff2b4ae914e 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -311,7 +311,7 @@ int main(int argc, char **argv) } /* test two functions in the corresponding *_kern.c file */ - CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request", + CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_mq_start_request", BPF_FD_TYPE_KPROBE)); CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion", BPF_FD_TYPE_KRETPROBE)); diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c index 9974c3d7c18b..ea1d4c19c132 100644 --- a/samples/bpf/tracex3_kern.c +++ b/samples/bpf/tracex3_kern.c @@ -20,7 +20,7 @@ struct bpf_map_def SEC("maps") my_map = { /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe * example will no longer be meaningful */ -SEC("kprobe/blk_start_request") +SEC("kprobe/blk_mq_start_request") int bpf_prog1(struct pt_regs *ctx) { long rq = PT_REGS_PARM1(ctx); -- cgit v1.2.3-59-g8ed1b From 5c3cf87d477a461274452cb46f7654c5b6ae6294 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Feb 2019 19:04:10 -0800 Subject: samples: bpf: force IPv4 in ping ping localhost may default of IPv6 on modern systems, but samples are trying to only parse IPv4. Force IPv4. samples/bpf/tracex1_user.c doesn't interpret the packet so we don't care which IP version will be used there. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- samples/bpf/sock_example.c | 2 +- samples/bpf/sockex1_user.c | 2 +- samples/bpf/sockex2_user.c | 2 +- samples/bpf/sockex3_user.c | 2 +- samples/bpf/tracex2_user.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'samples') diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 60ec467c78ab..00aae1d33fca 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -99,7 +99,7 @@ int main(void) { FILE *f; - f = popen("ping -c5 localhost", "r"); + f = popen("ping -4 -c5 localhost", "r"); (void)f; return test_sock(); diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 93ec01c56104..be8ba5686924 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -26,7 +26,7 @@ int main(int ac, char **argv) assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, sizeof(prog_fd[0])) == 0); - f = popen("ping -c5 localhost", "r"); + f = popen("ping -4 -c5 localhost", "r"); (void) f; for (i = 0; i < 5; i++) { diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 1d5c6e9a6d27..125ee6efc913 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -34,7 +34,7 @@ int main(int ac, char **argv) assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, sizeof(prog_fd[0])) == 0); - f = popen("ping -c5 localhost", "r"); + f = popen("ping -4 -c5 localhost", "r"); (void) f; for (i = 0; i < 5; i++) { diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 9d02e0404719..bbb1cd0666a9 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -58,7 +58,7 @@ int main(int argc, char **argv) sizeof(__u32)) == 0); if (argc > 1) - f = popen("ping -c5 localhost", "r"); + f = popen("ping -4 -c5 localhost", "r"); else f = popen("netperf -l 4 localhost", "r"); (void) f; diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 1a81e6a5c2ea..c9544a4ce61a 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -131,7 +131,7 @@ int main(int ac, char **argv) signal(SIGTERM, int_exit); /* start 'ping' in the background to have some kfree_skb events */ - f = popen("ping -c5 localhost", "r"); + f = popen("ping -4 -c5 localhost", "r"); (void) f; /* start 'dd' in the background to have plenty of 'write' syscalls */ -- cgit v1.2.3-59-g8ed1b From ea9b6362018358a46008a8af339178469a4efe13 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Feb 2019 19:04:11 -0800 Subject: samples: bpf: remove load_sock_ops in favour of bpftool bpftool can do all the things load_sock_ops used to do, and more. Point users to bpftool instead of maintaining this sample utility. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- samples/bpf/.gitignore | 1 - samples/bpf/Makefile | 2 - samples/bpf/load_sock_ops.c | 97 -------------------------------------- samples/bpf/tcp_basertt_kern.c | 2 +- samples/bpf/tcp_bpf.readme | 14 +++--- samples/bpf/tcp_bufs_kern.c | 2 +- samples/bpf/tcp_clamp_kern.c | 2 +- samples/bpf/tcp_cong_kern.c | 2 +- samples/bpf/tcp_iw_kern.c | 2 +- samples/bpf/tcp_rwnd_kern.c | 2 +- samples/bpf/tcp_synrto_kern.c | 2 +- samples/bpf/tcp_tos_reflect_kern.c | 2 +- 12 files changed, 16 insertions(+), 114 deletions(-) delete mode 100644 samples/bpf/load_sock_ops.c (limited to 'samples') diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 8ae4940025f8..dbb817dbacfc 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -1,7 +1,6 @@ cpustat fds_example lathist -load_sock_ops lwt_len_hist map_perf_test offwaketime diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a333e258f319..4dd98100678e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -40,7 +40,6 @@ hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel hostprogs-y += test_map_in_map hostprogs-y += per_socket_stats_example -hostprogs-y += load_sock_ops hostprogs-y += xdp_redirect hostprogs-y += xdp_redirect_map hostprogs-y += xdp_redirect_cpu @@ -71,7 +70,6 @@ tracex4-objs := bpf_load.o tracex4_user.o tracex5-objs := bpf_load.o tracex5_user.o tracex6-objs := bpf_load.o tracex6_user.o tracex7-objs := bpf_load.o tracex7_user.o -load_sock_ops-objs := bpf_load.o load_sock_ops.o test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS) lathist-objs := bpf_load.o lathist_user.o diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c deleted file mode 100644 index 8ecb41ea0c03..000000000000 --- a/samples/bpf/load_sock_ops.c +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright (c) 2017 Facebook - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include "bpf_load.h" -#include -#include -#include -#include - -static void usage(char *pname) -{ - printf("USAGE:\n %s [-l] \n", pname); - printf("\tLoad and attach a sock_ops program to the specified " - "cgroup\n"); - printf("\tIf \"-l\" is used, the program will continue to run\n"); - printf("\tprinting the BPF log buffer\n"); - printf("\tIf the specified filename does not end in \".o\", it\n"); - printf("\tappends \"_kern.o\" to the name\n"); - printf("\n"); - printf(" %s -r \n", pname); - printf("\tDetaches the currently attached sock_ops program\n"); - printf("\tfrom the specified cgroup\n"); - printf("\n"); - exit(1); -} - -int main(int argc, char **argv) -{ - int logFlag = 0; - int error = 0; - char *cg_path; - char fn[500]; - char *prog; - int cg_fd; - - if (argc < 3) - usage(argv[0]); - - if (!strcmp(argv[1], "-r")) { - cg_path = argv[2]; - cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); - error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); - if (error) { - printf("ERROR: bpf_prog_detach: %d (%s)\n", - error, strerror(errno)); - return 2; - } - return 0; - } else if (!strcmp(argv[1], "-h")) { - usage(argv[0]); - } else if (!strcmp(argv[1], "-l")) { - logFlag = 1; - if (argc < 4) - usage(argv[0]); - } - - prog = argv[argc - 1]; - cg_path = argv[argc - 2]; - if (strlen(prog) > 480) { - fprintf(stderr, "ERROR: program name too long (> 480 chars)\n"); - return 3; - } - cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY); - - if (!strcmp(prog + strlen(prog)-2, ".o")) - strcpy(fn, prog); - else - sprintf(fn, "%s_kern.o", prog); - if (logFlag) - printf("loading bpf file:%s\n", fn); - if (load_bpf_file(fn)) { - printf("ERROR: load_bpf_file failed for: %s\n", fn); - printf("%s", bpf_log_buf); - return 4; - } - if (logFlag) - printf("TCP BPF Loaded %s\n", fn); - - error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0); - if (error) { - printf("ERROR: bpf_prog_attach: %d (%s)\n", - error, strerror(errno)); - return 5; - } else if (logFlag) { - read_trace_pipe(); - } - - return error; -} diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c index 4bf4fc597db9..6ef1625e8b2c 100644 --- a/samples/bpf/tcp_basertt_kern.c +++ b/samples/bpf/tcp_basertt_kern.c @@ -7,7 +7,7 @@ * BPF program to set base_rtt to 80us when host is running TCP-NV and * both hosts are in the same datacenter (as determined by IPv6 prefix). * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme index 831fb601e3c9..fee746621aec 100644 --- a/samples/bpf/tcp_bpf.readme +++ b/samples/bpf/tcp_bpf.readme @@ -8,14 +8,16 @@ a cgroupv2 and attach a bash shell to the group. bash echo $$ >> /tmp/cgroupv2/foo/cgroup.procs -Anything that runs under this shell belongs to the foo cgroupv2 To load +Anything that runs under this shell belongs to the foo cgroupv2. To load (attach) one of the tcp_*_kern.o programs: - ./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o + bpftool prog load tcp_basertt_kern.o /sys/fs/bpf/tcp_prog + bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog + bpftool prog tracelog -If the "-l" flag is used, the load_sock_ops program will continue to run -printing the BPF log buffer. The tcp_*_kern.o programs use special print -functions to print logging information (if enabled by the ifdef). +"bpftool prog tracelog" will continue to run printing the BPF log buffer. +The tcp_*_kern.o programs use special print functions to print logging +information (if enabled by the ifdef). If using netperf/netserver to create traffic, you need to run them under the cgroupv2 to which the BPF programs are attached (i.e. under bash shell @@ -23,4 +25,4 @@ attached to the cgroupv2). To remove (unattach) a socket_ops BPF program from a cgroupv2: - ./load_sock_ops -r /tmp/cgroupv2/foo + bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c index 0566b7fa38a1..e03e204739fa 100644 --- a/samples/bpf/tcp_bufs_kern.c +++ b/samples/bpf/tcp_bufs_kern.c @@ -9,7 +9,7 @@ * doing appropriate checks that indicate the hosts are far enough * away (i.e. large RTT). * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c index f4225c9d2c0c..a0dc2d254aca 100644 --- a/samples/bpf/tcp_clamp_kern.c +++ b/samples/bpf/tcp_clamp_kern.c @@ -9,7 +9,7 @@ * the same datacenter. For his example, we assume they are within the same * datacenter when the first 5.5 bytes of their IPv6 addresses are the same. * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c index ad0f1ba8206a..4fd3ca979a06 100644 --- a/samples/bpf/tcp_cong_kern.c +++ b/samples/bpf/tcp_cong_kern.c @@ -7,7 +7,7 @@ * BPF program to set congestion control to dctcp when both hosts are * in the same datacenter (as deteremined by IPv6 prefix). * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c index 4ca5ecc9f580..9b139ec69560 100644 --- a/samples/bpf/tcp_iw_kern.c +++ b/samples/bpf/tcp_iw_kern.c @@ -9,7 +9,7 @@ * would usually be done after doing appropriate checks that indicate * the hosts are far enough away (i.e. large RTT). * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c index 09ff65b40b31..cc71ee96e044 100644 --- a/samples/bpf/tcp_rwnd_kern.c +++ b/samples/bpf/tcp_rwnd_kern.c @@ -8,7 +8,7 @@ * and the first 5.5 bytes of the IPv6 addresses are not the same (in this * example that means both hosts are not the same datacenter). * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c index 232bb242823e..ca87ed34f896 100644 --- a/samples/bpf/tcp_synrto_kern.c +++ b/samples/bpf/tcp_synrto_kern.c @@ -8,7 +8,7 @@ * and the first 5.5 bytes of the IPv6 addresses are the same (in this example * that means both hosts are in the same datacenter). * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include diff --git a/samples/bpf/tcp_tos_reflect_kern.c b/samples/bpf/tcp_tos_reflect_kern.c index d51dab19eca6..de788be6f862 100644 --- a/samples/bpf/tcp_tos_reflect_kern.c +++ b/samples/bpf/tcp_tos_reflect_kern.c @@ -4,7 +4,7 @@ * * BPF program to automatically reflect TOS option from received syn packet * - * Use load_sock_ops to load this BPF program. + * Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program. */ #include -- cgit v1.2.3-59-g8ed1b From 1a9b268c90286cae99051353cb7dfb53ffd82676 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Feb 2019 19:04:13 -0800 Subject: samples: bpf: use libbpf where easy Some samples don't really need the magic of bpf_load, switch them to libbpf. v2: - specify program types. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 6 +++--- samples/bpf/fds_example.c | 10 +++++++--- samples/bpf/sockex1_user.c | 23 +++++++++++++---------- samples/bpf/sockex2_user.c | 21 ++++++++++++--------- 4 files changed, 35 insertions(+), 25 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4dd98100678e..0c62ac39c697 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -59,9 +59,9 @@ LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o -fds_example-objs := bpf_load.o fds_example.o -sockex1-objs := bpf_load.o sockex1_user.o -sockex2-objs := bpf_load.o sockex2_user.o +fds_example-objs := fds_example.o +sockex1-objs := sockex1_user.o +sockex2-objs := sockex2_user.o sockex3-objs := bpf_load.o sockex3_user.o tracex1-objs := bpf_load.o tracex1_user.o tracex2-objs := bpf_load.o tracex2_user.o diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index 9854854f05d1..e51eb060244e 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -14,8 +14,8 @@ #include +#include "bpf/libbpf.h" #include "bpf_insn.h" -#include "bpf_load.h" #include "sock_example.h" #define BPF_F_PIN (1 << 0) @@ -57,10 +57,14 @@ static int bpf_prog_create(const char *object) BPF_EXIT_INSN(), }; size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); + char bpf_log_buf[BPF_LOG_BUF_SIZE]; + struct bpf_object *obj; + int prog_fd; if (object) { - assert(!load_bpf_file((char *)object)); - return prog_fd[0]; + assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC, + &obj, &prog_fd)); + return prog_fd; } else { return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, insns, insns_cnt, "GPL", 0, diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index be8ba5686924..7f90796ae15a 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -3,28 +3,31 @@ #include #include #include -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "sock_example.h" #include #include int main(int ac, char **argv) { + struct bpf_object *obj; + int map_fd, prog_fd; char filename[256]; - FILE *f; int i, sock; + FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, + &obj, &prog_fd)) return 1; - } + + map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); sock = open_raw_sock("lo"); - assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, - sizeof(prog_fd[0])) == 0); + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, + sizeof(prog_fd)) == 0); f = popen("ping -4 -c5 localhost", "r"); (void) f; @@ -34,13 +37,13 @@ int main(int ac, char **argv) int key; key = IPPROTO_TCP; - assert(bpf_map_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &tcp_cnt) == 0); key = IPPROTO_UDP; - assert(bpf_map_lookup_elem(map_fd[0], &key, &udp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &udp_cnt) == 0); key = IPPROTO_ICMP; - assert(bpf_map_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &icmp_cnt) == 0); printf("TCP %lld UDP %lld ICMP %lld bytes\n", tcp_cnt, udp_cnt, icmp_cnt); diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 125ee6efc913..bc257333ad92 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -3,7 +3,7 @@ #include #include #include -#include "bpf_load.h" +#include "bpf/libbpf.h" #include "sock_example.h" #include #include @@ -17,22 +17,25 @@ struct pair { int main(int ac, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; + int map_fd, prog_fd; char filename[256]; - FILE *f; int i, sock; + FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); setrlimit(RLIMIT_MEMLOCK, &r); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, + &obj, &prog_fd)) return 1; - } + + map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map"); sock = open_raw_sock("lo"); - assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, - sizeof(prog_fd[0])) == 0); + assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, + sizeof(prog_fd)) == 0); f = popen("ping -4 -c5 localhost", "r"); (void) f; @@ -41,8 +44,8 @@ int main(int ac, char **argv) int key = 0, next_key; struct pair value; - while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) { - bpf_map_lookup_elem(map_fd[0], &next_key, &value); + while (bpf_map_get_next_key(map_fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(map_fd, &next_key, &value); printf("ip %s bytes %lld packets %lld\n", inet_ntoa((struct in_addr){htonl(next_key)}), value.bytes, value.packets); -- cgit v1.2.3-59-g8ed1b From b74e21ab7d438117a10d2d331bdfc275fcab2970 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 28 Feb 2019 22:19:41 -0800 Subject: samples/bpf: silence compiler warning for xdpsock_user.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling xdpsock_user.c with 4.8.5, I hit the following compilation warning: HOSTCC samples/bpf/xdpsock_user.o /data/users/yhs/work/net-next/samples/bpf/xdpsock_user.c: In function ‘main’: /data/users/yhs/work/net-next/samples/bpf/xdpsock_user.c:449:6: warning: ‘idx_cq’ may be used unini tialized in this function [-Wmaybe-uninitialized] u32 idx_cq, idx_fq; ^ /data/users/yhs/work/net-next/samples/bpf/xdpsock_user.c:606:7: warning: ‘idx_rx’ may be used unini tialized in this function [-Wmaybe-uninitialized] u32 idx_rx, idx_tx = 0; ^ /data/users/yhs/work/net-next/samples/bpf/xdpsock_user.c:506:6: warning: ‘idx_rx’ may be used unini tialized in this function [-Wmaybe-uninitialized] u32 idx_rx, idx_fq = 0; As an example, the code pattern looks like: u32 idx_cq; ... ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); if (ret) { ... } ... idx_fq ... The compiler warns since it does not know whether &idx_fq is assigned or not inside the library function xsk_ring_prod__reserve(). Let us assign an initial value 0 to such auto variables to silence compiler warning. Fixes: 248c7f9c0e21 ("samples/bpf: convert xdpsock to use libbpf for AF_XDP access") Signed-off-by: Yonghong Song Acked-by: Jonathan Lemon Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- samples/bpf/xdpsock_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 9c76d6d43deb..d08ee1ab7bb4 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -446,7 +446,7 @@ static void kick_tx(struct xsk_socket_info *xsk) static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) { - u32 idx_cq, idx_fq; + u32 idx_cq = 0, idx_fq = 0; unsigned int rcvd; size_t ndescs; @@ -503,7 +503,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk) static void rx_drop(struct xsk_socket_info *xsk) { unsigned int rcvd, i; - u32 idx_rx, idx_fq = 0; + u32 idx_rx = 0, idx_fq = 0; int ret; rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); @@ -603,7 +603,7 @@ static void l2fwd(struct xsk_socket_info *xsk) { for (;;) { unsigned int rcvd, i; - u32 idx_rx, idx_tx = 0; + u32 idx_rx = 0, idx_tx = 0; int ret; for (;;) { -- cgit v1.2.3-59-g8ed1b From 187d0738ff351f725a58be3d606d3a7fc8db8aed Mon Sep 17 00:00:00 2001 From: brakmo Date: Fri, 1 Mar 2019 12:38:48 -0800 Subject: bpf: Sample HBM BPF program to limit egress bw A cgroup skb BPF program to limit cgroup output bandwidth. It uses a modified virtual token bucket queue to limit average egress bandwidth. The implementation uses credits instead of tokens. Negative credits imply that queueing would have happened (this is a virtual queue, so no queueing is done by it. However, queueing may occur at the actual qdisc (which is not used for rate limiting). This implementation uses 3 thresholds, one to start marking packets and the other two to drop packets: CREDIT - <--------------------------|------------------------> + | | | 0 | Large pkt | | drop thresh | Small pkt drop Mark threshold thresh The effect of marking depends on the type of packet: a) If the packet is ECN enabled, then the packet is ECN ce marked. The current mark threshold is tuned for DCTCP. c) Else, it is dropped if it is a large packet. If the credit is below the drop threshold, the packet is dropped. Note that dropping a packet through the BPF program does not trigger CWR (Congestion Window Reduction) in TCP packets. A future patch will add support for triggering CWR. This BPF program actually uses 2 drop thresholds, one threshold for larger packets (>= 120 bytes) and another for smaller packets. This protects smaller packets such as SYNs, ACKs, etc. The default bandwidth limit is set at 1Gbps but this can be changed by a user program through a shared BPF map. In addition, by default this BPF program does not limit connections using loopback. This behavior can be overwritten by the user program. There is also an option to calculate some statistics, such as percent of packets marked or dropped, which the user program can access. A latter patch provides such a program (hbm.c) Signed-off-by: Lawrence Brakmo Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 2 + samples/bpf/hbm.h | 31 +++++++++ samples/bpf/hbm_kern.h | 137 +++++++++++++++++++++++++++++++++++++++ samples/bpf/hbm_out_kern.c | 157 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 327 insertions(+) create mode 100644 samples/bpf/hbm.h create mode 100644 samples/bpf/hbm_kern.h create mode 100644 samples/bpf/hbm_out_kern.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0c62ac39c697..e1bdc96486f6 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -164,6 +164,7 @@ always += xdp_adjust_tail_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o +always += hbm_out_kern.o KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -263,6 +264,7 @@ $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h +$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is diff --git a/samples/bpf/hbm.h b/samples/bpf/hbm.h new file mode 100644 index 000000000000..518e8147d084 --- /dev/null +++ b/samples/bpf/hbm.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Include file for Host Bandwidth Management (HBM) programs + */ +struct hbm_vqueue { + struct bpf_spin_lock lock; + /* 4 byte hole */ + unsigned long long lasttime; /* In ns */ + int credit; /* In bytes */ + unsigned int rate; /* In bytes per NS << 20 */ +}; + +struct hbm_queue_stats { + unsigned long rate; /* in Mbps*/ + unsigned long stats:1, /* get HBM stats (marked, dropped,..) */ + loopback:1; /* also limit flows using loopback */ + unsigned long long pkts_marked; + unsigned long long bytes_marked; + unsigned long long pkts_dropped; + unsigned long long bytes_dropped; + unsigned long long pkts_total; + unsigned long long bytes_total; + unsigned long long firstPacketTime; + unsigned long long lastPacketTime; +}; diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h new file mode 100644 index 000000000000..c5635d924193 --- /dev/null +++ b/samples/bpf/hbm_kern.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Include file for sample Host Bandwidth Manager (HBM) BPF programs + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_endian.h" +#include "bpf_helpers.h" +#include "hbm.h" + +#define DROP_PKT 0 +#define ALLOW_PKT 1 +#define TCP_ECN_OK 1 + +#define HBM_DEBUG 0 // Set to 1 to enable debugging +#if HBM_DEBUG +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) +#else +#define bpf_printk(fmt, ...) +#endif + +#define INITIAL_CREDIT_PACKETS 100 +#define MAX_BYTES_PER_PACKET 1500 +#define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) +#define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) +#define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) +#define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) +#define LARGE_PKT_THRESH 120 +#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) +#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) + +// rate in bytes per ns << 20 +#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) + +struct bpf_map_def SEC("maps") queue_state = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct hbm_vqueue), +}; +BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key, + struct hbm_vqueue); + +struct bpf_map_def SEC("maps") queue_stats = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct hbm_queue_stats), + .max_entries = 1, +}; +BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats); + +struct hbm_pkt_info { + bool is_ip; + bool is_tcp; + short ecn; +}; + +static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb, + struct hbm_pkt_info *pkti) +{ + struct iphdr iph; + struct ipv6hdr *ip6h; + + bpf_skb_load_bytes(skb, 0, &iph, 12); + if (iph.version == 6) { + ip6h = (struct ipv6hdr *)&iph; + pkti->is_ip = true; + pkti->is_tcp = (ip6h->nexthdr == 6); + pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; + } else if (iph.version == 4) { + pkti->is_ip = true; + pkti->is_tcp = (iph.protocol == 6); + pkti->ecn = iph.tos & INET_ECN_MASK; + } else { + pkti->is_ip = false; + pkti->is_tcp = false; + pkti->ecn = 0; + } +} + +static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) +{ + bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); + qdp->lasttime = bpf_ktime_get_ns(); + qdp->credit = INIT_CREDIT; + qdp->rate = rate * 128; +} + +static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, + int len, + unsigned long long curtime, + bool congestion_flag, + bool drop_flag) +{ + if (qsp != NULL) { + // Following is needed for work conserving + __sync_add_and_fetch(&(qsp->bytes_total), len); + if (qsp->stats) { + // Optionally update statistics + if (qsp->firstPacketTime == 0) + qsp->firstPacketTime = curtime; + qsp->lastPacketTime = curtime; + __sync_add_and_fetch(&(qsp->pkts_total), 1); + if (congestion_flag || drop_flag) { + __sync_add_and_fetch(&(qsp->pkts_marked), 1); + __sync_add_and_fetch(&(qsp->bytes_marked), len); + } + if (drop_flag) { + __sync_add_and_fetch(&(qsp->pkts_dropped), 1); + __sync_add_and_fetch(&(qsp->bytes_dropped), + len); + } + } + } +} diff --git a/samples/bpf/hbm_out_kern.c b/samples/bpf/hbm_out_kern.c new file mode 100644 index 000000000000..f806863d0b79 --- /dev/null +++ b/samples/bpf/hbm_out_kern.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Sample Host Bandwidth Manager (HBM) BPF program. + * + * A cgroup skb BPF egress program to limit cgroup output bandwidth. + * It uses a modified virtual token bucket queue to limit average + * egress bandwidth. The implementation uses credits instead of tokens. + * Negative credits imply that queueing would have happened (this is + * a virtual queue, so no queueing is done by it. However, queueing may + * occur at the actual qdisc (which is not used for rate limiting). + * + * This implementation uses 3 thresholds, one to start marking packets and + * the other two to drop packets: + * CREDIT + * - <--------------------------|------------------------> + + * | | | 0 + * | Large pkt | + * | drop thresh | + * Small pkt drop Mark threshold + * thresh + * + * The effect of marking depends on the type of packet: + * a) If the packet is ECN enabled and it is a TCP packet, then the packet + * is ECN marked. + * b) If the packet is a TCP packet, then we probabilistically call tcp_cwr + * to reduce the congestion window. The current implementation uses a linear + * distribution (0% probability at marking threshold, 100% probability + * at drop threshold). + * c) If the packet is not a TCP packet, then it is dropped. + * + * If the credit is below the drop threshold, the packet is dropped. If it + * is a TCP packet, then it also calls tcp_cwr since packets dropped by + * by a cgroup skb BPF program do not automatically trigger a call to + * tcp_cwr in the current kernel code. + * + * This BPF program actually uses 2 drop thresholds, one threshold + * for larger packets (>= 120 bytes) and another for smaller packets. This + * protects smaller packets such as SYNs, ACKs, etc. + * + * The default bandwidth limit is set at 1Gbps but this can be changed by + * a user program through a shared BPF map. In addition, by default this BPF + * program does not limit connections using loopback. This behavior can be + * overwritten by the user program. There is also an option to calculate + * some statistics, such as percent of packets marked or dropped, which + * the user program can access. + * + * A latter patch provides such a program (hbm.c) + */ + +#include "hbm_kern.h" + +SEC("cgroup_skb/egress") +int _hbm_out_cg(struct __sk_buff *skb) +{ + struct hbm_pkt_info pkti; + int len = skb->len; + unsigned int queue_index = 0; + unsigned long long curtime; + int credit; + signed long long delta = 0, zero = 0; + int max_credit = MAX_CREDIT; + bool congestion_flag = false; + bool drop_flag = false; + bool cwr_flag = false; + struct hbm_vqueue *qdp; + struct hbm_queue_stats *qsp = NULL; + int rv = ALLOW_PKT; + + qsp = bpf_map_lookup_elem(&queue_stats, &queue_index); + if (qsp != NULL && !qsp->loopback && (skb->ifindex == 1)) + return ALLOW_PKT; + + hbm_get_pkt_info(skb, &pkti); + + // We may want to account for the length of headers in len + // calculation, like ETH header + overhead, specially if it + // is a gso packet. But I am not doing it right now. + + qdp = bpf_get_local_storage(&queue_state, 0); + if (!qdp) + return ALLOW_PKT; + else if (qdp->lasttime == 0) + hbm_init_vqueue(qdp, 1024); + + curtime = bpf_ktime_get_ns(); + + // Begin critical section + bpf_spin_lock(&qdp->lock); + credit = qdp->credit; + delta = curtime - qdp->lasttime; + /* delta < 0 implies that another process with a curtime greater + * than ours beat us to the critical section and already added + * the new credit, so we should not add it ourselves + */ + if (delta > 0) { + qdp->lasttime = curtime; + credit += CREDIT_PER_NS(delta, qdp->rate); + if (credit > MAX_CREDIT) + credit = MAX_CREDIT; + } + credit -= len; + qdp->credit = credit; + bpf_spin_unlock(&qdp->lock); + // End critical section + + // Check if we should update rate + if (qsp != NULL && (qsp->rate * 128) != qdp->rate) { + qdp->rate = qsp->rate * 128; + bpf_printk("Updating rate: %d (1sec:%llu bits)\n", + (int)qdp->rate, + CREDIT_PER_NS(1000000000, qdp->rate) * 8); + } + + // Set flags (drop, congestion, cwr) + // Dropping => we are congested, so ignore congestion flag + if (credit < -DROP_THRESH || + (len > LARGE_PKT_THRESH && + credit < -LARGE_PKT_DROP_THRESH)) { + // Very congested, set drop flag + drop_flag = true; + } else if (credit < 0) { + // Congested, set congestion flag + if (pkti.ecn) { + if (credit < -MARK_THRESH) + congestion_flag = true; + else + congestion_flag = false; + } else { + congestion_flag = true; + } + } + + if (congestion_flag) { + if (!bpf_skb_ecn_set_ce(skb)) { + if (len > LARGE_PKT_THRESH) { + // Problem if too many small packets? + drop_flag = true; + } + } + } + + if (drop_flag) + rv = DROP_PKT; + + hbm_update_stats(qsp, len, curtime, congestion_flag, drop_flag); + + if (rv == DROP_PKT) + __sync_add_and_fetch(&(qdp->credit), len); + + return rv; +} +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From a1270fe95b74eb3195b107c494ed1f11b932a278 Mon Sep 17 00:00:00 2001 From: brakmo Date: Fri, 1 Mar 2019 12:38:49 -0800 Subject: bpf: User program for testing HBM The program nrm creates a cgroup and attaches a BPF program to the cgroup for testing HBM (Host Bandwidth Manager) for egress traffic. One still needs to create network traffic. This can be done through netesto, netperf or iperf3. A follow-up patch contains a script to create traffic. USAGE: hbm [-d] [-l] [-n ] [-r ] [-s] [-t ] [-w] [-h] [prog] Where: -d Print BPF trace debug buffer -l Also limit flows doing loopback -n <#> To create cgroup "/hbm#" and attach prog. Default is /nrm1 This is convenient when testing HBM in more than 1 cgroup -r Rate limit in Mbps -s Get HBM stats (marked, dropped, etc.) -t