From 8912fd6a61d7474ea9b43be93f136034d28868d5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 6 Aug 2020 12:42:56 +0100 Subject: net: hns3: fix spelling mistake "could'nt" -> "couldn't" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9162856de1b1..e972138a14ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1728,7 +1728,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev) /* hardware completion status should be available by this time */ if (ret) { dev_err(&hdev->pdev->dev, - "could'nt get reset done status from h/w, timeout!\n"); + "couldn't get reset done status from h/w, timeout!\n"); return ret; } -- cgit v1.2.3-59-g8ed1b From 6bcaf41f9613278cd5897fc80ab93033bda8efaa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 Aug 2020 17:47:57 -0700 Subject: selftests/bpf: Prevent runqslower from racing on building bpftool runqslower's Makefile is building/installing bpftool into $(OUTPUT)/sbin/bpftool, which coincides with $(DEFAULT_BPFTOOL). In practice this means that often when building selftests from scratch (after `make clean`), selftests are racing with runqslower to simultaneously build bpftool and one of the two processes fail due to file being busy. Prevent this race by explicitly order-depending on $(BPFTOOL_DEFAULT). Fixes: a2c9652f751e ("selftests: Refactor build to remove tools/lib/bpf from include path") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805004757.2960750-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e7a8cf83ba48..48425f9251b5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -142,7 +142,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)/runqslower: $(BPFOBJ) +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool + +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ @@ -164,7 +166,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool -- cgit v1.2.3-59-g8ed1b From 5e7b30205cef80f6bb922e61834437ca7bff5837 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:56 -0700 Subject: bpf: Change uapi for bpf iterator map elements Commit a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") added bpf iterator support for map elements. The map element bpf iterator requires info to identify a particular map. In the above commit, the attr->link_create.target_fd is used to carry map_fd and an enum bpf_iter_link_info is added to uapi to specify the target_fd actually representing a map_fd: enum bpf_iter_link_info { BPF_ITER_LINK_UNSPEC = 0, BPF_ITER_LINK_MAP_FD = 1, MAX_BPF_ITER_LINK_INFO, }; This is an extensible approach as we can grow enumerator for pid, cgroup_id, etc. and we can unionize target_fd for pid, cgroup_id, etc. But in the future, there are chances that more complex customization may happen, e.g., for tasks, it could be filtered based on both cgroup_id and user_id. This patch changed the uapi to have fields __aligned_u64 iter_info; __u32 iter_info_len; for additional iter_info for link_create. The iter_info is defined as union bpf_iter_link_info { struct { __u32 map_fd; } map; }; So future extension for additional customization will be easier. The bpf_iter_link_info will be passed to target callback to validate and generic bpf_iter framework does not need to deal it any more. Note that map_fd = 0 will be considered invalid and -EBADF will be returned to user space. Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com --- include/linux/bpf.h | 10 ++++---- include/uapi/linux/bpf.h | 15 ++++++------ kernel/bpf/bpf_iter.c | 58 +++++++++++++++++++++++------------------------ kernel/bpf/map_iter.c | 37 +++++++++++++++++++++++------- kernel/bpf/syscall.c | 2 +- net/core/bpf_sk_storage.c | 37 +++++++++++++++++++++++------- 6 files changed, 102 insertions(+), 57 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cef4ef0d2b4e..55f694b63164 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info { struct bpf_map *map; }; -typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux); +typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - bpf_iter_check_target_t check_target; + bpf_iter_attach_target_t attach_target; + bpf_iter_detach_target_t detach_target; u32 ctx_arg_info_size; - enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 363b9cafc2d8..b6715964b685 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link) struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); - if (iter_link->aux.map) - bpf_map_put_with_uref(iter_link->aux.map); + if (iter_link->tinfo->reg_info->detach_target) + iter_link->tinfo->reg_info->detach_target(&iter_link->aux); } static void bpf_iter_link_dealloc(struct bpf_link *link) @@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link) int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + union bpf_iter_link_info __user *ulinfo; struct bpf_link_primer link_primer; struct bpf_iter_target_info *tinfo; - struct bpf_iter_aux_info aux = {}; + union bpf_iter_link_info linfo; struct bpf_iter_link *link; - u32 prog_btf_id, target_fd; + u32 prog_btf_id, linfo_len; bool existed = false; - struct bpf_map *map; int err; + if (attr->link_create.target_fd || attr->link_create.flags) + return -EINVAL; + + memset(&linfo, 0, sizeof(union bpf_iter_link_info)); + + ulinfo = u64_to_user_ptr(attr->link_create.iter_info); + linfo_len = attr->link_create.iter_info_len; + if (!ulinfo ^ !linfo_len) + return -EINVAL; + + if (ulinfo) { + err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), + linfo_len); + if (err) + return err; + linfo_len = min_t(u32, linfo_len, sizeof(linfo)); + if (copy_from_user(&linfo, ulinfo, linfo_len)) + return -EFAULT; + } + prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { @@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!existed) return -ENOENT; - /* Make sure user supplied flags are target expected. */ - target_fd = attr->link_create.target_fd; - if (attr->link_create.flags != tinfo->reg_info->req_linfo) - return -EINVAL; - if (!attr->link_create.flags && target_fd) - return -EINVAL; - link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; @@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } - if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) { - map = bpf_map_get_with_uref(target_fd); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto cleanup_link; - } - - aux.map = map; - err = tinfo->reg_info->check_target(prog, &aux); + if (tinfo->reg_info->attach_target) { + err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); if (err) { - bpf_map_put_with_uref(map); - goto cleanup_link; + bpf_link_cleanup(&link_primer); + return err; } - - link->aux.map = map; } return bpf_link_settle(&link_primer); - -cleanup_link: - bpf_link_cleanup(&link_primer); - return err; } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index fbe1f557cb88..af86048e5afd 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = { .seq_info = &bpf_map_seq_info, }; -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { u32 key_acc_size, value_acc_size, key_size, value_size; - struct bpf_map *map = aux->map; + struct bpf_map *map; bool is_percpu = false; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) - return -EINVAL; + goto put_map; key_acc_size = prog->aux->max_rdonly_access; value_acc_size = prog->aux->max_rdwr_access; @@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog, else value_size = round_up(map->value_size, 8) * num_possible_cpus(); - if (key_acc_size > key_size || value_acc_size > value_size) - return -EACCES; + if (key_acc_size > key_size || value_acc_size > value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, @@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, static const struct bpf_iter_reg bpf_map_elem_reg_info = { .target = "bpf_map_elem", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map_elem, key), diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2f343ce15747..86299a292214 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog * return -EINVAL; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.flags +#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len static int link_create(union bpf_attr *attr) { enum bpf_prog_type ptype; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d3377c90a291..b988f48153a4 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data, return 0; } -static int bpf_iter_check_map(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux) +static int bpf_iter_attach_map(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) { - struct bpf_map *map = aux->map; + struct bpf_map *map; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) - return -EINVAL; + goto put_map; - if (prog->aux->max_rdonly_access > map->value_size) - return -EACCES; + if (prog->aux->max_rdonly_access > map->value_size) { + err = -EACCES; + goto put_map; + } + aux->map = map; return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); } static const struct seq_operations bpf_sk_storage_map_seq_ops = { @@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = { static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { .target = "bpf_sk_storage_map", - .check_target = bpf_iter_check_map, - .req_linfo = BPF_ITER_LINK_MAP_FD, + .attach_target = bpf_iter_attach_map, + .detach_target = bpf_iter_detach_map, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), -- cgit v1.2.3-59-g8ed1b From 74fc097de327b37e8fe3ff580ce7ffaa7c1740dd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:58 -0700 Subject: tools/bpf: Support new uapi for map element bpf iterator Previous commit adjusted kernel uapi for map element bpf iterator. This patch adjusted libbpf API due to uapi change. bpftool and bpf_iter selftests are also changed accordingly. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055058.1457623-1-yhs@fb.com --- tools/bpf/bpftool/iter.c | 9 +++-- tools/include/uapi/linux/bpf.h | 15 +++++---- tools/lib/bpf/bpf.c | 3 ++ tools/lib/bpf/bpf.h | 5 ++- tools/lib/bpf/libbpf.c | 6 ++-- tools/lib/bpf/libbpf.h | 5 +-- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 40 ++++++++++++++++++----- 7 files changed, 58 insertions(+), 25 deletions(-) diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index c9dba7543dba..3b1aad7535dd 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -11,6 +11,7 @@ static int do_pin(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); + union bpf_iter_link_info linfo; const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; @@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv) map_fd = map_parse_fd(&argc, &argv); if (map_fd < 0) return -1; + + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + iter_opts.link_info = &linfo; + iter_opts.link_info_len = sizeof(linfo); } } @@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv) goto close_obj; } - if (map_fd >= 0) - iter_opts.map_fd = map_fd; - link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b134e679e9db..0480f893facd 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type */ }; +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; +}; + /* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { BPF_MAP_CREATE, @@ -249,13 +255,6 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; -enum bpf_iter_link_info { - BPF_ITER_LINK_UNSPEC = 0, - BPF_ITER_LINK_MAP_FD = 1, - - MAX_BPF_ITER_LINK_INFO, -}; - /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -623,6 +622,8 @@ union bpf_attr { }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index eab14c97c15d..0750681057c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; attr.link_create.flags = OPTS_GET(opts, flags, 0); + attr.link_create.iter_info = + ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); + attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 28855fd5b5f4..015d13f25fcc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */ struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 flags; + union bpf_iter_link_info *iter_info; + __u32 iter_info_len; }; -#define bpf_link_create_opts__last_field flags +#define bpf_link_create_opts__last_field iter_info_len LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7be04e45d29c..0a06124f7999 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); - if (OPTS_HAS(opts, map_fd)) { - target_fd = opts->map_fd; - link_create_opts.flags = BPF_ITER_LINK_MAP_FD; - } + link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); + link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3ed1399bfbbc..5ecb4069a9f0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 map_fd; + union bpf_iter_link_info *link_info; + __u32 link_info_len; }; -#define bpf_iter_attach_opts__last_field map_fd +#define bpf_iter_attach_opts__last_field link_info_len LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4ffefdc1130f..7375d9a6d242 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -468,6 +468,7 @@ static void test_bpf_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_hash_map *skel; int err, i, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u64 val, expected_val = 0; struct bpf_link *link; struct key_t { @@ -490,13 +491,16 @@ static void test_bpf_hash_map(void) goto out; /* iterator with hashmap2 and hashmap3 should fail */ - opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap2 unexpected succeeded\n")) goto out; - opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "attach_iter for hashmap3 unexpected succeeded\n")) @@ -519,7 +523,7 @@ static void test_bpf_hash_map(void) goto out; } - opts.map_fd = map_fd; + linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_hash_map *skel; int err, i, j, len, map_fd, iter_fd; + union bpf_iter_link_info linfo; __u32 expected_val = 0; struct bpf_link *link; struct key_t { @@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -649,6 +657,7 @@ static void test_bpf_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); __u32 expected_key = 0, res_first_key; struct bpf_iter_bpf_array_map *skel; + union bpf_iter_link_info linfo; int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; @@ -673,7 +682,10 @@ static void test_bpf_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_bpf_percpu_array_map *skel; __u32 expected_key = 0, expected_val = 0; + union bpf_iter_link_info linfo; int err, i, j, map_fd, iter_fd; struct bpf_link *link; char buf[64]; @@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void) DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); int err, i, len, map_fd, iter_fd, num_sockets; struct bpf_iter_bpf_sk_storage_map *skel; + union bpf_iter_link_info linfo; int sock_fd[3] = {-1, -1, -1}; __u32 val, expected_val = 0; struct bpf_link *link; @@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void) goto out; } - opts.map_fd = map_fd; + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = map_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) goto out; @@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void) { DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); struct bpf_iter_test_kern5 *skel; + union bpf_iter_link_info linfo; struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); @@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void) "skeleton open_and_load failed\n")) return; - opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + memset(&linfo, 0, sizeof(linfo)); + linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1); + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) bpf_link__destroy(link); -- cgit v1.2.3-59-g8ed1b From 932ac54a3e59335a847f7682b5124a788ab3c798 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Thu, 6 Aug 2020 07:33:59 +0900 Subject: libbf: Fix uninitialized pointer at btf__parse_raw() Recently, from commit 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs"), new API has been added to libbpf that allows to parse BTF from raw data file (btf__parse_raw()). The commit derives build failure of samples/bpf due to improper access of uninitialized pointer at btf_parse_raw(). btf.c: In function btf__parse_raw: btf.c:625:28: error: btf may be used uninitialized in this function 625 | return err ? ERR_PTR(err) : btf; | ~~~~~~~~~~~~~~~~~~~^~~~~ This commit fixes the build failure of samples/bpf by adding code of initializing btf pointer as NULL. Fixes: 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs") Signed-off-by: Daniel T. Lee Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805223359.32109-1-danieltimlee@gmail.com --- tools/lib/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 856b09a04563..4843e44916f7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -564,8 +564,8 @@ done: struct btf *btf__parse_raw(const char *path) { + struct btf *btf = NULL; void *data = NULL; - struct btf *btf; FILE *f = NULL; __u16 magic; int err = 0; -- cgit v1.2.3-59-g8ed1b From d48556f45608921fa07cb882f9dd15a8a1203427 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Aug 2020 08:52:25 -0700 Subject: bpf: Add missing return to resolve_btfids int sets_patch(struct object *obj) doesn't have a 'return 0' at the end. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806155225.637202-1-sdf@google.com --- tools/bpf/resolve_btfids/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 52d883325a23..4d9ecb975862 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -566,6 +566,7 @@ static int sets_patch(struct object *obj) next = rb_next(next); } + return 0; } static int symbols_patch(struct object *obj) -- cgit v1.2.3-59-g8ed1b From 0d360d64b01231cdb36e1936de889f308fd9317f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 6 Aug 2020 11:26:12 -0700 Subject: bpf: Remove inline from bpf_do_trace_printk I get the following error during compilation on my side: kernel/trace/bpf_trace.c: In function 'bpf_do_trace_printk': kernel/trace/bpf_trace.c:386:34: error: function 'bpf_do_trace_printk' can never be inlined because it uses variable argument lists static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) ^ Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of trace_printk()") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806182612.1390883-1-sdf@google.com --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index cb91ef902cc4..a8d4f253ed77 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -383,7 +383,7 @@ static DEFINE_RAW_SPINLOCK(trace_printk_lock); #define BPF_TRACE_PRINTK_SIZE 1024 -static inline __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) +static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...) { static char buf[BPF_TRACE_PRINTK_SIZE]; unsigned long flags; -- cgit v1.2.3-59-g8ed1b From 6fc5916cc256b8e92cc7ad3b34cc4d2a7efa1d5c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 6 Aug 2020 20:39:59 +0200 Subject: selftests: bpf: Switch off timeout Several bpf tests are interrupted by the default timeout of 45 seconds added by commit 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test"). In my case it was test_progs, test_tunnel.sh, test_lwt_ip_encap.sh and test_xdping.sh. There's not much value in having a timeout for bpf tests, switch it off. Signed-off-by: Jiri Benc Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/7a9198ed10917f4ecab4a3dd74bcda1200791efd.1596739059.git.jbenc@redhat.com --- tools/testing/selftests/bpf/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/bpf/settings diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/bpf/settings @@ -0,0 +1 @@ +timeout=0 -- cgit v1.2.3-59-g8ed1b From 929e54a989680c6f134b02293732030b897475dc Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Thu, 6 Aug 2020 18:42:24 +0800 Subject: bpf: Fix compilation warning of selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang compiler version: 12.0.0 The following warning appears during the selftests/bpf compilation: prog_tests/send_signal.c:51:3: warning: ignoring return value of ‘write’, declared with attribute warn_unused_result [-Wunused-result] 51 | write(pipe_c2p[1], buf, 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ prog_tests/send_signal.c:54:3: warning: ignoring return value of ‘read’, declared with attribute warn_unused_result [-Wunused-result] 54 | read(pipe_p2c[0], buf, 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~ ...... prog_tests/stacktrace_build_id_nmi.c:13:2: warning: ignoring return value of ‘fscanf’,declared with attribute warn_unused_result [-Wunused-resul] 13 | fscanf(f, "%llu", &sample_freq); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:133:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 133 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:138:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 138 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ test_tcpnotify_user.c:143:2: warning:ignoring return value of ‘system’, declared with attribute warn_unused_result [-Wunused-result] 143 | system(test_script); | ^~~~~~~~~~~~~~~~~~~ Add code that fix compilation warning about ignoring return value and handles any errors; Check return value of library`s API make the code more secure. Signed-off-by: Jianlin Lv Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200806104224.95306-1-Jianlin.Lv@arm.com --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 18 ++++++++---------- .../selftests/bpf/prog_tests/stacktrace_build_id_nmi.c | 4 +++- tools/testing/selftests/bpf/test_tcpnotify_user.c | 13 ++++++++++--- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 504abb7bfb95..7043e6ded0e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_p2c[1]); /* close write */ /* notify parent signal handler is installed */ - write(pipe_c2p[1], buf, 1); + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* make sure parent enabled bpf program to send_signal */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* wait a little for signal handler */ sleep(1); - if (sigusr1_received) - write(pipe_c2p[1], "2", 1); - else - write(pipe_c2p[1], "0", 1); + buf[0] = sigusr1_received ? '2' : '0'; + CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for parent notification and exit */ - read(pipe_p2c[0], buf, 1); + CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, } /* wait until child signal handler installed */ - read(pipe_c2p[0], buf, 1); + CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno); /* trigger the bpf send_signal */ skel->bss->pid = pid; @@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); /* wait for result */ err = read(pipe_c2p[0], buf, 1); @@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ - write(pipe_p2c[1], buf, 1); + CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno); disable_pmu: close(pmu_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f002e3090d92..11a769e18f5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void) { __u64 sample_freq = 5000; /* fallback to 5000 on error */ FILE *f; + __u32 duration = 0; f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); if (f == NULL) return sample_freq; - fscanf(f, "%llu", &sample_freq); + CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate", + "return default value: 5000,err %d\n", -errno); fclose(f); return sample_freq; } diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 8549b31716ab..73da7fe8c152 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -124,17 +124,24 @@ int main(int argc, char **argv) sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } sprintf(test_script, "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); - system(test_script); + if (system(test_script)) + printf("execute command: %s, err %d\n", test_script, -errno); sprintf(test_script, "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); - system(test_script); + if (system(test_script)) { + printf("FAILED: execute command: %s, err %d\n", test_script, -errno); + goto err; + } rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { -- cgit v1.2.3-59-g8ed1b From c7ca03c216acb14466a713fedf1b9f2c24994ef2 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 5 Aug 2020 18:50:40 -0700 Subject: drivers/net/wan/lapbether: Added needed_headroom and a skb->len check 1. Added a skb->len check This driver expects upper layers to include a pseudo header of 1 byte when passing down a skb for transmission. This driver will read this 1-byte header. This patch added a skb->len check before reading the header to make sure the header exists. 2. Changed to use needed_headroom instead of hard_header_len to request necessary headroom to be allocated In net/packet/af_packet.c, the function packet_snd first reserves a headroom of length (dev->hard_header_len + dev->needed_headroom). Then if the socket is a SOCK_DGRAM socket, it calls dev_hard_header, which calls dev->header_ops->create, to create the link layer header. If the socket is a SOCK_RAW socket, it "un-reserves" a headroom of length (dev->hard_header_len), and assumes the user to provide the appropriate link layer header. So according to the logic of af_packet.c, dev->hard_header_len should be the length of the header that would be created by dev->header_ops->create. However, this driver doesn't provide dev->header_ops, so logically dev->hard_header_len should be 0. So we should use dev->needed_headroom instead of dev->hard_header_len to request necessary headroom to be allocated. This change fixes kernel panic when this driver is used with AF_PACKET SOCK_RAW sockets. Call stack when panic: [ 168.399197] skbuff: skb_under_panic: text:ffffffff819d95fb len:20 put:14 head:ffff8882704c0a00 data:ffff8882704c09fd tail:0x11 end:0xc0 dev:veth0 ... [ 168.399255] Call Trace: [ 168.399259] skb_push.cold+0x14/0x24 [ 168.399262] eth_header+0x2b/0xc0 [ 168.399267] lapbeth_data_transmit+0x9a/0xb0 [lapbether] [ 168.399275] lapb_data_transmit+0x22/0x2c [lapb] [ 168.399277] lapb_transmit_buffer+0x71/0xb0 [lapb] [ 168.399279] lapb_kick+0xe3/0x1c0 [lapb] [ 168.399281] lapb_data_request+0x76/0xc0 [lapb] [ 168.399283] lapbeth_xmit+0x56/0x90 [lapbether] [ 168.399286] dev_hard_start_xmit+0x91/0x1f0 [ 168.399289] ? irq_init_percpu_irqstack+0xc0/0x100 [ 168.399291] __dev_queue_xmit+0x721/0x8e0 [ 168.399295] ? packet_parse_headers.isra.0+0xd2/0x110 [ 168.399297] dev_queue_xmit+0x10/0x20 [ 168.399298] packet_sendmsg+0xbf0/0x19b0 ...... Cc: Willem de Bruijn Cc: Martin Schiller Cc: Brian Norris Signed-off-by: Xie He Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index b2868433718f..1ea15f2123ed 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -157,6 +157,12 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb, if (!netif_running(dev)) goto drop; + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) + goto drop; + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -305,6 +311,7 @@ static void lapbeth_setup(struct net_device *dev) dev->netdev_ops = &lapbeth_netdev_ops; dev->needs_free_netdev = true; dev->type = ARPHRD_X25; + dev->hard_header_len = 0; dev->mtu = 1000; dev->addr_len = 0; } @@ -331,7 +338,8 @@ static int lapbeth_new_device(struct net_device *dev) * then this driver prepends a length field of 2 bytes, * then the underlying Ethernet device prepends its own header. */ - ndev->hard_header_len = -1 + 3 + 2 + dev->hard_header_len; + ndev->needed_headroom = -1 + 3 + 2 + dev->hard_header_len + + dev->needed_headroom; lapbeth = netdev_priv(ndev); lapbeth->axdev = ndev; -- cgit v1.2.3-59-g8ed1b From 7fb20f9e901e6df2f7dc032d88da5abff4117d37 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 7 Aug 2020 12:50:29 +0100 Subject: bpf, doc: Remove references to warning message when using bpf_trace_printk() The BPF helper bpf_trace_printk() no longer uses trace_printk(); it is now triggers a dedicated trace event. Hence the described warning is no longer present, so remove the discussion of it as it may confuse people. Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of trace_printk()") Signed-off-by: Alan Maguire Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1596801029-32395-1-git-send-email-alan.maguire@oracle.com --- Documentation/bpf/bpf_design_QA.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst index 12a246fcf6cb..2df7b067ab93 100644 --- a/Documentation/bpf/bpf_design_QA.rst +++ b/Documentation/bpf/bpf_design_QA.rst @@ -246,17 +246,6 @@ program is loaded the kernel will print warning message, so this helper is only useful for experiments and prototypes. Tracing BPF programs are root only. -Q: bpf_trace_printk() helper warning ------------------------------------- -Q: When bpf_trace_printk() helper is used the kernel prints nasty -warning message. Why is that? - -A: This is done to nudge program authors into better interfaces when -programs need to pass data to user space. Like bpf_perf_event_output() -can be used to efficiently stream data via perf ring buffer. -BPF maps can be used for asynchronous data sharing between kernel -and user space. bpf_trace_printk() should only be used for debugging. - Q: New functionality via kernel modules? ---------------------------------------- Q: Can BPF functionality such as new program or map types, new -- cgit v1.2.3-59-g8ed1b From d5ca590525cfbd87ca307dcf498a566e2e7c1767 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Aug 2020 20:30:57 -0700 Subject: selftests/bpf: Fix silent Makefile output 99aacebecb75 ("selftests: do not use .ONESHELL") removed .ONESHELL, which changes how Makefile "silences" multi-command target recipes. selftests/bpf's Makefile relied (a somewhat unknowingly) on .ONESHELL behavior of silencing all commands within the recipe if the first command contains @ symbol. Removing .ONESHELL exposed this hack. This patch fixes the issue by explicitly silencing each command with $(Q). Also explicitly define fallback rule for building *.o from *.c, instead of relying on non-silent inherited rule. This was causing a non-silent output for bench.o object file. Fixes: 92f7440ecc93 ("selftests/bpf: More succinct Makefile output") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200807033058.848677-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 48 +++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 48425f9251b5..a83b5827532f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -102,7 +102,7 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk @@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%.o: %.c + $(call msg,CC,,$@) + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) - $(CC) -c $(CFLAGS) -o $@ $< + $(Q)$(CC) -c $(CFLAGS) -o $@ $< VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -181,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): $(call msg,MKDIR,,$@) - mkdir -p $@ + $(Q)mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) - $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(call msg,CP,,$@) - cp "$(VMLINUX_H)" $@ + $(Q)cp "$(VMLINUX_H)" $@ endif $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ @@ -238,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - ($(CLANG) $3 -O2 -emit-llvm \ + $(Q)($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -301,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): $$(call msg,MKDIR,,$$@) - mkdir -p $$@ + $(Q)mkdir -p $$@ endif # ensure we set up BPF objects generation rule just once for a given @@ -321,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $$(BPFTOOL) gen skeleton $$< > $$@ + $(Q)$$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once @@ -345,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) - cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ @@ -353,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) - $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) - cp -a $$^ $(TRUNNER_OUTPUT)/ + $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ @@ -367,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(RESOLVE_BTFIDS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@ endef @@ -421,17 +425,17 @@ verifier/tests.h: verifier/*.c ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) - $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(call msg,CC,,$@) - $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ @@ -444,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) - $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ -- cgit v1.2.3-59-g8ed1b From b8c1a3090741f349322ad855d2b66d6e9752a60d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 Aug 2020 20:31:41 -0700 Subject: bpf: Delete repeated words in comments Drop repeated words in kernel/bpf/: {has, the} Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200807033141.10437-1-rdunlap@infradead.org --- kernel/bpf/core.c | 2 +- kernel/bpf/verifier.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bde93344164d..ed0b3578867c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1966,7 +1966,7 @@ void bpf_prog_array_delete_safe(struct bpf_prog_array *array, * @index: the index of the program to replace * * Skips over dummy programs, by not counting them, when calculating - * the the position of the program to replace. + * the position of the program to replace. * * Return: * * 0 - Success diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b6ccfce3bf4c..ef938f17b944 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8294,7 +8294,7 @@ static bool stacksafe(struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) /* Ex: old explored (safe) state has STACK_SPILL in - * this stack slot, but current has has STACK_MISC -> + * this stack slot, but current has STACK_MISC -> * this verifier states are not equivalent, * return false to continue verification of this path */ -- cgit v1.2.3-59-g8ed1b From b9b40ee4db6cb186341b97bca4f0d7aa2a042a66 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 7 Aug 2020 09:36:32 +0200 Subject: r8152: Use MAC address from correct device tree node Query the USB device's device tree node when looking for a MAC address. The struct device embedded into the struct net_device does not have a device tree node attached at all. The reason why this went unnoticed is because the system where this was tested was one of the few development units that had its OTP programmed, as opposed to production systems where the MAC address is stored in a separate EEPROM and is passed via device tree by the firmware. Reported-by: EJ Hsu Fixes: acb6d3771a03 ("r8152: Use MAC address from device tree if available") Signed-off-by: Thierry Reding Reviewed-by: EJ Hsu Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7d39f998535d..2b02fefd094d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1504,7 +1504,7 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa) sa->sa_family = dev->type; - ret = eth_platform_get_mac_address(&dev->dev, sa->sa_data); + ret = eth_platform_get_mac_address(&tp->udev->dev, sa->sa_data); if (ret < 0) { if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data); -- cgit v1.2.3-59-g8ed1b From 158b47a65aa6b1e70424c3c9c0eda8577b831ea8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 11:32:04 +0200 Subject: selftests: mptcp: fix dependecies Since commit df62f2ec3df6 ("selftests/mptcp: add diag interface tests") the MPTCP selftests relies on the MPTCP diag interface which is enabled by a specific kconfig knob: be sure to include it. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1c..8df5cb8f71ff 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,6 @@ CONFIG_MPTCP=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m -- cgit v1.2.3-59-g8ed1b From 6bdb6211a61eaf91a22e9160b16795ee6c40f90d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 18:31:00 +0200 Subject: mptcp: more stable diag self-tests During diag self-tests we introduce long wait in the mptcp test program to give the script enough time to access the sockets dump. Such wait is introduced after shutting down one sockets end. Since commit 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") if both sides shutdown the socket is correctly transitioned into CLOSED status. As a side effect some sockets are not dumped via the diag interface, because the socket state (CLOSED) does not match the default filter, and this cause self-tests instability. Address the issue moving the above mentioned wait before shutting down the socket. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/68 Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Tested-and-acked-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cad6f73a5fd0..090620c3e10c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... but we still receive. * Close our write side, ev. give some time - * for address notification + * for address notification and/or checking + * the current status */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_wait) + if (cfg_join) usleep(cfg_wait); close(peerfd); -- cgit v1.2.3-59-g8ed1b From 7ee2492635d862fac39bc5ef234ffd3d8e9f833b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Aug 2020 19:03:53 +0200 Subject: mptcp: fix warn at shutdown time for unaccepted msk sockets With commit b93df08ccda3 ("mptcp: explicitly track the fully established status"), the status of unaccepted mptcp closed in mptcp_sock_destruct() changes from TCP_SYN_RECV to TCP_ESTABLISHED. As a result mptcp_sock_destruct() does not perform the proper cleanup and inet_sock_destruct() will later emit a warn. Address the issue updating the condition tested in mptcp_sock_destruct(). Also update the related comment. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/66 Reported-and-tested-by: Christoph Paasch Fixes: b93df08ccda3 ("mptcp: explicitly track the fully established status") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 96f4f2fe50ad..e8cac2655c82 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -423,12 +423,12 @@ static void mptcp_sock_destruct(struct sock *sk) * also remove the mptcp socket, via * sock_put(ctx->conn). * - * Problem is that the mptcp socket will not be in - * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Problem is that the mptcp socket will be in + * ESTABLISHED state and will not have the SOCK_DEAD flag. * Both result in warnings from inet_sock_destruct. */ - if (sk->sk_state == TCP_SYN_RECV) { + if (sk->sk_state == TCP_ESTABLISHED) { sk->sk_state = TCP_CLOSE; WARN_ON_ONCE(sk->sk_socket); sock_orphan(sk); -- cgit v1.2.3-59-g8ed1b From 1c3b63f155f637594268cd1add8335461691b314 Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Thu, 6 Aug 2020 08:49:06 +0200 Subject: net/tls: allow MSG_CMSG_COMPAT in sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to use ktls on a system with 32-bit userspace and 64-bit kernel results in a EOPNOTSUPP message during sendmsg: setsockopt(3, SOL_TLS, TLS_TX, …, 40) = 0 sendmsg(3, …, msg_flags=0}, 0) = -1 EOPNOTSUPP (Operation not supported) The tls_sw implementation does strict flag checking and does not allow the MSG_CMSG_COMPAT flag, which is set if the message comes in through the compat syscall. This patch adds MSG_CMSG_COMPAT to the flag check to allow the usage of the TLS SW implementation on systems using the compat syscall path. Note that the same check is present in the sendmsg path for the TLS device implementation, however the flag hasn't been added there for lack of testing hardware. Signed-off-by: Rouven Czerwinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 710bd44eaa49..9a3d9fedd7aa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -935,7 +935,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int ret = 0; int pending; - if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_CMSG_COMPAT)) return -EOPNOTSUPP; mutex_lock(&tls_ctx->tx_lock); -- cgit v1.2.3-59-g8ed1b From d02cbc46136105cf86f84ac355e16f04696f538d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 6 Aug 2020 17:37:53 +0200 Subject: net: phy: fix memory leak in device-create error path A recent commit introduced a late error path in phy_device_create() which fails to release the device name allocated by dev_set_name(). Fixes: 13d0ab6750b2 ("net: phy: check return code when requesting PHY driver module") Cc: Heiner Kallweit Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1b9523595839..57d44648c8dd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -615,7 +615,9 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, if (c45_ids) dev->c45_ids = *c45_ids; dev->irq = bus->irq[addr]; + dev_set_name(&mdiodev->dev, PHY_ID_FMT, bus->id, addr); + device_initialize(&mdiodev->dev); dev->state = PHY_DOWN; @@ -649,10 +651,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, ret = phy_request_driver_module(dev, phy_id); } - if (!ret) { - device_initialize(&mdiodev->dev); - } else { - kfree(dev); + if (ret) { + put_device(&mdiodev->dev); dev = ERR_PTR(ret); } -- cgit v1.2.3-59-g8ed1b From 6b07edebe6d31529346e553a7b309bc5251da712 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:52:24 +0800 Subject: net: Use helper function fdput() Use helper function fdput() to fput() the file iff FDPUT_FPUT is set. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index aff52e81653c..3c3d6abe4c1e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1804,8 +1804,7 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, upeer_addrlen, flags, rlimit(RLIMIT_NOFILE)); - if (f.flags) - fput(f.file); + fdput(f); } return ret; @@ -1868,8 +1867,7 @@ int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) ret = move_addr_to_kernel(uservaddr, addrlen, &address); if (!ret) ret = __sys_connect_file(f.file, &address, addrlen, 0); - if (f.flags) - fput(f.file); + fdput(f); } return ret; -- cgit v1.2.3-59-g8ed1b From ce787a5a074a86f76f5d3fd804fa78e01bfb9e89 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:53:16 +0800 Subject: net: Set fput_needed iff FDPUT_FPUT is set We should fput() file iff FDPUT_FPUT is set. So we should set fput_needed accordingly. Fixes: 00e188ef6a7e ("sockfd_lookup_light(): switch to fdget^W^Waway from fget_light") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 3c3d6abe4c1e..e08415b4f939 100644 --- a/net/socket.c +++ b/net/socket.c @@ -500,7 +500,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) if (f.file) { sock = sock_from_file(f.file, err); if (likely(sock)) { - *fput_needed = f.flags; + *fput_needed = f.flags & FDPUT_FPUT; return sock; } fdput(f); -- cgit v1.2.3-59-g8ed1b From 47260ba937822cd1a57ee8b520b8789bdda5964e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:54:19 +0800 Subject: net: Remove meaningless jump label out_fs The out_fs jump label has nothing to do but goto out. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index e08415b4f939..c61f036d24f5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3063,7 +3063,7 @@ static int __init sock_init(void) err = register_filesystem(&sock_fs_type); if (err) - goto out_fs; + goto out; sock_mnt = kern_mount(&sock_fs_type); if (IS_ERR(sock_mnt)) { err = PTR_ERR(sock_mnt); @@ -3086,7 +3086,6 @@ out: out_mount: unregister_filesystem(&sock_fs_type); -out_fs: goto out; } -- cgit v1.2.3-59-g8ed1b From 11f920d2aa9d4c2c2d53ccd79f4b5512f2169623 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 6 Aug 2020 19:57:18 +0800 Subject: net: Use helper function ip_is_fragment() Use helper function ip_is_fragment() to check ip fragment. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2828f6d5ba89..7e2e502ef519 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4853,7 +4853,7 @@ static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) if (err < 0) goto out; - if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + if (ip_is_fragment(ip_hdr(skb))) fragment = true; off = ip_hdrlen(skb); -- cgit v1.2.3-59-g8ed1b From 7c7ab580db49cc7befe5f4b91bb1920cd6b07575 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Sat, 8 Aug 2020 16:23:10 +0800 Subject: net: Convert to use the fallthrough macro Convert the uses of fallthrough comments to fallthrough macro. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index c61f036d24f5..f4d5998bdcba 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1325,7 +1325,7 @@ int sock_wake_async(struct socket_wq *wq, int how, int band) case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) break; - /* fall through */ + fallthrough; case SOCK_WAKE_IO: call_kill: kill_fasync(&wq->fasync_list, SIGIO, band); @@ -3158,13 +3158,13 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) return -ENOMEM; buf_size += rule_cnt * sizeof(u32); - /* fall through */ + fallthrough; case ETHTOOL_GRXRINGS: case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_SRXCLSRLINS: convert_out = true; - /* fall through */ + fallthrough; case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; -- cgit v1.2.3-59-g8ed1b From 519a8a6cf91dda095be2d36216fc4ebc525270a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2020 18:42:14 +0200 Subject: net: Revert "net: optimize the sockptr_t for unified kernel/user address spaces" This reverts commits 6d04fe15f78acdf8e32329e208552e226f7a8ae6 and a31edb2059ed4e498f9aa8230c734b59d0ad797a. It turns out the idea to share a single pointer for both kernel and user space address causes various kinds of problems. So use the slightly less optimal version that uses an extra bit, but which is guaranteed to be safe everywhere. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: Eric Dumazet Reported-by: John Stultz Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 26 ++------------------------ net/ipv4/bpfilter/sockopt.c | 14 ++++++-------- net/socket.c | 6 +----- 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 96840def9d69..ea193414298b 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,26 +8,9 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H -#include #include #include -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -typedef union { - void *kernel; - void __user *user; -} sockptr_t; - -static inline bool sockptr_is_kernel(sockptr_t sockptr) -{ - return (unsigned long)sockptr.kernel >= TASK_SIZE; -} - -static inline sockptr_t KERNEL_SOCKPTR(void *p) -{ - return (sockptr_t) { .kernel = p }; -} -#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -45,15 +28,10 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, - size_t size) +static inline sockptr_t USER_SOCKPTR(void __user *p) { - if (!access_ok(p, size)) - return -EFAULT; - *sp = (sockptr_t) { .user = p }; - return 0; + return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 545b2640f019..1b34cb9a7708 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -57,18 +57,16 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, return bpfilter_mbox_request(sk, optname, optval, optlen, true); } -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, - char __user *user_optval, int __user *optlen) +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) { - sockptr_t optval; - int err, len; + int len; if (get_user(len, optlen)) return -EFAULT; - err = init_user_sockptr(&optval, user_optval, len); - if (err) - return err; - return bpfilter_mbox_request(sk, optname, optval, len, false); + + return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, + false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/socket.c b/net/socket.c index f4d5998bdcba..dbbe8ea7d395 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2095,7 +2095,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - sockptr_t optval; + sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2103,10 +2103,6 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; - err = init_user_sockptr(&optval, user_optval, optlen); - if (err) - return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; -- cgit v1.2.3-59-g8ed1b From 8a7f280f29a80f6e0798f5d6e07c5dd8726620fe Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Mon, 10 Aug 2020 09:55:55 -0700 Subject: vmxnet3: use correct tcp hdr length when packet is encapsulated Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, while calculating tcp hdr length, it does not take into account if the packet is encapsulated or not. This patch fixes this issue by using correct reference for inner tcp header. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index ca395f9679d0..2818015324b8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -886,7 +886,8 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, switch (protocol) { case IPPROTO_TCP: - ctx->l4_hdr_size = tcp_hdrlen(skb); + ctx->l4_hdr_size = skb->encapsulation ? inner_tcp_hdrlen(skb) : + tcp_hdrlen(skb); break; case IPPROTO_UDP: ctx->l4_hdr_size = sizeof(struct udphdr); -- cgit v1.2.3-59-g8ed1b From 56e287b3daa20a95e0756e016362f2e96158e1a3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 10:32:04 -0700 Subject: nfp: update maintainer I'm not doing much work on the NFP driver any more. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d2784b502da0..83ea07711518 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11948,7 +11948,8 @@ F: include/uapi/linux/netrom.h F: net/netrom/ NETRONOME ETHERNET DRIVERS -M: Jakub Kicinski +M: Simon Horman +R: Jakub Kicinski L: oss-drivers@netronome.com S: Maintained F: drivers/net/ethernet/netronome/ -- cgit v1.2.3-59-g8ed1b From f19008e676366c44e9241af57f331b6c6edf9552 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 10 Aug 2020 13:38:39 -0400 Subject: tcp: correct read of TFO keys on big endian systems When TFO keys are read back on big endian systems either via the global sysctl interface or via getsockopt() using TCP_FASTOPEN_KEY, the values don't match what was written. For example, on s390x: # echo "1-2-3-4" > /proc/sys/net/ipv4/tcp_fastopen_key # cat /proc/sys/net/ipv4/tcp_fastopen_key 02000000-01000000-04000000-03000000 Instead of: # cat /proc/sys/net/ipv4/tcp_fastopen_key 00000001-00000002-00000003-00000004 Fix this by converting to the correct endianness on read. This was reported by Colin Ian King when running the 'tcp_fastopen_backup_key' net selftest on s390x, which depends on the read value matching what was written. I've confirmed that the test now passes on big and little endian systems. Signed-off-by: Jason Baron Fixes: 438ac88009bc ("net: fastopen: robustness and endianness fixes for SipHash") Cc: Ard Biesheuvel Cc: Eric Dumazet Reported-and-tested-by: Colin Ian King Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/sysctl_net_ipv4.c | 16 ++++------------ net/ipv4/tcp.c | 16 ++++------------ net/ipv4/tcp_fastopen.c | 23 +++++++++++++++++++++++ 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dbf5c791a6eb..eab6c7510b5b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1672,6 +1672,8 @@ void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, void *primary_key, void *backup_key); +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5653e3b011bf..54023a46db04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -301,24 +301,16 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2 * TCP_FASTOPEN_KEY_MAX) + (TCP_FASTOPEN_KEY_MAX * 5)) }; - struct tcp_fastopen_context *ctx; - u32 user_key[TCP_FASTOPEN_KEY_MAX * 4]; - __le32 key[TCP_FASTOPEN_KEY_MAX * 4]; + u32 user_key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u32)]; + __le32 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(__le32)]; char *backup_data; - int ret, i = 0, off = 0, n_keys = 0; + int ret, i = 0, off = 0, n_keys; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) return -ENOMEM; - rcu_read_lock(); - ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); - if (ctx) { - n_keys = tcp_fastopen_context_len(ctx); - memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys); - } - rcu_read_unlock(); - + n_keys = tcp_fastopen_get_cipher(net, NULL, (u64 *)key); if (!n_keys) { memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH); n_keys = 1; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c06d2bfd2ec4..31f3b858db81 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3685,22 +3685,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; - struct tcp_fastopen_context *ctx; - unsigned int key_len = 0; + u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)]; + unsigned int key_len; if (get_user(len, optlen)) return -EFAULT; - rcu_read_lock(); - ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); - if (ctx) { - key_len = tcp_fastopen_context_len(ctx) * - TCP_FASTOPEN_KEY_LENGTH; - memcpy(&key[0], &ctx->key[0], key_len); - } - rcu_read_unlock(); - + key_len = tcp_fastopen_get_cipher(net, icsk, key) * + TCP_FASTOPEN_KEY_LENGTH; len = min_t(unsigned int, len, key_len); if (put_user(len, optlen)) return -EFAULT; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 19ad9586c720..1bb85821f1e6 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -108,6 +108,29 @@ out: return err; } +int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, + u64 *key) +{ + struct tcp_fastopen_context *ctx; + int n_keys = 0, i; + + rcu_read_lock(); + if (icsk) + ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); + else + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctx) { + n_keys = tcp_fastopen_context_len(ctx); + for (i = 0; i < n_keys; i++) { + put_unaligned_le64(ctx->key[i].key[0], key + (i * 2)); + put_unaligned_le64(ctx->key[i].key[1], key + (i * 2) + 1); + } + } + rcu_read_unlock(); + + return n_keys; +} + static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, struct sk_buff *syn, const siphash_key_t *key, -- cgit v1.2.3-59-g8ed1b From 444da3f52407d74c9aa12187ac6b01f76ee47d62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 11:21:11 -0700 Subject: bitfield.h: don't compile-time validate _val in FIELD_FIT When ur_load_imm_any() is inlined into jeq_imm(), it's possible for the compiler to deduce a case where _val can only have the value of -1 at compile time. Specifically, /* struct bpf_insn: _s32 imm */ u64 imm = insn->imm; /* sign extend */ if (imm >> 32) { /* non-zero only if insn->imm is negative */ /* inlined from ur_load_imm_any */ u32 __imm = imm >> 32; /* therefore, always 0xffffffff */ if (__builtin_constant_p(__imm) && __imm > 255) compiletime_assert_XXX() This can result in tripping a BUILD_BUG_ON() in __BF_FIELD_CHECK() that checks that a given value is representable in one byte (interpreted as unsigned). FIELD_FIT() should return true or false at runtime for whether a value can fit for not. Don't break the build over a value that's too large for the mask. We'd prefer to keep the inlining and compiler optimizations though we know this case will always return false. Cc: stable@vger.kernel.org Fixes: 1697599ee301a ("bitfield.h: add FIELD_FIT() helper") Link: https://lore.kernel.org/kernel-hardening/CAK7LNASvb0UDJ0U5wkYYRzTAdnEs64HjXpEUL7d=V0CXiAXcNw@mail.gmail.com/ Reported-by: Masahiro Yamada Debugged-by: Sami Tolvanen Signed-off-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 48ea093ff04c..4e035aca6f7e 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -77,7 +77,7 @@ */ #define FIELD_FIT(_mask, _val) \ ({ \ - __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) -- cgit v1.2.3-59-g8ed1b From b06c19d9f827f6743122795570bfc0c72db482b0 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 10 Aug 2020 17:02:58 -0700 Subject: net/tls: Fix kmap usage When MSG_OOB is specified to tls_device_sendpage() the mapped page is never unmapped. Hold off mapping the page until after the flags are checked and the page is actually needed. Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure") Signed-off-by: Ira Weiny Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 18fa6067bb7f..b74e2741f74f 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -561,7 +561,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct iov_iter msg_iter; - char *kaddr = kmap(page); + char *kaddr; struct kvec iov; int rc; @@ -576,6 +576,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page, goto out; } + kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size); -- cgit v1.2.3-59-g8ed1b From c79f428d6f14e754503fa7a3145d62039ccd05c9 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sat, 8 Aug 2020 19:35:48 -0700 Subject: drivers/net/wan/x25_asy: Added needed_headroom and a skb->len check 1. Added a skb->len check This driver expects upper layers to include a pseudo header of 1 byte when passing down a skb for transmission. This driver will read this 1-byte header. This patch added a skb->len check before reading the header to make sure the header exists. 2. Added needed_headroom When this driver transmits data, first this driver will remove a pseudo header of 1 byte, then the lapb module will prepend the LAPB header of 2 or 3 bytes. So the value of needed_headroom in this driver should be 3 - 1. Cc: Willem de Bruijn Cc: Martin Schiller Signed-off-by: Xie He Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 84640a0c13f3..de7984463595 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -307,6 +307,14 @@ static netdev_tx_t x25_asy_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + /* There should be a pseudo header of 1 byte added by upper layers. + * Check to make sure it is there before reading it. + */ + if (skb->len < 1) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + switch (skb->data[0]) { case X25_IFACE_DATA: break; @@ -752,6 +760,12 @@ static void x25_asy_setup(struct net_device *dev) dev->type = ARPHRD_X25; dev->tx_queue_len = 10; + /* When transmitting data: + * first this driver removes a pseudo header of 1 byte, + * then the lapb module prepends an LAPB header of at most 3 bytes. + */ + dev->needed_headroom = 3 - 1; + /* New-style flags. */ dev->flags = IFF_NOARP; } -- cgit v1.2.3-59-g8ed1b From 1dab5877e8ebb7a00fbf0e7d797c869aa37830b9 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Sun, 9 Aug 2020 11:53:49 +0800 Subject: hinic: fix strncpy output truncated compile warnings fix the compile warnings of 'strncpy' output truncated before terminating nul copying N bytes from a string of the same length Signed-off-by: Luo bin Reported-by: kernel test robot Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_devlink.c | 32 +++++++++-------------- drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h | 2 -- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index c6adc776f3c8..16bda7381ba0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -334,19 +334,14 @@ void hinic_devlink_unregister(struct hinic_devlink_priv *priv) static int chip_fault_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_level[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { - "fatal", "reset", "flr", "general", "suggestion"}; - char level_str[FAULT_SHOW_STR_LEN + 1] = {0}; - u8 level; + const char * const level_str[FAULT_LEVEL_MAX + 1] = { + "fatal", "reset", "flr", "general", "suggestion", "Unknown"}; + u8 fault_level; int err; - level = event->event.chip.err_level; - if (level < FAULT_LEVEL_MAX) - strncpy(level_str, fault_level[level], strlen(fault_level[level])); - else - strncpy(level_str, "Unknown", strlen("Unknown")); - - if (level == FAULT_LEVEL_SERIOUS_FLR) { + fault_level = (event->event.chip.err_level < FAULT_LEVEL_MAX) ? + event->event.chip.err_level : FAULT_LEVEL_MAX; + if (fault_level == FAULT_LEVEL_SERIOUS_FLR) { err = devlink_fmsg_u32_pair_put(fmsg, "Function level err func_id", (u32)event->event.chip.func_id); if (err) @@ -361,7 +356,7 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, if (err) return err; - err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str); + err = devlink_fmsg_string_pair_put(fmsg, "err_level", level_str[fault_level]); if (err) return err; @@ -381,18 +376,15 @@ static int chip_fault_show(struct devlink_fmsg *fmsg, static int fault_report_show(struct devlink_fmsg *fmsg, struct hinic_fault_event *event) { - char fault_type[FAULT_TYPE_MAX][FAULT_SHOW_STR_LEN + 1] = { + const char * const type_str[FAULT_TYPE_MAX + 1] = { "chip", "ucode", "mem rd timeout", "mem wr timeout", - "reg rd timeout", "reg wr timeout", "phy fault"}; - char type_str[FAULT_SHOW_STR_LEN + 1] = {0}; + "reg rd timeout", "reg wr timeout", "phy fault", "Unknown"}; + u8 fault_type; int err; - if (event->type < FAULT_TYPE_MAX) - strncpy(type_str, fault_type[event->type], strlen(fault_type[event->type])); - else - strncpy(type_str, "Unknown", strlen("Unknown")); + fault_type = (event->type < FAULT_TYPE_MAX) ? event->type : FAULT_TYPE_MAX; - err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str); + err = devlink_fmsg_string_pair_put(fmsg, "Fault type", type_str[fault_type]); if (err) return err; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index dc6e645f2689..701eb81e09a7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -504,8 +504,6 @@ enum hinic_fault_type { FAULT_TYPE_MAX, }; -#define FAULT_SHOW_STR_LEN 16 - enum hinic_fault_err_level { FAULT_LEVEL_FATAL, FAULT_LEVEL_SERIOUS_RESET, -- cgit v1.2.3-59-g8ed1b From 26896f01467a28651f7a536143fe5ac8449d4041 Mon Sep 17 00:00:00 2001 From: Qingyu Li Date: Mon, 10 Aug 2020 09:51:00 +0800 Subject: net/nfc/rawsock.c: add CAP_NET_RAW check. When creating a raw AF_NFC socket, CAP_NET_RAW needs to be checked first. Signed-off-by: Qingyu Li Signed-off-by: David S. Miller --- net/nfc/rawsock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index b2061b6746ea..955c195ae14b 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -328,10 +328,13 @@ static int rawsock_create(struct net *net, struct socket *sock, if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW)) return -ESOCKTNOSUPPORT; - if (sock->type == SOCK_RAW) + if (sock->type == SOCK_RAW) { + if (!capable(CAP_NET_RAW)) + return -EPERM; sock->ops = &rawsock_raw_ops; - else + } else { sock->ops = &rawsock_ops; + } sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern); if (!sk) -- cgit v1.2.3-59-g8ed1b From e71642009cbd4a874c3af71a4b16b39499f58658 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 10 Aug 2020 02:38:07 +0000 Subject: ionic_lif: Use devm_kcalloc() in ionic_qcq_alloc() A multiplication for the size determination of a memory allocation indicated that an array data structure should be processed. Thus use the corresponding function "devm_kcalloc". Signed-off-by: Xu Wang Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 1944bf5264db..26988ad7ec97 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -412,7 +412,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->flags = flags; - new->q.info = devm_kzalloc(dev, sizeof(*new->q.info) * num_descs, + new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), GFP_KERNEL); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); @@ -462,7 +462,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED; } - new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs, + new->cq.info = devm_kcalloc(dev, num_descs, sizeof(*new->cq.info), GFP_KERNEL); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); -- cgit v1.2.3-59-g8ed1b From 50caa777a3a24d7027748e96265728ce748b41ef Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 10 Aug 2020 10:57:05 +0800 Subject: net: qcom/emac: add missed clk_disable_unprepare in error path of emac_clks_phase1_init Fix the missing clk_disable_unprepare() before return from emac_clks_phase1_init() in the error handling case. Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 20b1b43a0e39..1166b98d8bb2 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -474,13 +474,24 @@ static int emac_clks_phase1_init(struct platform_device *pdev, ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); if (ret) - return ret; + goto disable_clk_axi; ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); if (ret) - return ret; + goto disable_clk_cfg_ahb; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + if (ret) + goto disable_clk_cfg_ahb; - return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); + return 0; + +disable_clk_cfg_ahb: + clk_disable_unprepare(adpt->clk[EMAC_CLK_CFG_AHB]); +disable_clk_axi: + clk_disable_unprepare(adpt->clk[EMAC_CLK_AXI]); + + return ret; } /* Enable clocks; needs emac_clks_phase1_init to be called before */ -- cgit v1.2.3-59-g8ed1b From 0f5907af39137f8183ed536aaa00f322d7365130 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 10 Aug 2020 08:16:58 -0400 Subject: net: Fix potential memory leak in proto_register() If we failed to assign proto idx, we free the twsk_slab_name but forget to free the twsk_slab. Add a helper function tw_prot_cleanup() to free these together and also use this helper function in proto_unregister(). Fixes: b45ce32135d1 ("sock: fix potential memory leak in proto_register()") Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- net/core/sock.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 49cd5ffe673e..c9083ad44ea1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3406,6 +3406,16 @@ static void sock_inuse_add(struct net *net, int val) } #endif +static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) +{ + if (!twsk_prot) + return; + kfree(twsk_prot->twsk_slab_name); + twsk_prot->twsk_slab_name = NULL; + kmem_cache_destroy(twsk_prot->twsk_slab); + twsk_prot->twsk_slab = NULL; +} + static void req_prot_cleanup(struct request_sock_ops *rsk_prot) { if (!rsk_prot) @@ -3476,7 +3486,7 @@ int proto_register(struct proto *prot, int alloc_slab) prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } } @@ -3484,15 +3494,15 @@ int proto_register(struct proto *prot, int alloc_slab) ret = assign_proto_idx(prot); if (ret) { mutex_unlock(&proto_list_mutex); - goto out_free_timewait_sock_slab_name; + goto out_free_timewait_sock_slab; } list_add(&prot->node, &proto_list); mutex_unlock(&proto_list_mutex); return ret; -out_free_timewait_sock_slab_name: +out_free_timewait_sock_slab: if (alloc_slab && prot->twsk_prot) - kfree(prot->twsk_prot->twsk_slab_name); + tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { req_prot_cleanup(prot->rsk_prot); @@ -3516,12 +3526,7 @@ void proto_unregister(struct proto *prot) prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); - - if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { - kmem_cache_destroy(prot->twsk_prot->twsk_slab); - kfree(prot->twsk_prot->twsk_slab_name); - prot->twsk_prot->twsk_slab = NULL; - } + tw_prot_cleanup(prot->twsk_prot); } EXPORT_SYMBOL(proto_unregister); -- cgit v1.2.3-59-g8ed1b From 1b8ef1423dbfd34de2439a2db457b84480b7c8a8 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Mon, 10 Aug 2020 17:01:58 +0200 Subject: net: phy: marvell10g: fix null pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c3e302edca24 ("net: phy: marvell10g: fix temperature sensor on 2110") added a check for PHY ID via phydev->drv->phy_id in a function which is called by devres at a time when phydev->drv is already set to null by phy_remove function. This null pointer dereference can be triggered via SFP subsystem with a SFP module containing this Marvell PHY. When the SFP interface is put down, the SFP subsystem removes the PHY. Fixes: c3e302edca24 ("net: phy: marvell10g: fix temperature sensor on 2110") Signed-off-by: Marek Behún Cc: Maxime Chevallier Cc: Andrew Lunn Cc: Baruch Siach Cc: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/marvell10g.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index a7610eb55f30..1901ba277413 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -208,13 +208,6 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable) MV_V2_TEMP_CTRL_MASK, val); } -static void mv3310_hwmon_disable(void *data) -{ - struct phy_device *phydev = data; - - mv3310_hwmon_config(phydev, false); -} - static int mv3310_hwmon_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -238,10 +231,6 @@ static int mv3310_hwmon_probe(struct phy_device *phydev) if (ret) return ret; - ret = devm_add_action_or_reset(dev, mv3310_hwmon_disable, phydev); - if (ret) - return ret; - priv->hwmon_dev = devm_hwmon_device_register_with_info(dev, priv->hwmon_name, phydev, &mv3310_hwmon_chip_info, NULL); @@ -426,6 +415,11 @@ static int mv3310_probe(struct phy_device *phydev) return phy_sfp_probe(phydev, &mv3310_sfp_ops); } +static void mv3310_remove(struct phy_device *phydev) +{ + mv3310_hwmon_config(phydev, false); +} + static int mv3310_suspend(struct phy_device *phydev) { return mv3310_power_down(phydev); @@ -784,6 +778,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, { .phy_id = MARVELL_PHY_ID_88E2110, @@ -798,6 +793,7 @@ static struct phy_driver mv3310_drivers[] = { .read_status = mv3310_read_status, .get_tunable = mv3310_get_tunable, .set_tunable = mv3310_set_tunable, + .remove = mv3310_remove, }, }; -- cgit v1.2.3-59-g8ed1b From 62ffc589abb176821662efc4525ee4ac0b9c3894 Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Tue, 11 Aug 2020 20:33:23 +0200 Subject: net: refactor bind_bucket fastreuse into helper Refactor the fastreuse update code in inet_csk_get_port into a small helper function that can be called from other places. Acked-by: Matthieu Baerts Signed-off-by: Tim Froidcoeur Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++ net/ipv4/inet_connection_sock.c | 97 +++++++++++++++++++++----------------- 2 files changed, 57 insertions(+), 44 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 1e209ce7d1bd..aa8893c68c50 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -304,6 +304,10 @@ void inet_csk_listen_stop(struct sock *sk); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); +/* update the fast reuse flag when adding a socket */ +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk); + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #define TCP_PINGPONG_THRESH 3 diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d1a3913eebe0..b457dd2d6c75 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -296,6 +296,57 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, ipv6_only_sock(sk), true, false); } +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk) +{ + kuid_t uid = sock_i_uid(sk); + bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; + + if (hlist_empty(&tb->owners)) { + tb->fastreuse = reuse; + if (sk->sk_reuseport) { + tb->fastreuseport = FASTREUSEPORT_ANY; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } else { + tb->fastreuseport = 0; + } + } else { + if (!reuse) + tb->fastreuse = 0; + if (sk->sk_reuseport) { + /* We didn't match or we don't have fastreuseport set on + * the tb, but we have sk_reuseport set on this socket + * and we know that there are no bind conflicts with + * this socket in this tb, so reset our tb's reuseport + * settings so that any subsequent sockets that match + * our current socket will be put on the fast path. + * + * If we reset we need to set FASTREUSEPORT_STRICT so we + * do extra checking for all subsequent sk_reuseport + * socks. + */ + if (!sk_reuseport_match(tb, sk)) { + tb->fastreuseport = FASTREUSEPORT_STRICT; + tb->fastuid = uid; + tb->fast_rcv_saddr = sk->sk_rcv_saddr; + tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; +#if IS_ENABLED(CONFIG_IPV6) + tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; +#endif + } + } else { + tb->fastreuseport = 0; + } + } +} + /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. * We try to allocate an odd port (and leave even ports for connect()) @@ -308,7 +359,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); struct inet_bind_bucket *tb = NULL; - kuid_t uid = sock_i_uid(sk); int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); @@ -345,49 +395,8 @@ tb_found: goto fail_unlock; } success: - if (hlist_empty(&tb->owners)) { - tb->fastreuse = reuse; - if (sk->sk_reuseport) { - tb->fastreuseport = FASTREUSEPORT_ANY; - tb->fastuid = uid; - tb->fast_rcv_saddr = sk->sk_rcv_saddr; - tb->fast_ipv6_only = ipv6_only_sock(sk); - tb->fast_sk_family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; -#endif - } else { - tb->fastreuseport = 0; - } - } else { - if (!reuse) - tb->fastreuse = 0; - if (sk->sk_reuseport) { - /* We didn't match or we don't have fastreuseport set on - * the tb, but we have sk_reuseport set on this socket - * and we know that there are no bind conflicts with - * this socket in this tb, so reset our tb's reuseport - * settings so that any subsequent sockets that match - * our current socket will be put on the fast path. - * - * If we reset we need to set FASTREUSEPORT_STRICT so we - * do extra checking for all subsequent sk_reuseport - * socks. - */ - if (!sk_reuseport_match(tb, sk)) { - tb->fastreuseport = FASTREUSEPORT_STRICT; - tb->fastuid = uid; - tb->fast_rcv_saddr = sk->sk_rcv_saddr; - tb->fast_ipv6_only = ipv6_only_sock(sk); - tb->fast_sk_family = sk->sk_family; -#if IS_ENABLED(CONFIG_IPV6) - tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; -#endif - } - } else { - tb->fastreuseport = 0; - } - } + inet_csk_update_fastreuse(tb, sk); + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, port); WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); -- cgit v1.2.3-59-g8ed1b From d76f3351cea2d927fdf70dd7c06898235035e84e Mon Sep 17 00:00:00 2001 From: Tim Froidcoeur Date: Tue, 11 Aug 2020 20:33:24 +0200 Subject: net: initialize fastreuse on inet_inherit_port In the case of TPROXY, bind_conflict optimizations for SO_REUSEADDR or SO_REUSEPORT are broken, possibly resulting in O(n) instead of O(1) bind behaviour or in the incorrect reuse of a bind. the kernel keeps track for each bind_bucket if all sockets in the bind_bucket support SO_REUSEADDR or SO_REUSEPORT in two fastreuse flags. These flags allow skipping the costly bind_conflict check when possible (meaning when all sockets have the proper SO_REUSE option). For every socket added to a bind_bucket, these flags need to be updated. As soon as a socket that does not support reuse is added, the flag is set to false and will never go back to true, unless the bind_bucket is deleted. Note that there is no mechanism to re-evaluate these flags when a socket is removed (this might make sense when removing a socket that would not allow reuse; this leaves room for a future patch). For this optimization to work, it is mandatory that these flags are properly initialized and updated. When a child socket is created from a listen socket in __inet_inherit_port, the TPROXY case could create a new bind bucket without properly initializing these flags, thus preventing the optimization to work. Alternatively, a socket not allowing reuse could be added to an existing bind bucket without updating the flags, causing bind_conflict to never be called as it should. Call inet_csk_update_fastreuse when __inet_inherit_port decides to create a new bind_bucket or use a different bind_bucket than the one of the listen socket. Fixes: 093d282321da ("tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()") Acked-by: Matthieu Baerts Signed-off-by: Tim Froidcoeur Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4eb4cd8d20dd..239e54474b65 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child) return -ENOMEM; } } + inet_csk_update_fastreuse(tb, child); } inet_bind_hash(child, tb, port); spin_unlock(&head->lock); -- cgit v1.2.3-59-g8ed1b From 41077c99026643c633a1f79376d369fffc757e06 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 12 Aug 2020 10:32:49 +0100 Subject: sfc: fix ef100 design-param checking The handling of the RXQ/TXQ size granularity design-params had two problems: it had a 64-bit divide that didn't build on 32-bit platforms, and it could divide by zero if the NIC supplied 0 as the value of the design-param. Fix both by checking for 0 and for a granularity bigger than our min-size; if the granularity <= EFX_MIN_DMAQ_SIZE then it fits in 32 bits, so we can cast it to u32 for the divide. Reported-by: kernel test robot Signed-off-by: Edward Cree Reviewed-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 36598d0542ed..206d70f9d95b 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -979,7 +979,8 @@ static int ef100_process_design_param(struct efx_nic *efx, * EFX_MIN_DMAQ_SIZE is divisible by GRANULARITY. * This is very unlikely to fail. */ - if (EFX_MIN_DMAQ_SIZE % reader->value) { + if (!reader->value || reader->value > EFX_MIN_DMAQ_SIZE || + EFX_MIN_DMAQ_SIZE % (u32)reader->value) { netif_err(efx, probe, efx->net_dev, "%s size granularity is %llu, can't guarantee safety\n", reader->type == ESE_EF100_DP_GZ_RXQ_SIZE_GRANULARITY ? "RXQ" : "TXQ", -- cgit v1.2.3-59-g8ed1b From 1980c05844830a44708c98c96d600833aa3fae08 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 12 Aug 2020 14:56:02 +0200 Subject: vsock: fix potential null pointer dereference in vsock_poll() syzbot reported this issue where in the vsock_poll() we find the socket state at TCP_ESTABLISHED, but 'transport' is null: general protection fault, probably for non-canonical address 0xdffffc0000000012: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000090-0x0000000000000097] CPU: 0 PID: 8227 Comm: syz-executor.2 Not tainted 5.8.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:vsock_poll+0x75a/0x8e0 net/vmw_vsock/af_vsock.c:1038 Call Trace: sock_poll+0x159/0x460 net/socket.c:1266 vfs_poll include/linux/poll.h:90 [inline] do_pollfd fs/select.c:869 [inline] do_poll fs/select.c:917 [inline] do_sys_poll+0x607/0xd40 fs/select.c:1011 __do_sys_poll fs/select.c:1069 [inline] __se_sys_poll fs/select.c:1057 [inline] __x64_sys_poll+0x18c/0x440 fs/select.c:1057 do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This issue can happen if the TCP_ESTABLISHED state is set after we read the vsk->transport in the vsock_poll(). We could put barriers to synchronize, but this can only happen during connection setup, so we can simply check that 'transport' is valid. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reported-and-tested-by: syzbot+a61bac2fcc1a7c6623fe@syzkaller.appspotmail.com Signed-off-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 27bbcfad9c17..9e93bc201cc0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1032,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == TCP_ESTABLISHED) { + if (transport && sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( -- cgit v1.2.3-59-g8ed1b From 06a7a37be55e29961c9ba2abec4d07c8e0e21861 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Aug 2020 21:08:53 +0200 Subject: ipv4: tunnel: fix compilation on ARCH=um With certain configurations, a 64-bit ARCH=um errors out here with an unknown csum_ipv6_magic() function. Include the right header file to always have it. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 75c6013ff9a4..5cb30b7aa752 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -38,6 +38,7 @@ #include #include #include +#include const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; -- cgit v1.2.3-59-g8ed1b From 592d751c1e174df5ff219946908b005eb48934b3 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:01 +0100 Subject: net: stmmac: dwmac1000: provide multicast filter fallback If we don't have a hardware multicast filter available then instead of silently failing to listen for the requested ethernet broadcast addresses fall back to receiving all multicast packets, in a similar fashion to other drivers with no multicast filter. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index efc6ec1b8027..fc8759f146c7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -164,6 +164,9 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF; } else if (dev->flags & IFF_ALLMULTI) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ + } else if (!netdev_mc_empty(dev) && (mcbitslog2 == 0)) { + /* Fall back to all multicast if we've no filter */ + value = GMAC_FRAME_FILTER_PM; } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; -- cgit v1.2.3-59-g8ed1b From df43dd526e6609769ae513a81443c7aa727c8ca3 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 12 Aug 2020 20:37:23 +0100 Subject: net: ethernet: stmmac: Disable hardware multicast filter The IPQ806x does not appear to have a functional multicast ethernet address filter. This was observed as a failure to correctly receive IPv6 packets on a LAN to the all stations address. Checking the vendor driver shows that it does not attempt to enable the multicast filter and instead falls back to receiving all multicast packets, internally setting ALLMULTI. Use the new fallback support in the dwmac1000 driver to correctly achieve the same with the mainline IPQ806x driver. Confirmed to fix IPv6 functionality on an RB3011 router. Cc: stable@vger.kernel.org Signed-off-by: Jonathan McDowell Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 02102c781a8c..bf3250e0e59c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -351,6 +351,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->has_gmac = true; plat_dat->bsp_priv = gmac; plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed; + plat_dat->multicast_filter_bins = 0; err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (err) -- cgit v1.2.3-59-g8ed1b From 2e0d8fef5f76bce0887c73b824d9e625a08e7406 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Aug 2020 18:34:40 -0700 Subject: net: accept an empty mask in /sys/class/net/*/queues/rx-*/rps_cpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We must accept an empty mask in store_rps_map(), or we are not able to disable RPS on a queue. Fixes: 07bbecb34106 ("net: Restrict receive packets queuing to housekeeping CPUs") Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Alex Belits Cc: Nitesh Narayan Lal Cc: Peter Zijlstra (Intel) Reviewed-by: Maciej Żenczykowski Acked-by: Peter Zijlstra (Intel) Acked-by: Nitesh Narayan Lal Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9de33b594ff2..efec66fa78b7 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -757,11 +757,13 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, return err; } - hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; - cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); - if (cpumask_empty(mask)) { - free_cpumask_var(mask); - return -EINVAL; + if (!cpumask_empty(mask)) { + hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ; + cpumask_and(mask, mask, housekeeping_cpumask(hk_flags)); + if (cpumask_empty(mask)) { + free_cpumask_var(mask); + return -EINVAL; + } } map = kzalloc(max_t(unsigned int, -- cgit v1.2.3-59-g8ed1b From 9643609423c7667fb748cc3ccff023d761d0ac90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 12 Aug 2020 13:26:37 -0700 Subject: Revert "ipv4: tunnel: fix compilation on ARCH=um" This reverts commit 06a7a37be55e29961c9ba2abec4d07c8e0e21861. The bug was already fixed, this added a dup include. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 5cb30b7aa752..75c6013ff9a4 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -38,7 +38,6 @@ #include #include #include -#include const struct ip_tunnel_encap_ops __rcu * iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; -- cgit v1.2.3-59-g8ed1b From 94c7eb54c4b8e81618ec79f414fe1ca5767f9720 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Aug 2020 10:06:43 -0700 Subject: random32: add a tracepoint for prandom_u32() There has been some heat around prandom_u32() lately, and some people were wondering if there was a simple way to determine how often it was used, before considering making it maybe 10 times more expensive. This tracepoint exports the generated pseudo random value. Tested: perf list | grep prandom_u32 random:prandom_u32 [Tracepoint event] perf record -a [-g] [-C1] -e random:prandom_u32 sleep 1 [ perf record: Woken up 0 times to write data ] [ perf record: Captured and wrote 259.748 MB perf.data (924087 samples) ] perf report --nochildren ... 97.67% ksoftirqd/1 [kernel.vmlinux] [k] prandom_u32 | ---prandom_u32 prandom_u32 | |--48.86%--tcp_v4_syn_recv_sock | tcp_check_req | tcp_v4_rcv | ... --48.81%--tcp_conn_request tcp_v4_conn_request tcp_rcv_state_process ... perf script Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: David S. Miller --- include/trace/events/random.h | 17 +++++++++++++++++ lib/random32.c | 2 ++ 2 files changed, 19 insertions(+) diff --git a/include/trace/events/random.h b/include/trace/events/random.h index 32c10a515e2d..9570a10cb949 100644 --- a/include/trace/events/random.h +++ b/include/trace/events/random.h @@ -307,6 +307,23 @@ TRACE_EVENT(urandom_read, __entry->pool_left, __entry->input_left) ); +TRACE_EVENT(prandom_u32, + + TP_PROTO(unsigned int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( unsigned int, ret) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret=%u" , __entry->ret) +); + #endif /* _TRACE_RANDOM_H */ /* This part must be outside protection */ diff --git a/lib/random32.c b/lib/random32.c index 3d749abb9e80..932345323af0 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_RANDOM32_SELFTEST static void __init prandom_state_selftest(void); @@ -82,6 +83,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); + trace_prandom_u32(res); put_cpu_var(net_rand_state); return res; -- cgit v1.2.3-59-g8ed1b From 88fd1cb80daa20af063bce81e1fad14e945a8dc4 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 13 Aug 2020 21:45:25 +0206 Subject: af_packet: TPACKET_V3: fix fill status rwlock imbalance After @blk_fill_in_prog_lock is acquired there is an early out vnet situation that can occur. In that case, the rwlock needs to be released. Also, since @blk_fill_in_prog_lock is only acquired when @tp_version is exactly TPACKET_V3, only release it on that exact condition as well. And finally, add sparse annotation so that it is clearer that prb_fill_curr_block() and prb_clear_blk_fill_status() are acquiring and releasing @blk_fill_in_prog_lock, respectively. sparse is still unable to understand the balance, but the warnings are now on a higher level that make more sense. Fixes: 632ca50f2cbd ("af_packet: TPACKET_V3: replace busy-wait loop") Signed-off-by: John Ogness Reported-by: kernel test robot Signed-off-by: David S. Miller --- net/packet/af_packet.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0b8160d1a6e0..479c257ded73 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -941,6 +941,7 @@ static int prb_queue_frozen(struct tpacket_kbdq_core *pkc) } static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) + __releases(&pkc->blk_fill_in_prog_lock) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); @@ -989,6 +990,7 @@ static void prb_fill_curr_block(char *curr, struct tpacket_kbdq_core *pkc, struct tpacket_block_desc *pbd, unsigned int len) + __acquires(&pkc->blk_fill_in_prog_lock) { struct tpacket3_hdr *ppd; @@ -2286,8 +2288,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (do_vnet && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) + vio_le(), true, 0)) { + if (po->tp_version == TPACKET_V3) + prb_clear_blk_fill_status(&po->rx_ring); goto drop_n_account; + } if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); @@ -2393,7 +2398,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, __clear_bit(slot_id, po->rx_ring.rx_owner_map); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); - } else { + } else if (po->tp_version == TPACKET_V3) { prb_clear_blk_fill_status(&po->rx_ring); } -- cgit v1.2.3-59-g8ed1b From 1f3a090b9033f69de380c03db3ea1a1015c850cf Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Wed, 12 Aug 2020 17:56:39 +0800 Subject: net: openvswitch: introduce common code for flushing flows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid some issues, for example RCU usage warning and double free, we should flush the flows under ovs_lock. This patch refactors table_instance_destroy and introduces table_instance_flow_flush which can be invoked by __dp_destroy or ovs_flow_tbl_flush. Fixes: 50b0e61b32ee ("net: openvswitch: fix possible memleak on destroy flow-table") Reported-by: Johan Knöös Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-August/050489.html Signed-off-by: Tonghao Zhang Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 10 +++++++++- net/openvswitch/flow_table.c | 35 +++++++++++++++-------------------- net/openvswitch/flow_table.h | 3 +++ 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 42f8cc70bb2c..6e47ef7ef036 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1756,6 +1756,7 @@ err: /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { + struct flow_table *table = &dp->table; int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { @@ -1774,7 +1775,14 @@ static void __dp_destroy(struct datapath *dp) */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); - /* RCU destroy the flow table */ + /* Flush sw_flow in the tables. RCU cb only releases resource + * such as dp, ports and tables. That may avoid some issues + * such as RCU usage warning. + */ + table_instance_flow_flush(table, ovsl_dereference(table->ti), + ovsl_dereference(table->ufid_ti)); + + /* RCU destroy the ports, meters and flow tables. */ call_rcu(&dp->rcu, destroy_dp_rcu); } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 8c12675cbb67..e2235849a57e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -473,19 +473,15 @@ static void table_instance_flow_free(struct flow_table *table, flow_mask_remove(table, flow->mask); } -static void table_instance_destroy(struct flow_table *table, - struct table_instance *ti, - struct table_instance *ufid_ti, - bool deferred) +/* Must be called with OVS mutex held. */ +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti) { int i; - if (!ti) - return; - - BUG_ON(!ufid_ti); if (ti->keep_flows) - goto skip_flows; + return; for (i = 0; i < ti->n_buckets; i++) { struct sw_flow *flow; @@ -497,18 +493,16 @@ static void table_instance_destroy(struct flow_table *table, table_instance_flow_free(table, ti, ufid_ti, flow, false); - ovs_flow_free(flow, deferred); + ovs_flow_free(flow, true); } } +} -skip_flows: - if (deferred) { - call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); - call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); - } else { - __table_instance_destroy(ti); - __table_instance_destroy(ufid_ti); - } +static void table_instance_destroy(struct table_instance *ti, + struct table_instance *ufid_ti) +{ + call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); + call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb); } /* No need for locking this function is called from RCU callback or @@ -523,7 +517,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) call_rcu(&mc->rcu, mask_cache_rcu_cb); call_rcu(&ma->rcu, mask_array_rcu_cb); - table_instance_destroy(table, ti, ufid_ti, false); + table_instance_destroy(ti, ufid_ti); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -641,7 +635,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) flow_table->count = 0; flow_table->ufid_count = 0; - table_instance_destroy(flow_table, old_ti, old_ufid_ti, true); + table_instance_flow_flush(flow_table, old_ti, old_ufid_ti); + table_instance_destroy(old_ti, old_ufid_ti); return 0; err_free_ti: diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 74ce48fecba9..6e7d4ac59353 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -105,5 +105,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, bool full, const struct sw_flow_mask *mask); void ovs_flow_masks_rebalance(struct flow_table *table); +void table_instance_flow_flush(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti); #endif /* flow_table.h */ -- cgit v1.2.3-59-g8ed1b