diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 78 | ||||
-rw-r--r-- | net/core/filter.c | 32 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 10 | ||||
-rw-r--r-- | net/core/gro_cells.c | 1 | ||||
-rw-r--r-- | net/core/neighbour.c | 7 | ||||
-rw-r--r-- | net/core/netpoll.c | 3 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 5 | ||||
-rw-r--r-- | net/core/skbuff.c | 7 | ||||
-rw-r--r-- | net/core/skmsg.c | 3 | ||||
-rw-r--r-- | net/core/sock.c | 1 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 20 |
11 files changed, 109 insertions, 58 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 77d43ae2a7bb..722d50dbf8a4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2175,6 +2175,20 @@ static bool remove_xps_queue_cpu(struct net_device *dev, return active; } +static void reset_xps_maps(struct net_device *dev, + struct xps_dev_maps *dev_maps, + bool is_rxqs_map) +{ + if (is_rxqs_map) { + static_key_slow_dec_cpuslocked(&xps_rxqs_needed); + RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); + } else { + RCU_INIT_POINTER(dev->xps_cpus_map, NULL); + } + static_key_slow_dec_cpuslocked(&xps_needed); + kfree_rcu(dev_maps, rcu); +} + static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, struct xps_dev_maps *dev_maps, unsigned int nr_ids, u16 offset, u16 count, bool is_rxqs_map) @@ -2186,18 +2200,15 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, j < nr_ids;) active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count); - if (!active) { - if (is_rxqs_map) { - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - } else { - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); + if (!active) + reset_xps_maps(dev, dev_maps, is_rxqs_map); - for (i = offset + (count - 1); count--; i--) - netdev_queue_numa_node_write( - netdev_get_tx_queue(dev, i), - NUMA_NO_NODE); + if (!is_rxqs_map) { + for (i = offset + (count - 1); count--; i--) { + netdev_queue_numa_node_write( + netdev_get_tx_queue(dev, i), + NUMA_NO_NODE); } - kfree_rcu(dev_maps, rcu); } } @@ -2234,10 +2245,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset, false); out_no_maps: - if (static_key_enabled(&xps_rxqs_needed)) - static_key_slow_dec_cpuslocked(&xps_rxqs_needed); - - static_key_slow_dec_cpuslocked(&xps_needed); mutex_unlock(&xps_map_mutex); cpus_read_unlock(); } @@ -2355,9 +2362,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, if (!new_dev_maps) goto out_no_new_maps; - static_key_slow_inc_cpuslocked(&xps_needed); - if (is_rxqs_map) - static_key_slow_inc_cpuslocked(&xps_rxqs_needed); + if (!dev_maps) { + /* Increment static keys at most once per type */ + static_key_slow_inc_cpuslocked(&xps_needed); + if (is_rxqs_map) + static_key_slow_inc_cpuslocked(&xps_rxqs_needed); + } for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), j < nr_ids;) { @@ -2455,13 +2465,8 @@ out_no_new_maps: } /* free map if not active */ - if (!active) { - if (is_rxqs_map) - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - else - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); - kfree_rcu(dev_maps, rcu); - } + if (!active) + reset_xps_maps(dev, dev_maps, is_rxqs_map); out_no_maps: mutex_unlock(&xps_map_mutex); @@ -3272,7 +3277,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de } skb = next; - if (netif_xmit_stopped(txq) && skb) { + if (netif_tx_queue_stopped(txq) && skb) { rc = NETDEV_TX_BUSY; break; } @@ -5009,7 +5014,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo struct net_device *orig_dev = skb->dev; struct packet_type *pt_prev = NULL; - list_del(&skb->list); + skb_list_del_init(skb); __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); if (!pt_prev) continue; @@ -5165,7 +5170,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { net_timestamp_check(netdev_tstamp_prequeue, skb); - list_del(&skb->list); + skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist); } @@ -5176,7 +5181,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) rcu_read_lock(); list_for_each_entry_safe(skb, next, head, list) { xdp_prog = rcu_dereference(skb->dev->xdp_prog); - list_del(&skb->list); + skb_list_del_init(skb); if (do_xdp_generic(xdp_prog, skb) == XDP_PASS) list_add_tail(&skb->list, &sublist); } @@ -5195,7 +5200,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) if (cpu >= 0) { /* Will be handled, remove from list */ - list_del(&skb->list); + skb_list_del_init(skb); enqueue_to_backlog(skb, cpu, &rflow->last_qtail); } } @@ -5655,6 +5660,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + + /* eth_type_trans() assumes pkt_type is PACKET_HOST */ + skb->pkt_type = PACKET_HOST; + skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); @@ -5966,11 +5975,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) timeout = n->dev->gro_flush_timeout; + /* When the NAPI instance uses a timeout and keeps postponing + * it, we need to bound somehow the time packets are kept in + * the GRO layer + */ + napi_gro_flush(n, !!timeout); if (timeout) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - else - napi_gro_flush(n, false); } if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ @@ -6197,8 +6209,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->skb = NULL; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) - pr_err_once("netif_napi_add() called with weight %d on device %s\n", - weight, dev->name); + netdev_err_once(dev, "%s() called with weight %d\n", __func__, + weight); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; diff --git a/net/core/filter.c b/net/core/filter.c index e521c5ebc7d1..8d2c629501e2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4852,18 +4852,17 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; - u16 hnum = ntohs(tuple->ipv6.dport); int sdif = inet6_sdif(skb); if (proto == IPPROTO_TCP) sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0, src6, tuple->ipv6.sport, - dst6, hnum, + dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); else if (likely(ipv6_bpf_stub)) sk = ipv6_bpf_stub->udp6_lib_lookup(net, src6, tuple->ipv6.sport, - dst6, hnum, + dst6, tuple->ipv6.dport, dif, sdif, &udp_table, skb); #endif @@ -4891,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *net; family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6; - if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags)) + if (unlikely(family == AF_UNSPEC || flags || + !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; if (skb->dev) caller_net = dev_net(skb->dev); else caller_net = sock_net(skb->sk); - if (netns_id) { + if ((s32)netns_id < 0) { + net = caller_net; + sk = sk_lookup(net, tuple, skb, family, proto); + } else { net = get_net_ns_by_id(caller_net, netns_id); if (unlikely(!net)) goto out; sk = sk_lookup(net, tuple, skb, family, proto); put_net(net); - } else { - net = caller_net; - sk = sk_lookup(net, tuple, skb, family, proto); } if (sk) @@ -5436,8 +5436,8 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != size_default) return false; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): - if (size != sizeof(struct bpf_flow_keys *)) + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + if (size != sizeof(__u64)) return false; break; default: @@ -5465,7 +5465,7 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -5490,7 +5490,7 @@ static bool cg_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): @@ -5531,7 +5531,7 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; } @@ -5757,7 +5757,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -5959,7 +5959,7 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): return false; } @@ -6040,7 +6040,7 @@ static bool flow_dissector_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range(struct __sk_buff, flow_keys): + case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): info->reg_type = PTR_TO_FLOW_KEYS; break; case bpf_ctx_range(struct __sk_buff, tc_classid): diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 676f3ad629f9..af68207ee56c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -783,6 +783,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb, /* Pass parameters to the BPF program */ cb->qdisc_cb.flow_keys = &flow_keys; flow_keys.nhoff = nhoff; + flow_keys.thoff = nhoff; bpf_compute_data_pointers((struct sk_buff *)skb); result = BPF_PROG_RUN(attached, skb); @@ -790,9 +791,12 @@ bool __skb_flow_dissect(const struct sk_buff *skb, /* Restore state */ memcpy(cb, &cb_saved, sizeof(cb_saved)); + flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len); + flow_keys.thoff = clamp_t(u16, flow_keys.thoff, + flow_keys.nhoff, skb->len); + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); - key_control->thoff = min_t(u16, key_control->thoff, skb->len); rcu_read_unlock(); return result == BPF_OK; } @@ -1166,8 +1170,8 @@ ip_proto_again: break; } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) && + !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 4b54e5f107c6..acf45ddbe924 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -84,6 +84,7 @@ void gro_cells_destroy(struct gro_cells *gcells) for_each_possible_cpu(i) { struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); + napi_disable(&cell->napi); netif_napi_del(&cell->napi); __skb_queue_purge(&cell->napi_skbs); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 41954e42a2de..5fa32c064baf 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2494,11 +2494,16 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || - ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { + ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); return -EINVAL; } + if (ndm->ndm_flags & ~NTF_PROXY) { + NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); + return -EINVAL; + } + err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, extack); } else { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5da9552b186b..2b9fdbc43205 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np) read_lock_bh(&idev->lock); list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) + if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) != + !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL)) continue; np->local_ip.in6 = ifp->addr; err = 0; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e01274bd5e3e..7819f7804eeb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = 0; } ret = dumpit(skb, cb); - if (ret < 0) + if (ret) break; } cb->family = idx; @@ -3800,6 +3800,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, { int err; + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 946de0e24c87..a8217e221e19 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4854,6 +4854,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset(skb); nf_reset_trace(skb); +#ifdef CONFIG_NET_SWITCHDEV + skb->offload_fwd_mark = 0; + skb->offload_mr_fwd_mark = 0; +#endif + if (!xnet) return; @@ -4944,6 +4949,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) * * This is a helper to do that correctly considering GSO_BY_FRAGS. * + * @skb: GSO skb + * * @seg_len: The segmented length (from skb_gso_*_seglen). In the * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. * diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 56a99d0c9aa0..b7dbb3c976cd 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -94,6 +94,9 @@ int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, } while (len) { + if (sk_msg_full(dst)) + return -ENOSPC; + sge_len = sge->length - off; sge_off = sge->offset + off; if (sge_len > len) diff --git a/net/core/sock.c b/net/core/sock.c index 6fcc4bc07d19..080a880a1761 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol) #ifdef CONFIG_INET if (family == AF_INET && + protocol != IPPROTO_RAW && !rcu_access_pointer(inet_protos[protocol])) return -ENOENT; #endif diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 37b4667128a3..d67ec17f2cc8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -28,6 +28,8 @@ static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static long long_one __maybe_unused = 1; +static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -289,6 +291,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } + +static int +proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} #endif static struct ctl_table net_core_table[] = { @@ -398,10 +411,11 @@ static struct ctl_table net_core_table[] = { { .procname = "bpf_jit_limit", .data = &bpf_jit_limit, - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0600, - .proc_handler = proc_dointvec_minmax_bpf_restricted, - .extra1 = &one, + .proc_handler = proc_dolongvec_minmax_bpf_restricted, + .extra1 = &long_one, + .extra2 = &long_max, }, #endif { |