diff options
Diffstat (limited to 'net')
100 files changed, 818 insertions, 394 deletions
diff --git a/net/Kconfig b/net/Kconfig index f5ee7c65e6b4..c7392c449b25 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -302,21 +302,6 @@ config BQL select DQL default y -config BPF_JIT - bool "enable BPF Just In Time compiler" - depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT - depends on MODULES - help - Berkeley Packet Filter filtering capabilities are normally handled - by an interpreter. This option allows kernel to generate a native - code when filter is loaded in memory. This should speedup - packet sniffing (libpcap/tcpdump). - - Note, admin should enable this feature changing: - /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) - /proc/sys/net/core/bpf_jit_kallsyms (optional) - config BPF_STREAM_PARSER bool "enable BPF STREAM_PARSER" depends on INET @@ -470,15 +455,3 @@ config ETHTOOL_NETLINK e.g. notification messages. endif # if NET - -# Used by archs to tell that they support BPF JIT compiler plus which flavour. -# Only one of the two can be selected for a specific arch since eBPF JIT supersedes -# the cBPF JIT. - -# Classic BPF JIT (cBPF) -config HAVE_CBPF_JIT - bool - -# Extended BPF JIT (eBPF) -config HAVE_EBPF_JIT - bool diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index be18af481d7d..c7236daa2415 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, if (a && a->status & ATIF_PROBE) { a->status |= ATIF_PROBE_FAIL; /* - * we do not respond to probe or request packets for + * we do not respond to probe or request packets of * this address while we are probing this address */ goto unlock; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fd12f1652bdf..7d71d104fdfd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1610,8 +1610,13 @@ setup_failed: } else { /* Init failed, cleanup */ flush_work(&hdev->tx_work); - flush_work(&hdev->cmd_work); + + /* Since hci_rx_work() is possible to awake new cmd_work + * it should be flushed first to avoid unexpected call of + * hci_cmd_work() + */ flush_work(&hdev->rx_work); + flush_work(&hdev->cmd_work); skb_queue_purge(&hdev->cmd_q); skb_queue_purge(&hdev->rx_q); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 251b9128f530..eed0dd066e12 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -762,7 +762,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) /* Detach sockets from device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - bh_lock_sock_nested(sk); + lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; @@ -771,7 +771,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) hci_dev_put(hdev); } - bh_unlock_sock(sk); + release_sock(sk); } read_unlock(&hci_sk_list.lock); } diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index c10e5a55758d..440139706130 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -308,7 +308,7 @@ static void dev_flowctrl(struct net_device *dev, int on) caifd_put(caifd); } -void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, +int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, struct cflayer *link_support, int head_room, struct cflayer **layer, int (**rcv_func)(struct sk_buff *, struct net_device *, @@ -319,11 +319,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, enum cfcnfg_phy_preference pref; struct cfcnfg *cfg = get_cfcnfg(dev_net(dev)); struct caif_device_entry_list *caifdevs; + int res; caifdevs = caif_device_list(dev_net(dev)); caifd = caif_device_alloc(dev); if (!caifd) - return; + return -ENOMEM; *layer = &caifd->layer; spin_lock_init(&caifd->flow_lock); @@ -344,7 +345,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, strlcpy(caifd->layer.name, dev->name, sizeof(caifd->layer.name)); caifd->layer.transmit = transmit; - cfcnfg_add_phy_layer(cfg, + res = cfcnfg_add_phy_layer(cfg, dev, &caifd->layer, pref, @@ -354,6 +355,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, mutex_unlock(&caifdevs->lock); if (rcv_func) *rcv_func = receive; + return res; } EXPORT_SYMBOL(caif_enroll_dev); @@ -368,6 +370,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, struct cflayer *layer, *link_support; int head_room = 0; struct caif_device_entry_list *caifdevs; + int res; cfg = get_cfcnfg(dev_net(dev)); caifdevs = caif_device_list(dev_net(dev)); @@ -393,8 +396,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, break; } } - caif_enroll_dev(dev, caifdev, link_support, head_room, + res = caif_enroll_dev(dev, caifdev, link_support, head_room, &layer, NULL); + if (res) + cfserl_release(link_support); caifdev->flowctrl = dev_flowctrl; break; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index a0116b9503d9..b02e1292f7f1 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -115,6 +115,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], return (struct cflayer *) this; } +static void cfusbl_release(struct cflayer *layer) +{ + kfree(layer); +} + static struct packet_type caif_usb_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_EX1), }; @@ -127,6 +132,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, struct cflayer *layer, *link_support; struct usbnet *usbnet; struct usb_device *usbdev; + int res; /* Check whether we have a NCM device, and find its VID/PID. */ if (!(dev->dev.parent && dev->dev.parent->driver && @@ -169,8 +175,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, if (dev->num_tx_queues > 1) pr_warn("USB device uses more than one tx queue\n"); - caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, + res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN, &layer, &caif_usb_type.func); + if (res) + goto err; + if (!pack_added) dev_add_pack(&caif_usb_type); pack_added = true; @@ -178,6 +187,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, strlcpy(layer->name, dev->name, sizeof(layer->name)); return 0; +err: + cfusbl_release(link_support); + return res; } static struct notifier_block caif_device_notifier = { diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 399239a14420..cac30e676ac9 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -450,7 +450,7 @@ unlock: rcu_read_unlock(); } -void +int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, @@ -459,7 +459,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, { struct cflayer *frml; struct cfcnfg_phyinfo *phyinfo = NULL; - int i; + int i, res = 0; u8 phyid; mutex_lock(&cnfg->lock); @@ -473,12 +473,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, goto got_phyid; } pr_warn("Too many CAIF Link Layers (max 6)\n"); + res = -EEXIST; goto out; got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); - if (!phyinfo) + if (!phyinfo) { + res = -ENOMEM; goto out_err; + } phy_layer->id = phyid; phyinfo->pref = pref; @@ -492,8 +495,10 @@ got_phyid: frml = cffrml_create(phyid, fcs); - if (!frml) + if (!frml) { + res = -ENOMEM; goto out_err; + } phyinfo->frm_layer = frml; layer_set_up(frml, cnfg->mux); @@ -511,11 +516,12 @@ got_phyid: list_add_rcu(&phyinfo->node, &cnfg->phys); out: mutex_unlock(&cnfg->lock); - return; + return res; out_err: kfree(phyinfo); mutex_unlock(&cnfg->lock); + return res; } EXPORT_SYMBOL(cfcnfg_add_phy_layer); diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index e11725a4bb0e..40cd57ad0a0f 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid); +void cfserl_release(struct cflayer *layer) +{ + kfree(layer); +} + struct cflayer *cfserl_create(int instance, bool use_stx) { struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC); diff --git a/net/can/isotp.c b/net/can/isotp.c index 9f94ad3caee9..253b24417c8e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1062,27 +1062,31 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < ISOTP_MIN_NAMELEN) return -EINVAL; + if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) + return -EADDRNOTAVAIL; + + if (!addr->can_ifindex) + return -ENODEV; + + lock_sock(sk); + /* do not register frame reception for functional addressing */ if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) do_rx_reg = 0; /* do not validate rx address for functional addressing */ if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) - return -EADDRNOTAVAIL; + if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { + err = -EADDRNOTAVAIL; + goto out; + } - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; + if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { + err = -EADDRNOTAVAIL; + goto out; + } } - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; - - if (!addr->can_ifindex) - return -ENODEV; - - lock_sock(sk); - if (so->bound && addr->can_ifindex == so->ifindex && addr->can_addr.tp.rx_id == so->rxid && addr->can_addr.tp.tx_id == so->txid) @@ -1164,16 +1168,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) return ISOTP_MIN_NAMELEN; } -static int isotp_setsockopt(struct socket *sock, int level, int optname, +static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); int ret = 0; - if (level != SOL_CAN_ISOTP) - return -EINVAL; - if (so->bound) return -EISCONN; @@ -1248,6 +1249,22 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, return ret; } +static int isotp_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) + +{ + struct sock *sk = sock->sk; + int ret; + + if (level != SOL_CAN_ISOTP) + return -EINVAL; + + lock_sock(sk); + ret = isotp_setsockopt_locked(sock, level, optname, optval, optlen); + release_sock(sk); + return ret; +} + static int isotp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { diff --git a/net/compat.c b/net/compat.c index ddd15af3a283..210fc3b4d0d8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -177,7 +177,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, if (kcmlen > stackbuf_size) kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL); if (kcmsg == NULL) - return -ENOBUFS; + return -ENOMEM; /* Now copy them over neatly. */ memset(kcmsg, 0, kcmlen); diff --git a/net/core/dev.c b/net/core/dev.c index 222b1d322c96..ef8cf7619baf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3853,7 +3853,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (q->flags & TCQ_F_NOLOCK) { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); + if (likely(!netif_xmit_frozen_or_stopped(txq))) + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); @@ -5025,25 +5026,43 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) sd->output_queue_tailp = &sd->output_queue; local_irq_enable(); + rcu_read_lock(); + while (head) { struct Qdisc *q = head; spinlock_t *root_lock = NULL; head = head->next_sched; - if (!(q->flags & TCQ_F_NOLOCK)) { - root_lock = qdisc_lock(q); - spin_lock(root_lock); - } /* We need to make sure head->next_sched is read * before clearing __QDISC_STATE_SCHED */ smp_mb__before_atomic(); + + if (!(q->flags & TCQ_F_NOLOCK)) { + root_lock = qdisc_lock(q); + spin_lock(root_lock); + } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, + &q->state))) { + /* There is a synchronize_net() between + * STATE_DEACTIVATED flag being set and + * qdisc_reset()/some_qdisc_is_busy() in + * dev_deactivate(), so we can safely bail out + * early here to avoid data race between + * qdisc_deactivate() and some_qdisc_is_busy() + * for lockless qdisc. + */ + clear_bit(__QDISC_STATE_SCHED, &q->state); + continue; + } + clear_bit(__QDISC_STATE_SCHED, &q->state); qdisc_run(q); if (root_lock) spin_unlock(root_lock); } + + rcu_read_unlock(); } xfrm_dev_backlog(sd); diff --git a/net/core/devlink.c b/net/core/devlink.c index 4eb969518ee0..051432ea4f69 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -705,7 +705,6 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->phys.port_number)) return -EMSGSIZE; @@ -8631,7 +8630,6 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (!attrs->split) n = snprintf(name, len, "p%u", attrs->phys.port_number); else @@ -8679,6 +8677,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index cd80ffed6d26..a9f937975080 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -1168,7 +1168,7 @@ static void notify_rule_change(int event, struct fib_rule *rule, { struct net *net; struct sk_buff *skb; - int err = -ENOBUFS; + int err = -ENOMEM; net = ops->fro_net; skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); diff --git a/net/core/filter.c b/net/core/filter.c index cae56d08a670..65ab4e21c087 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, __skb_push(skb, head_room); memset(skb->data, 0, head_room); skb_reset_mac_header(skb); + skb_reset_mac_len(skb); } return ret; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 98f20efbfadf..bf774575ad71 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || + (n->nud_state == NUD_NOARP) || (tbl->is_multicast && tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 9ec1aa9640ad..3c4c4c7a0402 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool, struct page *page, unsigned int dma_sync_size) { + dma_addr_t dma_addr = page_pool_get_dma_addr(page); + dma_sync_size = min(dma_sync_size, pool->p.max_len); - dma_sync_single_range_for_device(pool->p.dev, page->dma_addr, + dma_sync_single_range_for_device(pool->p.dev, dma_addr, pool->p.offset, dma_sync_size, pool->p.dma_dir); } @@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (dma_mapping_error(pool->p.dev, dma)) return false; - page->dma_addr = dma; + page_pool_set_dma_addr(page, dma); if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); @@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) */ goto skip_dma_unmap; - dma = page->dma_addr; + dma = page_pool_get_dma_addr(page); - /* When page is unmapped, it cannot be returned our pool */ + /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); - page->dma_addr = 0; + page_pool_set_dma_addr(page, 0); skip_dma_unmap: /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 714d5fa38546..3e84279c4123 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4842,8 +4842,10 @@ static int rtnl_bridge_notify(struct net_device *dev) if (err < 0) goto errout; - if (!skb->len) + if (!skb->len) { + err = -EINVAL; goto errout; + } rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; diff --git a/net/core/sock.c b/net/core/sock.c index c761c4a0b66b..946888afef88 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -815,10 +815,18 @@ void sock_set_rcvbuf(struct sock *sk, int val) } EXPORT_SYMBOL(sock_set_rcvbuf); +static void __sock_set_mark(struct sock *sk, u32 val) +{ + if (val != sk->sk_mark) { + sk->sk_mark = val; + sk_dst_reset(sk); + } +} + void sock_set_mark(struct sock *sk, u32 val) { lock_sock(sk); - sk->sk_mark = val; + __sock_set_mark(sk, val); release_sock(sk); } EXPORT_SYMBOL(sock_set_mark); @@ -1126,10 +1134,10 @@ set_sndbuf: case SO_MARK: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; - } else if (val != sk->sk_mark) { - sk->sk_mark = val; - sk_dst_reset(sk); + break; } + + __sock_set_mark(sk, val); break; case SO_RXQ_OVFL: @@ -2132,10 +2140,10 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (can_skb_orphan_partial(skb)) - skb_set_owner_sk_safe(skb, skb->sk); - else - skb_orphan(skb); + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) + return; + + skb_orphan(skb); } EXPORT_SYMBOL(skb_orphan_partial); diff --git a/net/dsa/master.c b/net/dsa/master.c index 052a977914a6..63adbc21a735 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -147,8 +147,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; + int mcount = 0, count, i; uint8_t pfx[4]; uint8_t *ndata; @@ -178,6 +177,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, */ ds->ops->get_strings(ds, port, stringset, ndata); count = ds->ops->get_sset_count(ds, port, stringset); + if (count < 0) + return; for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8c0f3c6ab365..d4756b920108 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -776,13 +776,15 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { - int count; + int count = 0; - count = 4; - if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, dp->index, sset); + if (ds->ops->get_sset_count) { + count = ds->ops->get_sset_count(ds, dp->index, sset); + if (count < 0) + return count; + } - return count; + return count + 4; } else if (sset == ETH_SS_TEST) { return net_selftest_get_count(); } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 008c1ec6e20c..122ad5833fb1 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -64,7 +64,7 @@ #define DSA_8021Q_SUBVLAN_HI_SHIFT 9 #define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9) #define DSA_8021Q_SUBVLAN_LO_SHIFT 4 -#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(4, 3) +#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4) #define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2) #define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0)) #define DSA_8021Q_SUBVLAN(x) \ diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 2a6733a6449a..5d38e90895ac 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev, if (dev->sfp_bus) return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); - if (ops->get_module_info) + if (ops->get_module_eeprom_by_page) return ops->get_module_eeprom_by_page(dev, page_data, extack); return -EOPNOTSUPP; diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index b7642dc96d50..ec07f5765e03 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -119,7 +119,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base, */ memset(&data->phy_stats, 0xff, sizeof(data->phy_stats)); memset(&data->mac_stats, 0xff, sizeof(data->mac_stats)); - memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats)); + memset(&data->ctrl_stats, 0xff, sizeof(data->ctrl_stats)); memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats)); if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) && diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index bfcdc75fc01e..26c32407f029 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -218,6 +218,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (master) { skb->dev = master->dev; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); hsr_forward_skb(skb, master); } else { atomic_long_inc(&dev->tx_dropped); @@ -259,6 +260,7 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master) goto out; skb_reset_mac_header(skb); + skb_reset_mac_len(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 6852e9bccf5b..ceb8afb2a62f 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -474,8 +474,8 @@ static void handle_std_frame(struct sk_buff *skb, } } -void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame) +int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) { struct hsr_port *port = frame->port_rcv; struct hsr_priv *hsr = port->hsr; @@ -483,20 +483,26 @@ void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, /* HSRv0 supervisory frames double as a tag so treat them as tagged. */ if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) || proto == htons(ETH_P_HSR)) { + /* Check if skb contains hsr_ethhdr */ + if (skb->mac_len < sizeof(struct hsr_ethhdr)) + return -EINVAL; + /* HSR tagged frame :- Data or Supervision */ frame->skb_std = NULL; frame->skb_prp = NULL; frame->skb_hsr = skb; frame->sequence_nr = hsr_get_skb_sequence_nr(skb); - return; + return 0; } /* Standard frame or PRP from master port */ handle_std_frame(skb, frame); + + return 0; } -void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame) +int prp_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame) { /* Supervision frame */ struct prp_rct *rct = skb_get_PRP_rct(skb); @@ -507,9 +513,11 @@ void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, frame->skb_std = NULL; frame->skb_prp = skb; frame->sequence_nr = prp_get_skb_sequence_nr(rct); - return; + return 0; } handle_std_frame(skb, frame); + + return 0; } static int fill_frame_info(struct hsr_frame_info *frame, @@ -519,9 +527,10 @@ static int fill_frame_info(struct hsr_frame_info *frame, struct hsr_vlan_ethhdr *vlan_hdr; struct ethhdr *ethhdr; __be16 proto; + int ret; - /* Check if skb contains hsr_ethhdr */ - if (skb->mac_len < sizeof(struct hsr_ethhdr)) + /* Check if skb contains ethhdr */ + if (skb->mac_len < sizeof(struct ethhdr)) return -EINVAL; memset(frame, 0, sizeof(*frame)); @@ -548,7 +557,10 @@ static int fill_frame_info(struct hsr_frame_info *frame, frame->is_from_san = false; frame->port_rcv = port; - hsr->proto_ops->fill_frame_info(proto, skb, frame); + ret = hsr->proto_ops->fill_frame_info(proto, skb, frame); + if (ret) + return ret; + check_local_dest(port->hsr, skb, frame); return 0; diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index b6acaafa83fc..206636750b30 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -24,8 +24,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port); bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); -void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame); -void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame); +int prp_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); +int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); #endif /* __HSR_FORWARD_H */ diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 8f264672b70b..53d1f7a82463 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -186,8 +186,8 @@ struct hsr_proto_ops { struct hsr_port *port); struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame, struct hsr_port *port); - void (*fill_frame_info)(__be16 proto, struct sk_buff *skb, - struct hsr_frame_info *frame); + int (*fill_frame_info)(__be16 proto, struct sk_buff *skb, + struct hsr_frame_info *frame); bool (*invalid_dan_ingress_frame)(__be16 protocol); void (*update_san_info)(struct hsr_node *node, bool is_sup); }; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index c5227d42faf5..b70e6bbf6021 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -60,12 +60,11 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_pass; skb_push(skb, ETH_HLEN); - - if (skb_mac_header(skb) != skb->data) { - WARN_ONCE(1, "%s:%d: Malformed frame at source port %s)\n", - __func__, __LINE__, port->dev->name); - goto finish_consume; - } + skb_reset_mac_header(skb); + if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) || + protocol == htons(ETH_P_HSR)) + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + skb_reset_mac_len(skb); hsr_forward_skb(skb, port); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 0c1b0770c59e..29bf97640166 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -680,8 +680,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info) nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) || nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER, be32_to_cpu(params.frame_counter)) || - ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) + ieee802154_llsec_fill_key_id(msg, ¶ms.out_key)) { + rc = -ENOBUFS; goto out_free; + } dev_put(dev); @@ -1184,7 +1186,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data) { struct ieee802154_llsec_device *dpos; struct ieee802154_llsec_device_key *kpos; - int rc = 0, idx = 0, idx2; + int idx = 0, idx2; list_for_each_entry(dpos, &data->table->devices, list) { if (idx++ < data->s_idx) @@ -1200,7 +1202,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data) data->nlmsg_seq, dpos->hwaddr, kpos, data->dev)) { - return rc = -EMSGSIZE; + return -EMSGSIZE; } data->s_idx2++; @@ -1209,7 +1211,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data) data->s_idx++; } - return rc; + return 0; } int ieee802154_llsec_dump_devkeys(struct sk_buff *skb, diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 2cdc7e63fe17..88215b5c93aa 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -241,8 +241,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info) } if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) || - nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) + nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) { + rc = -EMSGSIZE; goto nla_put_failure; + } dev_put(dev); wpan_phy_put(phy); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 05f6bd89a7dd..0cf2374c143b 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1298,19 +1298,20 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL)) return -EINVAL; - if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || - !attrs[NL802154_DEV_ADDR_ATTR_MODE] || - !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] || - attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])) + if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE]) return -EINVAL; addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]); addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]); switch (addr->mode) { case NL802154_DEV_ADDR_SHORT: + if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT]) + return -EINVAL; addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]); break; case NL802154_DEV_ADDR_EXTENDED: + if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]) + return -EINVAL; addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]); break; default: diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index dff4f0eb96b0..9e41eff4a685 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -185,6 +185,7 @@ BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) +#ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) BTF_ID(func, cubictcp_init) @@ -213,6 +214,7 @@ BTF_ID(func, bbr_min_tso_segs) BTF_ID(func, bbr_set_state) #endif #endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index bfaf327e9d12..e0480c6cebaa 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) kfree(doi_def->map.std->lvl.local); kfree(doi_def->map.std->cat.cipso); kfree(doi_def->map.std->cat.local); + kfree(doi_def->map.std); break; } kfree(doi_def); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2e35f68da40a..1c6429c353a9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index bc2f6ca97152..816d8aad5a68 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -886,7 +886,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* - * Copy BOOTP-supplied string if not already set. + * Copy BOOTP-supplied string */ static int __init ic_bootp_string(char *dest, char *src, int len, int max) { @@ -935,12 +935,15 @@ static void __init ic_do_bootp_ext(u8 *ext) } break; case 12: /* Host name */ - ic_bootp_string(utsname()->nodename, ext+1, *ext, - __NEW_UTS_LEN); - ic_host_name_set = 1; + if (!ic_host_name_set) { + ic_bootp_string(utsname()->nodename, ext+1, *ext, + __NEW_UTS_LEN); + ic_host_name_set = 1; + } break; case 15: /* Domain name (DNS) */ - ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); + if (!ic_domain[0]) + ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); break; case 17: /* Root path */ if (!root_server_path[0]) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b0ef65eb9bd2..701eb82acd1c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 0d59efb6b49e..d36ef9d25e73 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1745,10 +1745,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - /* limit our allocations to order-0 page */ - size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); - if (!skb) return NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 47a0dc46cbdb..28e44782c94d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -343,7 +343,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (!(fhdr->frag_off & htons(0xFFF9))) { + if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); __IP6_INC_STATS(net, @@ -351,6 +351,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) + + sizeof(struct ipv6hdr); return 1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a22822bdbf39..d417e514bd52 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3673,11 +3673,11 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (nh) { if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); - goto out; + goto out_free; } if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); - goto out; + goto out_free; } rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); @@ -3714,6 +3714,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, out: fib6_info_release(rt); return ERR_PTR(err); +out_free: + ip_fib_metrics_put(rt->fib6_metrics); + kfree(rt); + return ERR_PTR(err); } int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index aa98294a3ad3..f7c8110ece5f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (ipip6_tunnel_create(dev) < 0) goto failed_free; + if (!parms->name[0]) + strcpy(parms->name, dev->name); + return nt; failed_free: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8fcbaa1eedf3..214404a558fb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -50,12 +50,6 @@ struct ieee80211_local; #define IEEE80211_ENCRYPT_HEADROOM 8 #define IEEE80211_ENCRYPT_TAILROOM 18 -/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent - * reception of at least three fragmented frames. This limit can be increased - * by changing this define, at the cost of slower frame reassembly and - * increased memory use (about 2 kB of RAM per entry). */ -#define IEEE80211_FRAGMENT_MAX 4 - /* power level hasn't been configured (or set to automatic) */ #define IEEE80211_UNSET_POWER_LEVEL INT_MIN @@ -88,18 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; #define IEEE80211_MAX_NAN_INSTANCE_ID 255 -struct ieee80211_fragment_entry { - struct sk_buff_head skb_list; - unsigned long first_frag_time; - u16 seq; - u16 extra_len; - u16 last_frag; - u8 rx_queue; - bool check_sequential_pn; /* needed for CCMP/GCMP */ - u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ -}; - - struct ieee80211_bss { u32 device_ts_beacon, device_ts_presp; @@ -241,8 +223,15 @@ struct ieee80211_rx_data { */ int security_idx; - u32 tkip_iv32; - u16 tkip_iv16; + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[IEEE80211_CCMP_PN_LEN]; + } ccm_gcm; + }; }; struct ieee80211_csa_settings { @@ -902,9 +891,7 @@ struct ieee80211_sub_if_data { char name[IFNAMSIZ]; - /* Fragment table for host-based reassembly */ - struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; - unsigned int fragment_next; + struct ieee80211_fragment_cache frags; /* TID bitmap for NoAck policy */ u16 noack_map; @@ -2320,4 +2307,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, #define debug_noinline #endif +void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache); +void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache); + #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7032a2b59249..2e2f73a4aa73 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - int i; - /* free extra data */ ieee80211_free_keys(sdata, false); ieee80211_debugfs_remove_netdev(sdata); - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - __skb_queue_purge(&sdata->fragments[i].skb_list); - sdata->fragment_next = 0; + ieee80211_destroy_frag_cache(&sdata->frags); if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_teardown_sdata(sdata); @@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - skb_queue_head_init(&sdata->fragments[i].skb_list); + ieee80211_init_frag_cache(&sdata->frags); INIT_LIST_HEAD(&sdata->key_list); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 56c068cb49c4..f695fc80088b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -799,6 +799,7 @@ int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { + static atomic_t key_color = ATOMIC_INIT(0); struct ieee80211_key *old_key; int idx = key->conf.keyidx; bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; @@ -850,6 +851,12 @@ int ieee80211_key_link(struct ieee80211_key *key, key->sdata = sdata; key->sta = sta; + /* + * Assign a unique ID to every key so we can easily prevent mixed + * key and fragment cache attacks. + */ + key->color = atomic_inc_return(&key_color); + increment_tailroom_need_count(sdata); ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 7ad72e9b4991..1e326c89d721 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -128,6 +128,8 @@ struct ieee80211_key { } debugfs; #endif + unsigned int color; + /* * key config, must be last because it contains key * material as variable length member diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 62047e93e217..1bb43edd47b6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <linux/jiffies.h> @@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } +void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache->entries); i++) + skb_queue_head_init(&cache->entries[i].skb_list); +} + +void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache->entries); i++) + __skb_queue_purge(&cache->entries[i].skb_list); +} + static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, +ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache, unsigned int frag, unsigned int seq, int rx_queue, struct sk_buff **skb) { struct ieee80211_fragment_entry *entry; - entry = &sdata->fragments[sdata->fragment_next++]; - if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) - sdata->fragment_next = 0; + entry = &cache->entries[cache->next++]; + if (cache->next >= IEEE80211_FRAGMENT_MAX) + cache->next = 0; - if (!skb_queue_empty(&entry->skb_list)) - __skb_queue_purge(&entry->skb_list); + __skb_queue_purge(&entry->skb_list); __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ *skb = NULL; @@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, } static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, +ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache, unsigned int frag, unsigned int seq, int rx_queue, struct ieee80211_hdr *hdr) { struct ieee80211_fragment_entry *entry; int i, idx; - idx = sdata->fragment_next; + idx = cache->next; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { struct ieee80211_hdr *f_hdr; struct sk_buff *f_skb; @@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, if (idx < 0) idx = IEEE80211_FRAGMENT_MAX - 1; - entry = &sdata->fragments[idx]; + entry = &cache->entries[idx]; if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || entry->rx_queue != rx_queue || entry->last_frag + 1 != frag) @@ -2194,15 +2209,27 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, return NULL; } +static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc) +{ + return rx->key && + (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && + ieee80211_has_protected(fc); +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { + struct ieee80211_fragment_cache *cache = &rx->sdata->frags; struct ieee80211_hdr *hdr; u16 sc; __le16 fc; unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); hdr = (struct ieee80211_hdr *)rx->skb->data; fc = hdr->frame_control; @@ -2218,6 +2245,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) goto out_no_led; } + if (rx->sta) + cache = &rx->sta->frags; + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out; @@ -2236,20 +2266,17 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (frag == 0) { /* This is the first fragment of a new frame. */ - entry = ieee80211_reassemble_add(rx->sdata, frag, seq, + entry = ieee80211_reassemble_add(cache, frag, seq, rx->seqno_idx, &(rx->skb)); - if (rx->key && - (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && - ieee80211_has_protected(fc)) { + if (requires_sequential_pn(rx, fc)) { int queue = rx->security_idx; /* Store CCMP/GCMP PN so that we can verify that the * next fragment has a sequential PN value. */ entry->check_sequential_pn = true; + entry->is_protected = true; + entry->key_color = rx->key->color; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN); @@ -2261,6 +2288,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sizeof(rx->key->u.gcmp.rx_pn[queue])); BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != IEEE80211_GCMP_PN_LEN); + } else if (rx->key && + (ieee80211_has_protected(fc) || + (status->flag & RX_FLAG_DECRYPTED))) { + entry->is_protected = true; + entry->key_color = rx->key->color; } return RX_QUEUED; } @@ -2268,7 +2300,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is a fragment for a frame that should already be pending in * fragment cache. Add this fragment to the end of the pending entry. */ - entry = ieee80211_reassemble_find(rx->sdata, frag, seq, + entry = ieee80211_reassemble_find(cache, frag, seq, rx->seqno_idx, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); @@ -2283,25 +2315,39 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (entry->check_sequential_pn) { int i; u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; - int queue; - if (!rx->key || - (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256)) + if (!requires_sequential_pn(rx, fc)) + return RX_DROP_UNUSABLE; + + /* Prevent mixed key and fragment cache attacks */ + if (entry->key_color != rx->key->color) return RX_DROP_UNUSABLE; + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; if (pn[i]) break; } - queue = rx->security_idx; - rpn = rx->key->u.ccmp.rx_pn[queue]; + + rpn = rx->ccm_gcm.pn; if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); + } else if (entry->is_protected && + (!rx->key || + (!ieee80211_has_protected(fc) && + !(status->flag & RX_FLAG_DECRYPTED)) || + rx->key->color != entry->key_color)) { + /* Drop this as a mixed key or fragment cache attack, even + * if for TKIP Michael MIC should protect us, and WEP is a + * lost cause anyway. + */ + return RX_DROP_UNUSABLE; + } else if (entry->is_protected && rx->key && + entry->key_color != rx->key->color && + (status->flag & RX_FLAG_DECRYPTED)) { + return RX_DROP_UNUSABLE; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); @@ -2494,13 +2540,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; /* - * Allow EAPOL frames to us/the PAE group address regardless - * of whether the frame was encrypted or not. + * Allow EAPOL frames to us/the PAE group address regardless of + * whether the frame was encrypted or not, and always disallow + * all other destination addresses for them. */ - if (ehdr->h_proto == rx->sdata->control_port_protocol && - (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) || - ether_addr_equal(ehdr->h_dest, pae_group_addr))) - return true; + if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol)) + return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) || + ether_addr_equal(ehdr->h_dest, pae_group_addr); if (ieee80211_802_1x_port_control(rx) || ieee80211_drop_unencrypted(rx, fc)) @@ -2525,8 +2571,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); } else { + struct ethhdr *ehdr = (void *)skb_mac_header(skb); + memset(skb->cb, 0, sizeof(skb->cb)); + /* + * 802.1X over 802.11 requires that the authenticator address + * be used for EAPOL frames. However, 802.1X allows the use of + * the PAE group address instead. If the interface is part of + * a bridge and we pass the frame with the PAE group address, + * then the bridge will forward it to the network (even if the + * client was not associated yet), which isn't supposed to + * happen. + * To avoid that, rewrite the destination address to our own + * address, so that the authenticator (e.g. hostapd) will see + * the frame, but bridge won't forward it anywhere else. Note + * that due to earlier filtering, the only other address can + * be the PAE group address. + */ + if (unlikely(skb->protocol == sdata->control_port_protocol && + !ether_addr_equal(ehdr->h_dest, sdata->vif.addr))) + ether_addr_copy(ehdr->h_dest, sdata->vif.addr); + /* deliver to local stack */ if (rx->list) list_add_tail(&skb->list, rx->list); @@ -2566,6 +2632,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && + ehdr->h_proto != rx->sdata->control_port_protocol && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest) && ieee80211_vif_get_num_mcast_if(sdata) != 0) { @@ -2675,7 +2742,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) if (ieee80211_data_to_8023_exthdr(skb, ðhdr, rx->sdata->vif.addr, rx->sdata->vif.type, - data_offset)) + data_offset, true)) return RX_DROP_UNUSABLE; ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, @@ -2732,6 +2799,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; + if (rx->key) { + /* + * We should not receive A-MSDUs on pre-HT connections, + * and HT connections cannot use old ciphers. Thus drop + * them, as in those cases we couldn't even have SPP + * A-MSDUs or such. + */ + switch (rx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: + return RX_DROP_UNUSABLE; + default: + break; + } + } + return __ieee80211_rx_h_amsdu(rx, 0); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ec6973ee88ef..f2fb69da9b6e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <linux/module.h> @@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, u64_stats_init(&sta->rx_stats.syncp); + ieee80211_init_frag_cache(&sta->frags); + sta->sta_state = IEEE80211_STA_NONE; /* Mark TID as unreserved */ @@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta) ieee80211_sta_debugfs_remove(sta); + ieee80211_destroy_frag_cache(&sta->frags); + cleanup_single_sta(sta); } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 78b9d0c7cc58..0333072ebd98 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020 Intel Corporation + * Copyright(c) 2020-2021 Intel Corporation */ #ifndef STA_INFO_H @@ -439,6 +439,34 @@ struct ieee80211_sta_rx_stats { }; /* + * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent + * reception of at least one MSDU per access category per associated STA" + * on APs, or "at least one MSDU per access category" on other interface types. + * + * This limit can be increased by changing this define, at the cost of slower + * frame reassembly and increased memory use while fragments are pending. + */ +#define IEEE80211_FRAGMENT_MAX 4 + +struct ieee80211_fragment_entry { + struct sk_buff_head skb_list; + unsigned long first_frag_time; + u16 seq; + u16 extra_len; + u16 last_frag; + u8 rx_queue; + u8 check_sequential_pn:1, /* needed for CCMP/GCMP */ + is_protected:1; + u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ + unsigned int key_color; +}; + +struct ieee80211_fragment_cache { + struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX]; + unsigned int next; +}; + +/* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This * value will be scaled by the number of active stations when it is being @@ -531,6 +559,7 @@ struct ieee80211_sta_rx_stats { * @status_stats.last_ack_signal: last ACK signal * @status_stats.ack_signal_filled: last ACK signal validity * @status_stats.avg_ack_signal: average ACK signal + * @frags: fragment cache */ struct sta_info { /* General information, mostly static */ @@ -639,6 +668,8 @@ struct sta_info { struct cfg80211_chan_def tdls_chandef; + struct ieee80211_fragment_cache frags; + /* keep last! */ struct ieee80211_sta sta; }; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 91bf32af55e9..bca47fad5a16 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -3,6 +3,7 @@ * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen <j@w1.fi> * Copyright (C) 2016-2017 Intel Deutschland GmbH + * Copyright (C) 2020-2021 Intel Corporation */ #include <linux/netdevice.h> @@ -167,8 +168,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) update_iv: /* update IV in key information to be able to detect replays */ - rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32; - rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16; + rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32; + rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16; return RX_CONTINUE; @@ -294,8 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->security_idx, - &rx->tkip_iv32, - &rx->tkip_iv16); + &rx->tkip.iv32, + &rx->tkip.iv16); if (res != TKIP_DECRYPT_OK) return RX_DROP_UNUSABLE; @@ -553,6 +554,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, } memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); + if (unlikely(ieee80211_is_frag(hdr))) + memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove CCMP header and MIC */ @@ -781,6 +784,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) } memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); + if (unlikely(ieee80211_is_frag(hdr))) + memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove GCMP header and MIC */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 99fc21406168..6b825fb3fa83 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -130,7 +130,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); pr_debug("MP_JOIN hmac"); } else { - pr_warn("MP_JOIN bad option size"); mp_opt->mp_join = 0; } break; @@ -1024,7 +1023,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); - mptcp_pm_del_add_timer(msk, &mp_opt.addr); + mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 6ba040897738..2469e06a3a9d 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -346,18 +346,18 @@ out: struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) + struct mptcp_addr_info *addr, bool check_id) { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry) + if (entry && (!check_id || entry->addr.id == addr->id)) entry->retrans_times = ADD_ADDR_RETRANS_MAX; spin_unlock_bh(&msk->pm.lock); - if (entry) + if (entry && (!check_id || entry->addr.id == addr->id)) sk_stop_timer_sync(sk, &entry->add_timer); return entry; @@ -1064,7 +1064,7 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; - entry = mptcp_pm_del_add_timer(msk, addr); + entry = mptcp_pm_del_add_timer(msk, addr, false); if (entry) { list_del(&entry->list); kfree(entry); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 29a2d690d8d5..5edc686faff1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -879,12 +879,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq, !mpext->frozen; } +/* we can append data to the given data frag if: + * - there is space available in the backing page_frag + * - the data frag tail matches the current page_frag free offset + * - the data frag end sequence number matches the current write seq + */ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, const struct page_frag *pfrag, const struct mptcp_data_frag *df) { return df && pfrag->page == df->page && pfrag->size - pfrag->offset > 0 && + pfrag->offset == (df->offset + df->data_len) && df->data_seq + df->data_len == msk->write_seq; } @@ -941,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif + if (!msk->wmem_reserved) return; @@ -1079,10 +1089,20 @@ out: static void __mptcp_clean_una_wakeup(struct sock *sk) { +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif __mptcp_clean_una(sk); mptcp_write_space(sk); } +static void mptcp_clean_una_wakeup(struct sock *sk) +{ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + mptcp_data_unlock(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -2293,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk) struct sock *ssk; int ret; - __mptcp_clean_una_wakeup(sk); + mptcp_clean_una_wakeup(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { @@ -2418,13 +2438,12 @@ static int __mptcp_init_sock(struct sock *sk) timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); - tcp_assign_congestion_control(sk); - return 0; } static int mptcp_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct net *net = sock_net(sk); int ret; @@ -2442,6 +2461,16 @@ static int mptcp_init_sock(struct sock *sk) if (ret) return ret; + /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will + * propagate the correct value + */ + tcp_assign_congestion_control(sk); + strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name); + + /* no need to keep a reference to the ops, the name will suffice */ + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = NULL; + sk_sockets_allocated_inc(sk); sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; @@ -2616,7 +2645,6 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); - tcp_cleanup_congestion_control(sk); sk_refcnt_debug_release(sk); mptcp_dispose_initial_subflow(msk); sock_put(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index edc0128730df..0c6f99c67345 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -258,6 +258,7 @@ struct mptcp_sock { } rcvq_space; u32 setsockopt_seq; + char ca_name[TCP_CA_NAME_MAX]; }; #define mptcp_lock_sock(___sk, cb) do { \ @@ -671,7 +672,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, - struct mptcp_addr_info *addr); + struct mptcp_addr_info *addr, bool check_id); struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 00d941b66c1e..a79798189599 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -547,7 +547,7 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t } if (ret == 0) - tcp_set_congestion_control(sk, name, false, cap_net_admin); + strcpy(msk->ca_name, name); release_sock(sk); return ret; @@ -705,7 +705,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) - tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true); + tcp_set_congestion_control(ssk, msk->ca_name, false, true); } static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a5ede357cfbc..ef3d037f984a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* if the sk is MP_CAPABLE, we try to fetch the client key */ if (subflow_req->mp_capable) { - if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { - /* here we can receive and accept an in-window, - * out-of-order pkt, which will not carry the MP_CAPABLE - * opt even on mptcp enabled paths - */ - goto create_msk; - } - + /* we can receive and accept an in-window, out-of-order pkt, + * which may not carry the MP_CAPABLE opt even on mptcp enabled + * paths: always try to extract the peer key, and fallback + * for packets missing it. + * Even OoO DSS packets coming legitly after dropped or + * reordered MPC will cause fallback, but we don't have other + * options. + */ mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; } -create_msk: new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; @@ -867,7 +866,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len = mpext->data_len; if (data_len == 0) { - pr_err("Infinite mapping not handled"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); return MAPPING_INVALID; } @@ -1013,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (status == MAPPING_INVALID) { - ssk->sk_err = EBADMSG; - goto fatal; - } - if (status == MAPPING_DUMMY) { - __mptcp_do_fallback(msk); - skb = skb_peek(&ssk->sk_receive_queue); - subflow->map_valid = 1; - subflow->map_seq = READ_ONCE(msk->ack_seq); - subflow->map_data_len = skb->len; - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - return true; - } + if (unlikely(status == MAPPING_INVALID)) + goto fallback; + + if (unlikely(status == MAPPING_DUMMY)) + goto fallback; if (status != MAPPING_OK) goto no_data; @@ -1040,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk) * MP_CAPABLE-based mapping */ if (unlikely(!READ_ONCE(msk->can_ack))) { - if (!subflow->mpc_map) { - ssk->sk_err = EBADMSG; - goto fatal; - } + if (!subflow->mpc_map) + goto fallback; WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->ack_seq, subflow->map_seq); WRITE_ONCE(msk->can_ack, true); @@ -1071,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk) no_data: subflow_sched_work_if_closed(msk, ssk); return false; -fatal: - /* fatal protocol error, close the socket */ - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - ssk->sk_error_report(ssk); - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; - tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; - return false; + +fallback: + /* RFC 8684 section 3.7. */ + if (subflow->mp_join || subflow->fully_established) { + /* fatal protocol error, close the socket. + * subflow_error_report() will introduce the appropriate barriers + */ + ssk->sk_err = EBADMSG; + ssk->sk_error_report(ssk); + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; + tcp_send_active_reset(ssk, GFP_ATOMIC); + subflow->data_avail = 0; + return false; + } + + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + return true; } bool mptcp_subflow_data_available(struct sock *sk) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d45dbcba8b49..c25097092a06 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1367,7 +1367,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; - svc->flags = u->flags; + svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; svc->ipvs = ipvs; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 89e5bac384d7..dc9ca12b0489 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -664,7 +664,7 @@ int nf_conntrack_proto_init(void) #if IS_ENABLED(CONFIG_IPV6) cleanup_sockopt: - nf_unregister_sockopt(&so_getorigdst6); + nf_unregister_sockopt(&so_getorigdst); #endif return ret; } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 39c02d1aeedf..1d02650dd715 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -306,8 +306,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, { flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; - if (likely(!nf_flowtable_hw_offload(flow_table) || - !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags))) + if (likely(!nf_flowtable_hw_offload(flow_table))) return; nf_flow_offload_add(flow_table, flow); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2af7bdb38407..528b2f172684 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -902,10 +902,11 @@ static void flow_offload_work_add(struct flow_offload_work *offload) err = flow_offload_rule_add(offload, flow_rule); if (err < 0) - set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags); - else - set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); + goto out; + + set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); +out: nf_flow_offload_destroy(flow_rule); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d63d2d8f769c..72bc759179ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -736,7 +736,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) @@ -947,20 +948,22 @@ err_register_hooks: static void nf_tables_table_disable(struct net *net, struct nft_table *table) { + table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); + table->flags |= NFT_TABLE_F_DORMANT; } -enum { - NFT_TABLE_STATE_UNCHANGED = 0, - NFT_TABLE_STATE_DORMANT, - NFT_TABLE_STATE_WAKEUP -}; +#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) +#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) +#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; - int ret = 0; + int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -985,21 +988,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; + ctx->table->flags |= NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) + ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) - nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret < 0) + goto err_register_hooks; + + ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; + } } - if (ret < 0) - goto err; - nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; nft_trans_commit_list_add_tail(ctx->net, trans); + return 0; -err: + +err_register_hooks: nft_trans_destroy(trans); return ret; } @@ -1905,7 +1914,7 @@ static int nft_chain_parse_netdev(struct net *net, static int nft_chain_parse_hook(struct net *net, const struct nlattr * const nla[], struct nft_chain_hook *hook, u8 family, - bool autoload) + struct netlink_ext_ack *extack, bool autoload) { struct nftables_pernet *nft_net = nft_pernet(net); struct nlattr *ha[NFTA_HOOK_MAX + 1]; @@ -1935,8 +1944,10 @@ static int nft_chain_parse_hook(struct net *net, if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], family, autoload); - if (IS_ERR(type)) + if (IS_ERR(type)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return PTR_ERR(type); + } } if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; @@ -1945,8 +1956,11 @@ static int nft_chain_parse_hook(struct net *net, hook->priority <= NF_IP_PRI_CONNTRACK) return -EOPNOTSUPP; - if (!try_module_get(type->owner)) + if (!try_module_get(type->owner)) { + if (nla[NFTA_CHAIN_TYPE]) + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return -ENOENT; + } hook->type = type; @@ -2057,7 +2071,8 @@ static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) static u64 chain_id; static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, - u8 policy, u32 flags) + u8 policy, u32 flags, + struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; @@ -2079,7 +2094,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; - err = nft_chain_parse_hook(net, nla, &hook, family, true); + err = nft_chain_parse_hook(net, nla, &hook, family, extack, + true); if (err < 0) return err; @@ -2234,7 +2250,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EEXIST; } err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, - false); + extack, false); if (err < 0) return err; @@ -2447,7 +2463,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, extack); } - return nf_tables_addchain(&ctx, family, genmask, policy, flags); + return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, @@ -3328,8 +3344,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (n == NFT_RULE_MAXEXPRS) goto err1; err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, tmp); goto err1; + } size += expr_info[n].ops->size; n++; } @@ -8547,10 +8565,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags = nft_trans_table_flags(trans); + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, trans->ctx.table); } @@ -8768,9 +8790,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { nf_tables_table_disable(net, trans->ctx.table); - + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + } + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 322ac5dd5402..752b10cae524 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -380,10 +380,14 @@ static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { + u32 size; int ret; - if (tb[NFCTH_PRIV_DATA_LEN]) - return -EBUSY; + if (tb[NFCTH_PRIV_DATA_LEN]) { + size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + if (size != helper->data_len) + return -EBUSY; + } if (tb[NFCTH_POLICY]) { ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 0592a9456084..337e22d8b40b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1217,7 +1217,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj, struct nf_conn *ct; ct = nf_ct_get(pkt->skb, &ctinfo); - if (!ct || ctinfo == IP_CT_UNTRACKED) { + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 528a2d7ca991..dce866d93fee 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -408,8 +408,8 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, * * Return: true on match, false otherwise. */ -static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); unsigned long *res_map, *fill_map; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583e..d84afb8fa79a 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -178,6 +178,8 @@ struct nft_pipapo_elem { int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, union nft_pipapo_map_bucket *mt, bool match_only); +bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext); /** * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index d65ae0e23028..eabdb8d552ee 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1131,6 +1131,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, bool map_index; int i, ret = 0; + if (unlikely(!irq_fpu_usable())) + return nft_pipapo_lookup(net, set, key, ext); + m = rcu_dereference(priv->match); /* This also protects access to all data related to scratch maps */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3a62f97acf39..6133e412b948 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -461,11 +461,13 @@ void netlink_table_ungrab(void) static inline void netlink_lock_table(void) { + unsigned long flags; + /* read_lock() synchronizes us to netlink_table_grab */ - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); atomic_inc(&nl_table_users); - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); } static inline void diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 53dbe733f998..6cfd30fc0798 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -110,6 +110,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (!llcp_sock->service_name) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; + llcp_sock->dev = NULL; ret = -ENOMEM; goto put_dev; } @@ -119,6 +120,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->local = NULL; kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; + llcp_sock->dev = NULL; ret = -EADDRINUSE; goto put_dev; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 9a585332ea84..da7fe9db1b00 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(nci_allocate_device); void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); + nci_hci_deallocate(ndev); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 6b275a387a92..96865142104f 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -792,3 +792,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) return hdev; } + +void nci_hci_deallocate(struct nci_dev *ndev) +{ + kfree(ndev->hci_dev); +} diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 9c7eb8455ba8..5f1d438a0a23 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -329,7 +329,7 @@ static int rawsock_create(struct net *net, struct socket *sock, return -ESOCKTNOSUPPORT; if (sock->type == SOCK_RAW) { - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; sock->ops = &rawsock_raw_ops; } else { diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 96b524ceabca..896b8f5bc885 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -611,6 +611,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, spin_lock(&meter->lock); long_delta_ms = (now_ms - meter->used); /* ms */ + if (long_delta_ms < 0) { + /* This condition means that we have several threads fighting + * for a meter lock, and the one who received the packets a + * bit later wins. Assuming that all racing threads received + * packets at the same time to avoid overflow. + */ + long_delta_ms = 0; + } /* Make sure delta_ms will not be too large, so that bucket will not * wrap around below. diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba96db1880ea..ae906eb4b269 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -422,7 +422,8 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) return TP_STATUS_TS_RAW_HARDWARE; - if (ktime_to_timespec64_cond(skb->tstamp, ts)) + if ((flags & SOF_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec64_cond(skb->tstamp, ts)) return TP_STATUS_TS_SOFTWARE; return 0; @@ -2340,7 +2341,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, skb_copy_bits(skb, 0, h.raw + macoff, snaplen); - if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) + /* Always timestamp; prefer an existing software timestamp taken + * closer to the time of capture. + */ + ts_status = tpacket_get_timestamp(skb, &ts, + po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE); + if (!ts_status) ktime_get_real_ts64(&ts); status |= ts_status; diff --git a/net/rds/connection.c b/net/rds/connection.c index f2fcab182095..a3bc4b54d491 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -240,12 +240,23 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (loop_trans) { rds_trans_put(loop_trans); conn->c_loopback = 1; - if (is_outgoing && trans->t_prefer_loopback) { - /* "outgoing" connection - and the transport - * says it wants the connection handled by the - * loopback transport. This is what TCP does. - */ - trans = &rds_loop_transport; + if (trans->t_prefer_loopback) { + if (likely(is_outgoing)) { + /* "outgoing" connection to local address. + * Protocol says it wants the connection + * handled by the loopback transport. + * This is what TCP does. + */ + trans = &rds_loop_transport; + } else { + /* No transport currently in use + * should end up here, but if it + * does, reset/destroy the connection. + */ + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-EOPNOTSUPP); + goto out; + } } } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 43db0eca911f..abf19c0e3ba0 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -313,8 +313,8 @@ out: } #endif -static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, - __u32 scope_id) +int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id) { struct net_device *dev = NULL; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index bad9cf49d565..dc8d745d6857 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -59,7 +59,8 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; void rds_tcp_accept_work(struct sock *sk); - +int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id); /* tcp_connect.c */ int rds_tcp_conn_path_connect(struct rds_conn_path *cp); void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 101cf14215a0..09cadd556d1e 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -167,6 +167,12 @@ int rds_tcp_accept_one(struct socket *sock) } #endif + if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) { + /* local address connection is only allowed via loopback */ + ret = -EOPNOTSUPP; + goto out; + } + conn = rds_conn_create(sock_net(sock->sk), my_addr, peer_addr, &rds_tcp_transport, 0, GFP_KERNEL, dev_if); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ec7a1c438df9..18edd9ad1410 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -984,7 +984,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); if (!cached) { - if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { + if (tcf_ct_flow_table_lookup(p, skb, family)) { skip_add = true; goto do_nat; } @@ -1022,10 +1022,11 @@ do_nat: * even if the connection is already confirmed. */ nf_conntrack_confirm(skb); - } else if (!skip_add) { - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); } + if (!skip_add) + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); + out_push: skb_push_rcsum(skb, nh_ofs); @@ -1202,9 +1203,6 @@ static int tcf_ct_fill_params(struct net *net, sizeof(p->zone)); } - if (p->zone == NF_CT_DEFAULT_ZONE_ID) - return 0; - nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); if (!tmpl) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 40fbea626dfd..279f9e2a2319 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1624,7 +1624,7 @@ int tcf_classify_ingress(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { - ext = skb_ext_add(skb, TC_SKB_EXT); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index cd2748e2d4a2..d320bcfb2da2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -407,7 +407,8 @@ static void dsmark_reset(struct Qdisc *sch) struct dsmark_qdisc_data *p = qdisc_priv(sch); pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - qdisc_reset(p->q); + if (p->q) + qdisc_reset(p->q); sch->qstats.backlog = 0; sch->q.qlen = 0; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 949163fe68af..cac684952edc 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -138,8 +138,15 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Classifies packet into corresponding flow */ idx = fq_pie_classify(skb, sch, &ret); - sel_flow = &q->flows[idx]; + if (idx == 0) { + if (ret & __NET_XMIT_BYPASS) + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); + return ret; + } + idx--; + sel_flow = &q->flows[idx]; /* Checks whether adding a new packet would exceed memory limit */ get_pie_cb(skb)->mem_usage = skb->truesize; memory_limited = q->memory_usage > q->memory_limit + skb->truesize; @@ -297,9 +304,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, goto flow_error; } q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]); - if (!q->flows_cnt || q->flows_cnt >= 65536) { + if (!q->flows_cnt || q->flows_cnt > 65536) { NL_SET_ERR_MSG_MOD(extack, - "Number of flows must range in [1..65535]"); + "Number of flows must range in [1..65536]"); goto flow_error; } } @@ -367,7 +374,7 @@ static void fq_pie_timer(struct timer_list *t) struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; spinlock_t *root_lock; /* to lock qdisc for probability calculations */ - u16 idx; + u32 idx; root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -388,7 +395,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, { struct fq_pie_sched_data *q = qdisc_priv(sch); int err; - u16 idx; + u32 idx; pie_params_init(&q->p_params); sch->limit = 10 * 1024; @@ -500,7 +507,7 @@ static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static void fq_pie_reset(struct Qdisc *sch) { struct fq_pie_sched_data *q = qdisc_priv(sch); - u16 idx; + u32 idx; INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 44991ea726fc..fc8b56bcabf3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -35,6 +35,25 @@ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; EXPORT_SYMBOL(default_qdisc_ops); +static void qdisc_maybe_clear_missed(struct Qdisc *q, + const struct netdev_queue *txq) +{ + clear_bit(__QDISC_STATE_MISSED, &q->state); + + /* Make sure the below netif_xmit_frozen_or_stopped() + * checking happens after clearing STATE_MISSED. + */ + smp_mb__after_atomic(); + + /* Checking netif_xmit_frozen_or_stopped() again to + * make sure STATE_MISSED is set if the STATE_MISSED + * set by netif_tx_wake_queue()'s rescheduling of + * net_tx_action() is cleared by the above clear_bit(). + */ + if (!netif_xmit_frozen_or_stopped(txq)) + set_bit(__QDISC_STATE_MISSED, &q->state); +} + /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -74,6 +93,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) } } else { skb = SKB_XOFF_MAGIC; + qdisc_maybe_clear_missed(q, txq); } } @@ -242,6 +262,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, } } else { skb = NULL; + qdisc_maybe_clear_missed(q, txq); } if (lock) spin_unlock(lock); @@ -251,8 +272,10 @@ validate: *validate = true; if ((q->flags & TCQ_F_ONETXQUEUE) && - netif_xmit_frozen_or_stopped(txq)) + netif_xmit_frozen_or_stopped(txq)) { + qdisc_maybe_clear_missed(q, txq); return skb; + } skb = qdisc_dequeue_skb_bad_txq(q); if (unlikely(skb)) { @@ -311,6 +334,8 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) skb = dev_hard_start_xmit(skb, dev, txq, &ret); + else + qdisc_maybe_clear_missed(q, txq); HARD_TX_UNLOCK(dev, txq); } else { @@ -640,8 +665,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff *skb = NULL; + bool need_retry = true; int band; +retry: for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { struct skb_array *q = band2list(priv, band); @@ -652,6 +679,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); + } else if (need_retry && + test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { + /* Delay clearing the STATE_MISSED here to reduce + * the overhead of the second spin_trylock() in + * qdisc_run_begin() and __netif_schedule() calling + * in qdisc_run_end(). + */ + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + + /* Make sure dequeuing happens after clearing + * STATE_MISSED. + */ + smp_mb__after_atomic(); + + need_retry = false; + + goto retry; } else { WRITE_ONCE(qdisc->empty, true); } @@ -1158,8 +1202,10 @@ static void dev_reset_queue(struct net_device *dev, qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); spin_unlock_bh(&qdisc->seqlock); + } } static bool some_qdisc_is_busy(struct net_device *dev) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 081c11d5717c..8827987ba903 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1488,7 +1488,8 @@ static void htb_parent_to_leaf_offload(struct Qdisc *sch, struct Qdisc *old_q; /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ - qdisc_refcount_inc(new_q); + if (new_q) + qdisc_refcount_inc(new_q); old_q = htb_graft_helper(dev_queue, new_q); WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); } @@ -1675,10 +1676,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, cl->parent->common.classid, NULL); if (q->offload) { - if (new_q) { + if (new_q) htb_set_lockdep_class_child(new_q); - htb_parent_to_leaf_offload(sch, dev_queue, new_q); - } + htb_parent_to_leaf_offload(sch, dev_queue, new_q); } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 40f9f6c4a0a1..a79d193ff872 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4473,6 +4473,7 @@ static int sctp_setsockopt_encap_port(struct sock *sk, transports) t->encap_port = encap_port; + asoc->encap_port = encap_port; return 0; } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e92df779af73..55871b277f47 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -307,7 +307,7 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.encap_port, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 9c6e95882553..967712ba52a0 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -402,6 +402,14 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, return NULL; } + smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", + WQ_MEM_RECLAIM, name); + if (!smcd->event_wq) { + kfree(smcd->conn); + kfree(smcd); + return NULL; + } + smcd->dev.parent = parent; smcd->dev.release = smcd_release; device_initialize(&smcd->dev); @@ -415,19 +423,14 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, INIT_LIST_HEAD(&smcd->vlan); INIT_LIST_HEAD(&smcd->lgr_list); init_waitqueue_head(&smcd->lgrs_deleted); - smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", - WQ_MEM_RECLAIM, name); - if (!smcd->event_wq) { - kfree(smcd->conn); - kfree(smcd); - return NULL; - } return smcd; } EXPORT_SYMBOL_GPL(smcd_alloc_dev); int smcd_register_dev(struct smcd_dev *smcd) { + int rc; + mutex_lock(&smcd_dev_list.mutex); if (list_empty(&smcd_dev_list.list)) { u8 *system_eid = NULL; @@ -447,7 +450,14 @@ int smcd_register_dev(struct smcd_dev *smcd) dev_name(&smcd->dev), smcd->pnetid, smcd->pnetid_by_user ? " (user defined)" : ""); - return device_add(&smcd->dev); + rc = device_add(&smcd->dev); + if (rc) { + mutex_lock(&smcd_dev_list.mutex); + list_del(&smcd->list); + mutex_unlock(&smcd_dev_list.mutex); + } + + return rc; } EXPORT_SYMBOL_GPL(smcd_register_dev); diff --git a/net/tipc/core.c b/net/tipc/core.c index 5cc1f0307215..3f4542e0f065 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->trial_addr = 0; tn->addr_trial_end = 0; tn->capabilities = TIPC_NODE_CAPABILITIES; - INIT_WORK(&tn->final_work.work, tipc_net_finalize_work); + INIT_WORK(&tn->work, tipc_net_finalize_work); memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; @@ -110,7 +110,7 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_detach_loopback(net); /* Make sure the tipc_net_finalize_work() finished */ - cancel_work_sync(&tn->final_work.work); + cancel_work_sync(&tn->work); tipc_net_stop(net); tipc_bcast_stop(net); @@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net) #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_stop(&tipc_net(net)->crypto_tx); #endif + while (atomic_read(&tn->wq_count)) + cond_resched(); } static void __net_exit tipc_pernet_pre_exit(struct net *net) diff --git a/net/tipc/core.h b/net/tipc/core.h index 03de7b213f55..0a3f7a70a50a 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -91,12 +91,6 @@ extern unsigned int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; extern int sysctl_tipc_named_timeout __read_mostly; -struct tipc_net_work { - struct work_struct work; - struct net *net; - u32 addr; -}; - struct tipc_net { u8 node_id[NODE_ID_LEN]; u32 node_addr; @@ -148,7 +142,9 @@ struct tipc_net { struct tipc_crypto *crypto_tx; #endif /* Work item for net finalize */ - struct tipc_net_work final_work; + struct work_struct work; + /* The numbers of work queues in schedule */ + atomic_t wq_count; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 5380f605b851..da69e1abf68f 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -168,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, /* Apply trial address if we just left trial period */ if (!trial && !self) { - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } @@ -308,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t) if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); spin_unlock_bh(&d->lock); - tipc_sched_net_finalize(net, tn->trial_addr); + schedule_work(&tn->work); return; } diff --git a/net/tipc/link.c b/net/tipc/link.c index 115109259430..c44b4bfaaee6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -372,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l) return l->net_plane; } +struct net *tipc_link_net(struct tipc_link *l) +{ + return l->net; +} + void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { l->peer_caps = capabilities; diff --git a/net/tipc/link.h b/net/tipc/link.h index fc07232c9a12..a16f401fdabd 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); bool tipc_link_too_silent(struct tipc_link *l); +struct net *tipc_link_net(struct tipc_link *l); #endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 3f0a25345a7c..ce6ab54822d8 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -149,18 +149,13 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (unlikely(head)) goto err; *buf = NULL; + if (skb_has_frag_list(frag) && __skb_linearize(frag)) + goto err; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; head = *headbuf = frag; TIPC_SKB_CB(head)->tail = NULL; - if (skb_is_nonlinear(head)) { - skb_walk_frags(head, tail) { - TIPC_SKB_CB(head)->tail = tail; - } - } else { - skb_frag_list_init(head); - } return 0; } diff --git a/net/tipc/net.c b/net/tipc/net.c index a130195af188..0e95572e56b4 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -41,6 +41,7 @@ #include "socket.h" #include "node.h" #include "bcast.h" +#include "link.h" #include "netlink.h" #include "monitor.h" @@ -142,19 +143,9 @@ static void tipc_net_finalize(struct net *net, u32 addr) void tipc_net_finalize_work(struct work_struct *work) { - struct tipc_net_work *fwork; + struct tipc_net *tn = container_of(work, struct tipc_net, work); - fwork = container_of(work, struct tipc_net_work, work); - tipc_net_finalize(fwork->net, fwork->addr); -} - -void tipc_sched_net_finalize(struct net *net, u32 addr) -{ - struct tipc_net *tn = tipc_net(net); - - tn->final_work.net = net; - tn->final_work.addr = addr; - schedule_work(&tn->final_work.work); + tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); } void tipc_net_stop(struct net *net) diff --git a/net/tipc/node.c b/net/tipc/node.c index 8217905348f4..81af92954c6c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, n->addr, n->capabilities); + tipc_publ_notify(net, publ_list, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, n->addr, n->capabilities); + tipc_named_node_up(net, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, n->addr, bearer_id); - tipc_nametbl_publish(net, &ua, &sk, n->link_id); + tipc_mon_peer_up(net, sk.node, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, n->addr, bearer_id); - tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); + tipc_mon_peer_down(net, sk.node, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, sk.ref); } } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 58935cd0d068..53af72824c9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { skb_queue_splice_tail_init(&tmpq, inputq); - __skb_dequeue(arrvq); + /* Decrease the skb's refcnt as increasing in the + * function tipc_skb_peek + */ + kfree_skb(__skb_dequeue(arrvq)); } spin_unlock_bh(&inputq->lock); __skb_queue_purge(&tmpq); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index e556d2cdc064..c2bb818704c8 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work) kfree_rcu(rcast, rcu); } + atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); dst_cache_destroy(&ub->rcast.dst_cache); udp_tunnel_sock_release(ub->ubsock); synchronize_net(); @@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b) RCU_INIT_POINTER(ub->bearer, NULL); /* sock_release need to be done outside of rtnl lock */ + atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count); INIT_WORK(&ub->work, cleanup_bearer); schedule_work(&ub->work); } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 76a6f8c2eec4..bd9f1567aa39 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -50,6 +50,7 @@ static void tls_device_gc_task(struct work_struct *work); static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); static LIST_HEAD(tls_device_gc_list); static LIST_HEAD(tls_device_list); +static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) @@ -680,15 +681,13 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); struct net_device *netdev; - if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags))) - return; - trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); + rcu_read_lock(); netdev = READ_ONCE(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); - clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags); + rcu_read_unlock(); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } @@ -761,6 +760,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) if (tls_ctx->rx_conf != TLS_HW) return; + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) + return; prot = &tls_ctx->prot_info; rx_ctx = tls_offload_ctx_rx(tls_ctx); @@ -963,6 +964,17 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, ctx->sw.decrypted |= is_decrypted; + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { + if (likely(is_encrypted || is_decrypted)) + return 0; + + /* After tls_device_down disables the offload, the next SKB will + * likely have initial fragments decrypted, and final ones not + * decrypted. We need to reencrypt that single SKB. + */ + return tls_device_reencrypt(sk, skb); + } + /* Return immediately if the record is either entirely plaintext or * entirely ciphertext. Otherwise handle reencrypt partially decrypted * record. @@ -1292,6 +1304,26 @@ static int tls_device_down(struct net_device *netdev) spin_unlock_irqrestore(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &list, list) { + /* Stop offloaded TX and switch to the fallback. + * tls_is_sk_tx_device_offloaded will return false. + */ + WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); + + /* Stop the RX and TX resync. + * tls_dev_resync must not be called after tls_dev_del. + */ + WRITE_ONCE(ctx->netdev, NULL); + + /* Start skipping the RX resync logic completely. */ + set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags); + + /* Sync with inflight packets. After this point: + * TX: no non-encrypted packets will be passed to the driver. + * RX: resync requests from the driver will be ignored. + */ + synchronize_net(); + + /* Release the offload context on the driver side. */ if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); @@ -1299,15 +1331,21 @@ static int tls_device_down(struct net_device *netdev) !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); - WRITE_ONCE(ctx->netdev, NULL); - smp_mb__before_atomic(); /* pairs with test_and_set_bit() */ - while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags)) - usleep_range(10, 200); + dev_put(netdev); - list_del_init(&ctx->list); - if (refcount_dec_and_test(&ctx->refcount)) - tls_device_free_ctx(ctx); + /* Move the context to a separate list for two reasons: + * 1. When the context is deallocated, list_del is called. + * 2. It's no longer an offloaded context, so we don't want to + * run offload-specific code on this context. + */ + spin_lock_irqsave(&tls_device_lock, flags); + list_move_tail(&ctx->list, &tls_device_down_list); + spin_unlock_irqrestore(&tls_device_lock, flags); + + /* Device contexts for RX and TX will be freed in on sk_destruct + * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + */ } up_write(&device_offload_lock); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index cacf040872c7..e40bedd112b6 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -431,6 +431,13 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk, } EXPORT_SYMBOL_GPL(tls_validate_xmit_skb); +struct sk_buff *tls_validate_xmit_skb_sw(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb) +{ + return tls_sw_fallback(sk, skb); +} + struct sk_buff *tls_encrypt_skb(struct sk_buff *skb) { return tls_sw_fallback(skb->sk, skb); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 47b7c5334c34..fde56ff49163 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -636,6 +636,7 @@ struct tls_context *tls_ctx_create(struct sock *sk) mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); + ctx->sk = sk; return ctx; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1dcb34dfd56b..694de024d0ee 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -37,6 +37,7 @@ #include <linux/sched/signal.h> #include <linux/module.h> +#include <linux/splice.h> #include <crypto/aead.h> #include <net/strparser.h> @@ -1281,7 +1282,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, } static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, - int flags, long timeo, int *err) + bool nonblock, long timeo, int *err) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); @@ -1306,7 +1307,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock, if (sock_flag(sk, SOCK_DONE)) return NULL; - if ((flags & MSG_DONTWAIT) || !timeo) { + if (nonblock || !timeo) { *err = -EAGAIN; return NULL; } @@ -1786,7 +1787,7 @@ int tls_sw_recvmsg(struct sock *sk, bool async_capable; bool async = false; - skb = tls_wait_data(sk, psock, flags, timeo, &err); + skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err); if (!skb) { if (psock) { int ret = sk_msg_recvmsg(sk, psock, msg, len, @@ -1990,9 +1991,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK); - skb = tls_wait_data(sk, NULL, flags, timeo, &err); + skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err); if (!skb) goto splice_read_end; diff --git a/net/wireless/util.c b/net/wireless/util.c index 382c5262d997..7ec021a610ae 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, const u8 *addr, enum nl80211_iftype iftype, - u8 data_offset) + u8 data_offset, bool is_amsdu) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -629,7 +629,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)); tmp.h_proto = payload.proto; - if (likely((ether_addr_equal(payload.hdr, rfc1042_header) && + if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) && tmp.h_proto != htons(ETH_P_AARP) && tmp.h_proto != htons(ETH_P_IPX)) || ether_addr_equal(payload.hdr, bridge_tunnel_header))) @@ -771,6 +771,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, remaining = skb->len - offset; if (subframe_len > remaining) goto purge; + /* mitigate A-MSDU aggregation injection attacks */ + if (ether_addr_equal(eth.h_dest, rfc1042_header)) + goto purge; offset += sizeof(struct ethhdr); last = remaining <= subframe_len + padding; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 44d6566dd23e..1816899499ce 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -536,7 +536,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol, if (protocol) goto out; - rc = -ENOBUFS; + rc = -ENOMEM; if ((sk = x25_alloc_socket(net, kern)) == NULL) goto out; |