diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 174 |
1 files changed, 118 insertions, 56 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fed85447701a..7a3ab3427369 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -268,12 +268,17 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, tfile->napi_enabled = napi_en; tfile->napi_frags_enabled = napi_en && napi_frags; if (napi_en) { - netif_tx_napi_add(tun->dev, &tfile->napi, tun_napi_poll, - NAPI_POLL_WEIGHT); + netif_napi_add_tx(tun->dev, &tfile->napi, tun_napi_poll); napi_enable(&tfile->napi); } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -635,7 +640,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tfile); + if (!tfile->detached) + tun_napi_disable(tfile); tun_napi_del(tfile); } @@ -654,8 +660,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -728,6 +736,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); @@ -808,6 +817,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); @@ -1058,6 +1068,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + enum skb_drop_reason drop_reason; int txq = skb->queue_mapping; struct netdev_queue *queue; struct tun_file *tfile; @@ -1067,8 +1078,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ - if (!tfile) + if (!tfile) { + drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; + } if (!rcu_dereference(tun->steering_prog)) tun_automq_xmit(tun, skb); @@ -1078,19 +1091,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Drop if the filter does not like it. * This is a noop if the filter is disabled. * Filter can be enabled only for the TAP devices. */ - if (!check_filter(&tun->txflt, skb)) + if (!check_filter(&tun->txflt, skb)) { + drop_reason = SKB_DROP_REASON_TAP_TXFILTER; goto drop; + } if (tfile->socket.sk->sk_filter && - sk_filter(tfile->socket.sk, skb)) + sk_filter(tfile->socket.sk, skb)) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; + } len = run_ebpf_filter(tun, skb, len); - if (len == 0 || pskb_trim(skb, len)) + if (len == 0) { + drop_reason = SKB_DROP_REASON_TAP_FILTER; + goto drop; + } + + if (pskb_trim(skb, len)) { + drop_reason = SKB_DROP_REASON_NOMEM; goto drop; + } - if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto drop; + } skb_tx_timestamp(skb); @@ -1101,12 +1127,14 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); - if (ptr_ring_produce(&tfile->tx_ring, skb)) + if (ptr_ring_produce(&tfile->tx_ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); - queue->trans_start = jiffies; + txq_trans_cond_update(queue); /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) @@ -1117,9 +1145,9 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; drop: - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); skb_tx_error(skb); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); rcu_read_unlock(); return NET_XMIT_DROP; } @@ -1273,7 +1301,7 @@ resample: void *frame = tun_xdp_to_ptr(xdp); if (__ptr_ring_produce(&tfile->tx_ring, frame)) { - atomic_long_inc(&dev->tx_dropped); + dev_core_stats_tx_dropped_inc(dev); break; } nxmit++; @@ -1431,7 +1459,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int err; int i; - if (it->nr_segs > MAX_SKB_FRAGS + 1) + if (it->nr_segs > MAX_SKB_FRAGS + 1 || + len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE); local_bh_disable(); @@ -1608,7 +1637,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog, trace_xdp_exception(tun->dev, xdp_prog, act); fallthrough; case XDP_DROP: - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); break; } @@ -1717,6 +1746,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); + enum skb_drop_reason drop_reason; if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) @@ -1778,7 +1808,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, */ skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); if (IS_ERR(skb)) { - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); return PTR_ERR(skb); } if (!skb) @@ -1807,7 +1837,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); if (frags) mutex_unlock(&tfile->napi_mutex); return PTR_ERR(skb); @@ -1820,9 +1850,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (err) { err = -EFAULT; + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; drop: - atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); + dev_core_stats_rx_dropped_inc(tun->dev); + kfree_skb_reason(skb, drop_reason); if (frags) { tfile->napi.skb = NULL; mutex_unlock(&tfile->napi_mutex); @@ -1856,7 +1887,7 @@ drop: pi.proto = htons(ETH_P_IPV6); break; default: - atomic_long_inc(&tun->dev->rx_dropped); + dev_core_stats_rx_dropped_inc(tun->dev); kfree_skb(skb); return -EINVAL; } @@ -1869,6 +1900,7 @@ drop: case IFF_TAP: if (frags && !pskb_may_pull(skb, ETH_HLEN)) { err = -ENOMEM; + drop_reason = SKB_DROP_REASON_HDR_TRUNC; goto drop; } skb->protocol = eth_type_trans(skb, tun->dev); @@ -1922,6 +1954,7 @@ drop: if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; rcu_read_unlock(); + drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; } @@ -1934,17 +1967,25 @@ drop: skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { - atomic_long_inc(&tun->dev->rx_dropped); + WARN_ON_ONCE(1); + err = -ENOMEM; + dev_core_stats_rx_dropped_inc(tun->dev); +napi_busy: napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); - WARN_ON(1); - return -ENOMEM; + return err; } - local_bh_disable(); - napi_gro_frags(&tfile->napi); - local_bh_enable(); + if (likely(napi_schedule_prep(&tfile->napi))) { + local_bh_disable(); + napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); + local_bh_enable(); + } else { + err = -EBUSY; + goto napi_busy; + } mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; @@ -1962,7 +2003,7 @@ drop: } else if (!IS_ENABLED(CONFIG_4KSTACKS)) { tun_rx_batched(tun, tfile, skb, more); } else { - netif_rx_ni(skb); + netif_rx(skb); } rcu_read_unlock(); @@ -2388,9 +2429,10 @@ static int tun_xdp_one(struct tun_struct *tun, struct virtio_net_hdr *gso = &hdr->gso; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; + struct sk_buff_head *queue; u32 rxhash = 0, act; int buflen = hdr->buflen; - int err = 0; + int ret = 0; bool skb_xdp = false; struct page *page; @@ -2405,13 +2447,13 @@ static int tun_xdp_one(struct tun_struct *tun, xdp_set_data_meta_invalid(xdp); act = bpf_prog_run_xdp(xdp_prog, xdp); - err = tun_xdp_act(tun, xdp_prog, xdp, act); - if (err < 0) { + ret = tun_xdp_act(tun, xdp_prog, xdp, act); + if (ret < 0) { put_page(virt_to_head_page(xdp->data)); - return err; + return ret; } - switch (err) { + switch (ret) { case XDP_REDIRECT: *flush = true; fallthrough; @@ -2435,7 +2477,7 @@ static int tun_xdp_one(struct tun_struct *tun, build: skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { - err = -ENOMEM; + ret = -ENOMEM; goto out; } @@ -2445,7 +2487,7 @@ build: if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); - err = -EINVAL; + ret = -EINVAL; goto out; } @@ -2455,16 +2497,27 @@ build: skb_record_rx_queue(skb, tfile->queue_index); if (skb_xdp) { - err = do_xdp_generic(xdp_prog, skb); - if (err != XDP_PASS) + ret = do_xdp_generic(xdp_prog, skb); + if (ret != XDP_PASS) { + ret = 0; goto out; + } } if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 && !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); - netif_receive_skb(skb); + if (tfile->napi_enabled) { + queue = &tfile->sk.sk_write_queue; + spin_lock(&queue->lock); + __skb_queue_tail(queue, skb); + spin_unlock(&queue->lock); + ret = 1; + } else { + netif_receive_skb(skb); + ret = 0; + } /* No need to disable preemption here since this function is * always called with bh disabled @@ -2475,7 +2528,7 @@ build: tun_flow_update(tun, rxhash, tfile); out: - return err; + return ret; } static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) @@ -2489,10 +2542,11 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) if (!tun) return -EBADFD; - if (ctl && (ctl->type == TUN_MSG_PTR)) { + if (m->msg_controllen == sizeof(struct tun_msg_ctl) && + ctl && ctl->type == TUN_MSG_PTR) { struct tun_page tpage; int n = ctl->num; - int flush = 0; + int flush = 0, queued = 0; memset(&tpage, 0, sizeof(tpage)); @@ -2501,12 +2555,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) for (i = 0; i < n; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; - tun_xdp_one(tun, tfile, xdp, &flush, &tpage); + ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage); + if (ret > 0) + queued += ret; } if (flush) xdp_do_flush(); + if (tfile->napi_enabled && queued > 0) + napi_schedule(&tfile->napi); + rcu_read_unlock(); local_bh_enable(); @@ -2614,7 +2673,7 @@ static ssize_t tun_flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct tun_struct *tun = netdev_priv(to_net_dev(dev)); - return sprintf(buf, "0x%x\n", tun_flags(tun)); + return sysfs_emit(buf, "0x%x\n", tun_flags(tun)); } static ssize_t owner_show(struct device *dev, struct device_attribute *attr, @@ -2622,9 +2681,9 @@ static ssize_t owner_show(struct device *dev, struct device_attribute *attr, { struct tun_struct *tun = netdev_priv(to_net_dev(dev)); return uid_valid(tun->owner)? - sprintf(buf, "%u\n", - from_kuid_munged(current_user_ns(), tun->owner)): - sprintf(buf, "-1\n"); + sysfs_emit(buf, "%u\n", + from_kuid_munged(current_user_ns(), tun->owner)) : + sysfs_emit(buf, "-1\n"); } static ssize_t group_show(struct device *dev, struct device_attribute *attr, @@ -2632,9 +2691,9 @@ static ssize_t group_show(struct device *dev, struct device_attribute *attr, { struct tun_struct *tun = netdev_priv(to_net_dev(dev)); return gid_valid(tun->group) ? - sprintf(buf, "%u\n", - from_kgid_munged(current_user_ns(), tun->group)): - sprintf(buf, "-1\n"); + sysfs_emit(buf, "%u\n", + from_kgid_munged(current_user_ns(), tun->group)) : + sysfs_emit(buf, "-1\n"); } static DEVICE_ATTR_RO(tun_flags); @@ -2778,7 +2837,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) rcu_assign_pointer(tfile->tun, tun); } - netif_carrier_on(tun->dev); + if (ifr->ifr_flags & IFF_NO_CARRIER) + netif_carrier_off(tun->dev); + else + netif_carrier_on(tun->dev); /* Make sure persistent devices do not get stuck in * xoff state. @@ -3006,8 +3068,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ - return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES, - (unsigned int __user*)argp); + return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER | + TUN_FEATURES, (unsigned int __user*)argp); } else if (cmd == TUNSETQUEUE) { return tun_set_queue(file, &ifr); } else if (cmd == SIOCGSKNS) { @@ -3490,15 +3552,15 @@ static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info { struct tun_struct *tun = netdev_priv(dev); - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); switch (tun->flags & TUN_TYPE_MASK) { case IFF_TUN: - strlcpy(info->bus_info, "tun", sizeof(info->bus_info)); + strscpy(info->bus_info, "tun", sizeof(info->bus_info)); break; case IFF_TAP: - strlcpy(info->bus_info, "tap", sizeof(info->bus_info)); + strscpy(info->bus_info, "tap", sizeof(info->bus_info)); break; } } |