aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c174
1 files changed, 118 insertions, 56 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fed85447701a..7a3ab3427369 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -268,12 +268,17 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
tfile->napi_enabled = napi_en;
tfile->napi_frags_enabled = napi_en && napi_frags;
if (napi_en) {
- netif_tx_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(tun->dev, &tfile->napi, tun_napi_poll);
napi_enable(&tfile->napi);
}
}
+static void tun_napi_enable(struct tun_file *tfile)
+{
+ if (tfile->napi_enabled)
+ napi_enable(&tfile->napi);
+}
+
static void tun_napi_disable(struct tun_file *tfile)
{
if (tfile->napi_enabled)
@@ -635,7 +640,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun = rtnl_dereference(tfile->tun);
if (tun && clean) {
- tun_napi_disable(tfile);
+ if (!tfile->detached)
+ tun_napi_disable(tfile);
tun_napi_del(tfile);
}
@@ -654,8 +660,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
if (clean) {
RCU_INIT_POINTER(tfile->tun, NULL);
sock_put(&tfile->sk);
- } else
+ } else {
tun_disable_queue(tun, tfile);
+ tun_napi_disable(tfile);
+ }
synchronize_net();
tun_flow_delete_by_queue(tun, tun->numqueues + 1);
@@ -728,6 +736,7 @@ static void tun_detach_all(struct net_device *dev)
sock_put(&tfile->sk);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
+ tun_napi_del(tfile);
tun_enable_queue(tfile);
tun_queue_purge(tfile);
xdp_rxq_info_unreg(&tfile->xdp_rxq);
@@ -808,6 +817,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
if (tfile->detached) {
tun_enable_queue(tfile);
+ tun_napi_enable(tfile);
} else {
sock_hold(&tfile->sk);
tun_napi_init(tun, tfile, napi, napi_frags);
@@ -1058,6 +1068,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
+ enum skb_drop_reason drop_reason;
int txq = skb->queue_mapping;
struct netdev_queue *queue;
struct tun_file *tfile;
@@ -1067,8 +1078,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
tfile = rcu_dereference(tun->tfiles[txq]);
/* Drop packet if interface is not attached */
- if (!tfile)
+ if (!tfile) {
+ drop_reason = SKB_DROP_REASON_DEV_READY;
goto drop;
+ }
if (!rcu_dereference(tun->steering_prog))
tun_automq_xmit(tun, skb);
@@ -1078,19 +1091,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Drop if the filter does not like it.
* This is a noop if the filter is disabled.
* Filter can be enabled only for the TAP devices. */
- if (!check_filter(&tun->txflt, skb))
+ if (!check_filter(&tun->txflt, skb)) {
+ drop_reason = SKB_DROP_REASON_TAP_TXFILTER;
goto drop;
+ }
if (tfile->socket.sk->sk_filter &&
- sk_filter(tfile->socket.sk, skb))
+ sk_filter(tfile->socket.sk, skb)) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto drop;
+ }
len = run_ebpf_filter(tun, skb, len);
- if (len == 0 || pskb_trim(skb, len))
+ if (len == 0) {
+ drop_reason = SKB_DROP_REASON_TAP_FILTER;
+ goto drop;
+ }
+
+ if (pskb_trim(skb, len)) {
+ drop_reason = SKB_DROP_REASON_NOMEM;
goto drop;
+ }
- if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+ drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
goto drop;
+ }
skb_tx_timestamp(skb);
@@ -1101,12 +1127,14 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
nf_reset_ct(skb);
- if (ptr_ring_produce(&tfile->tx_ring, skb))
+ if (ptr_ring_produce(&tfile->tx_ring, skb)) {
+ drop_reason = SKB_DROP_REASON_FULL_RING;
goto drop;
+ }
/* NETIF_F_LLTX requires to do our own update of trans_start */
queue = netdev_get_tx_queue(dev, txq);
- queue->trans_start = jiffies;
+ txq_trans_cond_update(queue);
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
@@ -1117,9 +1145,9 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
drop:
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
skb_tx_error(skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
rcu_read_unlock();
return NET_XMIT_DROP;
}
@@ -1273,7 +1301,7 @@ resample:
void *frame = tun_xdp_to_ptr(xdp);
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
- atomic_long_inc(&dev->tx_dropped);
+ dev_core_stats_tx_dropped_inc(dev);
break;
}
nxmit++;
@@ -1431,7 +1459,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
int err;
int i;
- if (it->nr_segs > MAX_SKB_FRAGS + 1)
+ if (it->nr_segs > MAX_SKB_FRAGS + 1 ||
+ len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN))
return ERR_PTR(-EMSGSIZE);
local_bh_disable();
@@ -1608,7 +1637,7 @@ static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
trace_xdp_exception(tun->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
- atomic_long_inc(&tun->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(tun->dev);
break;
}
@@ -1717,6 +1746,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
u32 rxhash = 0;
int skb_xdp = 1;
bool frags = tun_napi_frags_enabled(tfile);
+ enum skb_drop_reason drop_reason;
if (!(tun->flags & IFF_NO_PI)) {
if (len < sizeof(pi))
@@ -1778,7 +1808,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
*/
skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
if (IS_ERR(skb)) {
- atomic_long_inc(&tun->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(tun->dev);
return PTR_ERR(skb);
}
if (!skb)
@@ -1807,7 +1837,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EAGAIN)
- atomic_long_inc(&tun->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(tun->dev);
if (frags)
mutex_unlock(&tfile->napi_mutex);
return PTR_ERR(skb);
@@ -1820,9 +1850,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (err) {
err = -EFAULT;
+ drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
drop:
- atomic_long_inc(&tun->dev->rx_dropped);
- kfree_skb(skb);
+ dev_core_stats_rx_dropped_inc(tun->dev);
+ kfree_skb_reason(skb, drop_reason);
if (frags) {
tfile->napi.skb = NULL;
mutex_unlock(&tfile->napi_mutex);
@@ -1856,7 +1887,7 @@ drop:
pi.proto = htons(ETH_P_IPV6);
break;
default:
- atomic_long_inc(&tun->dev->rx_dropped);
+ dev_core_stats_rx_dropped_inc(tun->dev);
kfree_skb(skb);
return -EINVAL;
}
@@ -1869,6 +1900,7 @@ drop:
case IFF_TAP:
if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
err = -ENOMEM;
+ drop_reason = SKB_DROP_REASON_HDR_TRUNC;
goto drop;
}
skb->protocol = eth_type_trans(skb, tun->dev);
@@ -1922,6 +1954,7 @@ drop:
if (unlikely(!(tun->dev->flags & IFF_UP))) {
err = -EIO;
rcu_read_unlock();
+ drop_reason = SKB_DROP_REASON_DEV_READY;
goto drop;
}
@@ -1934,17 +1967,25 @@ drop:
skb_headlen(skb));
if (unlikely(headlen > skb_headlen(skb))) {
- atomic_long_inc(&tun->dev->rx_dropped);
+ WARN_ON_ONCE(1);
+ err = -ENOMEM;
+ dev_core_stats_rx_dropped_inc(tun->dev);
+napi_busy:
napi_free_frags(&tfile->napi);
rcu_read_unlock();
mutex_unlock(&tfile->napi_mutex);
- WARN_ON(1);
- return -ENOMEM;
+ return err;
}
- local_bh_disable();
- napi_gro_frags(&tfile->napi);
- local_bh_enable();
+ if (likely(napi_schedule_prep(&tfile->napi))) {
+ local_bh_disable();
+ napi_gro_frags(&tfile->napi);
+ napi_complete(&tfile->napi);
+ local_bh_enable();
+ } else {
+ err = -EBUSY;
+ goto napi_busy;
+ }
mutex_unlock(&tfile->napi_mutex);
} else if (tfile->napi_enabled) {
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
@@ -1962,7 +2003,7 @@ drop:
} else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
tun_rx_batched(tun, tfile, skb, more);
} else {
- netif_rx_ni(skb);
+ netif_rx(skb);
}
rcu_read_unlock();
@@ -2388,9 +2429,10 @@ static int tun_xdp_one(struct tun_struct *tun,
struct virtio_net_hdr *gso = &hdr->gso;
struct bpf_prog *xdp_prog;
struct sk_buff *skb = NULL;
+ struct sk_buff_head *queue;
u32 rxhash = 0, act;
int buflen = hdr->buflen;
- int err = 0;
+ int ret = 0;
bool skb_xdp = false;
struct page *page;
@@ -2405,13 +2447,13 @@ static int tun_xdp_one(struct tun_struct *tun,
xdp_set_data_meta_invalid(xdp);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- err = tun_xdp_act(tun, xdp_prog, xdp, act);
- if (err < 0) {
+ ret = tun_xdp_act(tun, xdp_prog, xdp, act);
+ if (ret < 0) {
put_page(virt_to_head_page(xdp->data));
- return err;
+ return ret;
}
- switch (err) {
+ switch (ret) {
case XDP_REDIRECT:
*flush = true;
fallthrough;
@@ -2435,7 +2477,7 @@ static int tun_xdp_one(struct tun_struct *tun,
build:
skb = build_skb(xdp->data_hard_start, buflen);
if (!skb) {
- err = -ENOMEM;
+ ret = -ENOMEM;
goto out;
}
@@ -2445,7 +2487,7 @@ build:
if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
atomic_long_inc(&tun->rx_frame_errors);
kfree_skb(skb);
- err = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -2455,16 +2497,27 @@ build:
skb_record_rx_queue(skb, tfile->queue_index);
if (skb_xdp) {
- err = do_xdp_generic(xdp_prog, skb);
- if (err != XDP_PASS)
+ ret = do_xdp_generic(xdp_prog, skb);
+ if (ret != XDP_PASS) {
+ ret = 0;
goto out;
+ }
}
if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
!tfile->detached)
rxhash = __skb_get_hash_symmetric(skb);
- netif_receive_skb(skb);
+ if (tfile->napi_enabled) {
+ queue = &tfile->sk.sk_write_queue;
+ spin_lock(&queue->lock);
+ __skb_queue_tail(queue, skb);
+ spin_unlock(&queue->lock);
+ ret = 1;
+ } else {
+ netif_receive_skb(skb);
+ ret = 0;
+ }
/* No need to disable preemption here since this function is
* always called with bh disabled
@@ -2475,7 +2528,7 @@ build:
tun_flow_update(tun, rxhash, tfile);
out:
- return err;
+ return ret;
}
static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
@@ -2489,10 +2542,11 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (!tun)
return -EBADFD;
- if (ctl && (ctl->type == TUN_MSG_PTR)) {
+ if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
+ ctl && ctl->type == TUN_MSG_PTR) {
struct tun_page tpage;
int n = ctl->num;
- int flush = 0;
+ int flush = 0, queued = 0;
memset(&tpage, 0, sizeof(tpage));
@@ -2501,12 +2555,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
for (i = 0; i < n; i++) {
xdp = &((struct xdp_buff *)ctl->ptr)[i];
- tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+ ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+ if (ret > 0)
+ queued += ret;
}
if (flush)
xdp_do_flush();
+ if (tfile->napi_enabled && queued > 0)
+ napi_schedule(&tfile->napi);
+
rcu_read_unlock();
local_bh_enable();
@@ -2614,7 +2673,7 @@ static ssize_t tun_flags_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct tun_struct *tun = netdev_priv(to_net_dev(dev));
- return sprintf(buf, "0x%x\n", tun_flags(tun));
+ return sysfs_emit(buf, "0x%x\n", tun_flags(tun));
}
static ssize_t owner_show(struct device *dev, struct device_attribute *attr,
@@ -2622,9 +2681,9 @@ static ssize_t owner_show(struct device *dev, struct device_attribute *attr,
{
struct tun_struct *tun = netdev_priv(to_net_dev(dev));
return uid_valid(tun->owner)?
- sprintf(buf, "%u\n",
- from_kuid_munged(current_user_ns(), tun->owner)):
- sprintf(buf, "-1\n");
+ sysfs_emit(buf, "%u\n",
+ from_kuid_munged(current_user_ns(), tun->owner)) :
+ sysfs_emit(buf, "-1\n");
}
static ssize_t group_show(struct device *dev, struct device_attribute *attr,
@@ -2632,9 +2691,9 @@ static ssize_t group_show(struct device *dev, struct device_attribute *attr,
{
struct tun_struct *tun = netdev_priv(to_net_dev(dev));
return gid_valid(tun->group) ?
- sprintf(buf, "%u\n",
- from_kgid_munged(current_user_ns(), tun->group)):
- sprintf(buf, "-1\n");
+ sysfs_emit(buf, "%u\n",
+ from_kgid_munged(current_user_ns(), tun->group)) :
+ sysfs_emit(buf, "-1\n");
}
static DEVICE_ATTR_RO(tun_flags);
@@ -2778,7 +2837,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
rcu_assign_pointer(tfile->tun, tun);
}
- netif_carrier_on(tun->dev);
+ if (ifr->ifr_flags & IFF_NO_CARRIER)
+ netif_carrier_off(tun->dev);
+ else
+ netif_carrier_on(tun->dev);
/* Make sure persistent devices do not get stuck in
* xoff state.
@@ -3006,8 +3068,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
* This is needed because we never checked for invalid flags on
* TUNSETIFF.
*/
- return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
- (unsigned int __user*)argp);
+ return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER |
+ TUN_FEATURES, (unsigned int __user*)argp);
} else if (cmd == TUNSETQUEUE) {
return tun_set_queue(file, &ifr);
} else if (cmd == SIOCGSKNS) {
@@ -3490,15 +3552,15 @@ static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
{
struct tun_struct *tun = netdev_priv(dev);
- strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
- strlcpy(info->version, DRV_VERSION, sizeof(info->version));
+ strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+ strscpy(info->version, DRV_VERSION, sizeof(info->version));
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
- strlcpy(info->bus_info, "tun", sizeof(info->bus_info));
+ strscpy(info->bus_info, "tun", sizeof(info->bus_info));
break;
case IFF_TAP:
- strlcpy(info->bus_info, "tap", sizeof(info->bus_info));
+ strscpy(info->bus_info, "tap", sizeof(info->bus_info));
break;
}
}