From 94a997637c5b562fa0ca44fca1d2cd02ec08236f Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 4 Sep 2019 13:49:10 +0200 Subject: xsk: avoid store-tearing when assigning queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use WRITE_ONCE when doing the store of tx, rx, fq, and cq, to avoid potential store-tearing. These members are read outside of the control mutex in the mmap implementation. Acked-by: Jonathan Lemon Fixes: 37b076933a8e ("xsk: add missing write- and data-dependency barrier") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 187fd157fcff..271d8d3fb11e 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -434,7 +434,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, /* Make sure queue is ready before it can be seen by others */ smp_wmb(); - *queue = q; + WRITE_ONCE(*queue, q); return 0; } -- cgit v1.2.3-59-g8ed1b From 9764f4b301c3e7eb3b75eec85b73cad449cdbb0d Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 4 Sep 2019 13:49:11 +0200 Subject: xsk: avoid store-tearing when assigning umem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The umem member of struct xdp_sock is read outside of the control mutex, in the mmap implementation, and needs a WRITE_ONCE to avoid potential store-tearing. Acked-by: Jonathan Lemon Fixes: 423f38329d26 ("xsk: add umem fill queue support and mmap") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 271d8d3fb11e..8c9056f06989 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -644,7 +644,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } xdp_get_umem(umem_xs->umem); - xs->umem = umem_xs->umem; + WRITE_ONCE(xs->umem, umem_xs->umem); sockfd_put(sock); } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { err = -EINVAL; @@ -751,7 +751,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, /* Make sure umem is ready before it can be seen by others */ smp_wmb(); - xs->umem = umem; + WRITE_ONCE(xs->umem, umem); mutex_unlock(&xs->mutex); return 0; } -- cgit v1.2.3-59-g8ed1b From 42fddcc7c64b723a867c7b2f5f7505e244212f13 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 4 Sep 2019 13:49:12 +0200 Subject: xsk: use state member for socket synchronization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior the state variable was introduced by Ilya, the dev member was used to determine whether the socket was bound or not. However, when dev was read, proper SMP barriers and READ_ONCE were missing. In order to address the missing barriers and READ_ONCE, we start using the state variable as a point of synchronization. The state member read/write is paired with proper SMP barriers, and from this follows that the members described above does not need READ_ONCE if used in conjunction with state check. In all syscalls and the xsk_rcv path we check if state is XSK_BOUND. If that is the case we do a SMP read barrier, and this implies that the dev, umem and all rings are correctly setup. Note that no READ_ONCE are needed for these variable if used when state is XSK_BOUND (plus the read barrier). To summarize: The members struct xdp_sock members dev, queue_id, umem, fq, cq, tx, rx, and state were read lock-less, with incorrect barriers and missing {READ, WRITE}_ONCE. Now, umem, fq, cq, tx, rx, and state are read lock-less. When these members are updated, WRITE_ONCE is used. When read, READ_ONCE are only used when read outside the control mutex (e.g. mmap) or, not synchronized with the state member (XSK_BOUND plus smp_rmb()) Note that dev and queue_id do not need a WRITE_ONCE or READ_ONCE, due to the introduce state synchronization (XSK_BOUND plus smp_rmb()). Introducing the state check also fixes a race, found by syzcaller, in xsk_poll() where umem could be accessed when stale. Suggested-by: Hillf Danton Reported-by: syzbot+c82697e3043781e08802@syzkaller.appspotmail.com Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Signed-off-by: Björn Töpel Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 54 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 8c9056f06989..c2f1af3b6a7c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -186,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return err; } +static bool xsk_is_bound(struct xdp_sock *xs) +{ + if (READ_ONCE(xs->state) == XSK_BOUND) { + /* Matches smp_wmb() in bind(). */ + smp_rmb(); + return true; + } + return false; +} + int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 len; + if (!xsk_is_bound(xs)) + return -EINVAL; + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; @@ -387,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - if (unlikely(!xs->dev)) + if (unlikely(!xsk_is_bound(xs))) return -ENXIO; if (unlikely(!(xs->dev->flags & IFF_UP))) return -ENETDOWN; @@ -403,10 +416,15 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { unsigned int mask = datagram_poll(file, sock, wait); - struct sock *sk = sock->sk; - struct xdp_sock *xs = xdp_sk(sk); - struct net_device *dev = xs->dev; - struct xdp_umem *umem = xs->umem; + struct xdp_sock *xs = xdp_sk(sock->sk); + struct net_device *dev; + struct xdp_umem *umem; + + if (unlikely(!xsk_is_bound(xs))) + return mask; + + dev = xs->dev; + umem = xs->umem; if (umem->need_wakeup) dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, @@ -442,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs) { struct net_device *dev = xs->dev; - if (!dev || xs->state != XSK_BOUND) + if (xs->state != XSK_BOUND) return; - - xs->state = XSK_UNBOUND; + WRITE_ONCE(xs->state, XSK_UNBOUND); /* Wait for driver to stop using the xdp socket. */ xdp_del_sk_umem(xs->umem, xs); @@ -520,7 +537,9 @@ static int xsk_release(struct socket *sock) local_bh_enable(); xsk_delete_from_maps(xs); + mutex_lock(&xs->mutex); xsk_unbind_dev(xs); + mutex_unlock(&xs->mutex); xskq_destroy(xs->rx); xskq_destroy(xs->tx); @@ -632,12 +651,12 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } umem_xs = xdp_sk(sock->sk); - if (!umem_xs->umem) { - /* No umem to inherit. */ + if (!xsk_is_bound(umem_xs)) { err = -EBADF; sockfd_put(sock); goto out_unlock; - } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) { + } + if (umem_xs->dev != dev || umem_xs->queue_id != qid) { err = -EINVAL; sockfd_put(sock); goto out_unlock; @@ -671,10 +690,15 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xdp_add_sk_umem(xs->umem, xs); out_unlock: - if (err) + if (err) { dev_put(dev); - else - xs->state = XSK_BOUND; + } else { + /* Matches smp_rmb() in bind() for shared umem + * sockets, and xsk_is_bound(). + */ + smp_wmb(); + WRITE_ONCE(xs->state, XSK_BOUND); + } out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); @@ -927,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock, unsigned long pfn; struct page *qpg; - if (xs->state != XSK_READY) + if (READ_ONCE(xs->state) != XSK_READY) return -EBUSY; if (offset == XDP_PGOFF_RX_RING) { -- cgit v1.2.3-59-g8ed1b From 25dc18ff9b583a76fff00c7d9d14a40524653c45 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 4 Sep 2019 13:49:13 +0200 Subject: xsk: lock the control mutex in sock_diag interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When accessing the members of an XDP socket, the control mutex should be held. This commit fixes that. Acked-by: Jonathan Lemon Fixes: a36b38aa2af6 ("xsk: add sock_diag interface for AF_XDP") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 9986a759fe06..f59791ba43a0 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, msg->xdiag_ino = sk_ino; sock_diag_save_cookie(sk, msg->xdiag_cookie); + mutex_lock(&xs->mutex); if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) goto out_nlmsg_trim; @@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) goto out_nlmsg_trim; + mutex_unlock(&xs->mutex); nlmsg_end(nlskb, nlh); return 0; out_nlmsg_trim: + mutex_unlock(&xs->mutex); nlmsg_cancel(nlskb, nlh); return -EMSGSIZE; } -- cgit v1.2.3-59-g8ed1b