From 86b259f6f8880237899fbf4f940303b3987dffa9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 9 Aug 2022 10:55:43 -0700 Subject: tls: rx: device: bound the frag walk We can't do skb_walk_frags() on the input skbs, because the input skbs is really just a pointer to the tcp read queue. We need to bound the "is decrypted" check by the amount of data in the message. Note that the walk in tls_device_reencrypt() is after a CoW so the skb there is safe to walk. Actually in the current implementation it can't have frags at all, but whatever, maybe one day it will. Reported-by: Tariq Toukan Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Tested-by: Ran Rozenstein Link: https://lore.kernel.org/r/20220809175544.354343-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/tls') diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index e3e6cf75aa03..6ed41474bdf8 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -984,11 +984,17 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx) int is_decrypted = skb->decrypted; int is_encrypted = !is_decrypted; struct sk_buff *skb_iter; + int left; + left = rxm->full_len - skb->len; /* Check if all the data is decrypted already */ - skb_walk_frags(skb, skb_iter) { + skb_iter = skb_shinfo(skb)->frag_list; + while (skb_iter && left > 0) { is_decrypted &= skb_iter->decrypted; is_encrypted &= !skb_iter->decrypted; + + left -= skb_iter->len; + skb_iter = skb_iter->next; } trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len, -- cgit v1.2.3-59-g8ed1b From d800a7b3577bfb783481b02865d8775a760212a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 9 Aug 2022 10:55:44 -0700 Subject: tls: rx: device: don't try to copy too much on detach Another device offload bug, we use the length of the output skb as an indication of how much data to copy. But that skb is sized to offset + record length, and we start from offset. So we end up double-counting the offset which leads to skb_copy_bits() returning -EFAULT. Reported-by: Tariq Toukan Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Tested-by: Ran Rozenstein Link: https://lore.kernel.org/r/20220809175544.354343-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/tls/tls_strp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tls') diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index f0b7c9122fba..9b79e334dbd9 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -41,7 +41,7 @@ static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp) struct sk_buff *skb; int i, err, offset; - skb = alloc_skb_with_frags(0, strp->anchor->len, TLS_PAGE_ORDER, + skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER, &err, strp->sk->sk_allocation); if (!skb) return NULL; -- cgit v1.2.3-59-g8ed1b From 94ce3b64c62d4b628cf85cd0d9a370aca8f7e43a Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 10 Aug 2022 11:16:02 +0300 Subject: net/tls: Use RCU API to access tls_ctx->netdev Currently, tls_device_down synchronizes with tls_device_resync_rx using RCU, however, the pointer to netdev is stored using WRITE_ONCE and loaded using READ_ONCE. Although such approach is technically correct (rcu_dereference is essentially a READ_ONCE, and rcu_assign_pointer uses WRITE_ONCE to store NULL), using special RCU helpers for pointers is more valid, as it includes additional checks and might change the implementation transparently to the callers. Mark the netdev pointer as __rcu and use the correct RCU helpers to access it. For non-concurrent access pass the right conditions that guarantee safe access (locks taken, refcount value). Also use the correct helper in mlx5e, where even READ_ONCE was missing. The transition to RCU exposes existing issues, fixed by this commit: 1. bond_tls_device_xmit could read netdev twice, and it could become NULL the second time, after the NULL check passed. 2. Drivers shouldn't stop processing the last packet if tls_device_down just set netdev to NULL, before tls_dev_del was called. This prevents a possible packet drop when transitioning to the fallback software mode. Fixes: 89df6a810470 ("net/bonding: Implement TLS TX device offload") Fixes: c55dcdd435aa ("net/tls: Fix use-after-free after the TLS device goes down and up") Signed-off-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20220810081602.1435800-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 10 ++++-- .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 8 ++++- .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 8 ++++- include/net/tls.h | 2 +- net/tls/tls_device.c | 38 +++++++++++++++++----- net/tls/tls_device_fallback.c | 3 +- 6 files changed, 54 insertions(+), 15 deletions(-) (limited to 'net/tls') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ab7fdbbc2530..50e60843020c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5338,8 +5338,14 @@ static struct net_device *bond_sk_get_lower_dev(struct net_device *dev, static netdev_tx_t bond_tls_device_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *dev) { - if (likely(bond_get_slave_by_dev(bond, tls_get_ctx(skb->sk)->netdev))) - return bond_dev_queue_xmit(bond, skb, tls_get_ctx(skb->sk)->netdev); + struct net_device *tls_netdev = rcu_dereference(tls_get_ctx(skb->sk)->netdev); + + /* tls_netdev might become NULL, even if tls_is_sk_tx_device_offloaded + * was true, if tls_device_down is running in parallel, but it's OK, + * because bond_get_slave_by_dev has a NULL check. + */ + if (likely(bond_get_slave_by_dev(bond, tls_netdev))) + return bond_dev_queue_xmit(bond, skb, tls_netdev); return bond_tx_drop(dev, skb); } #endif diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index bfee0e4e54b1..da9973b711f4 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1932,6 +1932,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) int data_len, qidx, ret = 0, mss; struct tls_record_info *record; struct chcr_ktls_info *tx_info; + struct net_device *tls_netdev; struct tls_context *tls_ctx; struct sge_eth_txq *q; struct adapter *adap; @@ -1945,7 +1946,12 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) mss = skb_is_gso(skb) ? skb_shinfo(skb)->gso_size : data_len; tls_ctx = tls_get_ctx(skb->sk); - if (unlikely(tls_ctx->netdev != dev)) + tls_netdev = rcu_dereference_bh(tls_ctx->netdev); + /* Don't quit on NULL: if tls_device_down is running in parallel, + * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * true. Rather continue processing this packet. + */ + if (unlikely(tls_netdev && tls_netdev != dev)) goto out; tx_ctx = chcr_get_ktls_tx_context(tls_ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 6b6c7044b64a..0aef69527226 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -808,6 +808,7 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, { struct mlx5e_ktls_offload_context_tx *priv_tx; struct mlx5e_sq_stats *stats = sq->stats; + struct net_device *tls_netdev; struct tls_context *tls_ctx; int datalen; u32 seq; @@ -819,7 +820,12 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, mlx5e_tx_mpwqe_ensure_complete(sq); tls_ctx = tls_get_ctx(skb->sk); - if (WARN_ON_ONCE(tls_ctx->netdev != netdev)) + tls_netdev = rcu_dereference_bh(tls_ctx->netdev); + /* Don't WARN on NULL: if tls_device_down is running in parallel, + * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * true. Rather continue processing this packet. + */ + if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev)) goto err_out; priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); diff --git a/include/net/tls.h b/include/net/tls.h index b75b5727abdb..cb205f9d9473 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -237,7 +237,7 @@ struct tls_context { void *priv_ctx_tx; void *priv_ctx_rx; - struct net_device *netdev; + struct net_device __rcu *netdev; /* rw cache line */ struct cipher_context tx; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 6ed41474bdf8..0f983e5f7dde 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -71,7 +71,13 @@ static void tls_device_tx_del_task(struct work_struct *work) struct tls_offload_context_tx *offload_ctx = container_of(work, struct tls_offload_context_tx, destruct_work); struct tls_context *ctx = offload_ctx->ctx; - struct net_device *netdev = ctx->netdev; + struct net_device *netdev; + + /* Safe, because this is the destroy flow, refcount is 0, so + * tls_device_down can't store this field in parallel. + */ + netdev = rcu_dereference_protected(ctx->netdev, + !refcount_read(&ctx->refcount)); netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); dev_put(netdev); @@ -81,6 +87,7 @@ static void tls_device_tx_del_task(struct work_struct *work) static void tls_device_queue_ctx_destruction(struct tls_context *ctx) { + struct net_device *netdev; unsigned long flags; bool async_cleanup; @@ -91,7 +98,14 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx) } list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */ - async_cleanup = ctx->netdev && ctx->tx_conf == TLS_HW; + + /* Safe, because this is the destroy flow, refcount is 0, so + * tls_device_down can't store this field in parallel. + */ + netdev = rcu_dereference_protected(ctx->netdev, + !refcount_read(&ctx->refcount)); + + async_cleanup = netdev && ctx->tx_conf == TLS_HW; if (async_cleanup) { struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx); @@ -229,7 +243,8 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, trace_tls_device_tx_resync_send(sk, seq, rcd_sn); down_read(&device_offload_lock); - netdev = tls_ctx->netdev; + netdev = rcu_dereference_protected(tls_ctx->netdev, + lockdep_is_held(&device_offload_lock)); if (netdev) err = netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, @@ -710,7 +725,7 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); rcu_read_lock(); - netdev = READ_ONCE(tls_ctx->netdev); + netdev = rcu_dereference(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); @@ -1035,7 +1050,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, if (sk->sk_destruct != tls_device_sk_destruct) { refcount_set(&ctx->refcount, 1); dev_hold(netdev); - ctx->netdev = netdev; + RCU_INIT_POINTER(ctx->netdev, netdev); spin_lock_irq(&tls_device_lock); list_add_tail(&ctx->list, &tls_device_list); spin_unlock_irq(&tls_device_lock); @@ -1306,7 +1321,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk) struct net_device *netdev; down_read(&device_offload_lock); - netdev = tls_ctx->netdev; + netdev = rcu_dereference_protected(tls_ctx->netdev, + lockdep_is_held(&device_offload_lock)); if (!netdev) goto out; @@ -1315,7 +1331,7 @@ void tls_device_offload_cleanup_rx(struct sock *sk) if (tls_ctx->tx_conf != TLS_HW) { dev_put(netdev); - tls_ctx->netdev = NULL; + rcu_assign_pointer(tls_ctx->netdev, NULL); } else { set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); } @@ -1335,7 +1351,11 @@ static int tls_device_down(struct net_device *netdev) spin_lock_irqsave(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) { - if (ctx->netdev != netdev || + struct net_device *ctx_netdev = + rcu_dereference_protected(ctx->netdev, + lockdep_is_held(&device_offload_lock)); + + if (ctx_netdev != netdev || !refcount_inc_not_zero(&ctx->refcount)) continue; @@ -1352,7 +1372,7 @@ static int tls_device_down(struct net_device *netdev) /* Stop the RX and TX resync. * tls_dev_resync must not be called after tls_dev_del. */ - WRITE_ONCE(ctx->netdev, NULL); + rcu_assign_pointer(ctx->netdev, NULL); /* Start skipping the RX resync logic completely. */ set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 618cee704217..7dfc8023e0f1 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -426,7 +426,8 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb) { - if (dev == tls_get_ctx(sk)->netdev || netif_is_bond_master(dev)) + if (dev == rcu_dereference_bh(tls_get_ctx(sk)->netdev) || + netif_is_bond_master(dev)) return skb; return tls_sw_fallback(sk, skb); -- cgit v1.2.3-59-g8ed1b