From f0433eea468810aebd61d0b9d095e9acd6bea2ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 12 Apr 2025 16:30:11 -0700 Subject: net: don't mix device locking in dev_close_many() calls Lockdep found the following dependency: &dev_instance_lock_key#3 --> &rdev->wiphy.mtx --> &net->xdp.lock --> &xs->mutex --> &dev_instance_lock_key#3 The first dependency is the problem. wiphy mutex should be outside the instance locks. The problem happens in notifiers (as always) for CLOSE. We only hold the instance lock for ops locked devices during CLOSE, and WiFi netdevs are not ops locked. Unfortunately, when we dev_close_many() during netns dismantle we may be holding the instance lock of _another_ netdev when issuing a CLOSE for a WiFi device. Lockdep's "Possible unsafe locking scenario" only prints 3 locks and we have 4, plus I think we'd need 3 CPUs, like this: CPU0 CPU1 CPU2 ---- ---- ---- lock(&xs->mutex); lock(&dev_instance_lock_key#3); lock(&rdev->wiphy.mtx); lock(&net->xdp.lock); lock(&xs->mutex); lock(&rdev->wiphy.mtx); lock(&dev_instance_lock_key#3); Tho, I don't think that's possible as CPU1 and CPU2 would be under rtnl_lock. Even if we have per-netns rtnl_lock and wiphy can span network namespaces - CPU0 and CPU1 must be in the same netns to see dev_instance_lock, so CPU0 can't be installing a socket as CPU1 is tearing the netns down. Regardless, our expected lock ordering is that wiphy lock is taken before instance locks, so let's fix this. Go over the ops locked and non-locked devices separately. Note that calling dev_close_many() on an empty list is perfectly fine. All processing (including RCU syncs) are conditional on the list not being empty, already. Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Reported-by: syzbot+6f588c78bf765b62b450@syzkaller.appspotmail.com Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250412233011.309762-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 75e104322ad5..5fcbc66d865e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11932,15 +11932,24 @@ void unregister_netdevice_many_notify(struct list_head *head, BUG_ON(dev->reg_state != NETREG_REGISTERED); } - /* If device is running, close it first. */ + /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { - list_add_tail(&dev->close_list, &close_head); - netdev_lock_ops(dev); + if (netdev_need_ops_lock(dev)) { + list_add_tail(&dev->close_list, &close_head); + netdev_lock(dev); + } + } + dev_close_many(&close_head, true); + /* ... now unlock them and go over the rest. */ + list_for_each_entry(dev, head, unreg_list) { + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + list_add_tail(&dev->close_list, &close_head); } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { - netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); -- cgit v1.2.3-59-g8ed1b From 4798cfa2097f0833d54d8f5ce20ef14631917839 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Apr 2025 08:15:52 -0700 Subject: net: don't try to ops lock uninitialized devs We need to be careful when operating on dev while in rtnl_create_link(). Some devices (vxlan) initialize netdev_ops in ->newlink, so later on. Avoid using netdev_lock_ops(), the device isn't registered so we cannot legally call its ops or generate any notifications for it. netdev_ops_assert_locked_or_invisible() is safe to use, it checks registration status first. Reported-by: syzbot+de1c7d68a10e3f123bdd@syzkaller.appspotmail.com Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Acked-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250415151552.768373-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ net/core/rtnetlink.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5fcbc66d865e..1be7cb73a602 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1520,6 +1520,8 @@ EXPORT_SYMBOL(netdev_features_change); void netif_state_change(struct net_device *dev) { + netdev_ops_assert_locked_or_invisible(dev); + if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { .info.dev = dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 39a5b72e861f..c5a7f41982a5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3676,11 +3676,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) { - netdev_lock_ops(dev); + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - netdev_unlock_ops(dev); - } if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) -- cgit v1.2.3-59-g8ed1b