aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTaehee Yoo <ap420073@gmail.com>2020-09-25 18:13:29 +0000
committerDavid S. Miller <davem@davemloft.net>2020-09-28 15:00:15 -0700
commit1fc70edb7d7b5ce1ae32b0cf90183f4879ad421a (patch)
tree1b876520c6009ad68b0eb4664a87eb6878be125e /net
parentnet: core: introduce struct netdev_nested_priv for nested interface infrastructure (diff)
downloadlinux-dev-1fc70edb7d7b5ce1ae32b0cf90183f4879ad421a.tar.xz
linux-dev-1fc70edb7d7b5ce1ae32b0cf90183f4879ad421a.zip
net: core: add nested_level variable in net_device
This patch is to add a new variable 'nested_level' into the net_device structure. This variable will be used as a parameter of spin_lock_nested() of dev->addr_list_lock. netif_addr_lock() can be called recursively so spin_lock_nested() is used instead of spin_lock() and dev->lower_level is used as a parameter of spin_lock_nested(). But, dev->lower_level value can be updated while it is being used. So, lockdep would warn a possible deadlock scenario. When a stacked interface is deleted, netif_{uc | mc}_sync() is called recursively. So, spin_lock_nested() is called recursively too. At this moment, the dev->lower_level variable is used as a parameter of it. dev->lower_level value is updated when interfaces are being unlinked/linked immediately. Thus, After unlinking, dev->lower_level shouldn't be a parameter of spin_lock_nested(). A (macvlan) | B (vlan) | C (bridge) | D (macvlan) | E (vlan) | F (bridge) A->lower_level : 6 B->lower_level : 5 C->lower_level : 4 D->lower_level : 3 E->lower_level : 2 F->lower_level : 1 When an interface 'A' is removed, it releases resources. At this moment, netif_addr_lock() would be called. Then, netdev_upper_dev_unlink() is called recursively. Then dev->lower_level is updated. There is no problem. But, when the bridge module is removed, 'C' and 'F' interfaces are removed at once. If 'F' is removed first, a lower_level value is like below. A->lower_level : 5 B->lower_level : 4 C->lower_level : 3 D->lower_level : 2 E->lower_level : 1 F->lower_level : 1 Then, 'C' is removed. at this moment, netif_addr_lock() is called recursively. The ordering is like this. C(3)->D(2)->E(1)->F(1) At this moment, the lower_level value of 'E' and 'F' are the same. So, lockdep warns a possible deadlock scenario. In order to avoid this problem, a new variable 'nested_level' is added. This value is the same as dev->lower_level - 1. But this value is updated in rtnl_unlock(). So, this variable can be used as a parameter of spin_lock_nested() safely in the rtnl context. Test commands: ip link add br0 type bridge vlan_filtering 1 ip link add vlan1 link br0 type vlan id 10 ip link add macvlan2 link vlan1 type macvlan ip link add br3 type bridge vlan_filtering 1 ip link set macvlan2 master br3 ip link add vlan4 link br3 type vlan id 10 ip link add macvlan5 link vlan4 type macvlan ip link add br6 type bridge vlan_filtering 1 ip link set macvlan5 master br6 ip link add vlan7 link br6 type vlan id 10 ip link add macvlan8 link vlan7 type macvlan ip link set br0 up ip link set vlan1 up ip link set macvlan2 up ip link set br3 up ip link set vlan4 up ip link set macvlan5 up ip link set br6 up ip link set vlan7 up ip link set macvlan8 up modprobe -rv bridge Splat looks like: [ 36.057436][ T744] WARNING: possible recursive locking detected [ 36.058848][ T744] 5.9.0-rc6+ #728 Not tainted [ 36.059959][ T744] -------------------------------------------- [ 36.061391][ T744] ip/744 is trying to acquire lock: [ 36.062590][ T744] ffff8c4767509280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_set_rx_mode+0x19/0x30 [ 36.064922][ T744] [ 36.064922][ T744] but task is already holding lock: [ 36.066626][ T744] ffff8c4767769280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_uc_add+0x1e/0x60 [ 36.068851][ T744] [ 36.068851][ T744] other info that might help us debug this: [ 36.070731][ T744] Possible unsafe locking scenario: [ 36.070731][ T744] [ 36.072497][ T744] CPU0 [ 36.073238][ T744] ---- [ 36.074007][ T744] lock(&vlan_netdev_addr_lock_key); [ 36.075290][ T744] lock(&vlan_netdev_addr_lock_key); [ 36.076590][ T744] [ 36.076590][ T744] *** DEADLOCK *** [ 36.076590][ T744] [ 36.078515][ T744] May be due to missing lock nesting notation [ 36.078515][ T744] [ 36.080491][ T744] 3 locks held by ip/744: [ 36.081471][ T744] #0: ffffffff98571df0 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x236/0x490 [ 36.083614][ T744] #1: ffff8c4767769280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_uc_add+0x1e/0x60 [ 36.085942][ T744] #2: ffff8c476c8da280 (&bridge_netdev_addr_lock_key/4){+...}-{2:2}, at: dev_uc_sync+0x39/0x80 [ 36.088400][ T744] [ 36.088400][ T744] stack backtrace: [ 36.089772][ T744] CPU: 6 PID: 744 Comm: ip Not tainted 5.9.0-rc6+ #728 [ 36.091364][ T744] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 36.093630][ T744] Call Trace: [ 36.094416][ T744] dump_stack+0x77/0x9b [ 36.095385][ T744] __lock_acquire+0xbc3/0x1f40 [ 36.096522][ T744] lock_acquire+0xb4/0x3b0 [ 36.097540][ T744] ? dev_set_rx_mode+0x19/0x30 [ 36.098657][ T744] ? rtmsg_ifinfo+0x1f/0x30 [ 36.099711][ T744] ? __dev_notify_flags+0xa5/0xf0 [ 36.100874][ T744] ? rtnl_is_locked+0x11/0x20 [ 36.101967][ T744] ? __dev_set_promiscuity+0x7b/0x1a0 [ 36.103230][ T744] _raw_spin_lock_bh+0x38/0x70 [ 36.104348][ T744] ? dev_set_rx_mode+0x19/0x30 [ 36.105461][ T744] dev_set_rx_mode+0x19/0x30 [ 36.106532][ T744] dev_set_promiscuity+0x36/0x50 [ 36.107692][ T744] __dev_set_promiscuity+0x123/0x1a0 [ 36.108929][ T744] dev_set_promiscuity+0x1e/0x50 [ 36.110093][ T744] br_port_set_promisc+0x1f/0x40 [bridge] [ 36.111415][ T744] br_manage_promisc+0x8b/0xe0 [bridge] [ 36.112728][ T744] __dev_set_promiscuity+0x123/0x1a0 [ 36.113967][ T744] ? __hw_addr_sync_one+0x23/0x50 [ 36.115135][ T744] __dev_set_rx_mode+0x68/0x90 [ 36.116249][ T744] dev_uc_sync+0x70/0x80 [ 36.117244][ T744] dev_uc_add+0x50/0x60 [ 36.118223][ T744] macvlan_open+0x18e/0x1f0 [macvlan] [ 36.119470][ T744] __dev_open+0xd6/0x170 [ 36.120470][ T744] __dev_change_flags+0x181/0x1d0 [ 36.121644][ T744] dev_change_flags+0x23/0x60 [ 36.122741][ T744] do_setlink+0x30a/0x11e0 [ 36.123778][ T744] ? __lock_acquire+0x92c/0x1f40 [ 36.124929][ T744] ? __nla_validate_parse.part.6+0x45/0x8e0 [ 36.126309][ T744] ? __lock_acquire+0x92c/0x1f40 [ 36.127457][ T744] __rtnl_newlink+0x546/0x8e0 [ 36.128560][ T744] ? lock_acquire+0xb4/0x3b0 [ 36.129623][ T744] ? deactivate_slab.isra.85+0x6a1/0x850 [ 36.130946][ T744] ? __lock_acquire+0x92c/0x1f40 [ 36.132102][ T744] ? lock_acquire+0xb4/0x3b0 [ 36.133176][ T744] ? is_bpf_text_address+0x5/0xe0 [ 36.134364][ T744] ? rtnl_newlink+0x2e/0x70 [ 36.135445][ T744] ? rcu_read_lock_sched_held+0x32/0x60 [ 36.136771][ T744] ? kmem_cache_alloc_trace+0x2d8/0x380 [ 36.138070][ T744] ? rtnl_newlink+0x2e/0x70 [ 36.139164][ T744] rtnl_newlink+0x47/0x70 [ ... ] Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Signed-off-by: Taehee Yoo <ap420073@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c85
-rw-r--r--net/core/dev_addr_lists.c12
2 files changed, 78 insertions, 19 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a4a1fa806c5c..4906b44af850 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7104,6 +7104,7 @@ static bool __netdev_has_upper_dev(struct net_device *dev,
struct net_device *upper_dev)
{
struct netdev_nested_priv priv = {
+ .flags = 0,
.data = (void *)upper_dev,
};
@@ -7385,9 +7386,19 @@ static int __netdev_update_upper_level(struct net_device *dev,
}
static int __netdev_update_lower_level(struct net_device *dev,
- struct netdev_nested_priv *__unused)
+ struct netdev_nested_priv *priv)
{
dev->lower_level = __netdev_lower_depth(dev) + 1;
+
+#ifdef CONFIG_LOCKDEP
+ if (!priv)
+ return 0;
+
+ if (priv->flags & NESTED_SYNC_IMM)
+ dev->nested_level = dev->lower_level - 1;
+ if (priv->flags & NESTED_SYNC_TODO)
+ net_unlink_todo(dev);
+#endif
return 0;
}
@@ -7665,6 +7676,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
void *upper_priv, void *upper_info,
+ struct netdev_nested_priv *priv,
struct netlink_ext_ack *extack)
{
struct netdev_notifier_changeupper_info changeupper_info = {
@@ -7721,9 +7733,9 @@ static int __netdev_upper_dev_link(struct net_device *dev,
__netdev_update_upper_level(dev, NULL);
__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
- __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_update_lower_level(upper_dev, priv);
__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
- NULL);
+ priv);
return 0;
@@ -7748,8 +7760,13 @@ int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
struct netlink_ext_ack *extack)
{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
return __netdev_upper_dev_link(dev, upper_dev, false,
- NULL, NULL, extack);
+ NULL, NULL, &priv, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -7772,13 +7789,19 @@ int netdev_master_upper_dev_link(struct net_device *dev,
void *upper_priv, void *upper_info,
struct netlink_ext_ack *extack)
{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info, extack);
+ upper_priv, upper_info, &priv, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
static void __netdev_upper_dev_unlink(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netdev_nested_priv *priv)
{
struct netdev_notifier_changeupper_info changeupper_info = {
.info = {
@@ -7803,9 +7826,9 @@ static void __netdev_upper_dev_unlink(struct net_device *dev,
__netdev_update_upper_level(dev, NULL);
__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
- __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_update_lower_level(upper_dev, priv);
__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
- NULL);
+ priv);
}
/**
@@ -7819,7 +7842,12 @@ static void __netdev_upper_dev_unlink(struct net_device *dev,
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- __netdev_upper_dev_unlink(dev, upper_dev);
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
+ __netdev_upper_dev_unlink(dev, upper_dev, &priv);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -7855,6 +7883,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev,
struct net_device *dev,
struct netlink_ext_ack *extack)
{
+ struct netdev_nested_priv priv = {
+ .flags = 0,
+ .data = NULL,
+ };
int err;
if (!new_dev)
@@ -7862,8 +7894,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev,
if (old_dev && new_dev != old_dev)
netdev_adjacent_dev_disable(dev, old_dev);
-
- err = netdev_upper_dev_link(new_dev, dev, extack);
+ err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
+ extack);
if (err) {
if (old_dev && new_dev != old_dev)
netdev_adjacent_dev_enable(dev, old_dev);
@@ -7878,6 +7910,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev,
struct net_device *new_dev,
struct net_device *dev)
{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
if (!new_dev || !old_dev)
return;
@@ -7885,7 +7922,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev,
return;
netdev_adjacent_dev_enable(dev, old_dev);
- netdev_upper_dev_unlink(old_dev, dev);
+ __netdev_upper_dev_unlink(old_dev, dev, &priv);
}
EXPORT_SYMBOL(netdev_adjacent_change_commit);
@@ -7893,13 +7930,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev,
struct net_device *new_dev,
struct net_device *dev)
{
+ struct netdev_nested_priv priv = {
+ .flags = 0,
+ .data = NULL,
+ };
+
if (!new_dev)
return;
if (old_dev && new_dev != old_dev)
netdev_adjacent_dev_enable(dev, old_dev);
- netdev_upper_dev_unlink(new_dev, dev);
+ __netdev_upper_dev_unlink(new_dev, dev, &priv);
}
EXPORT_SYMBOL(netdev_adjacent_change_abort);
@@ -10083,6 +10125,19 @@ static void netdev_wait_allrefs(struct net_device *dev)
void netdev_run_todo(void)
{
struct list_head list;
+#ifdef CONFIG_LOCKDEP
+ struct list_head unlink_list;
+
+ list_replace_init(&net_unlink_list, &unlink_list);
+
+ while (!list_empty(&unlink_list)) {
+ struct net_device *dev = list_first_entry(&unlink_list,
+ struct net_device,
+ unlink_list);
+ list_del(&dev->unlink_list);
+ dev->nested_level = dev->lower_level - 1;
+ }
+#endif
/* Snapshot list, allow later requests */
list_replace_init(&net_todo_list, &list);
@@ -10295,6 +10350,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
+#ifdef CONFIG_LOCKDEP
+ dev->nested_level = 0;
+ INIT_LIST_HEAD(&dev->unlink_list);
+#endif
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 54cd568e7c2f..fa1c37ec40c9 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -700,7 +700,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
* larger.
*/
netif_addr_lock_bh(from);
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
@@ -867,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -897,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -922,7 +922,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
/* See the above comments inside dev_uc_unsync(). */
netif_addr_lock_bh(from);
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);