aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-10-24 14:53:49 -0700
committerDavid S. Miller <davem@davemloft.net>2019-10-24 14:53:49 -0700
commit65921376425fc9c8b7ce647e1f7989f7cdf5dd70 (patch)
treee2889962a59cc29fcec292788ee6936c87723c51 /include/linux
parentkeys: Fix memory leak in copy_net_ns (diff)
parentvirt_wifi: fix refcnt leak in module exit routine (diff)
Merge branch 'net-fix-nested-device-bugs'
Taehee Yoo says: ==================== net: fix nested device bugs This patchset fixes several bugs that are related to nesting device infrastructure. Current nesting infrastructure code doesn't limit the depth level of devices. nested devices could be handled recursively. at that moment, it needs huge memory and stack overflow could occur. Below devices type have same bug. VLAN, BONDING, TEAM, MACSEC, MACVLAN, IPVLAN, and VXLAN. But I couldn't test all interface types so there could be more device types, which have similar problems. Maybe qmi_wwan.c code could have same problem. So, I would appreciate if someone test qmi_wwan.c and other modules. Test commands: ip link add dummy0 type dummy ip link add vlan1 link dummy0 type vlan id 1 for i in {2..100} do let A=$i-1 ip link add name vlan$i link vlan$A type vlan id $i done ip link del dummy0 1st patch actually fixes the root cause. It adds new common variables {upper/lower}_level that represent depth level. upper_level variable is depth of upper devices. lower_level variable is depth of lower devices. [U][L] [U][L] vlan1 1 5 vlan4 1 4 vlan2 2 4 vlan5 2 3 vlan3 3 3 | | | +------------+ | vlan6 4 2 dummy0 5 1 After this patch, the nesting infrastructure code uses this variable to check the depth level. 2nd patch fixes Qdisc lockdep related problem. Before this patch, devices use static lockdep map. So, if devices that are same types are nested, lockdep will warn about recursive situation. These patches make these devices use dynamic lockdep key instead of static lock or subclass. 3rd patch fixes unexpected IFF_BONDING bit unset. When nested bonding interface scenario, bonding interface could lost it's IFF_BONDING flag. This should not happen. This patch adds a condition before unsetting IFF_BONDING. 4th patch fixes nested locking problem in bonding interface Bonding interface has own lock and this uses static lock. Bonding interface could be nested and it uses same lockdep key. So that unexisting lockdep warning occurs. 5th patch fixes nested locking problem in team interface Team interface has own lock and this uses static lock. Team interface could be nested and it uses same lockdep key. So that unexisting lockdep warning occurs. 6th patch fixes a refcnt leak in the macsec module. When the macsec module is unloaded, refcnt leaks occur. But actually, that holding refcnt is unnecessary. So this patch just removes these code. 7th patch adds ignore flag to an adjacent structure. In order to exchange an adjacent node safely, ignore flag is needed. 8th patch makes vxlan add an adjacent link to limit depth level. Vxlan interface could set it's lower interface and these lower interfaces are handled recursively. So, if the depth of lower interfaces is too deep, stack overflow could happen. 9th patch removes unnecessary variables and callback. After 1st patch, subclass callback and variables are unnecessary. This patch just removes these variables and callback. 10th patch fix refcnt leaks in the virt_wifi module Like every nested interface, the upper interface should be deleted before the lower interface is deleted. In order to fix this, the notifier routine is added in this patch. v4 -> v5 : - Update log messages - Move variables position, 1st patch - Fix iterator routine, 1st patch - Add generic lockdep key code, which replaces 2, 4, 5, 6, 7 patches. - Log message update, 10th patch - Fix wrong error value in error path of __init routine, 10th patch - hold module refcnt when interface is created, 10th patch v3 -> v4 : - Add new 12th patch to fix refcnt leaks in the virt_wifi module - Fix wrong usage netdev_upper_dev_link() in the vxlan.c - Preserve reverse christmas tree variable ordering in the vxlan.c - Add missing static keyword in the dev.c - Expose netdev_adjacent_change_{prepare/commit/abort} instead of netdev_adjacent_dev_{enable/disable} v2 -> v3 : - Modify nesting infrastructure code to use iterator instead of recursive. v1 -> v2 : - Make the 3rd patch do not add a new priv_flag. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/if_macvlan.h1
-rw-r--r--include/linux/if_team.h1
-rw-r--r--include/linux/if_vlan.h11
-rw-r--r--include/linux/netdevice.h61
4 files changed, 28 insertions, 46 deletions
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 2e55e4cdbd8a..a367ead4bf4b 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -29,7 +29,6 @@ struct macvlan_dev {
netdev_features_t set_features;
enum macvlan_mode mode;
u16 flags;
- int nest_level;
unsigned int macaddr_count;
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *netpoll;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 06faa066496f..ec7e4bd07f82 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -223,6 +223,7 @@ struct team {
atomic_t count_pending;
struct delayed_work dw;
} mcast_rejoin;
+ struct lock_class_key team_lock_key;
long mode_priv[TEAM_MODE_PRIV_LONGS];
};
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 244278d5c222..b05e855f1ddd 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -182,7 +182,6 @@ struct vlan_dev_priv {
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *netpoll;
#endif
- unsigned int nest_level;
};
static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
@@ -221,11 +220,6 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
extern bool vlan_uses_dev(const struct net_device *dev);
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
- BUG_ON(!is_vlan_dev(dev));
- return vlan_dev_priv(dev)->nest_level;
-}
#else
static inline struct net_device *
__vlan_find_dev_deep_rcu(struct net_device *real_dev,
@@ -295,11 +289,6 @@ static inline bool vlan_uses_dev(const struct net_device *dev)
{
return false;
}
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
- BUG();
- return 0;
-}
#endif
/**
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9eda1c31d1f7..c20f190b4c18 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -925,6 +925,7 @@ struct dev_ifalias {
struct devlink;
struct tlsdev_ops;
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -1421,7 +1422,6 @@ struct net_device_ops {
void (*ndo_dfwd_del_station)(struct net_device *pdev,
void *priv);
- int (*ndo_get_lock_subclass)(struct net_device *dev);
int (*ndo_set_tx_maxrate)(struct net_device *dev,
int queue_index,
u32 maxrate);
@@ -1649,6 +1649,8 @@ enum netdev_priv_flags {
* @perm_addr: Permanent hw address
* @addr_assign_type: Hw address assignment type
* @addr_len: Hardware address length
+ * @upper_level: Maximum depth level of upper devices.
+ * @lower_level: Maximum depth level of lower devices.
* @neigh_priv_len: Used in neigh_alloc()
* @dev_id: Used to differentiate devices that share
* the same link layer address
@@ -1758,9 +1760,13 @@ enum netdev_priv_flags {
* @phydev: Physical device may attach itself
* for hardware timestamping
* @sfp_bus: attached &struct sfp_bus structure.
- *
- * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ * @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock
+ spinlock
+ * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ * @qdisc_xmit_lock_key: lockdep class annotating
+ * netdev_queue->_xmit_lock spinlock
+ * @addr_list_lock_key: lockdep class annotating
+ * net_device->addr_list_lock spinlock
*
* @proto_down: protocol port state information can be sent to the
* switch driver and used to set the phys state of the
@@ -1875,6 +1881,8 @@ struct net_device {
unsigned char perm_addr[MAX_ADDR_LEN];
unsigned char addr_assign_type;
unsigned char addr_len;
+ unsigned char upper_level;
+ unsigned char lower_level;
unsigned short neigh_priv_len;
unsigned short dev_id;
unsigned short dev_port;
@@ -2045,8 +2053,10 @@ struct net_device {
#endif
struct phy_device *phydev;
struct sfp_bus *sfp_bus;
- struct lock_class_key *qdisc_tx_busylock;
- struct lock_class_key *qdisc_running_key;
+ struct lock_class_key qdisc_tx_busylock_key;
+ struct lock_class_key qdisc_running_key;
+ struct lock_class_key qdisc_xmit_lock_key;
+ struct lock_class_key addr_list_lock_key;
bool proto_down;
unsigned wol_enabled:1;
};
@@ -2124,23 +2134,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
f(dev, &dev->_tx[i], arg);
}
-#define netdev_lockdep_set_classes(dev) \
-{ \
- static struct lock_class_key qdisc_tx_busylock_key; \
- static struct lock_class_key qdisc_running_key; \
- static struct lock_class_key qdisc_xmit_lock_key; \
- static struct lock_class_key dev_addr_list_lock_key; \
- unsigned int i; \
- \
- (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
- (dev)->qdisc_running_key = &qdisc_running_key; \
- lockdep_set_class(&(dev)->addr_list_lock, \
- &dev_addr_list_lock_key); \
- for (i = 0; i < (dev)->num_tx_queues; i++) \
- lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \
- &qdisc_xmit_lock_key); \
-}
-
u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
@@ -3139,6 +3132,7 @@ static inline void netif_stop_queue(struct net_device *dev)
}
void netif_tx_stop_all_queues(struct net_device *dev);
+void netdev_update_lockdep_key(struct net_device *dev);
static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
{
@@ -4056,16 +4050,6 @@ static inline void netif_addr_lock(struct net_device *dev)
spin_lock(&dev->addr_list_lock);
}
-static inline void netif_addr_lock_nested(struct net_device *dev)
-{
- int subclass = SINGLE_DEPTH_NESTING;
-
- if (dev->netdev_ops->ndo_get_lock_subclass)
- subclass = dev->netdev_ops->ndo_get_lock_subclass(dev);
-
- spin_lock_nested(&dev->addr_list_lock, subclass);
-}
-
static inline void netif_addr_lock_bh(struct net_device *dev)
{
spin_lock_bh(&dev->addr_list_lock);
@@ -4329,6 +4313,16 @@ int netdev_master_upper_dev_link(struct net_device *dev,
struct netlink_ext_ack *extack);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev);
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev);
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev);
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
void *netdev_lower_dev_get_private(struct net_device *dev,
struct net_device *lower_dev);
@@ -4340,7 +4334,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
void netdev_rss_key_fill(void *buffer, size_t len);
-int dev_get_nest_level(struct net_device *dev);
int skb_checksum_help(struct sk_buff *skb);
int skb_crc32c_csum_help(struct sk_buff *skb);
int skb_csum_hwoffload_help(struct sk_buff *skb,