aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-08-29 16:19:50 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-29 16:19:50 -0400
commitd7ef9b04a219c1c8ad053e9b4321283b5af12938 (patch)
treeed95aed89dea6c836e57ad069737082693fee156
parentMerge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next (diff)
parentbonding: pr_debug instead of pr_warn in bond_arp_send_all (diff)
downloadlinux-dev-d7ef9b04a219c1c8ad053e9b4321283b5af12938.tar.xz
linux-dev-d7ef9b04a219c1c8ad053e9b4321283b5af12938.zip
Merge branch 'bond_vlan'
Veaceslav Falico says: ==================== bonding: remove vlan special handling v1: Per Jiri's advice, remove the exported netdev_upper struct to keep it inside dev.c only, and instead implement a macro to iterate over the list and return only net_device *. v2: Jiri noted that we need to see every upper device, but not only the first level. Modify the netdev_upper logic to include a list of lower devices and for both upper/lower lists every device would see both its first-level devices and every other devices that is lower/upper of it. Also, convert some annoying spamming warnings to pr_debug in bond_arp_send_all. v3: move renaming part completely to patch 1 (did I forget to git add before commiting?) and address Jiri's input about comments/style of patch 2. v4: as Vlad found spotted, bond_arp_send_all() won't work in a config where we have a device with ip on top of our upper vlan. It fails to send packets because we don't tag the packet, while the device on top of vlan will emit tagged packets through this vlan. Fix this by first searching for all upper vlans, and for each vlan - for the devs on top of it. If we find the dev - then tag the packet with the underling's vlan_id, otherwise just search the old way - for all devices on top of bonding. Also, move the version changes under "---" so they won't get into the commit message, if/when applied. The aim of this patchset is to remove bondings' own vlan handling as much as possible and replace it with the netdev upper device functionality. The upper device functionality is extended to include also lower devices and to have, for each device, a full view of every lower and upper device, but not only the first-level ones. This might permit in the future to avoid, for any grouping/teaming/upper/lower devices, to maintain its own lists of slaves/vlans/etc. This is achieved by adding a helper function to upper dev list handling - netdev_upper_get_next_dev(dev, iter), which returns the next device after the list_head **iter, and sets *iter to the next list_head *. This patchset also adds netdev_for_each_upper_dev(dev, upper, iter), which iterates through the whole dev->upper_dev_list, setting upper to the net_device. The only special treatment of vlans remains in rlb code. This patchset solves several issues with bonding, simplifies it overall, RCUify further and exports upper list functions for any other users which might also want to get rid of its own vlan_lists or slaves. I'm testing it continuously currently, no issues found, will update on anything. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/bonding/bond_alb.c75
-rw-r--r--drivers/net/bonding/bond_alb.h2
-rw-r--r--drivers/net/bonding/bond_main.c272
-rw-r--r--drivers/net/bonding/bonding.h21
-rw-r--r--include/linux/netdevice.h11
-rw-r--r--net/core/dev.c345
6 files changed, 416 insertions, 310 deletions
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 3a5db7b1df68..018235263596 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -971,58 +971,62 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
/*********************** tlb/rlb shared functions *********************/
-static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
+static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
+ u16 vid)
{
- struct bonding *bond = bond_get_bond_by_slave(slave);
struct learning_pkt pkt;
+ struct sk_buff *skb;
int size = sizeof(struct learning_pkt);
- int i;
+ char *data;
memset(&pkt, 0, size);
memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
pkt.type = cpu_to_be16(ETH_P_LOOP);
- for (i = 0; i < MAX_LP_BURST; i++) {
- struct sk_buff *skb;
- char *data;
+ skb = dev_alloc_skb(size);
+ if (!skb)
+ return;
+
+ data = skb_put(skb, size);
+ memcpy(data, &pkt, size);
- skb = dev_alloc_skb(size);
+ skb_reset_mac_header(skb);
+ skb->network_header = skb->mac_header + ETH_HLEN;
+ skb->protocol = pkt.type;
+ skb->priority = TC_PRIO_CONTROL;
+ skb->dev = slave->dev;
+
+ if (vid) {
+ skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid);
if (!skb) {
+ pr_err("%s: Error: failed to insert VLAN tag\n",
+ slave->bond->dev->name);
return;
}
+ }
- data = skb_put(skb, size);
- memcpy(data, &pkt, size);
-
- skb_reset_mac_header(skb);
- skb->network_header = skb->mac_header + ETH_HLEN;
- skb->protocol = pkt.type;
- skb->priority = TC_PRIO_CONTROL;
- skb->dev = slave->dev;
-
- if (bond_vlan_used(bond)) {
- struct vlan_entry *vlan;
+ dev_queue_xmit(skb);
+}
- vlan = bond_next_vlan(bond,
- bond->alb_info.current_alb_vlan);
- bond->alb_info.current_alb_vlan = vlan;
- if (!vlan) {
- kfree_skb(skb);
- continue;
- }
+static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
+{
+ struct bonding *bond = bond_get_bond_by_slave(slave);
+ struct net_device *upper;
+ struct list_head *iter;
- skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id);
- if (!skb) {
- pr_err("%s: Error: failed to insert VLAN tag\n",
- bond->dev->name);
- continue;
- }
- }
+ /* send untagged */
+ alb_send_lp_vid(slave, mac_addr, 0);
- dev_queue_xmit(skb);
+ /* loop through vlans and send one packet for each */
+ rcu_read_lock();
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (upper->priv_flags & IFF_802_1Q_VLAN)
+ alb_send_lp_vid(slave, mac_addr,
+ vlan_dev_vlan_id(upper));
}
+ rcu_read_unlock();
}
static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
@@ -1759,11 +1763,6 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
{
- if (bond->alb_info.current_alb_vlan &&
- (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) {
- bond->alb_info.current_alb_vlan = NULL;
- }
-
if (bond->alb_info.rlb_enabled) {
rlb_clear_vlan(bond, vlan_id);
}
diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
index e7a5b8b37ea3..e02c9c558c41 100644
--- a/drivers/net/bonding/bond_alb.h
+++ b/drivers/net/bonding/bond_alb.h
@@ -53,7 +53,6 @@ struct slave;
#define TLB_NULL_INDEX 0xffffffff
-#define MAX_LP_BURST 3
/* rlb defs */
#define RLB_HASH_TABLE_SIZE 256
@@ -170,7 +169,6 @@ struct alb_bond_info {
* rx traffic should be
* rebalanced
*/
- struct vlan_entry *current_alb_vlan;
};
int bond_alb_initialize(struct bonding *bond, int rlb_enabled);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7407e65f5d96..c50679f0a6b6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -283,116 +283,6 @@ const char *bond_mode_name(int mode)
/*---------------------------------- VLAN -----------------------------------*/
/**
- * bond_add_vlan - add a new vlan id on bond
- * @bond: bond that got the notification
- * @vlan_id: the vlan id to add
- *
- * Returns -ENOMEM if allocation failed.
- */
-static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id)
-{
- struct vlan_entry *vlan;
-
- pr_debug("bond: %s, vlan id %d\n",
- (bond ? bond->dev->name : "None"), vlan_id);
-
- vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL);
- if (!vlan)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&vlan->vlan_list);
- vlan->vlan_id = vlan_id;
-
- write_lock_bh(&bond->lock);
-
- list_add_tail(&vlan->vlan_list, &bond->vlan_list);
-
- write_unlock_bh(&bond->lock);
-
- pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name);
-
- return 0;
-}
-
-/**
- * bond_del_vlan - delete a vlan id from bond
- * @bond: bond that got the notification
- * @vlan_id: the vlan id to delete
- *
- * returns -ENODEV if @vlan_id was not found in @bond.
- */
-static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id)
-{
- struct vlan_entry *vlan;
- int res = -ENODEV;
-
- pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id);
-
- block_netpoll_tx();
- write_lock_bh(&bond->lock);
-
- list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
- if (vlan->vlan_id == vlan_id) {
- list_del(&vlan->vlan_list);
-
- if (bond_is_lb(bond))
- bond_alb_clear_vlan(bond, vlan_id);
-
- pr_debug("removed VLAN ID %d from bond %s\n",
- vlan_id, bond->dev->name);
-
- kfree(vlan);
-
- res = 0;
- goto out;
- }
- }
-
- pr_debug("couldn't find VLAN ID %d in bond %s\n",
- vlan_id, bond->dev->name);
-
-out:
- write_unlock_bh(&bond->lock);
- unblock_netpoll_tx();
- return res;
-}
-
-/**
- * bond_next_vlan - safely skip to the next item in the vlans list.
- * @bond: the bond we're working on
- * @curr: item we're advancing from
- *
- * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL,
- * or @curr->next otherwise (even if it is @curr itself again).
- *
- * Caller must hold bond->lock
- */
-struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
-{
- struct vlan_entry *next, *last;
-
- if (list_empty(&bond->vlan_list))
- return NULL;
-
- if (!curr) {
- next = list_entry(bond->vlan_list.next,
- struct vlan_entry, vlan_list);
- } else {
- last = list_entry(bond->vlan_list.prev,
- struct vlan_entry, vlan_list);
- if (last == curr) {
- next = list_entry(bond->vlan_list.next,
- struct vlan_entry, vlan_list);
- } else {
- next = list_entry(curr->vlan_list.next,
- struct vlan_entry, vlan_list);
- }
- }
-
- return next;
-}
-
-/**
* bond_dev_queue_xmit - Prepare skb for xmit.
*
* @bond: bond device that got this skb for tx.
@@ -451,13 +341,6 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev,
goto unwind;
}
- res = bond_add_vlan(bond, vid);
- if (res) {
- pr_err("%s: Error: Failed to add vlan id %d\n",
- bond_dev->name, vid);
- goto unwind;
- }
-
return 0;
unwind:
@@ -478,17 +361,12 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- int res;
bond_for_each_slave(bond, slave)
vlan_vid_del(slave->dev, proto, vid);
- res = bond_del_vlan(bond, vid);
- if (res) {
- pr_err("%s: Error: Failed to remove vlan id %d\n",
- bond_dev->name, vid);
- return res;
- }
+ if (bond_is_lb(bond))
+ bond_alb_clear_vlan(bond, vid);
return 0;
}
@@ -1954,7 +1832,7 @@ static int __bond_release_one(struct net_device *bond_dev,
bond_set_carrier(bond);
eth_hw_addr_random(bond_dev);
- if (bond_vlan_used(bond)) {
+ if (vlan_uses_dev(bond_dev)) {
pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n",
bond_dev->name, bond_dev->name);
pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n",
@@ -2392,24 +2270,25 @@ re_arm:
}
}
-static int bond_has_this_ip(struct bonding *bond, __be32 ip)
+static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
{
- struct vlan_entry *vlan;
- struct net_device *vlan_dev;
+ struct net_device *upper;
+ struct list_head *iter;
+ bool ret = false;
if (ip == bond_confirm_addr(bond->dev, 0, ip))
- return 1;
+ return true;
- list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
- rcu_read_lock();
- vlan_dev = __vlan_find_dev_deep(bond->dev, htons(ETH_P_8021Q),
- vlan->vlan_id);
- rcu_read_unlock();
- if (vlan_dev && ip == bond_confirm_addr(vlan_dev, 0, ip))
- return 1;
+ rcu_read_lock();
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (ip == bond_confirm_addr(upper, 0, ip)) {
+ ret = true;
+ break;
+ }
}
+ rcu_read_unlock();
- return 0;
+ return ret;
}
/*
@@ -2444,81 +2323,79 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_
static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
{
- int i, vlan_id;
- __be32 *targets = bond->params.arp_targets;
- struct vlan_entry *vlan;
- struct net_device *vlan_dev = NULL;
+ struct net_device *upper, *vlan_upper;
+ struct list_head *iter, *vlan_iter;
struct rtable *rt;
+ __be32 *targets = bond->params.arp_targets, addr;
+ int i, vlan_id;
- for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) {
- __be32 addr;
- if (!targets[i])
- break;
+ for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
pr_debug("basa: target %pI4\n", &targets[i]);
- if (!bond_vlan_used(bond)) {
- pr_debug("basa: empty vlan: arp_send\n");
- addr = bond_confirm_addr(bond->dev, targets[i], 0);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, 0);
- continue;
- }
- /*
- * If VLANs are configured, we do a route lookup to
- * determine which VLAN interface would be used, so we
- * can tag the ARP with the proper VLAN tag.
- */
+ /* Find out through which dev should the packet go */
rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
RTO_ONLINK, 0);
if (IS_ERR(rt)) {
- if (net_ratelimit()) {
- pr_warning("%s: no route to arp_ip_target %pI4\n",
- bond->dev->name, &targets[i]);
- }
+ pr_debug("%s: no route to arp_ip_target %pI4\n",
+ bond->dev->name, &targets[i]);
continue;
}
- /*
- * This target is not on a VLAN
+ vlan_id = 0;
+
+ /* bond device itself */
+ if (rt->dst.dev == bond->dev)
+ goto found;
+
+ rcu_read_lock();
+ /* first we search only for vlan devices. for every vlan
+ * found we verify its upper dev list, searching for the
+ * rt->dst.dev. If found we save the tag of the vlan and
+ * proceed to send the packet.
+ *
+ * TODO: QinQ?
*/
- if (rt->dst.dev == bond->dev) {
- ip_rt_put(rt);
- pr_debug("basa: rtdev == bond->dev: arp_send\n");
- addr = bond_confirm_addr(bond->dev, targets[i], 0);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, 0);
- continue;
+ netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) {
+ if (!is_vlan_dev(vlan_upper))
+ continue;
+ netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) {
+ if (upper == rt->dst.dev) {
+ vlan_id = vlan_dev_vlan_id(vlan_upper);
+ rcu_read_unlock();
+ goto found;
+ }
+ }
}
- vlan_id = 0;
- list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
- rcu_read_lock();
- vlan_dev = __vlan_find_dev_deep(bond->dev,
- htons(ETH_P_8021Q),
- vlan->vlan_id);
- rcu_read_unlock();
- if (vlan_dev == rt->dst.dev) {
- vlan_id = vlan->vlan_id;
- pr_debug("basa: vlan match on %s %d\n",
- vlan_dev->name, vlan_id);
- break;
+ /* if the device we're looking for is not on top of any of
+ * our upper vlans, then just search for any dev that
+ * matches, and in case it's a vlan - save the id
+ */
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (upper == rt->dst.dev) {
+ /* if it's a vlan - get its VID */
+ if (is_vlan_dev(upper))
+ vlan_id = vlan_dev_vlan_id(upper);
+
+ rcu_read_unlock();
+ goto found;
}
}
+ rcu_read_unlock();
- if (vlan_id && vlan_dev) {
- ip_rt_put(rt);
- addr = bond_confirm_addr(vlan_dev, targets[i], 0);
- bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
- addr, vlan_id);
- continue;
- }
+ /* Not our device - skip */
+ pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
+ bond->dev->name, &targets[i],
+ rt->dst.dev ? rt->dst.dev->name : "NULL");
- if (net_ratelimit()) {
- pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
- bond->dev->name, &targets[i],
- rt->dst.dev ? rt->dst.dev->name : "NULL");
- }
ip_rt_put(rt);
+ continue;
+
+found:
+ addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
+ ip_rt_put(rt);
+ bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+ addr, vlan_id);
}
}
@@ -4142,7 +4019,6 @@ static void bond_setup(struct net_device *bond_dev)
/* Initialize pointers */
bond->dev = bond_dev;
- INIT_LIST_HEAD(&bond->vlan_list);
/* Initialize the device entry points */
ether_setup(bond_dev);
@@ -4195,7 +4071,6 @@ static void bond_uninit(struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave, *tmp_slave;
- struct vlan_entry *vlan, *tmp;
bond_netpoll_cleanup(bond_dev);
@@ -4207,11 +4082,6 @@ static void bond_uninit(struct net_device *bond_dev)
list_del(&bond->bond_list);
bond_debug_unregister(bond);
-
- list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) {
- list_del(&vlan->vlan_list);
- kfree(vlan);
- }
}
/*------------------------- Module initialization ---------------------------*/
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 4bf52d5f637e..4abc925823e1 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -185,11 +185,6 @@ struct bond_parm_tbl {
#define BOND_MAX_MODENAME_LEN 20
-struct vlan_entry {
- struct list_head vlan_list;
- unsigned short vlan_id;
-};
-
struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct list_head list;
@@ -254,7 +249,6 @@ struct bonding {
struct ad_bond_info ad_info;
struct alb_bond_info alb_info;
struct bond_params params;
- struct list_head vlan_list;
struct workqueue_struct *wq;
struct delayed_work mii_work;
struct delayed_work arp_work;
@@ -267,9 +261,22 @@ struct bonding {
#endif /* CONFIG_DEBUG_FS */
};
+/* if we hold rtnl_lock() - call vlan_uses_dev() */
static inline bool bond_vlan_used(struct bonding *bond)
{
- return !list_empty(&bond->vlan_list);
+ struct net_device *upper;
+ struct list_head *iter;
+
+ rcu_read_lock();
+ netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) {
+ if (upper->priv_flags & IFF_802_1Q_VLAN) {
+ rcu_read_unlock();
+ return true;
+ }
+ }
+ rcu_read_unlock();
+
+ return false;
}
#define bond_slave_get_rcu(dev) \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 077363dcd860..3ad49b833eab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1125,6 +1125,7 @@ struct net_device {
struct list_head napi_list;
struct list_head unreg_list;
struct list_head upper_dev_list; /* List of upper devices */
+ struct list_head lower_dev_list;
/* currently active device features */
@@ -2767,6 +2768,16 @@ extern int bpf_jit_enable;
extern bool netdev_has_upper_dev(struct net_device *dev,
struct net_device *upper_dev);
extern bool netdev_has_any_upper_dev(struct net_device *dev);
+extern struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter);
+
+/* iterate through upper list, must be called under RCU read lock */
+#define netdev_for_each_upper_dev_rcu(dev, upper, iter) \
+ for (iter = &(dev)->upper_dev_list, \
+ upper = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
+ upper; \
+ upper = netdev_upper_get_next_dev_rcu(dev, &(iter)))
+
extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
extern int netdev_upper_dev_link(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 1ed2b66a10a6..6fbb0c90849b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4367,57 +4367,48 @@ softnet_break:
goto out;
}
-struct netdev_upper {
+struct netdev_adjacent {
struct net_device *dev;
+
+ /* upper master flag, there can only be one master device per list */
bool master;
+
+ /* indicates that this dev is our first-level lower/upper device */
+ bool neighbour;
+
+ /* counter for the number of times this device was added to us */
+ u16 ref_nr;
+
struct list_head list;
struct rcu_head rcu;
- struct list_head search_list;
};
-static void __append_search_uppers(struct list_head *search_list,
- struct net_device *dev)
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool upper)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *adj;
+ struct list_head *dev_list;
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- /* check if this upper is not already in search list */
- if (list_empty(&upper->search_list))
- list_add_tail(&upper->search_list, search_list);
+ dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
+
+ list_for_each_entry(adj, dev_list, list) {
+ if (adj->dev == adj_dev)
+ return adj;
}
+ return NULL;
}
-static bool __netdev_search_upper_dev(struct net_device *dev,
- struct net_device *upper_dev)
+static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
+ struct net_device *udev)
{
- LIST_HEAD(search_list);
- struct netdev_upper *upper;
- struct netdev_upper *tmp;
- bool ret = false;
-
- __append_search_uppers(&search_list, dev);
- list_for_each_entry(upper, &search_list, search_list) {
- if (upper->dev == upper_dev) {
- ret = true;
- break;
- }
- __append_search_uppers(&search_list, upper->dev);
- }
- list_for_each_entry_safe(upper, tmp, &search_list, search_list)
- INIT_LIST_HEAD(&upper->search_list);
- return ret;
+ return __netdev_find_adj(dev, udev, true);
}
-static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
- struct net_device *upper_dev)
+static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
+ struct net_device *ldev)
{
- struct netdev_upper *upper;
-
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- if (upper->dev == upper_dev)
- return upper;
- }
- return NULL;
+ return __netdev_find_adj(dev, ldev, false);
}
/**
@@ -4462,7 +4453,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev);
*/
struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
ASSERT_RTNL();
@@ -4470,13 +4461,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
return NULL;
upper = list_first_entry(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->upper_dev_list)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device
@@ -4486,20 +4502,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get);
*/
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
upper = list_first_or_null_rcu(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (upper && likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+static int __netdev_adjacent_dev_insert(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool neighbour, bool master,
+ bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ adj = __netdev_find_adj(dev, adj_dev, upper);
+
+ if (adj) {
+ BUG_ON(neighbour);
+ adj->ref_nr++;
+ return 0;
+ }
+
+ adj = kmalloc(sizeof(*adj), GFP_KERNEL);
+ if (!adj)
+ return -ENOMEM;
+
+ adj->dev = adj_dev;
+ adj->master = master;
+ adj->neighbour = neighbour;
+ adj->ref_nr = 1;
+
+ dev_hold(adj_dev);
+ pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+
+ if (!upper) {
+ list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
+ return 0;
+ }
+
+ /* Ensure that master upper link is always the first item in list. */
+ if (master)
+ list_add_rcu(&adj->list, &dev->upper_dev_list);
+ else
+ list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+
+ return 0;
+}
+
+static inline int __netdev_upper_dev_insert(struct net_device *dev,
+ struct net_device *udev,
+ bool master, bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
+ true);
+}
+
+static inline int __netdev_lower_dev_insert(struct net_device *dev,
+ struct net_device *ldev,
+ bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
+ false);
+}
+
+void __netdev_adjacent_dev_remove(struct net_device *dev,
+ struct net_device *adj_dev, bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ if (upper)
+ adj = __netdev_find_upper(dev, adj_dev);
+ else
+ adj = __netdev_find_lower(dev, adj_dev);
+
+ if (!adj)
+ BUG();
+
+ if (adj->ref_nr > 1) {
+ adj->ref_nr--;
+ return;
+ }
+
+ list_del_rcu(&adj->list);
+ pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+ dev_put(adj_dev);
+ kfree_rcu(adj, rcu);
+}
+
+static inline void __netdev_upper_dev_remove(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_remove(dev, udev, true);
+}
+
+static inline void __netdev_lower_dev_remove(struct net_device *dev,
+ struct net_device *ldev)
+{
+ return __netdev_adjacent_dev_remove(dev, ldev, false);
+}
+
+int __netdev_adjacent_dev_insert_link(struct net_device *dev,
+ struct net_device *upper_dev,
+ bool master, bool neighbour)
+{
+ int ret;
+
+ ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+ if (ret)
+ return ret;
+
+ ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+ if (ret) {
+ __netdev_upper_dev_remove(dev, upper_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static inline int __netdev_adjacent_dev_link(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+}
+
+static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+ struct net_device *udev,
+ bool master)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+}
+
+void __netdev_adjacent_dev_unlink(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ __netdev_upper_dev_remove(dev, upper_dev);
+ __netdev_lower_dev_remove(upper_dev, dev);
+}
+
+
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *i, *j, *to_i, *to_j;
+ int ret = 0;
ASSERT_RTNL();
@@ -4507,7 +4661,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_search_upper_dev(upper_dev, dev))
+ if (__netdev_find_upper(upper_dev, dev))
return -EBUSY;
if (__netdev_find_upper(dev, upper_dev))
@@ -4516,22 +4670,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- upper = kmalloc(sizeof(*upper), GFP_KERNEL);
- if (!upper)
- return -ENOMEM;
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+ if (ret)
+ return ret;
- upper->dev = upper_dev;
- upper->master = master;
- INIT_LIST_HEAD(&upper->search_list);
+ /* Now that we linked these devs, make all the upper_dev's
+ * upper_dev_list visible to every dev's lower_dev_list and vice
+ * versa, and don't forget the devices itself. All of these
+ * links are non-neighbours.
+ */
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ list_for_each_entry(j, &dev->lower_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+ if (ret)
+ goto rollback_mesh;
+ }
+ }
+
+ /* add dev to every upper_dev's upper device */
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(dev, i->dev);
+ if (ret)
+ goto rollback_upper_mesh;
+ }
+
+ /* add upper_dev to every dev's lower device */
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+ if (ret)
+ goto rollback_lower_mesh;
+ }
- /* Ensure that master upper link is always the first item in list. */
- if (master)
- list_add_rcu(&upper->list, &dev->upper_dev_list);
- else
- list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
- dev_hold(upper_dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0;
+
+rollback_lower_mesh:
+ to_i = i;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ }
+
+ i = NULL;
+
+rollback_upper_mesh:
+ to_i = i;
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+ }
+
+ i = j = NULL;
+
+rollback_mesh:
+ to_i = i;
+ to_j = j;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ if (i == to_i && j == to_j)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ }
+ if (i == to_i)
+ break;
+ }
+
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ return ret;
}
/**
@@ -4580,16 +4788,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_upper *upper;
-
+ struct netdev_adjacent *i, *j;
ASSERT_RTNL();
- upper = __netdev_find_upper(dev, upper_dev);
- if (!upper)
- return;
- list_del_rcu(&upper->list);
- dev_put(upper_dev);
- kfree_rcu(upper, rcu);
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ /* Here is the tricky part. We must remove all dev's lower
+ * devices from all upper_dev's upper devices and vice
+ * versa, to maintain the graph relationship.
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+
+ /* remove also the devices itself from lower/upper device
+ * list
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -5850,6 +6070,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->upper_dev_list);
+ INIT_LIST_HEAD(&dev->lower_dev_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);