aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIdo Schimmel <idosch@mellanox.com>2018-01-09 16:40:25 +0200
committerDavid S. Miller <davem@davemloft.net>2018-01-10 15:14:44 -0500
commitd7dedee184e775f77d321cfa1c660a7680cf6588 (patch)
tree7d59d50b661562ccf2cfad936be59b37f7f69b99 /net
parentvhost_net: batch used ring update in rx (diff)
downloadlinux-dev-d7dedee184e775f77d321cfa1c660a7680cf6588.tar.xz
linux-dev-d7dedee184e775f77d321cfa1c660a7680cf6588.zip
ipv6: Calculate hash thresholds for IPv6 nexthops
Before we convert IPv6 to use hash-threshold instead of modulo-N, we first need each nexthop to store its region boundary in the hash function's output space. The boundary is calculated by dividing the output space equally between the different active nexthops. That is, nexthops that are not dead or linkdown. The boundaries are rebalanced whenever a nexthop is added or removed to a multipath route and whenever a nexthop becomes active or inactive. Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv6/ip6_fib.c8
-rw-r--r--net/ipv6/route.c96
2 files changed, 98 insertions, 6 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b5f19703fca6..e31118f417b4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -796,12 +796,6 @@ insert_above:
return ln;
}
-static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
-{
- return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
-}
-
static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
{
int i;
@@ -991,6 +985,7 @@ next_iter:
rt6i_nsiblings++;
}
BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1672,6 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
sibling->rt6i_nsiblings--;
rt->rt6i_nsiblings = 0;
list_del_init(&rt->rt6i_siblings);
+ rt6_multipath_rebalance(next_sibling);
}
/* Adjust walkers */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1054b059747f..ced2c9bed10b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3481,6 +3481,99 @@ struct arg_netdev_event {
};
};
+static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ iter = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ while (iter) {
+ if (iter->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference_protected(iter->rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+
+ return NULL;
+}
+
+static bool rt6_is_dead(const struct rt6_info *rt)
+{
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
+ (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+ rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+ return true;
+
+ return false;
+}
+
+static int rt6_multipath_total_weight(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ int total = 0;
+
+ if (!rt6_is_dead(rt))
+ total++;
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+ if (!rt6_is_dead(iter))
+ total++;
+ }
+
+ return total;
+}
+
+static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+{
+ int upper_bound = -1;
+
+ if (!rt6_is_dead(rt)) {
+ (*weight)++;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
+ total) - 1;
+ }
+ atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+}
+
+static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+{
+ struct rt6_info *iter;
+ int weight = 0;
+
+ rt6_upper_bound_set(rt, &weight, total);
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ rt6_upper_bound_set(iter, &weight, total);
+}
+
+void rt6_multipath_rebalance(struct rt6_info *rt)
+{
+ struct rt6_info *first;
+ int total;
+
+ /* In case the entire multipath route was marked for flushing,
+ * then there is no need to rebalance upon the removal of every
+ * sibling route.
+ */
+ if (!rt->rt6i_nsiblings || rt->should_flush)
+ return;
+
+ /* During lookup routes are evaluated in order, so we need to
+ * make sure upper bounds are assigned from the first sibling
+ * onwards.
+ */
+ first = rt6_multipath_first_sibling(rt);
+ if (WARN_ON_ONCE(!first))
+ return;
+
+ total = rt6_multipath_total_weight(first);
+ rt6_multipath_upper_bound_set(first, total);
+}
+
static int fib6_ifup(struct rt6_info *rt, void *p_arg)
{
const struct arg_netdev_event *arg = p_arg;
@@ -3489,6 +3582,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
rt->rt6i_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+ rt6_multipath_rebalance(rt);
}
return 0;
@@ -3588,6 +3682,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
RTNH_F_LINKDOWN);
fib6_update_sernum(rt);
+ rt6_multipath_rebalance(rt);
}
return -2;
case NETDEV_CHANGE:
@@ -3595,6 +3690,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt6_multipath_rebalance(rt);
break;
}