aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-03-03 15:04:14 -0800
committerJakub Kicinski <kuba@kernel.org>2025-03-03 15:04:15 -0800
commit3424291dd242c4fe9f5cff236f73beef7b5c4909 (patch)
treef599cddc06e3f8b3a1163722e36a64a9b423e0e6
parentselftests: net: report output format as TAP 13 in Python tests (diff)
parentipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL. (diff)
downloadwireguard-linux-3424291dd242c4fe9f5cff236f73beef7b5c4909.tar.xz
wireguard-linux-3424291dd242c4fe9f5cff236f73beef7b5c4909.zip
Merge branch 'ipv4-fib-convert-rtm_newroute-and-rtm_delroute-to-per-netns-rtnl'
Kuniyuki Iwashima says: ==================== ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL. Patch 1 is misc cleanup. Patch 2 ~ 8 converts two fib_info hash tables to per-netns. Patch 9 ~ 12 converts rtnl_lock() to rtnl_net_lcok(). v2: https://lore.kernel.org/20250226192556.21633-1-kuniyu@amazon.com v1: https://lore.kernel.org/20250225182250.74650-1-kuniyu@amazon.com ==================== Link: https://patch.msgid.link/20250228042328.96624-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--net/ipv4/fib_frontend.c74
-rw-r--r--net/ipv4/fib_semantics.c206
-rw-r--r--net/ipv4/fib_trie.c22
5 files changed, 159 insertions, 148 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index a113c11ab56b..e3864b74e92a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -162,6 +162,8 @@ struct fib_info {
struct fib_nh fib_nh[] __counted_by(fib_nhs);
};
+int __net_init fib4_semantics_init(struct net *net);
+void __net_exit fib4_semantics_exit(struct net *net);
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rule;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 45ac125e8aeb..650b2dc9199f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -111,6 +111,9 @@ struct netns_ipv4 {
#endif
struct hlist_head *fib_table_hash;
struct sock *fibnl;
+ struct hlist_head *fib_info_hash;
+ unsigned int fib_info_hash_bits;
+ unsigned int fib_info_cnt;
struct sock *mc_autojoin_sk;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 272e42d81323..6de77415b5b3 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -553,18 +553,16 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
const struct in_ifaddr *ifa;
struct in_device *in_dev;
- in_dev = __in_dev_get_rtnl(dev);
+ in_dev = __in_dev_get_rtnl_net(dev);
if (!in_dev)
return -ENODEV;
*colon = ':';
- rcu_read_lock();
- in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) {
if (strcmp(ifa->ifa_label, devname) == 0)
break;
}
- rcu_read_unlock();
if (!ifa)
return -ENODEV;
@@ -635,7 +633,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- rtnl_lock();
+ rtnl_net_lock(net);
err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
@@ -659,7 +657,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
/* allocated by rtentry_to_fib_config() */
kfree(cfg.fc_mx);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
return -EINVAL;
@@ -837,19 +835,33 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
+ if (cfg->fc_dst_len > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
if (cfg->fc_nh_id) {
if (cfg->fc_oif || cfg->fc_gw_family ||
cfg->fc_encap || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Nexthop specification and nexthop id are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
}
if (has_gw && has_via) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
if (!cfg->fc_table)
@@ -872,20 +884,24 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ rtnl_net_lock(net);
+
if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) {
NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
err = -EINVAL;
- goto errout;
+ goto unlock;
}
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
NL_SET_ERR_MSG(extack, "FIB table does not exist");
err = -ESRCH;
- goto errout;
+ goto unlock;
}
err = fib_table_delete(net, tb, &cfg, extack);
+unlock:
+ rtnl_net_unlock(net);
errout:
return err;
}
@@ -902,15 +918,20 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ rtnl_net_lock(net);
+
tb = fib_new_table(net, cfg.fc_table);
if (!tb) {
err = -ENOBUFS;
- goto errout;
+ goto unlock;
}
err = fib_table_insert(net, tb, &cfg, extack);
if (!err && cfg.fc_type == RTN_LOCAL)
net->ipv4.fib_has_custom_local_routes = true;
+
+unlock:
+ rtnl_net_unlock(net);
errout:
return err;
}
@@ -1450,7 +1471,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
fib_sync_up(dev, RTNH_F_DEAD);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
- rt_cache_flush(dev_net(dev));
+ rt_cache_flush(net);
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa, NULL);
@@ -1461,7 +1482,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
*/
fib_disable_ip(dev, event, true);
} else {
- rt_cache_flush(dev_net(dev));
+ rt_cache_flush(net);
}
break;
}
@@ -1575,7 +1596,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
@@ -1615,9 +1636,15 @@ static int __net_init fib_net_init(struct net *net)
error = ip_fib_net_init(net);
if (error < 0)
goto out;
+
+ error = fib4_semantics_init(net);
+ if (error)
+ goto out_semantics;
+
error = nl_fib_lookup_init(net);
if (error < 0)
goto out_nlfl;
+
error = fib_proc_init(net);
if (error < 0)
goto out_proc;
@@ -1627,9 +1654,11 @@ out:
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
- rtnl_lock();
+ fib4_semantics_exit(net);
+out_semantics:
+ rtnl_net_lock(net);
ip_fib_net_exit(net);
- rtnl_unlock();
+ rtnl_net_unlock(net);
goto out;
}
@@ -1644,10 +1673,15 @@ static void __net_exit fib_net_exit_batch(struct list_head *net_list)
struct net *net;
rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
+ list_for_each_entry(net, net_list, exit_list) {
+ __rtnl_net_lock(net);
ip_fib_net_exit(net);
-
+ __rtnl_net_unlock(net);
+ }
rtnl_unlock();
+
+ list_for_each_entry(net, net_list, exit_list)
+ fib4_semantics_exit(net);
}
static struct pernet_operations fib_net_ops = {
@@ -1658,9 +1692,9 @@ static struct pernet_operations fib_net_ops = {
static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = {
{.protocol = PF_INET, .msgtype = RTM_NEWROUTE,
- .doit = inet_rtm_newroute},
+ .doit = inet_rtm_newroute, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_DELROUTE,
- .doit = inet_rtm_delroute},
+ .doit = inet_rtm_delroute, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib,
.flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d2cee5c314f5..f68bb9e34c34 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -50,12 +50,6 @@
#include "fib_lookup.h"
-static struct hlist_head *fib_info_hash;
-static struct hlist_head *fib_info_laddrhash;
-static unsigned int fib_info_hash_size;
-static unsigned int fib_info_hash_bits;
-static unsigned int fib_info_cnt;
-
/* for_nexthops and change_nexthops only used when nexthop object
* is not set in a fib_info. The logic within can reference fib_nh.
*/
@@ -258,8 +252,7 @@ void fib_release_info(struct fib_info *fi)
ASSERT_RTNL();
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
-
- fib_info_cnt--;
+ fi->fib_net->ipv4.fib_info_cnt--;
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -335,11 +328,12 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
static unsigned int fib_info_hashfn_result(const struct net *net,
unsigned int val)
{
- return hash_32(val ^ net_hash_mix(net), fib_info_hash_bits);
+ return hash_32(val ^ net_hash_mix(net), net->ipv4.fib_info_hash_bits);
}
-static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
{
+ struct net *net = fi->fib_net;
unsigned int val;
val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
@@ -354,7 +348,70 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
} endfor_nexthops(fi)
}
- return fib_info_hashfn_result(fi->fib_net, val);
+ return &net->ipv4.fib_info_hash[fib_info_hashfn_result(net, val)];
+}
+
+static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net,
+ __be32 val)
+{
+ unsigned int hash_bits = net->ipv4.fib_info_hash_bits;
+ u32 slot;
+
+ slot = hash_32(net_hash_mix(net) ^ (__force u32)val, hash_bits);
+
+ return &net->ipv4.fib_info_hash[(1 << hash_bits) + slot];
+}
+
+static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits)
+{
+ /* The second half is used for prefsrc */
+ return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head *),
+ GFP_KERNEL);
+}
+
+static void fib_info_hash_free(struct hlist_head *head)
+{
+ kvfree(head);
+}
+
+static void fib_info_hash_grow(struct net *net)
+{
+ unsigned int old_size = 1 << net->ipv4.fib_info_hash_bits;
+ struct hlist_head *new_info_hash, *old_info_hash;
+ unsigned int i;
+
+ if (net->ipv4.fib_info_cnt < old_size)
+ return;
+
+ new_info_hash = fib_info_hash_alloc(net->ipv4.fib_info_hash_bits + 1);
+ if (!new_info_hash)
+ return;
+
+ old_info_hash = net->ipv4.fib_info_hash;
+ net->ipv4.fib_info_hash = new_info_hash;
+ net->ipv4.fib_info_hash_bits += 1;
+
+ for (i = 0; i < old_size; i++) {
+ struct hlist_head *head = &old_info_hash[i];
+ struct hlist_node *n;
+ struct fib_info *fi;
+
+ hlist_for_each_entry_safe(fi, n, head, fib_hash)
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
+ }
+
+ for (i = 0; i < old_size; i++) {
+ struct hlist_head *lhead = &old_info_hash[old_size + i];
+ struct hlist_node *n;
+ struct fib_info *fi;
+
+ hlist_for_each_entry_safe(fi, n, lhead, fib_lhash)
+ hlist_add_head(&fi->fib_lhash,
+ fib_info_laddrhash_bucket(fi->fib_net,
+ fi->fib_prefsrc));
+ }
+
+ fib_info_hash_free(old_info_hash);
}
/* no metrics, only nexthop id */
@@ -370,13 +427,12 @@ static struct fib_info *fib_find_info_nh(struct net *net,
(__force u32)cfg->fc_prefsrc,
cfg->fc_priority);
hash = fib_info_hashfn_result(net, hash);
- head = &fib_info_hash[hash];
+ head = &net->ipv4.fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
- if (!net_eq(fi->fib_net, net))
- continue;
if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
continue;
+
if (cfg->fc_protocol == fi->fib_protocol &&
cfg->fc_scope == fi->fib_scope &&
cfg->fc_prefsrc == fi->fib_prefsrc &&
@@ -392,18 +448,13 @@ static struct fib_info *fib_find_info_nh(struct net *net,
static struct fib_info *fib_find_info(struct fib_info *nfi)
{
- struct hlist_head *head;
+ struct hlist_head *head = fib_info_hash_bucket(nfi);
struct fib_info *fi;
- unsigned int hash;
-
- hash = fib_info_hashfn(nfi);
- head = &fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
- if (!net_eq(fi->fib_net, nfi->fib_net))
- continue;
if (fi->fib_nhs != nfi->fib_nhs)
continue;
+
if (nfi->fib_protocol == fi->fib_protocol &&
nfi->fib_scope == fi->fib_scope &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
@@ -1239,64 +1290,6 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static struct hlist_head *
-fib_info_laddrhash_bucket(const struct net *net, __be32 val)
-{
- u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
- fib_info_hash_bits);
-
- return &fib_info_laddrhash[slot];
-}
-
-static void fib_info_hash_move(struct hlist_head *new_info_hash,
- struct hlist_head *new_laddrhash,
- unsigned int new_size)
-{
- struct hlist_head *old_info_hash, *old_laddrhash;
- unsigned int old_size = fib_info_hash_size;
- unsigned int i;
-
- ASSERT_RTNL();
- old_info_hash = fib_info_hash;
- old_laddrhash = fib_info_laddrhash;
- fib_info_hash_size = new_size;
- fib_info_hash_bits = ilog2(new_size);
-
- for (i = 0; i < old_size; i++) {
- struct hlist_head *head = &fib_info_hash[i];
- struct hlist_node *n;
- struct fib_info *fi;
-
- hlist_for_each_entry_safe(fi, n, head, fib_hash) {
- struct hlist_head *dest;
- unsigned int new_hash;
-
- new_hash = fib_info_hashfn(fi);
- dest = &new_info_hash[new_hash];
- hlist_add_head(&fi->fib_hash, dest);
- }
- }
- fib_info_hash = new_info_hash;
-
- fib_info_laddrhash = new_laddrhash;
- for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &old_laddrhash[i];
- struct hlist_node *n;
- struct fib_info *fi;
-
- hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
- struct hlist_head *ldest;
-
- ldest = fib_info_laddrhash_bucket(fi->fib_net,
- fi->fib_prefsrc);
- hlist_add_head(&fi->fib_lhash, ldest);
- }
- }
-
- kvfree(old_info_hash);
- kvfree(old_laddrhash);
-}
-
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
unsigned char scope)
{
@@ -1409,32 +1402,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
#endif
- err = -ENOBUFS;
-
- if (fib_info_cnt >= fib_info_hash_size) {
- unsigned int new_size = fib_info_hash_size << 1;
- struct hlist_head *new_info_hash;
- struct hlist_head *new_laddrhash;
- size_t bytes;
-
- if (!new_size)
- new_size = 16;
- bytes = (size_t)new_size * sizeof(struct hlist_head *);
- new_info_hash = kvzalloc(bytes, GFP_KERNEL);
- new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
- if (!new_info_hash || !new_laddrhash) {
- kvfree(new_info_hash);
- kvfree(new_laddrhash);
- } else {
- fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
- }
- if (!fib_info_hash_size)
- goto failure;
- }
+ fib_info_hash_grow(net);
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
- if (!fi)
+ if (!fi) {
+ err = -ENOBUFS;
goto failure;
+ }
+
fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack);
if (IS_ERR(fi->fib_metrics)) {
err = PTR_ERR(fi->fib_metrics);
@@ -1571,9 +1546,9 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
- fib_info_cnt++;
- hlist_add_head(&fi->fib_hash,
- &fib_info_hash[fib_info_hashfn(fi)]);
+ net->ipv4.fib_info_cnt++;
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
+
if (fi->fib_prefsrc) {
struct hlist_head *head;
@@ -1855,7 +1830,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
struct fib_info *fi;
int ret = 0;
- if (!fib_info_laddrhash || local == 0)
+ if (!local)
return 0;
head = fib_info_laddrhash_bucket(net, local);
@@ -2257,3 +2232,22 @@ check_saddr:
fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
}
}
+
+int __net_init fib4_semantics_init(struct net *net)
+{
+ unsigned int hash_bits = 4;
+
+ net->ipv4.fib_info_hash = fib_info_hash_alloc(hash_bits);
+ if (!net->ipv4.fib_info_hash)
+ return -ENOMEM;
+
+ net->ipv4.fib_info_hash_bits = hash_bits;
+ net->ipv4.fib_info_cnt = 0;
+
+ return 0;
+}
+
+void __net_exit fib4_semantics_exit(struct net *net)
+{
+ fib_info_hash_free(net->ipv4.fib_info_hash);
+}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d6411ac81096..59a6f0a9638f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1187,22 +1187,6 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
return 0;
}
-static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
-{
- if (plen > KEYLENGTH) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- return false;
- }
-
- if ((plen < KEYLENGTH) && (key << plen)) {
- NL_SET_ERR_MSG(extack,
- "Invalid prefix for given prefix length");
- return false;
- }
-
- return true;
-}
-
static void fib_remove_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *old);
@@ -1223,9 +1207,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
fi = fib_create_info(cfg, extack);
@@ -1717,9 +1698,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
l = fib_find_node(t, &tp, key);
if (!l)
return -ESRCH;