From b75ed8b1aa9c3a99702159c3be8b0c1d54972ae5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Mar 2019 20:53:55 -0700 Subject: ipv4: Rename fib_nh entries Rename fib_nh entries that will be moved to a fib_nh_common struct. Specifically, the device, oif, gateway, flags, scope, lwtstate, nh_weight and nh_upper_bound are common with all nexthop definitions. In the process shorten fib_nh_lwtstate to fib_nh_lws to avoid really long lines. Rename only; no functional change intended. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 6271bab63bfb..61ea7a24c8e5 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -63,13 +63,16 @@ TRACE_EVENT(fib_table_lookup, } if (nh) { + struct net_device *dev; + p32 = (__be32 *) __entry->saddr; *p32 = nh->nh_saddr; p32 = (__be32 *) __entry->gw; - *p32 = nh->nh_gw; + *p32 = nh->fib_nh_gw4; - __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-"); + dev = nh->fib_nh_dev; + __assign_str(name, dev ? dev->name : "-"); } else { p32 = (__be32 *) __entry->saddr; *p32 = 0; -- cgit v1.2.3-59-g8ed1b From ad1601ae0260551f85691ca1ac814773fdcec239 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Mar 2019 20:53:56 -0700 Subject: ipv6: Rename fib6_nh entries Rename fib6_nh entries that will be moved to a fib_nh_common struct. Specifically, the device, gateway, flags, and lwtstate are common with all nexthop definitions. In some places new temporary variables are declared or local variables renamed to maintain line lengths. Rename only; no functional change intended. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 34 ++-- include/net/ip6_fib.h | 16 +- include/net/ip6_route.h | 8 +- include/trace/events/fib6.h | 6 +- net/core/filter.c | 6 +- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 4 +- net/ipv6/ndisc.c | 8 +- net/ipv6/route.c | 181 +++++++++++---------- 9 files changed, 138 insertions(+), 127 deletions(-) (limited to 'include/trace') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a18e1ae1c2f6..0ba9daa05a52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2873,12 +2873,13 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; struct in6_addr *gw; int ifindex, weight; - ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex; - weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight; - gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw; + ifindex = fib6_nh->fib_nh_dev->ifindex; + weight = fib6_nh->fib_nh_weight; + gw = &fib6_nh->fib_nh_gw6; if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, weight)) return false; @@ -2944,7 +2945,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev; + dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev; val ^= dev->ifindex; } @@ -3946,9 +3947,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->fib6_nh.nh_gw)) + &rt->fib6_nh.fib_nh_gw6)) return nh; continue; } @@ -4008,19 +4009,20 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD; return; } list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; struct mlxsw_sp_nexthop *nh; nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); if (nh && nh->offloaded) - mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; + fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; else - mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4035,7 +4037,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4972,8 +4974,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->fib6_nh.nh_dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret); + return rt->fib6_nh.fib_nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, @@ -4983,7 +4985,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -5026,11 +5028,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->fib6_nh.nh_weight; - memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh.fib_nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3b04b318cf13..aff8570725c8 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -125,14 +125,14 @@ struct rt6_exception { #define FIB6_MAX_DEPTH 5 struct fib6_nh { - struct in6_addr nh_gw; + struct in6_addr fib_nh_gw6; bool fib_nh_has_gw; - struct net_device *nh_dev; - struct lwtunnel_state *nh_lwtstate; + struct net_device *fib_nh_dev; + struct lwtunnel_state *fib_nh_lws; - unsigned int nh_flags; - atomic_t nh_upper_bound; - int nh_weight; + unsigned int fib_nh_flags; + atomic_t fib_nh_upper_bound; + int fib_nh_weight; }; struct fib6_info { @@ -442,7 +442,7 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_dev; + return f6i->fib6_nh.fib_nh_dev; } int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, @@ -453,7 +453,7 @@ void fib6_nh_release(struct fib6_nh *fib6_nh); static inline struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_lwtstate; + return f6i->fib6_nh.fib_nh_lws; } void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 95cd8a2f6284..342180a7285c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -274,9 +274,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && - ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && - !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); + struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + + return nha->fib_nh_dev == nhb->fib_nh_dev && + ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && + !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index b088b54d699c..6d05ebdd669c 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -62,8 +62,8 @@ TRACE_EVENT(fib6_table_lookup, __entry->dport = 0; } - if (f6i->fib6_nh.nh_dev) { - __assign_str(name, f6i->fib6_nh.nh_dev); + if (f6i->fib6_nh.fib_nh_dev) { + __assign_str(name, f6i->fib6_nh.fib_nh_dev); } else { __assign_str(name, "-"); } @@ -75,7 +75,7 @@ TRACE_EVENT(fib6_table_lookup, } else if (f6i) { in6 = (struct in6_addr *)__entry->gw; - *in6 = f6i->fib6_nh.nh_gw; + *in6 = f6i->fib6_nh.fib_nh_gw6; } ), diff --git a/net/core/filter.c b/net/core/filter.c index 79d319c636ea..887ab073a0ea 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4748,13 +4748,13 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_FRAG_NEEDED; } - if (f6i->fib6_nh.nh_lwtstate) + if (f6i->fib6_nh.fib_nh_lws) return BPF_FIB_LKUP_RET_UNSUPP_LWT; if (f6i->fib6_nh.fib_nh_has_gw) - *dst = f6i->fib6_nh.nh_gw; + *dst = f6i->fib6_nh.fib_nh_gw6; - dev = f6i->fib6_nh.nh_dev; + dev = f6i->fib6_nh.fib_nh_dev; params->rt_metric = f6i->fib6_metric; /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c5ac08fc6cc9..2e8d1d2d8d3d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2419,7 +2419,7 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex) continue; if (no_gw && rt->fib6_nh.fib_nh_has_gw) continue; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 91ce84ecdb57..8c00609a1513 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2306,12 +2306,12 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) #endif if (rt->fib6_nh.fib_nh_has_gw) { flags |= RTF_GATEWAY; - seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); + seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6); } else { seq_puts(seq, "00000000000000000000000000000000"); } - dev = rt->fib6_nh.nh_dev; + dev = rt->fib6_nh.fib_nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, flags, dev ? dev->name : ""); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 659ecf4e4b3c..66c8b294e02b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1276,8 +1276,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { - neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, - rt->fib6_nh.nh_dev, NULL, + neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, + rt->fib6_nh.fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, @@ -1306,8 +1306,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, - rt->fib6_nh.nh_dev, NULL, + neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, + rt->fib6_nh.fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 681c7184e157..e4c2f8e43405 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -441,14 +441,14 @@ struct fib6_info *fib6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound)) return match; list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, fib6_siblings) { int nh_upper_bound; - nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound); if (fl6->mp_hash > nh_upper_bound) continue; if (rt6_score_route(sibling, oif, strict) < 0) @@ -473,13 +473,13 @@ static inline struct fib6_info *rt6_device_match(struct net *net, struct fib6_info *sprt; if (!oif && ipv6_addr_any(saddr) && - !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) + !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)) return rt; for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { - const struct net_device *dev = sprt->fib6_nh.nh_dev; + const struct net_device *dev = sprt->fib6_nh.fib_nh_dev; - if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) continue; if (oif) { @@ -495,7 +495,7 @@ static inline struct fib6_info *rt6_device_match(struct net *net, if (oif && flags & RT6_LOOKUP_F_IFACE) return net->ipv6.fib6_null_entry; - return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; + return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -536,8 +536,8 @@ static void rt6_probe(struct fib6_info *rt) if (!rt || !rt->fib6_nh.fib_nh_has_gw) return; - nh_gw = &rt->fib6_nh.nh_gw; - dev = rt->fib6_nh.nh_dev; + nh_gw = &rt->fib6_nh.fib_nh_gw6; + dev = rt->fib6_nh.fib_nh_dev; rcu_read_lock_bh(); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); @@ -582,7 +582,7 @@ static inline void rt6_probe(struct fib6_info *rt) */ static inline int rt6_check_dev(struct fib6_info *rt, int oif) { - const struct net_device *dev = rt->fib6_nh.nh_dev; + const struct net_device *dev = rt->fib6_nh.fib_nh_dev; if (!oif || dev->ifindex == oif) return 2; @@ -599,8 +599,8 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, - &rt->fib6_nh.nh_gw); + neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev, + &rt->fib6_nh.fib_nh_gw6); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -646,11 +646,11 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, int m; bool match_do_rr = false; - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) goto out; - if (ip6_ignore_linkdown(rt->fib6_nh.nh_dev) && - rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) && + rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; @@ -855,7 +855,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, /* called with rcu_lock held */ static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the @@ -949,8 +949,8 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.input = ip6_forward; } - if (ort->fib6_nh.nh_lwtstate) { - rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + if (ort->fib6_nh.fib_nh_lws) { + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws); lwtunnel_set_redirect(&rt->dst); } @@ -976,7 +976,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; rt->rt6i_flags = ort->fib6_flags; if (ort->fib6_nh.fib_nh_has_gw) { - rt->rt6i_gateway = ort->fib6_nh.nh_gw; + rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6; rt->rt6i_flags |= RTF_GATEWAY; } rt6_set_from(rt, ort); @@ -1023,7 +1023,7 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) { unsigned short flags = fib6_info_dst_flags(rt); - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; struct rt6_info *nrt; if (!fib6_info_hold_safe(rt)) @@ -1407,7 +1407,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt) mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu); } static int rt6_insert_exception(struct rt6_info *nrt, @@ -2424,7 +2424,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) continue; if (fib6_check_expired(rt)) continue; @@ -2432,14 +2432,14 @@ restart: break; if (!rt->fib6_nh.fib_nh_has_gw) continue; - if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) + if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex) continue; /* rt_cache's gateway might be different from its 'parent' * in the case of an ip redirect. * So we keep searching in the exception table if the gateway * is different. */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { + if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) { rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); @@ -2929,7 +2929,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, goto out; } - fib6_nh->nh_flags |= RTNH_F_ONLINK; + fib6_nh->fib_nh_flags |= RTNH_F_ONLINK; } if (cfg->fc_encap) { @@ -2941,10 +2941,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, if (err) goto out; - fib6_nh->nh_lwtstate = lwtstate_get(lwtstate); + fib6_nh->fib_nh_lws = lwtstate_get(lwtstate); } - fib6_nh->nh_weight = 1; + fib6_nh->fib_nh_weight = 1; /* We cannot add true routes via loopback here, * they would result in kernel looping; promote them to reject routes @@ -2973,7 +2973,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, if (err) goto out; - fib6_nh->nh_gw = cfg->fc_gateway; + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; fib6_nh->fib_nh_has_gw = 1; } @@ -2995,18 +2995,18 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) && !netif_carrier_ok(dev)) - fib6_nh->nh_flags |= RTNH_F_LINKDOWN; + fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; set_dev: - fib6_nh->nh_dev = dev; + fib6_nh->fib_nh_dev = dev; err = 0; out: if (idev) in6_dev_put(idev); if (err) { - lwtstate_put(fib6_nh->nh_lwtstate); - fib6_nh->nh_lwtstate = NULL; + lwtstate_put(fib6_nh->fib_nh_lws); + fib6_nh->fib_nh_lws = NULL; if (dev) dev_put(dev); } @@ -3016,10 +3016,10 @@ out: void fib6_nh_release(struct fib6_nh *fib6_nh) { - lwtstate_put(fib6_nh->nh_lwtstate); + lwtstate_put(fib6_nh->fib_nh_lws); - if (fib6_nh->nh_dev) - dev_put(fib6_nh->nh_dev); + if (fib6_nh->fib_nh_dev) + dev_put(fib6_nh->fib_nh_dev); } static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, @@ -3129,7 +3129,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, * they would result in kernel looping; promote them to reject routes */ addr_type = ipv6_addr_type(&cfg->fc_dst); - if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.nh_dev, addr_type)) + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type)) rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; if (!ipv6_addr_any(&cfg->fc_prefsrc)) { @@ -3287,6 +3287,8 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { + struct fib6_nh *nh; + if (cfg->fc_flags & RTF_CACHE) { int rc; @@ -3301,12 +3303,14 @@ static int ip6_route_del(struct fib6_config *cfg, } continue; } + + nh = &rt->fib6_nh; if (cfg->fc_ifindex && - (!rt->fib6_nh.nh_dev || - rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) + (!nh->fib_nh_dev || + nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) + !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) continue; if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; @@ -3477,12 +3481,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) continue; if (!(rt->fib6_flags & RTF_ROUTEINFO) || !rt->fib6_nh.fib_nh_has_gw) continue; - if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) continue; if (!fib6_info_hold_safe(rt)) continue; @@ -3540,9 +3544,11 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->fib6_nh.nh_dev && + struct fib6_nh *nh = &rt->fib6_nh; + + if (dev == nh->fib_nh_dev && ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) + ipv6_addr_equal(&nh->fib_nh_gw6, addr)) break; } if (rt && !fib6_info_hold_safe(rt)) @@ -3779,7 +3785,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) && rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); @@ -3810,7 +3816,7 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && rt->fib6_nh.fib_nh_has_gw && - ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { + ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { return -1; } @@ -3858,9 +3864,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || - (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && - ip6_ignore_linkdown(rt->fib6_nh.nh_dev))) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && + ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev))) return true; return false; @@ -3872,11 +3878,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt) int total = 0; if (!rt6_is_dead(rt)) - total += rt->fib6_nh.nh_weight; + total += rt->fib6_nh.fib_nh_weight; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->fib6_nh.nh_weight; + total += iter->fib6_nh.fib_nh_weight; } return total; @@ -3887,11 +3893,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->fib6_nh.nh_weight; + *weight += rt->fib6_nh.fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound); } static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) @@ -3934,8 +3940,9 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { - rt->fib6_nh.nh_flags &= ~arg->nh_flags; + if (rt != net->ipv6.fib6_null_entry && + rt->fib6_nh.fib_nh_dev == arg->dev) { + rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3963,10 +3970,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt, { struct fib6_info *iter; - if (rt->fib6_nh.nh_dev == dev) + if (rt->fib6_nh.fib_nh_dev == dev) return true; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) + if (iter->fib6_nh.fib_nh_dev == dev) return true; return false; @@ -3987,12 +3994,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, struct fib6_info *iter; unsigned int dead = 0; - if (rt->fib6_nh.nh_dev == down_dev || - rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_dev == down_dev || + rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == down_dev || - iter->fib6_nh.nh_flags & RTNH_F_DEAD) + if (iter->fib6_nh.fib_nh_dev == down_dev || + iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; return dead; @@ -4004,11 +4011,11 @@ static void rt6_multipath_nh_flags_set(struct fib6_info *rt, { struct fib6_info *iter; - if (rt->fib6_nh.nh_dev == dev) - rt->fib6_nh.nh_flags |= nh_flags; + if (rt->fib6_nh.fib_nh_dev == dev) + rt->fib6_nh.fib_nh_flags |= nh_flags; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) - iter->fib6_nh.nh_flags |= nh_flags; + if (iter->fib6_nh.fib_nh_dev == dev) + iter->fib6_nh.fib_nh_flags |= nh_flags; } /* called with write lock held for table with rt */ @@ -4023,12 +4030,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) switch (arg->event) { case NETDEV_UNREGISTER: - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; if (!rt->fib6_nsiblings) - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; @@ -4044,10 +4051,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) } return -2; case NETDEV_CHANGE: - if (rt->fib6_nh.nh_dev != dev || + if (rt->fib6_nh.fib_nh_dev != dev || rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -4103,7 +4110,7 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->fib6_nh.nh_dev == arg->dev && + if (rt->fib6_nh.fib_nh_dev == arg->dev && !fib6_metric_locked(rt, RTAX_MTU)) { u32 mtu = rt->fib6_pmtu; @@ -4394,7 +4401,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, goto cleanup; } - rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1; err = ip6_route_info_append(info->nl_net, &rt6_nh_list, rt, &r_cfg); @@ -4561,7 +4568,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws); nexthop_len *= rt->fib6_nsiblings; } @@ -4579,41 +4586,41 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws) + nexthop_len; } static int rt6_nexthop_info(struct sk_buff *skb, const struct fib6_nh *fib6_nh, unsigned int *flags, bool skip_oif) { - if (fib6_nh->nh_flags & RTNH_F_DEAD) + if (fib6_nh->fib_nh_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; - if (fib6_nh->nh_flags & RTNH_F_LINKDOWN) { + if (fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; rcu_read_lock(); - if (ip6_ignore_linkdown(fib6_nh->nh_dev)) + if (ip6_ignore_linkdown(fib6_nh->fib_nh_dev)) *flags |= RTNH_F_DEAD; rcu_read_unlock(); } if (fib6_nh->fib_nh_has_gw) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &fib6_nh->nh_gw) < 0) + if (nla_put_in6_addr(skb, RTA_GATEWAY, &fib6_nh->fib_nh_gw6) < 0) goto nla_put_failure; } - *flags |= (fib6_nh->nh_flags & RTNH_F_ONLINK); - if (fib6_nh->nh_flags & RTNH_F_OFFLOAD) + *flags |= (fib6_nh->fib_nh_flags & RTNH_F_ONLINK); + if (fib6_nh->fib_nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && fib6_nh->nh_dev && - nla_put_u32(skb, RTA_OIF, fib6_nh->nh_dev->ifindex)) + if (!skip_oif && fib6_nh->fib_nh_dev && + nla_put_u32(skb, RTA_OIF, fib6_nh->fib_nh_dev->ifindex)) goto nla_put_failure; - if (fib6_nh->nh_lwtstate && - lwtunnel_fill_encap(skb, fib6_nh->nh_lwtstate) < 0) + if (fib6_nh->fib_nh_lws && + lwtunnel_fill_encap(skb, fib6_nh->fib_nh_lws) < 0) goto nla_put_failure; return 0; @@ -4625,7 +4632,7 @@ nla_put_failure: /* add multipath next hop */ static int rt6_add_nexthop(struct sk_buff *skb, const struct fib6_nh *fib6_nh) { - const struct net_device *dev = fib6_nh->nh_dev; + const struct net_device *dev = fib6_nh->fib_nh_dev; struct rtnexthop *rtnh; unsigned int flags = 0; @@ -4633,7 +4640,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, const struct fib6_nh *fib6_nh) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = fib6_nh->nh_weight - 1; + rtnh->rtnh_hops = fib6_nh->fib_nh_weight - 1; rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; if (rt6_nexthop_info(skb, fib6_nh, &flags, true) < 0) @@ -4805,7 +4812,7 @@ nla_put_failure: static bool fib6_info_uses_dev(const struct fib6_info *f6i, const struct net_device *dev) { - if (f6i->fib6_nh.nh_dev == dev) + if (f6i->fib6_nh.fib_nh_dev == dev) return true; if (f6i->fib6_nsiblings) { @@ -4813,7 +4820,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, list_for_each_entry_safe(sibling, next_sibling, &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh.nh_dev == dev) + if (sibling->fib6_nh.fib_nh_dev == dev) return true; } } @@ -5098,7 +5105,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { - net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; + net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -5433,7 +5440,7 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; + init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES -- cgit v1.2.3-59-g8ed1b From a4e76ba6b4994773fbe7a4eed8228e47862ac8a3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 31 Mar 2019 06:49:41 +0000 Subject: mlxsw: spectrum_acl: Rename rehash_dis trace The name of the trace is no longer correct, since there is no disable of rehash done. So name it "rehash_rollback_failed". Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++-- include/trace/events/mlxsw.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 2f0b61b87c99..e993159e8e4c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -1399,8 +1399,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, ctx, credits); if (err2) { - trace_mlxsw_sp_acl_tcam_vregion_rehash_dis(mlxsw_sp, - vregion); + trace_mlxsw_sp_acl_tcam_vregion_rehash_rollback_failed(mlxsw_sp, + vregion); dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n"); /* Let the rollback to be continued later on. */ } diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h index 6a4cfaef33a2..19a25ed323a5 100644 --- a/include/trace/events/mlxsw.h +++ b/include/trace/events/mlxsw.h @@ -93,7 +93,7 @@ TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate_end, __entry->mlxsw_sp, __entry->vregion) ); -TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis, +TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_rollback_failed, TP_PROTO(const struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_tcam_vregion *vregion), -- cgit v1.2.3-59-g8ed1b From 0af7e7c128eb33f2dc16ed088ced00675785d628 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 2 Apr 2019 14:11:54 -0700 Subject: ipv4: Update fib_table_lookup tracepoint to take common nexthop Update fib_table_lookup tracepoint to take a fib_nh_common struct and dump the v6 gateway address if the nexthop uses it. Over the years saddr has not proven useful and the output of the tracepoint produces very long lines. Since saddr is not part of fib_nh_common, drop it. If it needs to be added later, fib_nh which contains saddr can be obtained from a fib_nh_common via container_of. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib.h | 45 ++++++++++++++++++++++++++------------------- net/ipv4/fib_trie.c | 2 +- 2 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 61ea7a24c8e5..7f83b6eafc5c 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -13,9 +13,9 @@ TRACE_EVENT(fib_table_lookup, TP_PROTO(u32 tb_id, const struct flowi4 *flp, - const struct fib_nh *nh, int err), + const struct fib_nh_common *nhc, int err), - TP_ARGS(tb_id, flp, nh, err), + TP_ARGS(tb_id, flp, nhc, err), TP_STRUCT__entry( __field( u32, tb_id ) @@ -28,14 +28,17 @@ TRACE_EVENT(fib_table_lookup, __field( __u8, flags ) __array( __u8, src, 4 ) __array( __u8, dst, 4 ) - __array( __u8, gw, 4 ) - __array( __u8, saddr, 4 ) + __array( __u8, gw4, 4 ) + __array( __u8, gw6, 16 ) __field( u16, sport ) __field( u16, dport ) __dynamic_array(char, name, IFNAMSIZ ) ), TP_fast_assign( + struct in6_addr in6_zero = {}; + struct net_device *dev; + struct in6_addr *in6; __be32 *p32; __entry->tb_id = tb_id; @@ -62,33 +65,37 @@ TRACE_EVENT(fib_table_lookup, __entry->dport = 0; } - if (nh) { - struct net_device *dev; + dev = nhc ? nhc->nhc_dev : NULL; + __assign_str(name, dev ? dev->name : "-"); - p32 = (__be32 *) __entry->saddr; - *p32 = nh->nh_saddr; + if (nhc) { + if (nhc->nhc_family == AF_INET) { + p32 = (__be32 *) __entry->gw4; + *p32 = nhc->nhc_gw.ipv4; - p32 = (__be32 *) __entry->gw; - *p32 = nh->fib_nh_gw4; + in6 = (struct in6_addr *)__entry->gw6; + *in6 = in6_zero; + } else if (nhc->nhc_family == AF_INET6) { + p32 = (__be32 *) __entry->gw4; + *p32 = 0; - dev = nh->fib_nh_dev; - __assign_str(name, dev ? dev->name : "-"); + in6 = (struct in6_addr *)__entry->gw6; + *in6 = nhc->nhc_gw.ipv6; + } } else { - p32 = (__be32 *) __entry->saddr; + p32 = (__be32 *) __entry->gw4; *p32 = 0; - p32 = (__be32 *) __entry->gw; - *p32 = 0; - - __assign_str(name, "-"); + in6 = (struct in6_addr *)__entry->gw6; + *in6 = in6_zero; } ), - TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d", + TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4/%pI6c err %d", __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw, __entry->saddr, __entry->err) + __get_str(name), __entry->gw4, __entry->gw6, __entry->err) ); #endif /* _TRACE_FIB_H */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1e3b492690f9..13b3327206f9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1498,7 +1498,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif - trace_fib_table_lookup(tb->tb_id, flp, nh, err); + trace_fib_table_lookup(tb->tb_id, flp, &nh->nh_common, err); return err; } -- cgit v1.2.3-59-g8ed1b From bdf004677107e3b847c5db09c9fbf8edefa24996 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 5 Apr 2019 16:30:26 -0700 Subject: net: Replace nhc_has_gw with nhc_gw_family Allow the gateway in a fib_nh_common to be from a different address family than the outer fib{6}_nh. To that end, replace nhc_has_gw with nhc_gw_family and update users of nhc_has_gw to check nhc_gw_family. Now nhc_family is used to know if the nh_common is part of a fib_nh or fib6_nh (used for container_of to get to route family specific data), and nhc_gw_family represents the address family for the gateway. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- include/net/ip6_route.h | 2 +- include/net/ip_fib.h | 7 +++--- include/trace/events/fib.h | 4 ++-- net/core/filter.c | 4 ++-- net/ipv4/fib_semantics.c | 25 ++++++++++------------ net/ipv4/route.c | 5 +++-- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/route.c | 18 ++++++++-------- 10 files changed, 35 insertions(+), 38 deletions(-) (limited to 'include/trace') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0ba9daa05a52..772aa78cab51 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4915,7 +4915,7 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ - return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_has_gw; + return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family; } static struct fib6_info * @@ -5055,7 +5055,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { - return rt->fib6_nh.fib_nh_has_gw || + return rt->fib6_nh.fib_nh_gw_family || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 342180a7285c..5909fc421305 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -69,7 +69,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) && - f6i->fib6_nh.fib_nh_has_gw; + f6i->fib6_nh.fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3ce07841dc3b..c68a40435ee0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -83,8 +83,8 @@ struct fib_nh_common { struct lwtunnel_state *nhc_lwtstate; unsigned char nhc_scope; u8 nhc_family; - u8 nhc_has_gw:1, - unused:7; + u8 nhc_gw_family; + union { __be32 ipv4; struct in6_addr ipv6; @@ -112,8 +112,7 @@ struct fib_nh { #define fib_nh_flags nh_common.nhc_flags #define fib_nh_lws nh_common.nhc_lwtstate #define fib_nh_scope nh_common.nhc_scope -#define fib_nh_family nh_common.nhc_family -#define fib_nh_has_gw nh_common.nhc_has_gw +#define fib_nh_gw_family nh_common.nhc_gw_family #define fib_nh_gw4 nh_common.nhc_gw.ipv4 #define fib_nh_gw6 nh_common.nhc_gw.ipv6 #define fib_nh_weight nh_common.nhc_weight diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 7f83b6eafc5c..6f2a4dc35e37 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -69,13 +69,13 @@ TRACE_EVENT(fib_table_lookup, __assign_str(name, dev ? dev->name : "-"); if (nhc) { - if (nhc->nhc_family == AF_INET) { + if (nhc->nhc_gw_family == AF_INET) { p32 = (__be32 *) __entry->gw4; *p32 = nhc->nhc_gw.ipv4; in6 = (struct in6_addr *)__entry->gw6; *in6 = in6_zero; - } else if (nhc->nhc_family == AF_INET6) { + } else if (nhc->nhc_gw_family == AF_INET6) { p32 = (__be32 *) __entry->gw4; *p32 = 0; diff --git a/net/core/filter.c b/net/core/filter.c index 26d9cd785ae2..abd5b6ce031a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4639,7 +4639,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_UNSUPP_LWT; dev = nhc->nhc_dev; - if (nhc->nhc_has_gw) + if (nhc->nhc_gw_family) params->ipv4_dst = nhc->nhc_gw.ipv4; params->rt_metric = res.fi->fib_priority; @@ -4752,7 +4752,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (f6i->fib6_nh.fib_nh_lws) return BPF_FIB_LKUP_RET_UNSUPP_LWT; - if (f6i->fib6_nh.fib_nh_has_gw) + if (f6i->fib6_nh.fib_nh_gw_family) *dst = f6i->fib6_nh.fib_nh_gw6; dev = f6i->fib6_nh.fib_nh_dev; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8e0cb1687a74..e11f78c6373f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -513,7 +513,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_oif = cfg->fc_oif; if (cfg->fc_gw) { nh->fib_nh_gw4 = cfg->fc_gw; - nh->fib_nh_has_gw = 1; + nh->fib_nh_gw_family = AF_INET; } nh->fib_nh_flags = cfg->fc_flags; @@ -1238,7 +1238,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, "Route with host scope can not have multiple nexthops"); goto err_inval; } - if (nh->fib_nh_gw4) { + if (nh->fib_nh_gw_family) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); goto err_inval; @@ -1341,18 +1341,15 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, rcu_read_unlock(); } - if (nhc->nhc_has_gw) { - switch (nhc->nhc_family) { - case AF_INET: - if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) - goto nla_put_failure; - break; - case AF_INET6: - if (nla_put_in6_addr(skb, RTA_GATEWAY, - &nhc->nhc_gw.ipv6) < 0) - goto nla_put_failure; - break; - } + switch (nhc->nhc_gw_family) { + case AF_INET: + if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) + goto nla_put_failure; + break; + case AF_INET6: + if (nla_put_in6_addr(skb, RTA_GATEWAY, &nhc->nhc_gw.ipv6) < 0) + goto nla_put_failure; + break; } *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f3f2adf630d4..e7338e421796 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1734,8 +1734,9 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && skb->protocol == htons(ETH_P_IP)) { - __be32 gw = nhc->nhc_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; + __be32 gw; + gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; if (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, gw)) IPCB(skb)->flags |= IPSKB_DOREDIRECT; @@ -2284,7 +2285,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && - !(nhc->nhc_has_gw && + !(nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK))) { do_cache = false; goto add; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2e8d1d2d8d3d..340a0f06f974 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2421,7 +2421,7 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, for_each_fib6_node_rt_rcu(fn) { if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex) continue; - if (no_gw && rt->fib6_nh.fib_nh_has_gw) + if (no_gw && rt->fib6_nh.fib_nh_gw_family) continue; if ((rt->fib6_flags & flags) != flags) continue; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 8c00609a1513..46f54a5bb1f0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2304,7 +2304,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->fib6_nh.fib_nh_has_gw) { + if (rt->fib6_nh.fib_nh_gw_family) { flags |= RTF_GATEWAY; seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6); } else { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6e89151693d0..69f92d2b780e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -533,7 +533,7 @@ static void rt6_probe(struct fib6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !rt->fib6_nh.fib_nh_has_gw) + if (!rt || !rt->fib6_nh.fib_nh_gw_family) return; nh_gw = &rt->fib6_nh.fib_nh_gw6; @@ -595,7 +595,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) struct neighbour *neigh; if (rt->fib6_flags & RTF_NONEXTHOP || - !rt->fib6_nh.fib_nh_has_gw) + !rt->fib6_nh.fib_nh_gw_family) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); @@ -769,7 +769,7 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) { - return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw; + return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -975,7 +975,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) rt->rt6i_dst = ort->fib6_dst; rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; rt->rt6i_flags = ort->fib6_flags; - if (ort->fib6_nh.fib_nh_has_gw) { + if (ort->fib6_nh.fib_nh_gw_family) { rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6; rt->rt6i_flags |= RTF_GATEWAY; } @@ -1860,7 +1860,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, rcu_read_unlock(); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !f6i->fib6_nh.fib_nh_has_gw)) { + !f6i->fib6_nh.fib_nh_gw_family)) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -2430,7 +2430,7 @@ restart: continue; if (rt->fib6_flags & RTF_REJECT) break; - if (!rt->fib6_nh.fib_nh_has_gw) + if (!rt->fib6_nh.fib_nh_gw_family) continue; if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex) continue; @@ -2964,7 +2964,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, goto out; fib6_nh->fib_nh_gw6 = cfg->fc_gateway; - fib6_nh->fib_nh_has_gw = 1; + fib6_nh->fib_nh_gw_family = AF_INET6; } err = -ENODEV; @@ -3476,7 +3476,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net, if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) continue; if (!(rt->fib6_flags & RTF_ROUTEINFO) || - !rt->fib6_nh.fib_nh_has_gw) + !rt->fib6_nh.fib_nh_gw_family) continue; if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) continue; @@ -3807,7 +3807,7 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) struct in6_addr *gateway = (struct in6_addr *)arg; if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - rt->fib6_nh.fib_nh_has_gw && + rt->fib6_nh.fib_nh_gw_family && ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { return -1; } -- cgit v1.2.3-59-g8ed1b From 8ff2e5b26cb84b1b0f502c0b7a3c62e4c4d86acc Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 16 Apr 2019 14:36:09 -0700 Subject: ipv6: Pass fib6_result to fib6_table_lookup tracepoint Change fib6_table_lookup tracepoint to take the fib6_result and use the fib6_info and fib6_nh from it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/fib6.h | 16 ++++++++-------- net/ipv6/route.c | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 6d05ebdd669c..70e252d926ea 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -12,10 +12,10 @@ TRACE_EVENT(fib6_table_lookup, - TP_PROTO(const struct net *net, const struct fib6_info *f6i, + TP_PROTO(const struct net *net, const struct fib6_result *res, struct fib6_table *table, const struct flowi6 *flp), - TP_ARGS(net, f6i, table, flp), + TP_ARGS(net, res, table, flp), TP_STRUCT__entry( __field( u32, tb_id ) @@ -39,7 +39,7 @@ TRACE_EVENT(fib6_table_lookup, struct in6_addr *in6; __entry->tb_id = table->tb6_id; - __entry->err = ip6_rt_type_to_error(f6i->fib6_type); + __entry->err = ip6_rt_type_to_error(res->f6i->fib6_type); __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; __entry->tos = ip6_tclass(flp->flowlabel); @@ -62,20 +62,20 @@ TRACE_EVENT(fib6_table_lookup, __entry->dport = 0; } - if (f6i->fib6_nh.fib_nh_dev) { - __assign_str(name, f6i->fib6_nh.fib_nh_dev); + if (res->nh && res->nh->fib_nh_dev) { + __assign_str(name, res->nh->fib_nh_dev); } else { __assign_str(name, "-"); } - if (f6i == net->ipv6.fib6_null_entry) { + if (res->f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; in6 = (struct in6_addr *)__entry->gw; *in6 = in6_zero; - } else if (f6i) { + } else if (res->nh) { in6 = (struct in6_addr *)__entry->gw; - *in6 = f6i->fib6_nh.fib_nh_gw6; + *in6 = res->nh->fib_nh_gw6; } ), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a466e2e478e8..405e0784d13b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1130,7 +1130,7 @@ restart: } out: - trace_fib6_table_lookup(net, res.f6i, table, fl6); + trace_fib6_table_lookup(net, &res, table, fl6); rcu_read_unlock(); @@ -1865,7 +1865,7 @@ redo_rt6_select: } } - trace_fib6_table_lookup(net, res.f6i, table, fl6); + trace_fib6_table_lookup(net, &res, table, fl6); return res.f6i; } @@ -2538,7 +2538,7 @@ out: rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); + trace_fib6_table_lookup(net, &res, table, fl6); return ret; }; -- cgit v1.2.3-59-g8ed1b From 7d21fec90438941b44b699ae73673d2f8a3a9d76 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 16 Apr 2019 14:36:11 -0700 Subject: ipv6: Add fib6_type and fib6_flags to fib6_result Add the fib6_flags and fib6_type to fib6_result. Update the lookup helpers to set them and update post fib lookup users to use the version from the result. This allows nexthop objects to have blackhole nexthop. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ include/trace/events/fib6.h | 2 +- net/core/filter.c | 26 +++++++++---------- net/ipv6/route.c | 61 +++++++++++++++++++++++++++------------------ 4 files changed, 52 insertions(+), 39 deletions(-) (limited to 'include/trace') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index cb3277cd1413..6b7557b71c8c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -193,6 +193,8 @@ struct rt6_info { struct fib6_result { struct fib6_nh *nh; struct fib6_info *f6i; + u32 fib6_flags; + u8 fib6_type; }; #define for_each_fib6_node_rt_rcu(fn) \ diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 70e252d926ea..c6abdcc77c12 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -39,7 +39,7 @@ TRACE_EVENT(fib6_table_lookup, struct in6_addr *in6; __entry->tb_id = table->tb6_id; - __entry->err = ip6_rt_type_to_error(res->f6i->fib6_type); + __entry->err = ip6_rt_type_to_error(res->fib6_type); __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; __entry->tos = ip6_tclass(flp->flowlabel); diff --git a/net/core/filter.c b/net/core/filter.c index d17347cbeb1e..1644a16afcec 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4741,21 +4741,19 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, res.f6i == net->ipv6.fib6_null_entry)) return BPF_FIB_LKUP_RET_NOT_FWDED; - if (unlikely(res.f6i->fib6_flags & RTF_REJECT)) { - switch (res.f6i->fib6_type) { - case RTN_BLACKHOLE: - return BPF_FIB_LKUP_RET_BLACKHOLE; - case RTN_UNREACHABLE: - return BPF_FIB_LKUP_RET_UNREACHABLE; - case RTN_PROHIBIT: - return BPF_FIB_LKUP_RET_PROHIBIT; - default: - return BPF_FIB_LKUP_RET_NOT_FWDED; - } - } - - if (res.f6i->fib6_type != RTN_UNICAST) + switch (res.fib6_type) { + /* only unicast is forwarded */ + case RTN_UNICAST: + break; + case RTN_BLACKHOLE: + return BPF_FIB_LKUP_RET_BLACKHOLE; + case RTN_UNREACHABLE: + return BPF_FIB_LKUP_RET_UNREACHABLE; + case RTN_PROHIBIT: + return BPF_FIB_LKUP_RET_PROHIBIT; + default: return BPF_FIB_LKUP_RET_NOT_FWDED; + } ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, fl6.flowi6_oif != 0, NULL, strict); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5a1e1176c33c..e8c73b7782cd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -500,31 +500,33 @@ static void rt6_device_match(struct net *net, struct fib6_result *res, if (!oif && ipv6_addr_any(saddr)) { nh = &f6i->fib6_nh; - if (!(nh->fib_nh_flags & RTNH_F_DEAD)) { - res->nh = nh; - return; - } + if (!(nh->fib_nh_flags & RTNH_F_DEAD)) + goto out; } for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) { nh = &spf6i->fib6_nh; if (__rt6_device_match(net, nh, saddr, oif, flags)) { res->f6i = spf6i; - res->nh = nh; + goto out; } } if (oif && flags & RT6_LOOKUP_F_IFACE) { res->f6i = net->ipv6.fib6_null_entry; - res->nh = &res->f6i->fib6_nh; - return; + nh = &res->f6i->fib6_nh; + goto out; } - res->nh = &f6i->fib6_nh; - if (res->nh->fib_nh_flags & RTNH_F_DEAD) { + nh = &f6i->fib6_nh; + if (nh->fib_nh_flags & RTNH_F_DEAD) { res->f6i = net->ipv6.fib6_null_entry; - res->nh = &res->f6i->fib6_nh; + nh = &res->f6i->fib6_nh; } +out: + res->nh = nh; + res->fib6_type = res->f6i->fib6_type; + res->fib6_flags = res->f6i->fib6_flags; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -719,6 +721,8 @@ static void __find_rr_leaf(struct fib6_info *f6i_start, if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) { res->f6i = f6i; res->nh = nh; + res->fib6_flags = f6i->fib6_flags; + res->fib6_type = f6i->fib6_type; } } } @@ -796,6 +800,8 @@ out: if (!res->f6i) { res->f6i = net->ipv6.fib6_null_entry; res->nh = &res->f6i->fib6_nh; + res->fib6_flags = res->f6i->fib6_flags; + res->fib6_type = res->f6i->fib6_type; } } @@ -889,15 +895,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res) { struct net_device *dev = res->nh->fib_nh_dev; - const struct fib6_info *f6i = res->f6i; - if (f6i->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { + if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the * device if it is a master device, the master device if * device is enslaved, and the loopback as the default */ if (netif_is_l3_slave(dev) && - !rt6_need_strict(&f6i->fib6_dst.addr)) + !rt6_need_strict(&res->f6i->fib6_dst.addr)) dev = l3mdev_master_dev_rcu(dev); else if (!netif_is_l3_master(dev)) dev = dev_net(dev)->loopback_dev; @@ -943,11 +948,11 @@ static unsigned short fib6_info_dst_flags(struct fib6_info *rt) return flags; } -static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type) { - rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + rt->dst.error = ip6_rt_type_to_error(fib6_type); - switch (ort->fib6_type) { + switch (fib6_type) { case RTN_BLACKHOLE: rt->dst.output = dst_discard_out; rt->dst.input = dst_discard; @@ -967,19 +972,19 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res) { - struct fib6_info *ort = res->f6i; + struct fib6_info *f6i = res->f6i; - if (ort->fib6_flags & RTF_REJECT) { - ip6_rt_init_dst_reject(rt, ort); + if (res->fib6_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, res->fib6_type); return; } rt->dst.error = 0; rt->dst.output = ip6_output; - if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { + if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; - } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { + } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; } else { rt->dst.input = ip6_forward; @@ -1012,7 +1017,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res) rt->rt6i_dst = f6i->fib6_dst; rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; - rt->rt6i_flags = f6i->fib6_flags; + rt->rt6i_flags = res->fib6_flags; if (nh->fib_nh_gw_family) { rt->rt6i_gateway = nh->fib_nh_gw6; rt->rt6i_flags |= RTF_GATEWAY; @@ -2365,6 +2370,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, return; } res.nh = &res.f6i->fib6_nh; + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); @@ -2530,10 +2538,13 @@ restart: res.f6i = rt; res.nh = &rt->fib6_nh; out: - if (ret) + if (ret) { ip6_hold_safe(net, &ret); - else + } else { + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; ret = ip6_create_rt_rcu(&res); + } rcu_read_unlock(); @@ -3491,6 +3502,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu rcu_read_unlock(); res.nh = &res.f6i->fib6_nh; + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL); if (!nrt) goto out; -- cgit v1.2.3-59-g8ed1b From 9df1c28bb75217b244257152ab7d788bb2a386d0 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:47 -0700 Subject: bpf: add writable context for raw tracepoints This is an opt-in interface that allows a tracepoint to provide a safe buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program. The size of the buffer must be a compile-time constant, and is checked before allowing a BPF program to attach to a tracepoint that uses this feature. The pointer to this buffer will be the first argument of tracepoints that opt in; the pointer is valid and can be bpf_probe_read() by both BPF_PROG_TYPE_RAW_TRACEPOINT and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a tracepoint, but the buffer to which it points may only be written by the latter. Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ include/linux/bpf_types.h | 1 + include/linux/tracepoint-defs.h | 1 + include/trace/bpf_probe.h | 27 +++++++++++++++++++++++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 8 ++++++-- kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 24 ++++++++++++++++++++++++ 8 files changed, 91 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f15432d90728..cd6341eabd74 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -272,6 +272,7 @@ enum bpf_reg_type { PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ }; /* The information passed from prog-specific *_is_valid_access @@ -361,6 +362,7 @@ struct bpf_prog_aux { u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d26991a16894..a10d37bce364 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 49ba9cde7e4b..b29950a19205 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -45,6 +45,7 @@ struct bpf_raw_event_map { struct tracepoint *tp; void *bpf_func; u32 num_args; + u32 writable_size; } __aligned(32); #endif diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index 505dae0bed80..d6e556c0a085 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \ * to make sure that if the tracepoint handling changes, the * bpf probe will fail to compile unless it too is updated. */ -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, call, proto, args) \ +#define __DEFINE_EVENT(template, call, proto, args, size) \ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ @@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ .bpf_func = (void *)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ + .writable_size = size, \ }; +#define FIRST(x, ...) x + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +static inline void bpf_test_buffer_##call(void) \ +{ \ + /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ + * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \ + * dead-code-eliminated. \ + */ \ + FIRST(proto); \ + (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ +} \ +__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0) #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef DEFINE_EVENT_WRITABLE +#undef __DEFINE_EVENT +#undef FIRST + #endif /* CONFIG_BPF_EVENTS */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eaf2d3284248..f7fa7a34a62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b0de49598341..ae141e745f92 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) } raw_tp->btp = btp; - prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, - BPF_PROG_TYPE_RAW_TRACEPOINT); + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out_free_tp; } + if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { + err = -EINVAL; + goto out_put_prog; + } err = bpf_probe_register(raw_tp->btp, prog); if (err) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 423f242a5efb..2ef442c62c0e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -405,6 +405,7 @@ static const char * const reg_type_str[] = { [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", [PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", + [PTR_TO_TP_BUFFER] = "tp_buffer", }; static char slot_type_char[] = { @@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env, return 0; } +static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + if (off < 0) { + verbose(env, + "R%d invalid tracepoint buffer access: off=%d, size=%d", + regno, off, size); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, + "R%d invalid variable buffer offset: off=%d, var_off=%s", + regno, off, tn_buf); + return -EACCES; + } + if (off + size > env->prog->aux->max_tp_access) + env->prog->aux->max_tp_access = off + size; + + return 0; +} + + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_TP_BUFFER) { + err = check_tp_buffer_access(env, reg, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 91800be0c8eb..8607aba1d882 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { const struct bpf_prog_ops raw_tracepoint_prog_ops = { }; +static bool raw_tp_writable_prog_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off == 0) { + if (size != sizeof(u64) || type != BPF_READ) + return false; + info->reg_type = PTR_TO_TP_BUFFER; + } + return raw_tp_prog_is_valid_access(off, size, type, prog, info); +} + +const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = { + .get_func_proto = raw_tp_prog_func_proto, + .is_valid_access = raw_tp_writable_prog_is_valid_access, +}; + +const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = { +}; + static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64)) return -EINVAL; + if (prog->aux->max_tp_access > btp->writable_size) + return -EINVAL; + return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog); } -- cgit v1.2.3-59-g8ed1b From ea106722c76f08002b69a6983ed84dc18958ba48 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:48 -0700 Subject: nbd: trace sending nbd requests This adds a tracepoint that can both observe the nbd request being sent to the server, as well as modify that request , e.g., setting a flag in the request that will cause the server to collect detailed tracing data. The struct request * being handled is included to permit correlation with the block tracepoints. Signed-off-by: Matt Mullins Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- MAINTAINERS | 1 + drivers/block/nbd.c | 5 +++++ include/trace/events/nbd.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 include/trace/events/nbd.h (limited to 'include/trace') diff --git a/MAINTAINERS b/MAINTAINERS index 72dfb80e8721..025c6d27789e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10741,6 +10741,7 @@ L: linux-block@vger.kernel.org L: nbd@other.debian.org F: Documentation/blockdev/nbd.txt F: drivers/block/nbd.c +F: include/trace/events/nbd.h F: include/uapi/linux/nbd.h NETWORK DROP MONITOR diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 92b8aafb8bb4..24cc10d1f0b4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -44,6 +44,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static int nbd_total_devices = 0; @@ -526,6 +529,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) handle = nbd_cmd_handle(cmd); memcpy(request.handle, &handle, sizeof(handle)); + trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd)); + dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", req, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h new file mode 100644 index 000000000000..5928255ed02e --- /dev/null +++ b/include/trace/events/nbd.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nbd + +#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NBD_H + +#include + +DECLARE_EVENT_CLASS(nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + TP_STRUCT__entry( + __field(struct nbd_request *, nbd_request) + __field(u64, dev_index) + __field(struct request *, request) + ), + + TP_fast_assign( + __entry->nbd_request = 0; + __entry->dev_index = index; + __entry->request = rq; + ), + + TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef NBD_DEFINE_EVENT +#define NBD_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request, + + TP_PROTO(struct nbd_request *nbd_request, int index, + struct request *rq), + + TP_ARGS(nbd_request, index, rq), + + sizeof(struct nbd_request) +); + +#endif + +/* This part must be outside protection */ +#include -- cgit v1.2.3-59-g8ed1b From 2abd2de712cd891321a06b0890a85aef1e506cb5 Mon Sep 17 00:00:00 2001 From: Andrew Hall Date: Fri, 26 Apr 2019 11:49:49 -0700 Subject: nbd: add tracepoints for send/receive timing This adds four tracepoints to nbd, enabling separate tracing of payload and header sending/receipt. In the send path for headers that have already been sent, we also explicitly initialize the handle so it can be referenced by the later tracepoint. Signed-off-by: Andrew Hall Signed-off-by: Matt Mullins Reviewed-by: Josef Bacik Signed-off-by: Alexei Starovoitov --- drivers/block/nbd.c | 8 ++++++++ include/trace/events/nbd.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include/trace') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 24cc10d1f0b4..3e6c3d5dadc8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -513,6 +513,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) if (sent) { if (sent >= sizeof(request)) { skip = sent - sizeof(request); + + /* initialize handle for tracing purposes */ + handle = nbd_cmd_handle(cmd); + goto send_pages; } iov_iter_advance(&from, sent); @@ -536,6 +540,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); result = sock_xmit(nbd, index, 1, &from, (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); + trace_nbd_header_sent(req, handle); if (result <= 0) { if (was_interrupted(result)) { /* If we havne't sent anything we can just return BUSY, @@ -608,6 +613,7 @@ send_pages: bio = next; } out: + trace_nbd_payload_sent(req, handle); nsock->pending = NULL; nsock->sent = 0; return 0; @@ -655,6 +661,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) tag, req); return ERR_PTR(-ENOENT); } + trace_nbd_header_received(req, handle); cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); @@ -708,6 +715,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) } } out: + trace_nbd_payload_received(req, handle); mutex_unlock(&cmd->lock); return ret ? ERR_PTR(ret) : cmd; } diff --git a/include/trace/events/nbd.h b/include/trace/events/nbd.h index 5928255ed02e..9849956f34d8 100644 --- a/include/trace/events/nbd.h +++ b/include/trace/events/nbd.h @@ -7,6 +7,57 @@ #include +DECLARE_EVENT_CLASS(nbd_transport_event, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle), + + TP_STRUCT__entry( + __field(struct request *, req) + __field(u64, handle) + ), + + TP_fast_assign( + __entry->req = req; + __entry->handle = handle; + ), + + TP_printk( + "nbd transport event: request %p, handle 0x%016llx", + __entry->req, + __entry->handle + ) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_sent, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_header_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + +DEFINE_EVENT(nbd_transport_event, nbd_payload_received, + + TP_PROTO(struct request *req, u64 handle), + + TP_ARGS(req, handle) +); + DECLARE_EVENT_CLASS(nbd_send_request, TP_PROTO(struct nbd_request *nbd_request, int index, -- cgit v1.2.3-59-g8ed1b From e950e843367d7990b9d7ea964e3c33876d477c4b Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:51 -0700 Subject: selftests: bpf: test writable buffers in raw tps This tests that: * a BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE cannot be attached if it uses either: * a variable offset to the tracepoint buffer, or * an offset beyond the size of the tracepoint buffer * a tracer can modify the buffer provided when attached to a writable tracepoint in bpf_prog_test_run Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/trace/events/bpf_test_run.h | 50 ++++++++++++++ net/bpf/test_run.c | 4 ++ .../raw_tp_writable_reject_nbd_invalid.c | 42 ++++++++++++ .../bpf/prog_tests/raw_tp_writable_test_run.c | 80 ++++++++++++++++++++++ .../selftests/bpf/verifier/raw_tp_writable.c | 34 +++++++++ 5 files changed, 210 insertions(+) create mode 100644 include/trace/events/bpf_test_run.h create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c create mode 100644 tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c create mode 100644 tools/testing/selftests/bpf/verifier/raw_tp_writable.c (limited to 'include/trace') diff --git a/include/trace/events/bpf_test_run.h b/include/trace/events/bpf_test_run.h new file mode 100644 index 000000000000..265447e3f71a --- /dev/null +++ b/include/trace/events/bpf_test_run.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bpf_test_run + +#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BPF_TEST_RUN_H + +#include + +DECLARE_EVENT_CLASS(bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + TP_STRUCT__entry( + __field(int, err) + ), + + TP_fast_assign( + __entry->err = *err; + ), + + TP_printk("bpf_test_finish with err=%d", __entry->err) +); + +#ifdef DEFINE_EVENT_WRITABLE +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ + PARAMS(args), size) +#else +#undef BPF_TEST_RUN_DEFINE_EVENT +#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ + DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish, + + TP_PROTO(int *err), + + TP_ARGS(err), + + sizeof(int) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8606e5aef0b6..6c4694ae4241 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -13,6 +13,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time) { @@ -100,6 +103,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, if (err != -ENOSPC) err = 0; out: + trace_bpf_test_finish(&err); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c new file mode 100644 index 000000000000..9807336a3016 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void test_raw_tp_writable_reject_nbd_invalid(void) +{ + __u32 duration = 0; + char error[4096]; + int bpf_fd = -1, tp_fd = -1; + + const struct bpf_insn program[] = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* one byte beyond the end of the nbd_request struct */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, + sizeof(struct nbd_request)), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = program, + .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd); + if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open", + "erroneously succeeded\n")) + goto out_bpffd; + + close(tp_fd); +out_bpffd: + close(bpf_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c new file mode 100644 index 000000000000..5c45424cac5f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +void test_raw_tp_writable_test_run(void) +{ + __u32 duration = 0; + char error[4096]; + + const struct bpf_insn trace_program[] = { + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .license = "GPL v2", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + .log_level = 2, + }; + + int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", + "failed: %d errno %d\n", bpf_fd, errno)) + return; + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL v2", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; + + int filter_fd = + bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out_bpffd; + + int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd); + if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened", + "failed: %d errno %d\n", tp_fd, errno)) + goto out_filterfd; + + char test_skb[128] = { + 0, + }; + + __u32 prog_ret; + int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, + 0, &prog_ret, 0); + CHECK(err != 42, "test_run", + "tracepoint did not modify return value\n"); + CHECK(prog_ret != 0, "test_run_ret", + "socket_filter did not return 0\n"); + + close(tp_fd); + + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0, + &prog_ret, 0); + CHECK(err != 0, "test_run_notrace", + "test_run failed with %d errno %d\n", err, errno); + CHECK(prog_ret != 0, "test_run_ret_notrace", + "socket_filter did not return 0\n"); + +out_filterfd: + close(filter_fd); +out_bpffd: + close(bpf_fd); +} diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c new file mode 100644 index 000000000000..95b5d70a1dc1 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c @@ -0,0 +1,34 @@ +{ + "raw_tracepoint_writable: reject variable offset", + .insns = { + /* r6 is our tp buffer */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + + BPF_LD_MAP_FD(BPF_REG_1, 0), + /* move the key (== 0) to r10-8 */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + /* lookup in the map */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + + /* exit clean if null */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + /* shift the buffer pointer to a variable location */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0), + /* clobber whatever's there */ + BPF_MOV64_IMM(BPF_REG_7, 4242), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, }, + .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)", +}, -- cgit v1.2.3-59-g8ed1b From 141b6b2ad75d92770240de3af98d55c41ce7cd18 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 1 May 2019 19:56:59 -0700 Subject: net: add a generic tracepoint for TX queue timeout Although devlink health report does a nice job on reporting TX timeout and other NIC errors, unfortunately it requires drivers to support it but currently only mlx5 has implemented it. Before other drivers could catch up, it is useful to have a generic tracepoint to monitor this kind of TX timeout. We have been suffering TX timeout with different drivers, we plan to start to monitor it with rasdaemon which just needs a new tracepoint. Sample output: ksoftirqd/1-16 [001] ..s2 144.043173: net_dev_xmit_timeout: dev=ens3 driver=e1000 queue=0 Cc: Eran Ben Elisha Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jiri Pirko Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller --- include/trace/events/net.h | 23 +++++++++++++++++++++++ net/sched/sch_generic.c | 2 ++ 2 files changed, 25 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 1efd7d9b25fe..2399073c3afc 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -95,6 +95,29 @@ TRACE_EVENT(net_dev_xmit, __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) ); +TRACE_EVENT(net_dev_xmit_timeout, + + TP_PROTO(struct net_device *dev, + int queue_index), + + TP_ARGS(dev, queue_index), + + TP_STRUCT__entry( + __string( name, dev->name ) + __string( driver, netdev_drivername(dev)) + __field( int, queue_index ) + ), + + TP_fast_assign( + __assign_str(name, dev->name); + __assign_str(driver, netdev_drivername(dev)); + __entry->queue_index = queue_index; + ), + + TP_printk("dev=%s driver=%s queue=%d", + __get_str(name), __get_str(driver), __entry->queue_index) +); + DECLARE_EVENT_CLASS(net_dev_template, TP_PROTO(struct sk_buff *skb), diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 848aab3693bd..cce1e9ee85af 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -32,6 +32,7 @@ #include #include #include +#include #include /* Qdisc to use by default */ @@ -441,6 +442,7 @@ static void dev_watchdog(struct timer_list *t) } if (some_queue_timedout) { + trace_net_dev_xmit_timeout(dev, i); WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); -- cgit v1.2.3-59-g8ed1b