diff options
Diffstat (limited to 'net/sched')
67 files changed, 1284 insertions, 1430 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 32563cef85bf..9b31a10cc639 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -195,7 +195,7 @@ static int offload_action_init(struct flow_offload_action *fl_action, if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); err = act->ops->offload_act_setup(act, fl_action, NULL, - false); + false, extack); spin_unlock_bh(&act->tcfa_lock); return err; } @@ -271,10 +271,10 @@ static int tcf_action_offload_add_ex(struct tc_action *action, if (err) goto fl_err; - err = tc_setup_action(&fl_action->action, actions); + err = tc_setup_action(&fl_action->action, actions, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, - "Failed to setup tc actions for offload\n"); + "Failed to setup tc actions for offload"); goto fl_err; } @@ -588,7 +588,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -604,20 +605,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -638,7 +644,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { @@ -670,6 +676,31 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) } EXPORT_SYMBOL(tcf_idr_search); +static int __tcf_generic_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) +{ + struct tc_action_net *tn = net_generic(net, ops->net_id); + + if (unlikely(ops->walk)) + return ops->walk(net, skb, cb, type, ops, extack); + + return tcf_generic_walker(tn, skb, cb, type, ops, extack); +} + +static int __tcf_idr_search(struct net *net, + const struct tc_action_ops *ops, + struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, ops->net_id); + + if (unlikely(ops->lookup)) + return ops->lookup(net, a, index); + + return tcf_idr_search(tn, a, index); +} + static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; @@ -920,7 +951,7 @@ int tcf_register_action(struct tc_action_ops *act, struct tc_action_ops *a; int ret; - if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup) + if (!act->act || !act->dump || !act->init) return -EINVAL; /* We have to register pernet ops before making the action ops visible, @@ -1037,6 +1068,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; + int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; @@ -1045,11 +1077,17 @@ restart_act_graph: if (tc_act_skip_sw(a->tcfa_flags)) continue; + + repeat_ttl = 32; repeat: ret = a->ops->act(skb, a, res); - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ - + if (unlikely(ret == TC_ACT_REPEAT)) { + if (--repeat_ttl != 0) + goto repeat; + /* suspicious opcode, stop pipeline */ + net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); + return TC_ACT_OK; + } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) { @@ -1439,6 +1477,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, continue; if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || skip_hw != tc_act_skip_hw(act->tcfa_flags)) { + NL_SET_ERR_MSG(extack, + "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } @@ -1623,7 +1663,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, goto err_out; } err = -ENOENT; - if (ops->lookup(net, &a, index) == 0) { + if (__tcf_idr_search(net, ops, &a, index) == 0) { NL_SET_ERR_MSG(extack, "TC action with specified index not found"); goto err_mod; } @@ -1688,7 +1728,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; } - err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack); + err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack); if (err <= 0) { nla_nest_cancel(skb, nest); goto out_module_put; @@ -2106,7 +2146,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto out_module_put; - ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL); + ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index a77d8908e737..b79eee44e24e 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -29,7 +29,6 @@ struct tcf_bpf_cfg { bool is_ebpf; }; -static unsigned int bpf_net_id; static struct tc_action_ops act_bpf_ops; static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, @@ -53,6 +52,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } + if (unlikely(!skb->tstamp && skb->mono_delivery_time)) + skb->mono_delivery_time = 0; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); @@ -278,7 +279,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -332,7 +333,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; - if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { + if (is_bpf == is_ebpf) { ret = -EINVAL; goto put_chain; } @@ -388,23 +389,6 @@ static void tcf_bpf_cleanup(struct tc_action *act) tcf_bpf_cfg_cleanup(&tmp); } -static int tcf_bpf_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .id = TCA_ID_BPF, @@ -413,27 +397,25 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .dump = tcf_bpf_dump, .cleanup = tcf_bpf_cleanup, .init = tcf_bpf_init, - .walk = tcf_bpf_walker, - .lookup = tcf_bpf_search, .size = sizeof(struct tcf_bpf), }; static __net_init int bpf_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, bpf_net_id); + tc_action_net_exit(net_list, act_bpf_ops.net_id); } static struct pernet_operations bpf_net_ops = { .init = bpf_init_net, .exit_batch = bpf_exit_net, - .id = &bpf_net_id, + .id = &act_bpf_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 09e2aafc8943..66b143bb04ac 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -25,7 +25,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> -static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, @@ -99,7 +98,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; @@ -200,23 +199,6 @@ nla_put_failure: return -1; } -static int tcf_connmark_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .id = TCA_ID_CONNMARK, @@ -224,27 +206,25 @@ static struct tc_action_ops act_connmark_ops = { .act = tcf_connmark_act, .dump = tcf_connmark_dump, .init = tcf_connmark_init, - .walk = tcf_connmark_walker, - .lookup = tcf_connmark_search, .size = sizeof(struct tcf_connmark_info), }; static __net_init int connmark_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, connmark_net_id); + tc_action_net_exit(net_list, act_connmark_ops.net_id); } static struct pernet_operations connmark_net_ops = { .init = connmark_init_net, .exit_batch = connmark_exit_net, - .id = &connmark_net_id, + .id = &act_connmark_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index e0f515b774ca..1366adf9b909 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,7 +37,6 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; -static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, @@ -45,7 +44,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; @@ -673,30 +672,14 @@ static void tcf_csum_cleanup(struct tc_action *a) kfree_rcu(params, rcu); } -static int tcf_csum_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_csum_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_csum)); } static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -721,8 +704,6 @@ static struct tc_action_ops act_csum_ops = { .dump = tcf_csum_dump, .init = tcf_csum_init, .cleanup = tcf_csum_cleanup, - .walk = tcf_csum_walker, - .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), @@ -730,20 +711,20 @@ static struct tc_action_ops act_csum_ops = { static __net_init int csum_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, csum_net_id); + tc_action_net_exit(net_list, act_csum_ops.net_id); } static struct pernet_operations csum_net_ops = { .init = csum_init_net, .exit_batch = csum_exit_net, - .id = &csum_net_id, + .id = &act_csum_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f99247fc6468..b38d91d6b249 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -57,12 +57,6 @@ static const struct rhashtable_params zones_params = { .automatic_shrinking = true, }; -static struct nf_ct_ext_type act_ct_extend __read_mostly = { - .len = sizeof(struct nf_conn_act_ct_ext), - .align = __alignof__(struct nf_conn_act_ct_ext), - .id = NF_CT_EXT_ACT_CT, -}; - static struct flow_action_entry * tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action) { @@ -283,7 +277,7 @@ static struct nf_flowtable_type flowtable_ct = { .owner = THIS_MODULE, }; -static int tcf_ct_flow_table_get(struct tcf_ct_params *params) +static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) { struct tcf_ct_flow_table *ct_ft; int err = -ENOMEM; @@ -309,6 +303,7 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params *params) err = nf_flow_table_init(&ct_ft->nf_ft); if (err) goto err_init; + write_pnet(&ct_ft->nf_ft.net, net); __module_get(THIS_MODULE); out_unlock: @@ -361,6 +356,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params *params) } } +static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry, + struct nf_conn_act_ct_ext *act_ct_ext, u8 dir) +{ + entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC; + entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir]; +} + static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, struct nf_conn *ct, bool tcp) @@ -385,10 +387,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft, act_ct_ext = nf_conn_act_ct_ext_find(ct); if (act_ct_ext) { - entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx = - act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL]; - entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx = - act_ct_ext->ifindex[IP_CT_DIR_REPLY]; + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_ORIGINAL); + tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY); } err = flow_offload_add(&ct_ft->nf_ft, entry); @@ -421,6 +421,19 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft, break; case IPPROTO_UDP: break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: { + struct nf_conntrack_tuple *tuple; + + if (ct->status & IPS_NAT_MASK) + return; + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + /* No support for GRE v1 */ + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) + return; + break; + } +#endif default: return; } @@ -440,6 +453,8 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; + size_t hdrsize; + u8 ipproto; if (!pskb_network_may_pull(skb, sizeof(*iph))) return false; @@ -451,29 +466,54 @@ tcf_ct_flow_table_fill_tuple_ipv4(struct sk_buff *skb, unlikely(thoff != sizeof(struct iphdr))) return false; - if (iph->protocol != IPPROTO_TCP && - iph->protocol != IPPROTO_UDP) + ipproto = iph->protocol; + switch (ipproto) { + case IPPROTO_TCP: + hdrsize = sizeof(struct tcphdr); + break; + case IPPROTO_UDP: + hdrsize = sizeof(*ports); + break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + hdrsize = sizeof(struct gre_base_hdr); + break; +#endif + default: return false; + } if (iph->ttl <= 1) return false; - if (!pskb_network_may_pull(skb, iph->protocol == IPPROTO_TCP ? - thoff + sizeof(struct tcphdr) : - thoff + sizeof(*ports))) + if (!pskb_network_may_pull(skb, thoff + hdrsize)) return false; - iph = ip_hdr(skb); - if (iph->protocol == IPPROTO_TCP) + switch (ipproto) { + case IPPROTO_TCP: *tcph = (void *)(skb_network_header(skb) + thoff); + fallthrough; + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return false; + break; + } + } + + iph = ip_hdr(skb); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET; - tuple->l4proto = iph->protocol; + tuple->l4proto = ipproto; return true; } @@ -486,36 +526,63 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; + size_t hdrsize; + u8 nexthdr; if (!pskb_network_may_pull(skb, sizeof(*ip6h))) return false; ip6h = ipv6_hdr(skb); + thoff = sizeof(*ip6h); - if (ip6h->nexthdr != IPPROTO_TCP && - ip6h->nexthdr != IPPROTO_UDP) + nexthdr = ip6h->nexthdr; + switch (nexthdr) { + case IPPROTO_TCP: + hdrsize = sizeof(struct tcphdr); + break; + case IPPROTO_UDP: + hdrsize = sizeof(*ports); + break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + hdrsize = sizeof(struct gre_base_hdr); + break; +#endif + default: return false; + } if (ip6h->hop_limit <= 1) return false; - thoff = sizeof(*ip6h); - if (!pskb_network_may_pull(skb, ip6h->nexthdr == IPPROTO_TCP ? - thoff + sizeof(struct tcphdr) : - thoff + sizeof(*ports))) + if (!pskb_network_may_pull(skb, thoff + hdrsize)) return false; - ip6h = ipv6_hdr(skb); - if (ip6h->nexthdr == IPPROTO_TCP) + switch (nexthdr) { + case IPPROTO_TCP: *tcph = (void *)(skb_network_header(skb) + thoff); + fallthrough; + case IPPROTO_UDP: + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + break; + case IPPROTO_GRE: { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return false; + break; + } + } + + ip6h = ipv6_hdr(skb); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; tuple->l3proto = AF_INET6; - tuple->l4proto = ip6h->nexthdr; + tuple->l4proto = nexthdr; return true; } @@ -533,11 +600,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct nf_conn *ct; u8 dir; - /* Previously seen or loopback */ - ct = nf_ct_get(skb, &ctinfo); - if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) - return false; - switch (family) { case NFPROTO_IPV4: if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) @@ -587,7 +649,6 @@ static void tcf_ct_flow_tables_uninit(void) } static struct tc_action_ops act_ct_ops; -static unsigned int ct_net_id; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ @@ -605,22 +666,25 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, if (!ct) return false; if (!net_eq(net, read_pnet(&ct->ct_net))) - return false; + goto drop_ct; if (nf_ct_zone(ct)->id != zone_id) - return false; + goto drop_ct; /* Force conntrack entry direction. */ if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { if (nf_ct_is_confirmed(ct)) nf_ct_kill(ct); - nf_ct_put(ct); - nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - - return false; + goto drop_ct; } return true; + +drop_ct: + nf_ct_put(ct); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + + return false; } /* Trim the skb to the length specified by the IP/IPv6 header, @@ -632,7 +696,6 @@ static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) { unsigned int len; - int err; switch (family) { case NFPROTO_IPV4: @@ -646,9 +709,7 @@ static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb->len; } - err = pskb_trim_rcsum(skb, len); - - return err; + return pskb_trim_rcsum(skb, len); } static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) @@ -1190,7 +1251,7 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; struct nf_conn *tmpl; int err; @@ -1265,7 +1326,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ct_net_id); + struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_ct_params *params = NULL; struct nlattr *tb[TCA_CT_MAX + 1]; @@ -1327,9 +1388,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (err) goto cleanup; - err = tcf_ct_flow_table_get(params); + err = tcf_ct_flow_table_get(net, params); if (err) - goto cleanup; + goto cleanup_params; spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -1344,6 +1405,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, return res; +cleanup_params: + if (params->tmpl) + nf_ct_put(params->tmpl); cleanup: if (goto_ch) tcf_chain_put_by_act(goto_ch); @@ -1493,23 +1557,6 @@ nla_put_failure: return -1; } -static int tcf_ct_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ct_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ct_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { @@ -1520,7 +1567,8 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, } static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -1547,8 +1595,6 @@ static struct tc_action_ops act_ct_ops = { .dump = tcf_ct_dump, .init = tcf_ct_init, .cleanup = tcf_ct_cleanup, - .walk = tcf_ct_walker, - .lookup = tcf_ct_search, .stats_update = tcf_stats_update, .offload_act_setup = tcf_ct_offload_act_setup, .size = sizeof(struct tcf_ct), @@ -1557,7 +1603,7 @@ static struct tc_action_ops act_ct_ops = { static __net_init int ct_init_net(struct net *net) { unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); if (nf_connlabels_get(net, n_bits - 1)) { tn->labels = false; @@ -1575,20 +1621,20 @@ static void __net_exit ct_exit_net(struct list_head *net_list) rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); if (tn->labels) nf_connlabels_put(net); } rtnl_unlock(); - tc_action_net_exit(net_list, ct_net_id); + tc_action_net_exit(net_list, act_ct_ops.net_id); } static struct pernet_operations ct_net_ops = { .init = ct_init_net, .exit_batch = ct_exit_net, - .id = &ct_net_id, + .id = &act_ct_ops.net_id, .size = sizeof(struct tc_ct_action_net), }; @@ -1608,16 +1654,10 @@ static int __init ct_init_module(void) if (err) goto err_register; - err = nf_ct_extend_register(&act_ct_extend); - if (err) - goto err_register_extend; - static_branch_inc(&tcf_frag_xmit_count); return 0; -err_register_extend: - tcf_unregister_action(&act_ct_ops, &ct_net_ops); err_register: tcf_ct_flow_tables_uninit(); err_tbl_init: @@ -1628,7 +1668,6 @@ err_tbl_init: static void __exit ct_cleanup_module(void) { static_branch_dec(&tcf_frag_xmit_count); - nf_ct_extend_unregister(&act_ct_extend); tcf_unregister_action(&act_ct_ops, &ct_net_ops); tcf_ct_flow_tables_uninit(); destroy_workqueue(act_ct_wq); diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 0281e45987a4..d4102f0a9abd 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -25,7 +25,6 @@ #include <net/netfilter/nf_conntrack_zones.h> static struct tc_action_ops act_ctinfo_ops; -static unsigned int ctinfo_net_id; static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, @@ -157,7 +156,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; @@ -342,23 +341,6 @@ nla_put_failure: return -1; } -static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_ctinfo_cleanup(struct tc_action *a) { struct tcf_ctinfo *ci = to_ctinfo(a); @@ -377,27 +359,25 @@ static struct tc_action_ops act_ctinfo_ops = { .dump = tcf_ctinfo_dump, .init = tcf_ctinfo_init, .cleanup= tcf_ctinfo_cleanup, - .walk = tcf_ctinfo_walker, - .lookup = tcf_ctinfo_search, .size = sizeof(struct tcf_ctinfo), }; static __net_init int ctinfo_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ctinfo_net_id); + tc_action_net_exit(net_list, act_ctinfo_ops.net_id); } static struct pernet_operations ctinfo_net_ops = { .init = ctinfo_init_net, .exit_batch = ctinfo_exit_net, - .id = &ctinfo_net_id, + .id = &act_ctinfo_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index bde6a6c01e64..62d682b96b88 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -19,14 +19,13 @@ #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> -static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; #ifdef CONFIG_GACT_PROB static int gact_net_rand(struct tcf_gact *gact) { smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */ - if (prandom_u32() % gact->tcfg_pval) + if (prandom_u32_max(gact->tcfg_pval)) return gact->tcf_action; return gact->tcfg_paction; } @@ -55,7 +54,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -222,23 +221,6 @@ nla_put_failure: return -1; } -static int tcf_gact_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_gact_get_fill_size(const struct tc_action *act) { size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */ @@ -253,7 +235,8 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) } static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -267,7 +250,17 @@ static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data, } else if (is_tcf_gact_goto_chain(act)) { entry->id = FLOW_ACTION_GOTO; entry->chain_index = tcf_gact_goto_chain_index(act); + } else if (is_tcf_gact_continue(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported"); + return -EOPNOTSUPP; + } else if (is_tcf_gact_reclassify(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported"); + return -EOPNOTSUPP; + } else if (is_tcf_gact_pipe(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported"); + return -EOPNOTSUPP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload"); return -EOPNOTSUPP; } *index_inc = 1; @@ -297,8 +290,6 @@ static struct tc_action_ops act_gact_ops = { .stats_update = tcf_gact_stats_update, .dump = tcf_gact_dump, .init = tcf_gact_init, - .walk = tcf_gact_walker, - .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, .offload_act_setup = tcf_gact_offload_act_setup, .size = sizeof(struct tcf_gact), @@ -306,20 +297,20 @@ static struct tc_action_ops act_gact_ops = { static __net_init int gact_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, gact_net_id); + tc_action_net_exit(net_list, act_gact_ops.net_id); } static struct pernet_operations gact_net_ops = { .init = gact_init_net, .exit_batch = gact_exit_net, - .id = &gact_net_id, + .id = &act_gact_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index d56e73843a4b..3049878e7315 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -15,7 +15,6 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gate.h> -static unsigned int gate_net_id; static struct tc_action_ops act_gate_ops; static ktime_t gate_get_time(struct tcf_gate *gact) @@ -298,7 +297,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; @@ -565,16 +564,6 @@ nla_put_failure: return -1; } -static int tcf_gate_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, gate_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { @@ -585,13 +574,6 @@ static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, tm->lastuse = max_t(u64, tm->lastuse, lastuse); } -static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gate_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_gate_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_gate)); @@ -619,7 +601,8 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry, } static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { int err; @@ -653,30 +636,28 @@ static struct tc_action_ops act_gate_ops = { .dump = tcf_gate_dump, .init = tcf_gate_init, .cleanup = tcf_gate_cleanup, - .walk = tcf_gate_walker, .stats_update = tcf_gate_stats_update, .get_fill_size = tcf_gate_get_fill_size, - .lookup = tcf_gate_search, .offload_act_setup = tcf_gate_offload_act_setup, .size = sizeof(struct tcf_gate), }; static __net_init int gate_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); return tc_action_net_init(net, tn, &act_gate_ops); } static void __net_exit gate_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, gate_net_id); + tc_action_net_exit(net_list, act_gate_ops.net_id); } static struct pernet_operations gate_net_ops = { .init = gate_init_net, .exit_batch = gate_exit_net, - .id = &gate_net_id, + .id = &act_gate_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 41ba55e60b1b..41d63b33461d 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -30,7 +30,6 @@ #include <linux/etherdevice.h> #include <net/ife.h> -static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -482,7 +481,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; @@ -878,23 +877,6 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, return tcf_ife_decode(skb, a, res); } -static int tcf_ife_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_ife_ops = { .kind = "ife", .id = TCA_ID_IFE, @@ -903,27 +885,25 @@ static struct tc_action_ops act_ife_ops = { .dump = tcf_ife_dump, .cleanup = tcf_ife_cleanup, .init = tcf_ife_init, - .walk = tcf_ife_walker, - .lookup = tcf_ife_search, .size = sizeof(struct tcf_ife_info), }; static __net_init int ife_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ife_net_id); + tc_action_net_exit(net_list, act_ife_ops.net_id); } static struct pernet_operations ife_net_ops = { .init = ife_init_net, .exit_batch = ife_exit_net, - .id = &ife_net_id, + .id = &act_ife_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 2f3d507c24a1..1625e1037416 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -24,10 +24,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> -static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; - -static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; static int ipt_init_target(struct net *net, struct xt_entry_target *t, @@ -206,8 +203,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, - tp, flags); + return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est, + a, &act_ipt_ops, tp, flags); } static int tcf_xt_init(struct net *net, struct nlattr *nla, @@ -215,8 +212,8 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, - tp, flags); + return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est, + a, &act_xt_ops, tp, flags); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, @@ -316,23 +313,6 @@ nla_put_failure: return -1; } -static int tcf_ipt_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .id = TCA_ID_IPT, @@ -341,47 +321,28 @@ static struct tc_action_ops act_ipt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_release, .init = tcf_ipt_init, - .walk = tcf_ipt_walker, - .lookup = tcf_ipt_search, .size = sizeof(struct tcf_ipt), }; static __net_init int ipt_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); + struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); return tc_action_net_init(net, tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ipt_net_id); + tc_action_net_exit(net_list, act_ipt_ops.net_id); } static struct pernet_operations ipt_net_ops = { .init = ipt_init_net, .exit_batch = ipt_exit_net, - .id = &ipt_net_id, + .id = &act_ipt_ops.net_id, .size = sizeof(struct tc_action_net), }; -static int tcf_xt_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_xt_ops = { .kind = "xt", .id = TCA_ID_XT, @@ -390,27 +351,25 @@ static struct tc_action_ops act_xt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_release, .init = tcf_xt_init, - .walk = tcf_xt_walker, - .lookup = tcf_xt_search, .size = sizeof(struct tcf_ipt), }; static __net_init int xt_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, xt_net_id); + struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); return tc_action_net_init(net, tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, xt_net_id); + tc_action_net_exit(net_list, act_xt_ops.net_id); } static struct pernet_operations xt_net_ops = { .init = xt_init_net, .exit_batch = xt_exit_net, - .id = &xt_net_id, + .id = &act_xt_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 39acd1d18609..b8ad6ae282c0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -79,14 +79,13 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, }; -static unsigned int mirred_net_id; static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -181,7 +180,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(ndev); odev = rcu_replace_pointer(m->tcfm_dev, ndev, lockdep_is_held(&m->tcf_lock)); - dev_put_track(odev, &m->tcfm_dev_tracker); + netdev_put(odev, &m->tcfm_dev_tracker); netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC); m->tcfm_mac_header_xmit = mac_header_xmit; } @@ -306,8 +305,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { - res->ingress = want_ingress; - err = tcf_mirred_forward(res->ingress, skb); + err = tcf_mirred_forward(want_ingress, skb); if (err) tcf_action_inc_overlimit_qstats(&m->common); __this_cpu_dec(mirred_rec_level); @@ -373,23 +371,6 @@ nla_put_failure: return -1; } -static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_idr_search(tn, a, index); -} - static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -402,7 +383,7 @@ static int mirred_device_event(struct notifier_block *unused, list_for_each_entry(m, &mirred_list, tcfm_list) { spin_lock_bh(&m->tcf_lock); if (tcf_mirred_dev_dereference(m) == dev) { - dev_put_track(dev, &m->tcfm_dev_tracker); + netdev_put(dev, &m->tcfm_dev_tracker); /* Note : no rcu grace period necessary, as * net_device are already rcu protected. */ @@ -460,7 +441,8 @@ static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry, } static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -478,6 +460,7 @@ static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data, entry->id = FLOW_ACTION_MIRRED_INGRESS; tcf_offload_mirred_get_dev(entry, act); } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload"); return -EOPNOTSUPP; } *index_inc = 1; @@ -508,8 +491,6 @@ static struct tc_action_ops act_mirred_ops = { .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, - .walk = tcf_mirred_walker, - .lookup = tcf_mirred_search, .get_fill_size = tcf_mirred_get_fill_size, .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), @@ -518,20 +499,20 @@ static struct tc_action_ops act_mirred_ops = { static __net_init int mirred_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, mirred_net_id); + tc_action_net_exit(net_list, act_mirred_ops.net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, .exit_batch = mirred_exit_net, - .id = &mirred_net_id, + .id = &act_mirred_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index b9ff3459fdab..8ad25cc8ccd5 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -15,7 +15,6 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_mpls.h> -static unsigned int mpls_net_id; static struct tc_action_ops act_mpls_ops; #define ACT_MPLS_TTL_DEFAULT 255 @@ -155,7 +154,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MPLS_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -367,25 +366,9 @@ nla_put_failure: return -EMSGSIZE; } -static int tcf_mpls_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, mpls_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mpls_net_id); - - return tcf_idr_search(tn, a, index); -} - static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -410,7 +393,14 @@ static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, entry->mpls_mangle.bos = tcf_mpls_bos(act); entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; + case TCA_MPLS_ACT_DEC_TTL: + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"dec_ttl\" option is used"); + return -EOPNOTSUPP; + case TCA_MPLS_ACT_MAC_PUSH: + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"mac_push\" option is used"); + return -EOPNOTSUPP; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported MPLS mode offload"); return -EOPNOTSUPP; } *index_inc = 1; @@ -443,28 +433,26 @@ static struct tc_action_ops act_mpls_ops = { .dump = tcf_mpls_dump, .init = tcf_mpls_init, .cleanup = tcf_mpls_cleanup, - .walk = tcf_mpls_walker, - .lookup = tcf_mpls_search, .offload_act_setup = tcf_mpls_offload_act_setup, .size = sizeof(struct tcf_mpls), }; static __net_init int mpls_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); return tc_action_net_init(net, tn, &act_mpls_ops); } static void __net_exit mpls_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, mpls_net_id); + tc_action_net_exit(net_list, act_mpls_ops.net_id); } static struct pernet_operations mpls_net_ops = { .init = mpls_init_net, .exit_batch = mpls_exit_net, - .id = &mpls_net_id, + .id = &act_mpls_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 2a39b3729e84..9265145f1040 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -26,7 +26,6 @@ #include <net/udp.h> -static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { @@ -37,7 +36,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -289,23 +288,6 @@ nla_put_failure: return -1; } -static int tcf_nat_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_nat_ops = { .kind = "nat", .id = TCA_ID_NAT, @@ -313,27 +295,25 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat_act, .dump = tcf_nat_dump, .init = tcf_nat_init, - .walk = tcf_nat_walker, - .lookup = tcf_nat_search, .size = sizeof(struct tcf_nat), }; static __net_init int nat_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); return tc_action_net_init(net, tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, nat_net_id); + tc_action_net_exit(net_list, act_nat_ops.net_id); } static struct pernet_operations nat_net_ops = { .init = nat_init_net, .exit_batch = nat_exit_net, - .id = &nat_net_id, + .id = &act_nat_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 31fcd279c177..94ed5857ce67 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -21,7 +21,6 @@ #include <uapi/linux/tc_act/tc_pedit.h> #include <net/pkt_cls.h> -static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { @@ -139,7 +138,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -149,7 +148,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *pattr; struct tcf_pedit *p; int ret = 0, err; - int ksize; + int i, ksize; u32 index; if (!nla) { @@ -228,6 +227,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_off_max_hint = 0; + for (i = 0; i < p->tcfp_nkeys; ++i) { + u32 cur = p->tcfp_keys[i].off; + + /* sanitize the shift value for any later use */ + p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1, + p->tcfp_keys[i].shift); + + /* The AT option can read a single byte, we can bound the actual + * value with uchar max. + */ + cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; + + /* Each key touches 4 bytes starting from the computed offset */ + p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); + } p->tcfp_flags = parm->flags; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -308,13 +323,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); + u32 max_offset; int i; - if (skb_unclone(skb, GFP_ATOMIC)) - return p->tcf_action; - spin_lock(&p->tcf_lock); + max_offset = (skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : + skb_network_offset(skb)) + + p->tcfp_off_max_hint; + if (skb_ensure_writable(skb, min(skb->len, max_offset))) + goto unlock; + tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { @@ -403,6 +423,7 @@ bad: p->tcf_qstats.overlimits++; done: bstats_update(&p->tcf_bstats, skb); +unlock: spin_unlock(&p->tcf_lock); return p->tcf_action; } @@ -470,25 +491,9 @@ nla_put_failure: return -1; } -static int tcf_pedit_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_idr_search(tn, a, index); -} - static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -503,6 +508,7 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, entry->id = FLOW_ACTION_ADD; break; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload"); return -EOPNOTSUPP; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -529,28 +535,26 @@ static struct tc_action_ops act_pedit_ops = { .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, - .walk = tcf_pedit_walker, - .lookup = tcf_pedit_search, .offload_act_setup = tcf_pedit_offload_act_setup, .size = sizeof(struct tcf_pedit), }; static __net_init int pedit_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); return tc_action_net_init(net, tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, pedit_net_id); + tc_action_net_exit(net_list, act_pedit_ops.net_id); } static struct pernet_operations pedit_net_ops = { .init = pedit_init_net, .exit_batch = pedit_exit_net, - .id = &pedit_net_id, + .id = &act_pedit_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0923aa2b8f8a..0adb26e366a7 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,19 +22,8 @@ /* Each policer is serialized by its individual spinlock */ -static unsigned int police_net_id; static struct tc_action_ops act_police_ops; -static int tcf_police_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, police_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, @@ -58,7 +47,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); struct tcf_police_params *new; bool exists = false; u32 index; @@ -239,6 +228,20 @@ release_idr: return err; } +static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit) +{ + u32 len; + + if (skb_is_gso(skb)) + return skb_gso_validate_mac_len(skb, limit); + + len = qdisc_pkt_len(skb); + if (skb_at_tc_ingress(skb)) + len += skb->mac_len; + + return len <= limit; +} + static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -261,7 +264,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, goto inc_overlimits; } - if (qdisc_pkt_len(skb) <= p->tcfp_mtu) { + if (tcf_police_mtu_check(skb, p->tcfp_mtu)) { if (!p->rate_present && !p->pps_present) { ret = p->tcfp_result; goto end; @@ -398,27 +401,78 @@ nla_put_failure: return -1; } -static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) +static int tcf_police_act_to_flow_act(int tc_act, u32 *extval, + struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, police_net_id); + int act_id = -EOPNOTSUPP; + + if (!TC_ACT_EXT_OPCODE(tc_act)) { + if (tc_act == TC_ACT_OK) + act_id = FLOW_ACTION_ACCEPT; + else if (tc_act == TC_ACT_SHOT) + act_id = FLOW_ACTION_DROP; + else if (tc_act == TC_ACT_PIPE) + act_id = FLOW_ACTION_PIPE; + else if (tc_act == TC_ACT_RECLASSIFY) + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\""); + else + NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload"); + } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) { + act_id = FLOW_ACTION_GOTO; + *extval = tc_act & TC_ACT_EXT_VAL_MASK; + } else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) { + act_id = FLOW_ACTION_JUMP; + *extval = tc_act & TC_ACT_EXT_VAL_MASK; + } else if (tc_act == TC_ACT_UNSPEC) { + act_id = FLOW_ACTION_CONTINUE; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload"); + } - return tcf_idr_search(tn, a, index); + return act_id; } static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; + struct tcf_police *police = to_police(act); + struct tcf_police_params *p; + int act_id; + + p = rcu_dereference_protected(police->params, + lockdep_is_held(&police->tcf_lock)); entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.peakrate_bytes_ps = tcf_police_peakrate_bytes_ps(act); + entry->police.avrate = tcf_police_tcfp_ewma_rate(act); + entry->police.overhead = tcf_police_rate_overhead(act); entry->police.burst_pkt = tcf_police_burst_pkt(act); entry->police.rate_pkt_ps = tcf_police_rate_pkt_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); + + act_id = tcf_police_act_to_flow_act(police->tcf_action, + &entry->police.exceed.extval, + extack); + if (act_id < 0) + return act_id; + + entry->police.exceed.act_id = act_id; + + act_id = tcf_police_act_to_flow_act(p->tcfp_result, + &entry->police.notexceed.extval, + extack); + if (act_id < 0) + return act_id; + + entry->police.notexceed.act_id = act_id; + *index_inc = 1; } else { struct flow_offload_action *fl_action = entry_data; @@ -441,8 +495,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_police_act, .dump = tcf_police_dump, .init = tcf_police_init, - .walk = tcf_police_walker, - .lookup = tcf_police_search, .cleanup = tcf_police_cleanup, .offload_act_setup = tcf_police_offload_act_setup, .size = sizeof(struct tcf_police), @@ -450,20 +502,20 @@ static struct tc_action_ops act_police_ops = { static __net_init int police_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); return tc_action_net_init(net, tn, &act_police_ops); } static void __net_exit police_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, police_net_id); + tc_action_net_exit(net_list, act_police_ops.net_id); } static struct pernet_operations police_net_ops = { .init = police_init_net, .exit_batch = police_exit_net, - .id = &police_net_id, + .id = &act_police_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 9a22cdda6bbd..7a25477f5d99 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -23,7 +23,6 @@ #include <linux/if_arp.h> -static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { @@ -38,7 +37,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; @@ -169,7 +168,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, psample_group = rcu_dereference_bh(s->psample_group); /* randomly sample packets according to rate */ - if (psample_group && (prandom_u32() % s->rate == 0)) { + if (psample_group && (prandom_u32_max(s->rate) == 0)) { if (!skb_at_tc_ingress(skb)) { md.in_ifindex = skb->skb_iif; md.out_ifindex = skb->dev->ifindex; @@ -241,23 +240,6 @@ nla_put_failure: return -1; } -static int tcf_sample_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_psample_group_put(void *priv) { struct psample_group *group = priv; @@ -291,7 +273,8 @@ static void tcf_offload_sample_get_group(struct flow_action_entry *entry, } static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -320,8 +303,6 @@ static struct tc_action_ops act_sample_ops = { .dump = tcf_sample_dump, .init = tcf_sample_init, .cleanup = tcf_sample_cleanup, - .walk = tcf_sample_walker, - .lookup = tcf_sample_search, .get_psample_group = tcf_sample_get_group, .offload_act_setup = tcf_sample_offload_act_setup, .size = sizeof(struct tcf_sample), @@ -329,20 +310,20 @@ static struct tc_action_ops act_sample_ops = { static __net_init int sample_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); return tc_action_net_init(net, tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, sample_net_id); + tc_action_net_exit(net_list, act_sample_ops.net_id); } static struct pernet_operations sample_net_ops = { .init = sample_init_net, .exit_batch = sample_exit_net, - .id = &sample_net_id, + .id = &act_sample_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8c1d60bde93e..18d376135461 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -18,7 +18,6 @@ #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> -static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 @@ -89,7 +88,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -198,23 +197,6 @@ nla_put_failure: return -1; } -static int tcf_simp_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_simp_ops = { .kind = "simple", .id = TCA_ID_SIMP, @@ -223,27 +205,25 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_release, .init = tcf_simp_init, - .walk = tcf_simp_walker, - .lookup = tcf_simp_search, .size = sizeof(struct tcf_defact), }; static __net_init int simp_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, simp_net_id); + tc_action_net_exit(net_list, act_simp_ops.net_id); } static struct pernet_operations simp_net_ops = { .init = simp_init_net, .exit_batch = simp_exit_net, - .id = &simp_net_id, + .id = &act_simp_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ceba11b198bb..7f598784fd30 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -20,9 +20,22 @@ #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> -static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; +static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, + struct sk_buff *skb) +{ + u16 queue_mapping = params->queue_mapping; + + if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { + u32 hash = skb_get_hash(skb); + + queue_mapping += hash % params->mapping_mod; + } + + return netdev_cap_txqueue(skb->dev, queue_mapping); +} + static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { @@ -58,8 +71,12 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, } } if (params->flags & SKBEDIT_F_QUEUE_MAPPING && - skb->dev->real_num_tx_queues > params->queue_mapping) - skb_set_queue_mapping(skb, params->queue_mapping); + skb->dev->real_num_tx_queues > params->queue_mapping) { +#ifdef CONFIG_NET_EGRESS + netdev_xmit_skip_txqueue(true); +#endif + skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb)); + } if (params->flags & SKBEDIT_F_MARK) { skb->mark &= ~params->mask; skb->mark |= params->mark & params->mask; @@ -92,6 +109,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MASK] = { .len = sizeof(u32) }, [TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) }, + [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -99,7 +117,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; @@ -108,6 +126,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; u16 *queue_mapping = NULL, *ptype = NULL; + u16 mapping_mod = 1; bool exists = false; int ret = 0, err; u32 index; @@ -153,6 +172,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (tb[TCA_SKBEDIT_FLAGS] != NULL) { u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]); + if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) { + u16 *queue_mapping_max; + + if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] || + !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) { + NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping."); + return -EINVAL; + } + + queue_mapping_max = + nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]); + if (*queue_mapping_max < *queue_mapping) { + NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min."); + return -EINVAL; + } + + mapping_mod = *queue_mapping_max - *queue_mapping + 1; + flags |= SKBEDIT_F_TXQ_SKBHASH; + } if (*pure_flags & SKBEDIT_F_INHERITDSFIELD) flags |= SKBEDIT_F_INHERITDSFIELD; } @@ -204,8 +242,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->flags = flags; if (flags & SKBEDIT_F_PRIORITY) params_new->priority = *priority; - if (flags & SKBEDIT_F_QUEUE_MAPPING) + if (flags & SKBEDIT_F_QUEUE_MAPPING) { params_new->queue_mapping = *queue_mapping; + params_new->mapping_mod = mapping_mod; + } if (flags & SKBEDIT_F_MARK) params_new->mark = *mark; if (flags & SKBEDIT_F_PTYPE) @@ -272,6 +312,13 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, goto nla_put_failure; if (params->flags & SKBEDIT_F_INHERITDSFIELD) pure_flags |= SKBEDIT_F_INHERITDSFIELD; + if (params->flags & SKBEDIT_F_TXQ_SKBHASH) { + if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX, + params->queue_mapping + params->mapping_mod - 1)) + goto nla_put_failure; + + pure_flags |= SKBEDIT_F_TXQ_SKBHASH; + } if (pure_flags != 0 && nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags)) goto nla_put_failure; @@ -299,28 +346,12 @@ static void tcf_skbedit_cleanup(struct tc_action *a) kfree_rcu(params, rcu); } -static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_skbedit)) + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */ + + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */ + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */ @@ -328,7 +359,8 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) } static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -342,7 +374,14 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data } else if (is_tcf_skbedit_priority(act)) { entry->id = FLOW_ACTION_PRIORITY; entry->priority = tcf_skbedit_priority(act); + } else if (is_tcf_skbedit_queue_mapping(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used"); + return -EOPNOTSUPP; + } else if (is_tcf_skbedit_inheritdsfield(act)) { + NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used"); + return -EOPNOTSUPP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload"); return -EOPNOTSUPP; } *index_inc = 1; @@ -371,29 +410,27 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .init = tcf_skbedit_init, .cleanup = tcf_skbedit_cleanup, - .walk = tcf_skbedit_walker, .get_fill_size = tcf_skbedit_get_fill_size, - .lookup = tcf_skbedit_search, .offload_act_setup = tcf_skbedit_offload_act_setup, .size = sizeof(struct tcf_skbedit), }; static __net_init int skbedit_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, skbedit_net_id); + tc_action_net_exit(net_list, act_skbedit_ops.net_id); } static struct pernet_operations skbedit_net_ops = { .init = skbedit_init_net, .exit_batch = skbedit_exit_net, - .id = &skbedit_net_id, + .id = &act_skbedit_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 2083612d8780..d98758a63934 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -19,7 +19,6 @@ #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> -static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, @@ -103,7 +102,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); bool ovr = flags & TCA_ACT_FLAGS_REPLACE; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; @@ -276,23 +275,6 @@ nla_put_failure: return -1; } -static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .id = TCA_ACT_SKBMOD, @@ -301,27 +283,25 @@ static struct tc_action_ops act_skbmod_ops = { .dump = tcf_skbmod_dump, .init = tcf_skbmod_init, .cleanup = tcf_skbmod_cleanup, - .walk = tcf_skbmod_walker, - .lookup = tcf_skbmod_search, .size = sizeof(struct tcf_skbmod), }; static __net_init int skbmod_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, skbmod_net_id); + tc_action_net_exit(net_list, act_skbmod_ops.net_id); } static struct pernet_operations skbmod_net_ops = { .init = skbmod_init_net, .exit_batch = skbmod_exit_net, - .id = &skbmod_net_id, + .id = &act_skbmod_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 23aba03d26a8..2691a3d8e451 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,7 +20,6 @@ #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> -static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, @@ -358,7 +357,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; @@ -770,23 +769,6 @@ nla_put_failure: return -1; } -static int tunnel_key_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_tunnel_encap_put_tunnel(void *priv) { struct ip_tunnel_info *tunnel = priv; @@ -808,7 +790,8 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, - bool bind) + bool bind, + struct netlink_ext_ack *extack) { int err; @@ -823,6 +806,7 @@ static int tcf_tunnel_key_offload_act_setup(struct tc_action *act, } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload"); return -EOPNOTSUPP; } *index_inc = 1; @@ -848,28 +832,26 @@ static struct tc_action_ops act_tunnel_key_ops = { .dump = tunnel_key_dump, .init = tunnel_key_init, .cleanup = tunnel_key_release, - .walk = tunnel_key_walker, - .lookup = tunnel_key_search, .offload_act_setup = tcf_tunnel_key_offload_act_setup, .size = sizeof(struct tcf_tunnel_key), }; static __net_init int tunnel_key_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, tunnel_key_net_id); + tc_action_net_exit(net_list, act_tunnel_key_ops.net_id); } static struct pernet_operations tunnel_key_net_ops = { .init = tunnel_key_init_net, .exit_batch = tunnel_key_exit_net, - .id = &tunnel_key_net_id, + .id = &act_tunnel_key_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 756e2dcde1cd..7b24e898a3e6 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -16,7 +16,6 @@ #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> -static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, @@ -117,7 +116,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -333,16 +332,6 @@ nla_put_failure: return -1; } -static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { @@ -353,13 +342,6 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, tm->lastuse = max_t(u64, tm->lastuse, lastuse); } -static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_vlan_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_vlan)) @@ -369,7 +351,8 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act) } static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, - u32 *index_inc, bool bind) + u32 *index_inc, bool bind, + struct netlink_ext_ack *extack) { if (bind) { struct flow_action_entry *entry = entry_data; @@ -390,7 +373,15 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, entry->vlan.proto = tcf_vlan_push_proto(act); entry->vlan.prio = tcf_vlan_push_prio(act); break; + case TCA_VLAN_ACT_POP_ETH: + entry->id = FLOW_ACTION_VLAN_POP_ETH; + break; + case TCA_VLAN_ACT_PUSH_ETH: + entry->id = FLOW_ACTION_VLAN_PUSH_ETH; + tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act); + break; default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload"); return -EOPNOTSUPP; } *index_inc = 1; @@ -407,6 +398,12 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data, case TCA_VLAN_ACT_MODIFY: fl_action->id = FLOW_ACTION_VLAN_MANGLE; break; + case TCA_VLAN_ACT_POP_ETH: + fl_action->id = FLOW_ACTION_VLAN_POP_ETH; + break; + case TCA_VLAN_ACT_PUSH_ETH: + fl_action->id = FLOW_ACTION_VLAN_PUSH_ETH; + break; default: return -EOPNOTSUPP; } @@ -423,30 +420,28 @@ static struct tc_action_ops act_vlan_ops = { .dump = tcf_vlan_dump, .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, - .walk = tcf_vlan_walker, .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, - .lookup = tcf_vlan_search, .offload_act_setup = tcf_vlan_offload_act_setup, .size = sizeof(struct tcf_vlan), }; static __net_init int vlan_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, vlan_net_id); + tc_action_net_exit(net_list, act_vlan_ops.net_id); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit_batch = vlan_exit_net, - .id = &vlan_net_id, + .id = &act_vlan_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d4e27c679123..50566db45949 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); +#ifdef CONFIG_NET_CLS_ACT +DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc); +EXPORT_SYMBOL(tc_skb_ext_tc); + +void tc_skb_ext_tc_enable(void) +{ + static_branch_inc(&tc_skb_ext_tc); +} +EXPORT_SYMBOL(tc_skb_ext_tc_enable); + +void tc_skb_ext_tc_disable(void) +{ + static_branch_dec(&tc_skb_ext_tc); +} +EXPORT_SYMBOL(tc_skb_ext_tc_disable); +#endif + static u32 destroy_obj_hashfn(const struct tcf_proto *tp) { return jhash_3words(tp->chain->index, tp->prio, @@ -177,7 +194,7 @@ EXPORT_SYMBOL(register_tcf_proto_ops); static struct workqueue_struct *tc_filter_wq; -int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) +void unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; int rc = -ENOENT; @@ -197,7 +214,8 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } } write_unlock(&cls_mod_lock); - return rc; + + WARN(rc, "unregister tc filter kind(%s) failed %d\n", ops->kind, rc); } EXPORT_SYMBOL(unregister_tcf_proto_ops); @@ -1044,7 +1062,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, /* Find qdisc */ if (!*parent) { - *q = dev->qdisc; + *q = rcu_dereference(dev->qdisc); *parent = (*q)->handle; } else { *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); @@ -1615,19 +1633,21 @@ int tcf_classify(struct sk_buff *skb, ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, &last_executed_chain); - /* If we missed on some chain */ - if (ret == TC_ACT_UNSPEC && last_executed_chain) { - struct tc_skb_cb *cb = tc_skb_cb(skb); + if (tc_skb_ext_tc_enabled()) { + /* If we missed on some chain */ + if (ret == TC_ACT_UNSPEC && last_executed_chain) { + struct tc_skb_cb *cb = tc_skb_cb(skb); - ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) - return TC_ACT_SHOT; - ext->chain = last_executed_chain; - ext->mru = cb->mru; - ext->post_ct = cb->post_ct; - ext->post_ct_snat = cb->post_ct_snat; - ext->post_ct_dnat = cb->post_ct_dnat; - ext->zone = cb->zone; + ext = tc_skb_ext_alloc(skb); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + ext->chain = last_executed_chain; + ext->mru = cb->mru; + ext->post_ct = cb->post_ct; + ext->post_ct_snat = cb->post_ct_snat; + ext->post_ct_dnat = cb->post_ct_dnat; + ext->zone = cb->zone; + } } return ret; @@ -1653,10 +1673,10 @@ static int tcf_chain_tp_insert(struct tcf_chain *chain, if (chain->flushing) return -EAGAIN; + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); tcf_proto_get(tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); return 0; @@ -1945,9 +1965,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1957,9 +1977,6 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool rtnl_held = false; u32 flags; - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - replay: tp_created = 0; @@ -1976,6 +1993,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; flags = 0; if (prio == 0) { @@ -2115,6 +2134,7 @@ replay: } if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); err = -EINVAL; goto errout; @@ -2186,9 +2206,6 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; bool rtnl_held = false; - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -2585,7 +2602,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) parent = tcm->tcm_parent; if (!parent) - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) @@ -2798,17 +2815,14 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; - if (n->nlmsg_type != RTM_GETCHAIN && - !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -2959,7 +2973,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; if (!tcm->tcm_parent) - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); @@ -3491,22 +3505,27 @@ EXPORT_SYMBOL(tc_cleanup_offload_action); static int tc_setup_offload_act(struct tc_action *act, struct flow_action_entry *entry, - u32 *index_inc) + u32 *index_inc, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT - if (act->ops->offload_act_setup) - return act->ops->offload_act_setup(act, entry, index_inc, true); - else + if (act->ops->offload_act_setup) { + return act->ops->offload_act_setup(act, entry, index_inc, true, + extack); + } else { + NL_SET_ERR_MSG(extack, "Action does not support offload"); return -EOPNOTSUPP; + } #else return 0; #endif } int tc_setup_action(struct flow_action *flow_action, - struct tc_action *actions[]) + struct tc_action *actions[], + struct netlink_ext_ack *extack) { - int i, j, index, err = 0; + int i, j, k, index, err = 0; struct tc_action *act; BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY); @@ -3526,14 +3545,18 @@ int tc_setup_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = tc_act_hw_stats(act->hw_stats); - entry->hw_index = act->tcfa_index; index = 0; - err = tc_setup_offload_act(act, entry, &index); - if (!err) - j += index; - else + err = tc_setup_offload_act(act, entry, &index, extack); + if (err) goto err_out_locked; + + for (k = 0; k < index ; k++) { + entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); + entry[k].hw_index = act->tcfa_index; + } + + j += index; + spin_unlock_bh(&act->tcfa_lock); } @@ -3548,13 +3571,14 @@ err_out_locked: } int tc_setup_offload_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, + struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT if (!exts) return 0; - return tc_setup_action(flow_action, exts->actions); + return tc_setup_action(flow_action, exts->actions, extack); #else return 0; #endif @@ -3606,9 +3630,6 @@ int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch, if (err) return err; - if (!block_index) - return 0; - qe->info.binder_type = binder_type; qe->info.chain_head_change = tcf_chain_head_change_dflt; qe->info.chain_head_change_priv = &qe->filter_chain; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 8158fc9ee1ab..d229ce99e554 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -251,15 +251,8 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { - if (arg->count < arg->skip) - goto skip; - - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) break; - } -skip: - arg->count++; } } @@ -268,12 +261,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct basic_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index df19a847829e..bc317b3eac12 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -102,6 +102,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); } + if (unlikely(!skb->tstamp && skb->mono_delivery_time)) + skb->mono_delivery_time = 0; if (prog->exts_integrated) { res->class = 0; @@ -633,12 +635,7 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl, { struct cls_bpf_prog *prog = fh; - if (prog && prog->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &prog->res, base); - else - __tcf_unbind_filter(q, &prog->res); - } + tc_cls_bind_class(classid, cl, q, &prog->res, base); } static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, @@ -648,14 +645,8 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct cls_bpf_prog *prog; list_for_each_entry(prog, &head->plist, link) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, prog, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, prog)) break; - } -skip: - arg->count++; } } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 972303aa8edd..014cd3de7b5d 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -683,14 +683,8 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct flow_filter *f; list_for_each_entry(f, &head->filters, list) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) break; - } -skip: - arg->count++; } } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1a9b1f140f9e..25bc57ee6ea1 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -16,6 +16,7 @@ #include <linux/in6.h> #include <linux/ip.h> #include <linux/mpls.h> +#include <linux/ppp_defs.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> @@ -25,6 +26,7 @@ #include <net/geneve.h> #include <net/vxlan.h> #include <net/erspan.h> +#include <net/gtp.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -62,15 +64,12 @@ struct fl_flow_key { struct flow_dissector_key_ip ip; struct flow_dissector_key_ip enc_ip; struct flow_dissector_key_enc_opts enc_opts; - union { - struct flow_dissector_key_ports tp; - struct { - struct flow_dissector_key_ports tp_min; - struct flow_dissector_key_ports tp_max; - }; - } tp_range; + struct flow_dissector_key_ports_range tp_range; struct flow_dissector_key_ct ct; struct flow_dissector_key_hash hash; + struct flow_dissector_key_num_of_vlans num_of_vlans; + struct flow_dissector_key_pppoe pppoe; + struct flow_dissector_key_l2tpv3 l2tpv3; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -463,14 +462,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts, + cls_flower.common.extack); if (err) { kfree(cls_flower.rule); - if (skip_sw) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + + return skip_sw ? err : 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, @@ -713,6 +710,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 }, [TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 }, }; @@ -723,6 +724,7 @@ enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_OPTS_GTP] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -747,6 +749,12 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = { }; static const struct nla_policy +gtp_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1] = { + [TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ENC_OPT_GTP_QFI] = { .type = NLA_U8 }, +}; + +static const struct nla_policy mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 }, @@ -1005,6 +1013,7 @@ static int fl_set_key_mpls(struct nlattr **tb, static void fl_set_key_vlan(struct nlattr **tb, __be16 ethertype, int vlan_id_key, int vlan_prio_key, + int vlan_next_eth_type_key, struct flow_dissector_key_vlan *key_val, struct flow_dissector_key_vlan *key_mask) { @@ -1021,8 +1030,59 @@ static void fl_set_key_vlan(struct nlattr **tb, VLAN_PRIORITY_MASK; key_mask->vlan_priority = VLAN_PRIORITY_MASK; } - key_val->vlan_tpid = ethertype; - key_mask->vlan_tpid = cpu_to_be16(~0); + if (ethertype) { + key_val->vlan_tpid = ethertype; + key_mask->vlan_tpid = cpu_to_be16(~0); + } + if (tb[vlan_next_eth_type_key]) { + key_val->vlan_eth_type = + nla_get_be16(tb[vlan_next_eth_type_key]); + key_mask->vlan_eth_type = cpu_to_be16(~0); + } +} + +static void fl_set_key_pppoe(struct nlattr **tb, + struct flow_dissector_key_pppoe *key_val, + struct flow_dissector_key_pppoe *key_mask, + struct fl_flow_key *key, + struct fl_flow_key *mask) +{ + /* key_val::type must be set to ETH_P_PPP_SES + * because ETH_P_PPP_SES was stored in basic.n_proto + * which might get overwritten by ppp_proto + * or might be set to 0, the role of key_val::type + * is simmilar to vlan_key::tpid + */ + key_val->type = htons(ETH_P_PPP_SES); + key_mask->type = cpu_to_be16(~0); + + if (tb[TCA_FLOWER_KEY_PPPOE_SID]) { + key_val->session_id = + nla_get_be16(tb[TCA_FLOWER_KEY_PPPOE_SID]); + key_mask->session_id = cpu_to_be16(~0); + } + if (tb[TCA_FLOWER_KEY_PPP_PROTO]) { + key_val->ppp_proto = + nla_get_be16(tb[TCA_FLOWER_KEY_PPP_PROTO]); + key_mask->ppp_proto = cpu_to_be16(~0); + + if (key_val->ppp_proto == htons(PPP_IP)) { + key->basic.n_proto = htons(ETH_P_IP); + mask->basic.n_proto = cpu_to_be16(~0); + } else if (key_val->ppp_proto == htons(PPP_IPV6)) { + key->basic.n_proto = htons(ETH_P_IPV6); + mask->basic.n_proto = cpu_to_be16(~0); + } else if (key_val->ppp_proto == htons(PPP_MPLS_UC)) { + key->basic.n_proto = htons(ETH_P_MPLS_UC); + mask->basic.n_proto = cpu_to_be16(~0); + } else if (key_val->ppp_proto == htons(PPP_MPLS_MC)) { + key->basic.n_proto = htons(ETH_P_MPLS_MC); + mask->basic.n_proto = cpu_to_be16(~0); + } + } else { + key->basic.n_proto = 0; + mask->basic.n_proto = cpu_to_be16(0); + } } static void fl_set_key_flag(u32 flower_key, u32 flower_mask, @@ -1262,6 +1322,49 @@ static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key, return sizeof(*md); } +static int fl_set_gtp_opt(const struct nlattr *nla, struct fl_flow_key *key, + int depth, int option_len, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GTP_MAX + 1]; + struct gtp_pdu_session_info *sinfo; + u8 len = key->enc_opts.len; + int err; + + sinfo = (struct gtp_pdu_session_info *)&key->enc_opts.data[len]; + memset(sinfo, 0xff, option_len); + + if (!depth) + return sizeof(*sinfo); + + if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GTP) { + NL_SET_ERR_MSG_MOD(extack, "Non-gtp option type for mask"); + return -EINVAL; + } + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GTP_MAX, nla, + gtp_opt_policy, extack); + if (err < 0) + return err; + + if (!option_len && + (!tb[TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE] || + !tb[TCA_FLOWER_KEY_ENC_OPT_GTP_QFI])) { + NL_SET_ERR_MSG_MOD(extack, + "Missing tunnel key gtp option pdu type or qfi"); + return -EINVAL; + } + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE]) + sinfo->pdu_type = + nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE]); + + if (tb[TCA_FLOWER_KEY_ENC_OPT_GTP_QFI]) + sinfo->qfi = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_GTP_QFI]); + + return sizeof(*sinfo); +} + static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1386,6 +1489,38 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return -EINVAL; } break; + case TCA_FLOWER_KEY_ENC_OPTS_GTP: + if (key->enc_opts.dst_opt_type) { + NL_SET_ERR_MSG_MOD(extack, + "Duplicate type for gtp options"); + return -EINVAL; + } + option_len = 0; + key->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + option_len = fl_set_gtp_opt(nla_opt_key, key, + key_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + key->enc_opts.len += option_len; + /* At the same time we need to parse through the mask + * in order to verify exact and mask attribute lengths. + */ + mask->enc_opts.dst_opt_type = TUNNEL_GTP_OPT; + option_len = fl_set_gtp_opt(nla_opt_msk, mask, + msk_depth, option_len, + extack); + if (option_len < 0) + return option_len; + + mask->enc_opts.len += option_len; + if (key->enc_opts.len != mask->enc_opts.len) { + NL_SET_ERR_MSG_MOD(extack, + "Key and mask miss aligned"); + return -EINVAL; + } + break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; @@ -1492,6 +1627,26 @@ static int fl_set_key_ct(struct nlattr **tb, return 0; } +static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype, + struct fl_flow_key *key, struct fl_flow_key *mask, + int vthresh) +{ + const bool good_num_of_vlans = key->num_of_vlans.num_of_vlans > vthresh; + + if (!tb) { + *ethertype = 0; + return good_num_of_vlans; + } + + *ethertype = nla_get_be16(tb); + if (good_num_of_vlans || eth_type_vlan(*ethertype)) + return true; + + key->basic.n_proto = *ethertype; + mask->basic.n_proto = cpu_to_be16(~0); + return false; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1513,38 +1668,36 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); - - if (tb[TCA_FLOWER_KEY_ETH_TYPE]) { - ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]); - - if (eth_type_vlan(ethertype)) { - fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, - TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, - &mask->vlan); - - if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { - ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]); - if (eth_type_vlan(ethertype)) { - fl_set_key_vlan(tb, ethertype, - TCA_FLOWER_KEY_CVLAN_ID, - TCA_FLOWER_KEY_CVLAN_PRIO, - &key->cvlan, &mask->cvlan); - fl_set_key_val(tb, &key->basic.n_proto, - TCA_FLOWER_KEY_CVLAN_ETH_TYPE, - &mask->basic.n_proto, - TCA_FLOWER_UNSPEC, - sizeof(key->basic.n_proto)); - } else { - key->basic.n_proto = ethertype; - mask->basic.n_proto = cpu_to_be16(~0); - } - } - } else { - key->basic.n_proto = ethertype; - mask->basic.n_proto = cpu_to_be16(~0); + fl_set_key_val(tb, &key->num_of_vlans, + TCA_FLOWER_KEY_NUM_OF_VLANS, + &mask->num_of_vlans, + TCA_FLOWER_UNSPEC, + sizeof(key->num_of_vlans)); + + if (is_vlan_key(tb[TCA_FLOWER_KEY_ETH_TYPE], ðertype, key, mask, 0)) { + fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID, + TCA_FLOWER_KEY_VLAN_PRIO, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &key->vlan, &mask->vlan); + + if (is_vlan_key(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE], + ðertype, key, mask, 1)) { + fl_set_key_vlan(tb, ethertype, + TCA_FLOWER_KEY_CVLAN_ID, + TCA_FLOWER_KEY_CVLAN_PRIO, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, + &key->cvlan, &mask->cvlan); + fl_set_key_val(tb, &key->basic.n_proto, + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, + &mask->basic.n_proto, + TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); } } + if (key->basic.n_proto == htons(ETH_P_PPP_SES)) + fl_set_key_pppoe(tb, &key->pppoe, &mask->pppoe, key, mask); + if (key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) { fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, @@ -1639,6 +1792,11 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA, mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK, sizeof(key->arp.tha)); + } else if (key->basic.ip_proto == IPPROTO_L2TP) { + fl_set_key_val(tb, &key->l2tpv3.session_id, + TCA_FLOWER_KEY_L2TPV3_SID, + &mask->l2tpv3.session_id, TCA_FLOWER_UNSPEC, + sizeof(key->l2tpv3.session_id)); } if (key->basic.ip_proto == IPPROTO_TCP || @@ -1815,6 +1973,12 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_CT, ct); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_HASH, hash); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_PPPOE, pppoe); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3); skb_flow_dissector_init(dissector, keys, cnt); } @@ -2271,11 +2435,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts, + cls_flower.common.extack); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); __fl_put(f); return err; } @@ -2761,6 +2925,34 @@ nla_put_failure: return -EMSGSIZE; } +static int fl_dump_key_gtp_opt(struct sk_buff *skb, + struct flow_dissector_key_enc_opts *enc_opts) + +{ + struct gtp_pdu_session_info *session_info; + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_GTP); + if (!nest) + goto nla_put_failure; + + session_info = (struct gtp_pdu_session_info *)&enc_opts->data[0]; + + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GTP_PDU_TYPE, + session_info->pdu_type)) + goto nla_put_failure; + + if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GTP_QFI, session_info->qfi)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int fl_dump_key_ct(struct sk_buff *skb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask) @@ -2824,6 +3016,11 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, if (err) goto nla_put_failure; break; + case TUNNEL_GTP_OPT: + err = fl_dump_key_gtp_opt(skb, enc_opts); + if (err) + goto nla_put_failure; + break; default: goto nla_put_failure; } @@ -2870,6 +3067,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->basic.n_proto))) goto nla_put_failure; + if (mask->num_of_vlans.num_of_vlans) { + if (nla_put_u8(skb, TCA_FLOWER_KEY_NUM_OF_VLANS, key->num_of_vlans.num_of_vlans)) + goto nla_put_failure; + } + if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls)) goto nla_put_failure; @@ -2886,13 +3088,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (mask->basic.n_proto) { - if (mask->cvlan.vlan_tpid) { + if (mask->cvlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE, key->basic.n_proto)) goto nla_put_failure; - } else if (mask->vlan.vlan_tpid) { + } else if (mask->vlan.vlan_eth_type) { if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, - key->basic.n_proto)) + key->vlan.vlan_eth_type)) goto nla_put_failure; } } @@ -2905,6 +3107,17 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, fl_dump_key_ip(skb, false, &key->ip, &mask->ip))) goto nla_put_failure; + if (mask->pppoe.session_id) { + if (nla_put_be16(skb, TCA_FLOWER_KEY_PPPOE_SID, + key->pppoe.session_id)) + goto nla_put_failure; + } + if (mask->basic.n_proto && mask->pppoe.ppp_proto) { + if (nla_put_be16(skb, TCA_FLOWER_KEY_PPP_PROTO, + key->pppoe.ppp_proto)) + goto nla_put_failure; + } + if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC, &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK, @@ -2992,6 +3205,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK, sizeof(key->arp.tha)))) goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_L2TP && + fl_dump_key_val(skb, &key->l2tpv3.session_id, + TCA_FLOWER_KEY_L2TPV3_SID, + &mask->l2tpv3.session_id, + TCA_FLOWER_UNSPEC, + sizeof(key->l2tpv3.session_id))) + goto nla_put_failure; if ((key->basic.ip_proto == IPPROTO_TCP || key->basic.ip_proto == IPPROTO_UDP || @@ -3185,12 +3405,7 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct cls_fl_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static bool fl_delete_empty(struct tcf_proto *tp) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8654b0ce997c..a32351da968c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -358,15 +358,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, for (f = rtnl_dereference(head->ht[h]); f; f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) return; - } - arg->count++; } } } @@ -423,12 +416,7 @@ static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct fw_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops cls_fw_ops __read_mostly = { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index ca5670fd5228..39a5d9c170de 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,16 +97,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts, + cls_mall.common.extack); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); - if (skip_sw) - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - return err; + return skip_sw ? err : 0; } err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, @@ -302,14 +299,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts, + cls_mall.common.extack); if (err) { kfree(cls_mall.rule); - if (add && tc_skip_sw(head->flags)) { - NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + + return add && tc_skip_sw(head->flags) ? err : 0; } err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, @@ -318,10 +313,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, tc_cleanup_offload_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err) - return err; - - return 0; + return err; } static void mall_stats_hw_filter(struct tcf_proto *tp, @@ -402,12 +394,7 @@ static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct cls_mall_head *head = fh; - if (head && head->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &head->res, base); - else - __tcf_unbind_filter(q, &head->res); - } + tc_cls_bind_class(classid, cl, q, &head->res, base); } static struct tcf_proto_ops cls_mall_ops __read_mostly = { diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a35ab8c27866..9e43b929d4ca 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -424,6 +424,11 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return -EINVAL; } + if (!nhandle) { + NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid"); + return -EINVAL; + } + h1 = to_hash(nhandle); b = rtnl_dereference(head->table[h1]); if (!b) { @@ -477,8 +482,13 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, int err; bool new = true; + if (!handle) { + NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid"); + return -EINVAL; + } + if (opt == NULL) - return handle ? -EINVAL : 0; + return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, route4_policy, NULL); @@ -486,7 +496,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return err; fold = *arg; - if (fold && handle && fold->handle != handle) + if (fold && fold->handle != handle) return -EINVAL; err = -ENOBUFS; @@ -526,7 +536,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, rcu_assign_pointer(f->next, f1); rcu_assign_pointer(*fp, f); - if (fold && fold->handle && f->handle != fold->handle) { + if (fold) { th = to_hash(fold->handle); h = from_hash(fold->handle >> 16); b = rtnl_dereference(head->table[th]); @@ -577,15 +587,8 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, for (f = rtnl_dereference(b->ht[h1]); f; f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) return; - } - arg->count++; } } } @@ -646,12 +649,7 @@ static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct route4_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops cls_route4_ops __read_mostly = { diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 5cd9d6b143c4..b00a7dbd0587 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -671,15 +671,8 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, for (f = rtnl_dereference(s->ht[h1]); f; f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) return; - } - arg->count++; } } } @@ -740,12 +733,7 @@ static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct rsvp_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops RSVP_OPS __read_mostly = { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 742c7d49a958..1c9eeb98d826 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -566,13 +566,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, for (i = 0; i < p->hash; i++) { if (!p->perfect[i].res.class) continue; - if (walker->count >= walker->skip) { - if (walker->fn(tp, p->perfect + i, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; + if (!tc_cls_stats_dump(tp, walker, p->perfect + i)) + return; } } if (!p->h) @@ -580,13 +575,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, for (i = 0; i < p->hash; i++) { for (f = rtnl_dereference(p->h[i]); f; f = next) { next = rtnl_dereference(f->next); - if (walker->count >= walker->skip) { - if (walker->fn(tp, &f->result, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; + if (!tc_cls_stats_dump(tp, walker, &f->result)) + return; } } } @@ -701,12 +691,7 @@ static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl, { struct tcindex_filter_result *r = fh; - if (r && r->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &r->res, base); - else - __tcf_unbind_filter(q, &r->res); - } + tc_cls_bind_class(classid, cl, q, &r->res, base); } static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index cf5649292ee0..34d25f7a0687 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -386,14 +386,19 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) +static void __u32_destroy_key(struct tc_u_knode *n) { struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - tcf_exts_put_net(&n->exts); if (ht && --ht->refcnt == 0) kfree(ht); + kfree(n); +} + +static void u32_destroy_key(struct tc_u_knode *n, bool free_pf) +{ + tcf_exts_put_net(&n->exts); #ifdef CONFIG_CLS_U32_PERF if (free_pf) free_percpu(n->pf); @@ -402,8 +407,7 @@ static int u32_destroy_key(struct tc_u_knode *n, bool free_pf) if (free_pf) free_percpu(n->pcpu_success); #endif - kfree(n); - return 0; + __u32_destroy_key(n); } /* u32_delete_key_rcu should be called when free'ing a copied @@ -811,10 +815,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); - /* bump reference count as long as we hold pointer to structure */ - if (ht) - ht->refcnt++; - #ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update * so we must keep them in tact. When the node is later destroyed @@ -836,6 +836,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, return NULL; } + /* bump reference count as long as we hold pointer to structure */ + if (ht) + ht->refcnt++; + return new; } @@ -900,13 +904,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, extack); if (err) { - u32_destroy_key(new, false); + __u32_destroy_key(new); return err; } err = u32_replace_hw_knode(tp, new, flags, extack); if (err) { - u32_destroy_key(new, false); + __u32_destroy_key(new); return err; } @@ -1036,7 +1040,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - memcpy(&n->sel, s, sel_size); + unsafe_memcpy(&n->sel, s, sel_size, + /* A composite flex-array structure destination, + * which was correctly sized with struct_size(), + * bounds-checked against nla_len(), and allocated + * above. */); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; @@ -1121,26 +1129,16 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue; - if (arg->count >= arg->skip) { - if (arg->fn(tp, ht, arg) < 0) { - arg->stop = 1; - return; - } - } - arg->count++; + + if (!tc_cls_stats_dump(tp, arg, ht)) + return; + for (h = 0; h <= ht->divisor; h++) { for (n = rtnl_dereference(ht->ht[h]); n; n = rtnl_dereference(n->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, n, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, n)) return; - } - arg->count++; } } } @@ -1252,12 +1250,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct tc_u_knode *n = fh; - if (n && n->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &n->res, base); - else - __tcf_unbind_filter(q, &n->res); - } + tc_cls_bind_class(classid, cl, q, &n->res, base); } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 0a04468b7314..49bae3d5006b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -311,12 +311,15 @@ META_COLLECTOR(int_sk_bound_if) META_COLLECTOR(var_sk_bound_if) { + int bound_dev_if; + if (skip_nonlocal(skb)) { *err = -1; return; } - if (skb->sk->sk_bound_dev_if == 0) { + bound_dev_if = READ_ONCE(skb->sk->sk_bound_dev_if); + if (bound_dev_if == 0) { dst->value = (unsigned long) "any"; dst->len = 3; } else { @@ -324,7 +327,7 @@ META_COLLECTOR(var_sk_bound_if) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(skb->sk), - skb->sk->sk_bound_dev_if); + bound_dev_if); *err = var_dev(dev, dst); rcu_read_unlock(); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c9c6f49f9c28..4a27dfb1ba0f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -171,7 +171,7 @@ out_einval: } EXPORT_SYMBOL(register_qdisc); -int unregister_qdisc(struct Qdisc_ops *qops) +void unregister_qdisc(struct Qdisc_ops *qops) { struct Qdisc_ops *q, **qp; int err = -ENOENT; @@ -186,7 +186,8 @@ int unregister_qdisc(struct Qdisc_ops *qops) err = 0; } write_unlock(&qdisc_mod_lock); - return err; + + WARN(err, "unregister qdisc(%s) failed\n", qops->id); } EXPORT_SYMBOL(unregister_qdisc); @@ -194,7 +195,7 @@ EXPORT_SYMBOL(unregister_qdisc); void qdisc_get_default(char *name, size_t len) { read_lock(&qdisc_mod_lock); - strlcpy(name, default_qdisc_ops->id, len); + strscpy(name, default_qdisc_ops->id, len); read_unlock(&qdisc_mod_lock); } @@ -301,7 +302,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) if (!handle) return NULL; - q = qdisc_match_from_root(dev->qdisc, handle); + q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle); if (q) goto out; @@ -320,7 +321,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) if (!handle) return NULL; - q = qdisc_match_from_root(dev->qdisc, handle); + q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle); if (q) goto out; @@ -867,6 +868,23 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, } EXPORT_SYMBOL(qdisc_offload_graft_helper); +void qdisc_offload_query_caps(struct net_device *dev, + enum tc_setup_type type, + void *caps, size_t caps_len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_query_caps_base base = { + .type = type, + .caps = caps, + }; + + memset(caps, 0, caps_len); + + if (ops->ndo_setup_tc) + ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base); +} +EXPORT_SYMBOL(qdisc_offload_query_caps); + static void qdisc_offload_graft_root(struct net_device *dev, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) @@ -1062,7 +1080,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_offload_graft_root(dev, new, old, extack); - if (new && new->ops->attach) + if (new && new->ops->attach && !ingress) goto skip; for (i = 0; i < num_q; i++) { @@ -1081,11 +1099,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - dev->qdisc, new); + old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); - dev->qdisc = new ? : &noop_qdisc; + rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); + + notify_and_destroy(net, skb, n, classid, old, new); if (new && new->ops->attach) new->ops->attach(new); @@ -1163,7 +1182,7 @@ static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, static struct Qdisc *qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *p, u32 parent, u32 handle, + u32 parent, u32 handle, struct nlattr **tca, int *errp, struct netlink_ext_ack *extack) { @@ -1204,7 +1223,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } @@ -1292,7 +1311,7 @@ err_out5: if (ops->destroy) ops->destroy(sch); err_out3: - dev_put_track(dev, &sch->dev_tracker); + netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: module_put(ops->owner); @@ -1424,10 +1443,6 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && - !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -1451,7 +1466,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); } if (!q) { NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); @@ -1508,9 +1523,6 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q, *p; int err; - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - replay: /* Reinit, just in case something touches this. */ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, @@ -1540,7 +1552,7 @@ replay: q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); } /* It may be default qdisc, ignore it */ @@ -1640,7 +1652,7 @@ create_n_graft: } if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) { - q = qdisc_create(dev, dev_ingress_queue(dev), p, + q = qdisc_create(dev, dev_ingress_queue(dev), tcm->tcm_parent, tcm->tcm_parent, tca, &err, extack); } else { @@ -1657,7 +1669,7 @@ create_n_graft: else dev_queue = netdev_get_tx_queue(dev, 0); - q = qdisc_create(dev, dev_queue, p, + q = qdisc_create(dev, dev_queue, tcm->tcm_parent, tcm->tcm_handle, tca, &err, extack); } @@ -1762,7 +1774,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), + skb, cb, &q_idx, s_q_idx, true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; @@ -1903,7 +1916,7 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_bind_args *a = (void *)arg; - if (tp->ops->bind_class) { + if (n && tp->ops->bind_class) { struct Qdisc *q = tcf_block_q(tp->chain->block); sch_tree_lock(q); @@ -1991,10 +2004,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && - !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -2033,7 +2042,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc->handle; + qid = rtnl_dereference(dev->qdisc)->handle; /* Now qid is genuine qdisc handle consistent * both with parent and child. @@ -2044,7 +2053,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) - qid = dev->qdisc->handle; + qid = rtnl_dereference(dev->qdisc)->handle; } /* OK. Locate qdisc */ @@ -2205,7 +2214,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0) + if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc), + skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 4c8e994cf0a5..f52255fea652 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -354,12 +354,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) if (walker->stop) return; list_for_each_entry(flow, &p->flows, list) { - if (walker->count >= walker->skip && - walker->fn(sch, (unsigned long)flow, walker) < 0) { - walker->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) break; - } - walker->count++; } } @@ -577,7 +573,6 @@ static void atm_tc_reset(struct Qdisc *sch) pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); list_for_each_entry(flow, &p->flows, list) qdisc_reset(flow->q); - sch->q.qlen = 0; } static void atm_tc_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index a43a58a73d09..3ed0c3342189 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -573,7 +573,7 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, /* Simple BLUE implementation. Lack of ECN is deliberate. */ if (vars->p_drop) - drop |= (prandom_u32() < vars->p_drop); + drop |= (get_random_u32() < vars->p_drop); /* Overload the drop_next field as an activity timeout */ if (!vars->count) @@ -2092,11 +2092,11 @@ retry: WARN_ON(host_load > CAKE_QUEUES); - /* The shifted prandom_u32() is a way to apply dithering to - * avoid accumulating roundoff errors + /* The get_random_u16() is a way to apply dithering to avoid + * accumulating roundoff errors */ flow->deficit += (b->flow_quantum * quantum_div[host_load] + - (prandom_u32() >> 16)) >> 16; + get_random_u16()) >> 16; list_move_tail(&flow->flowchain, &b->old_flows); goto retry; @@ -2224,8 +2224,12 @@ retry: static void cake_reset(struct Qdisc *sch) { + struct cake_sched_data *q = qdisc_priv(sch); u32 c; + if (!q->tins) + return; + for (c = 0; c < CAKE_MAX_TINS; c++) cake_clear_tin(sch, c); } @@ -2569,9 +2573,6 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_CAKE_MAX + 1]; int err; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, extack); if (err < 0) @@ -3064,16 +3065,13 @@ static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg) struct cake_tin_data *b = &q->tins[q->tin_order[i]]; for (j = 0; j < CAKE_QUEUES; j++) { - if (list_empty(&b->flows[j].flowchain) || - arg->count < arg->skip) { + if (list_empty(&b->flows[j].flowchain)) { arg->count++; continue; } - if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i * CAKE_QUEUES + j + 1, + arg)) break; - } - arg->count++; } } } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 02d9f0dfe356..6568e17c4c63 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -149,7 +149,6 @@ struct cbq_sched_data { psched_time_t now; /* Cached timestamp */ unsigned int pmask; - struct hrtimer delay_timer; struct qdisc_watchdog watchdog; /* Watchdog timer, started when CBQ has backlog, but cannot @@ -441,81 +440,6 @@ static void cbq_overlimit(struct cbq_class *cl) } } -static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio, - psched_time_t now) -{ - struct cbq_class *cl; - struct cbq_class *cl_prev = q->active[prio]; - psched_time_t sched = now; - - if (cl_prev == NULL) - return 0; - - do { - cl = cl_prev->next_alive; - if (now - cl->penalized > 0) { - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - cl->cpriority = cl->priority; - cl->delayed = 0; - cbq_activate_class(cl); - - if (cl == q->active[prio]) { - q->active[prio] = cl_prev; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - return 0; - } - } - - cl = cl_prev->next_alive; - } else if (sched - cl->penalized > 0) - sched = cl->penalized; - } while ((cl_prev = cl) != q->active[prio]); - - return sched - now; -} - -static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) -{ - struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data, - delay_timer); - struct Qdisc *sch = q->watchdog.qdisc; - psched_time_t now; - psched_tdiff_t delay = 0; - unsigned int pmask; - - now = psched_get_time(); - - pmask = q->pmask; - q->pmask = 0; - - while (pmask) { - int prio = ffz(~pmask); - psched_tdiff_t tmp; - - pmask &= ~(1<<prio); - - tmp = cbq_undelay_prio(q, prio, now); - if (tmp > 0) { - q->pmask |= 1<<prio; - if (tmp < delay || delay == 0) - delay = tmp; - } - } - - if (delay) { - ktime_t time; - - time = 0; - time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay)); - hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED); - } - - __netif_schedule(qdisc_root(sch)); - return HRTIMER_NORESTART; -} - /* * It is mission critical procedure. * @@ -1034,7 +958,6 @@ cbq_reset(struct Qdisc *sch) q->tx_class = NULL; q->tx_borrowed = NULL; qdisc_watchdog_cancel(&q->watchdog); - hrtimer_cancel(&q->delay_timer); q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); @@ -1052,11 +975,10 @@ cbq_reset(struct Qdisc *sch) cl->cpriority = cl->priority; } } - sch->q.qlen = 0; } -static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) +static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) { if (lss->change & TCF_CBQ_LSS_FLAGS) { cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; @@ -1074,7 +996,6 @@ static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) } if (lss->change & TCF_CBQ_LSS_OFFTIME) cl->offtime = lss->offtime; - return 0; } static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) @@ -1162,8 +1083,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt, int err; qdisc_watchdog_init(&q->watchdog, sch); - hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); - q->delay_timer.function = cbq_undelay; err = cbq_opt_parse(tb, opt, extack); if (err < 0) @@ -1757,15 +1676,8 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 459cc240eda9..cac870eb7897 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -520,13 +520,7 @@ static unsigned long cbs_find(struct Qdisc *sch, u32 classid) static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) { - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 2adbd945bf15..3ac3e5c80b6f 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -60,7 +60,6 @@ struct choke_sched_data { u32 forced_drop; /* Forced drops, qavg > max_thresh */ u32 forced_mark; /* Forced marks, qavg > max_thresh */ u32 pdrop; /* Drops due to queue limits */ - u32 other; /* Drops due to drop() calls */ u32 matched; /* Drops to flow match */ } stats; @@ -315,8 +314,6 @@ static void choke_reset(struct Qdisc *sch) rtnl_qdisc_drop(skb, sch); } - sch->q.qlen = 0; - sch->qstats.backlog = 0; if (q->tab) memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; @@ -466,7 +463,6 @@ static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .early = q->stats.prob_drop + q->stats.forced_drop, .marked = q->stats.prob_mark + q->stats.forced_mark, .pdrop = q->stats.pdrop, - .other = q->stats.other, .matched = q->stats.matched, }; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 30169b3adbbb..d7a4874543de 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -138,9 +138,6 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, unsigned int qlen, dropped = 0; int err; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_CODEL_MAX, opt, codel_policy, NULL); if (err < 0) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 18e4f7a0b291..e35a4e90f4e6 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -284,15 +284,8 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } @@ -441,8 +434,6 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void drr_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 4c100d105269..401ffaf87d62 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -176,16 +176,12 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) return; for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) - goto ignore; - if (walker->count >= walker->skip) { - if (walker->fn(sch, i + 1, walker) < 0) { - walker->stop = 1; - break; - } + if (p->mv[i].mask == 0xff && !p->mv[i].value) { + walker->count++; + continue; } -ignore: - walker->count++; + if (!tc_qdisc_stats_dump(sch, i + 1, walker)) + break; } } @@ -409,8 +405,6 @@ static void dsmark_reset(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); if (p->q) qdisc_reset(p->q); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void dsmark_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index c48f91075b5c..61d1f0e32cf3 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -323,9 +323,6 @@ static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q, struct tc_etf_qopt_offload etf = { }; int err; - if (q->offload) - return 0; - if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload"); return -EOPNOTSUPP; @@ -445,9 +442,6 @@ static void etf_reset(struct Qdisc *sch) timesortedlist_clear(sch); __qdisc_reset_queue(&sch->q); - sch->qstats.backlog = 0; - sch->q.qlen = 0; - q->last = 0; } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index d73393493553..b10efeaf0629 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -341,15 +341,8 @@ static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->nbands; i++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } @@ -594,11 +587,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, unsigned int i; int err; - if (!opt) { - NL_SET_ERR_MSG(extack, "ETS options are required for this operation"); - return -EINVAL; - } - err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack); if (err < 0) return err; @@ -727,8 +715,6 @@ static void ets_qdisc_reset(struct Qdisc *sch) } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void ets_qdisc_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 2fb76fc0cc31..48d14fb90ba0 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -808,9 +808,6 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, unsigned drop_len = 0; u32 fq_log; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy, NULL); if (err < 0) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 839e1235db05..8c4fee063436 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -347,8 +347,6 @@ static void fq_codel_reset(struct Qdisc *sch) codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); - sch->q.qlen = 0; - sch->qstats.backlog = 0; q->memory_usage = 0; } @@ -374,9 +372,6 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, u32 quantum = 0; int err; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, NULL); if (err < 0) @@ -687,16 +682,12 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->flows_cnt; i++) { - if (list_empty(&q->flows[i].flowchain) || - arg->count < arg->skip) { + if (list_empty(&q->flows[i].flowchain)) { arg->count++; continue; } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index d6aba6edd16e..6980796d435d 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -283,9 +283,6 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, unsigned int num_dropped = 0; int err; - if (!opt) - return -EINVAL; - err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); if (err < 0) return err; @@ -521,9 +518,6 @@ static void fq_pie_reset(struct Qdisc *sch) INIT_LIST_HEAD(&flow->flowchain); pie_vars_init(&flow->vars); } - - sch->q.qlen = 0; - sch->qstats.backlog = 0; } static void fq_pie_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b07bd1c7330f..a9aadc4e6858 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) void __qdisc_run(struct Qdisc *q) { - int quota = dev_tx_weight; + int quota = READ_ONCE(dev_tx_weight); int packets; while (qdisc_restart(q, &packets)) { @@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q) unsigned long dev_trans_start(struct net_device *dev) { - unsigned long val, res; + unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start); + unsigned long val; unsigned int i; - if (is_vlan_dev(dev)) - dev = vlan_dev_real_dev(dev); - else if (netif_is_macvlan(dev)) - dev = macvlan_dev_real_dev(dev); - res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start); for (i = 1; i < dev->num_tx_queues; i++) { val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start); if (val && time_after(val, res)) @@ -541,7 +537,7 @@ static void dev_watchdog(struct timer_list *t) spin_unlock(&dev->tx_global_lock); if (release) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); } void __netdev_watchdog_up(struct net_device *dev) @@ -551,7 +547,8 @@ void __netdev_watchdog_up(struct net_device *dev) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) - dev_hold_track(dev, &dev->watchdog_dev_tracker, GFP_ATOMIC); + netdev_hold(dev, &dev->watchdog_dev_tracker, + GFP_ATOMIC); } } EXPORT_SYMBOL_GPL(__netdev_watchdog_up); @@ -565,7 +562,7 @@ static void dev_watchdog_down(struct net_device *dev) { netif_tx_lock_bh(dev); if (del_timer(&dev->watchdog_timer)) - dev_put_track(dev, &dev->watchdog_dev_tracker); + netdev_put(dev, &dev->watchdog_dev_tracker); netif_tx_unlock_bh(dev); } @@ -944,7 +941,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, goto errout; __skb_queue_head_init(&sch->gso_skb); __skb_queue_head_init(&sch->skb_bad_txq); - qdisc_skb_head_init(&sch->q); gnet_stats_basic_sync_init(&sch->bstats); spin_lock_init(&sch->q.lock); @@ -975,7 +971,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - dev_hold_track(dev, &sch->dev_tracker, GFP_KERNEL); + netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL); refcount_set(&sch->refcnt, 1); return sch; @@ -1019,22 +1015,14 @@ EXPORT_SYMBOL(qdisc_create_dflt); void qdisc_reset(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; - struct sk_buff *skb, *tmp; trace_qdisc_reset(qdisc); if (ops->reset) ops->reset(qdisc); - skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { - __skb_unlink(skb, &qdisc->gso_skb); - kfree_skb_list(skb); - } - - skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { - __skb_unlink(skb, &qdisc->skb_bad_txq); - kfree_skb_list(skb); - } + __skb_queue_purge(&qdisc->gso_skb); + __skb_queue_purge(&qdisc->skb_bad_txq); qdisc->q.qlen = 0; qdisc->qstats.backlog = 0; @@ -1075,7 +1063,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put_track(qdisc_dev(qdisc), &qdisc->dev_tracker); + netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker); trace_qdisc_destroy(qdisc); @@ -1133,6 +1121,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, } EXPORT_SYMBOL(dev_graft_qdisc); +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) +{ + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; + + if (qdisc) { + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); + dev_queue->qdisc_sleeping = qdisc_default; + + qdisc_put(qdisc); + } +} + static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) @@ -1164,30 +1167,34 @@ static void attach_default_qdiscs(struct net_device *dev) if (!netif_is_multiqueue(dev) || dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - dev->qdisc = txq->qdisc_sleeping; - qdisc_refcount_inc(dev->qdisc); + qdisc = txq->qdisc_sleeping; + rcu_assign_pointer(dev->qdisc, qdisc); + qdisc_refcount_inc(qdisc); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL); if (qdisc) { - dev->qdisc = qdisc; + rcu_assign_pointer(dev->qdisc, qdisc); qdisc->ops->attach(qdisc); } } + qdisc = rtnl_dereference(dev->qdisc); /* Detect default qdisc setup/init failed and fallback to "noqueue" */ - if (dev->qdisc == &noop_qdisc) { + if (qdisc == &noop_qdisc) { netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", default_qdisc_ops->id, noqueue_qdisc_ops.id); + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - dev->qdisc = txq->qdisc_sleeping; - qdisc_refcount_inc(dev->qdisc); + qdisc = txq->qdisc_sleeping; + rcu_assign_pointer(dev->qdisc, qdisc); + qdisc_refcount_inc(qdisc); dev->priv_flags ^= IFF_NO_QUEUE; } #ifdef CONFIG_NET_SCHED - if (dev->qdisc != &noop_qdisc) - qdisc_hash_add(dev->qdisc, false); + if (qdisc != &noop_qdisc) + qdisc_hash_add(qdisc, false); #endif } @@ -1217,7 +1224,7 @@ void dev_activate(struct net_device *dev) * and noqueue_qdisc for virtual interfaces */ - if (dev->qdisc == &noop_qdisc) + if (rtnl_dereference(dev->qdisc) == &noop_qdisc) attach_default_qdiscs(dev); if (!netif_carrier_ok(dev)) @@ -1383,7 +1390,7 @@ static int qdisc_change_tx_queue_len(struct net_device *dev, void dev_qdisc_change_real_num_tx(struct net_device *dev, unsigned int new_real_tx) { - struct Qdisc *qdisc = dev->qdisc; + struct Qdisc *qdisc = rtnl_dereference(dev->qdisc); if (qdisc->ops->change_real_num_tx) qdisc->ops->change_real_num_tx(qdisc, new_real_tx); @@ -1447,7 +1454,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { - dev->qdisc = &noop_qdisc; + rcu_assign_pointer(dev->qdisc, &noop_qdisc); netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); @@ -1455,28 +1462,13 @@ void dev_init_scheduler(struct net_device *dev) timer_setup(&dev->watchdog_timer, dev_watchdog, 0); } -static void shutdown_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - void *_qdisc_default) -{ - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; - struct Qdisc *qdisc_default = _qdisc_default; - - if (qdisc) { - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; - - qdisc_put(qdisc); - } -} - void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - qdisc_put(dev->qdisc); - dev->qdisc = &noop_qdisc; + qdisc_put(rtnl_dereference(dev->qdisc)); + rcu_assign_pointer(dev->qdisc, &noop_qdisc); WARN_ON(timer_pending(&dev->watchdog_timer)); } @@ -1529,6 +1521,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; + r->mpu = conf->mpu; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 1073c76d05c4..a661b062cca8 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -648,9 +648,6 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, u32 max_P; struct gred_sched_data *prealloc; - if (opt == NULL) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy, extack); if (err < 0) @@ -829,7 +826,6 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) opt.Wlog = q->parms.Wlog; opt.Plog = q->parms.Plog; opt.Scell_log = q->parms.Scell_log; - opt.other = q->stats.other; opt.early = q->stats.prob_drop; opt.forced = q->stats.forced_drop; opt.pdrop = q->stats.pdrop; @@ -895,8 +891,6 @@ append_opt: goto nla_put_failure; if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop)) goto nla_put_failure; - if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other)) - goto nla_put_failure; nla_nest_end(skb, vq); } @@ -914,10 +908,9 @@ static void gred_destroy(struct Qdisc *sch) struct gred_sched *table = qdisc_priv(sch); int i; - for (i = 0; i < table->DPs; i++) { - if (table->tab[i]) - gred_destroy_vq(table->tab[i]); - } + for (i = 0; i < table->DPs; i++) + gred_destroy_vq(table->tab[i]); + gred_offload(sch, TC_GRED_DESTROY); kfree(table->opt); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d3979a6000e7..70b0c5873d32 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1349,15 +1349,8 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } @@ -1430,7 +1423,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt, struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; - if (opt == NULL || nla_len(opt) < sizeof(*qopt)) + if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); @@ -1484,8 +1477,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) } q->eligible = RB_ROOT; qdisc_watchdog_cancel(&q->watchdog); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 420ede875322..d26cd436cbe3 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -516,9 +516,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, u32 new_quantum = q->quantum; u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy, NULL); if (err < 0) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9267922ea9c3..e5b4bbf3ce3d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1008,8 +1008,6 @@ static void htb_reset(struct Qdisc *sch) } qdisc_watchdog_cancel(&q->watchdog); __qdisc_reset_queue(&q->direct_queue); - sch->q.qlen = 0; - sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); } @@ -1104,9 +1102,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, err = qdisc_class_hash_init(&q->clhash); if (err < 0) - goto err_free_direct_qdiscs; - - qdisc_skb_head_init(&q->direct_queue); + return err; if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); @@ -1127,8 +1123,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, 0), extack); if (!qdisc) { - err = -ENOMEM; - goto err_free_qdiscs; + return -ENOMEM; } htb_set_lockdep_class_child(qdisc); @@ -1146,7 +1141,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, }; err = htb_offload(dev, &offload_opt); if (err) - goto err_free_qdiscs; + return err; /* Defer this assignment, so that htb_destroy skips offload-related * parts (especially calling ndo_setup_tc) on errors. @@ -1154,22 +1149,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, q->offload = true; return 0; - -err_free_qdiscs: - for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx]; - ntx++) - qdisc_put(q->direct_qdiscs[ntx]); - - qdisc_class_hash_destroy(&q->clhash); - /* Prevent use-after-free and double-free when htb_destroy gets called. - */ - q->clhash.hash = NULL; - q->clhash.hashsize = 0; - -err_free_direct_qdiscs: - kfree(q->direct_qdiscs); - q->direct_qdiscs = NULL; - return err; } static void htb_attach_offload(struct Qdisc *sch) @@ -1692,13 +1671,12 @@ static void htb_destroy(struct Qdisc *sch) qdisc_class_hash_destroy(&q->clhash); __qdisc_reset_queue(&q->direct_queue); - if (!q->offload) - return; - - offload_opt = (struct tc_htb_qopt_offload) { - .command = TC_HTB_DESTROY, - }; - htb_offload(dev, &offload_opt); + if (q->offload) { + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_DESTROY, + }; + htb_offload(dev, &offload_opt); + } if (!q->direct_qdiscs) return; @@ -1810,6 +1788,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + if (q->offload) { + /* Options not supported by the offload. */ + if (hopt->rate.overhead || hopt->ceil.overhead) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); + goto failure; + } + if (hopt->rate.mpu || hopt->ceil.mpu) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); + goto failure; + } + if (hopt->quantum) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); + goto failure; + } + if (hopt->prio) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); + goto failure; + } + } + /* Keeping backward compatible with rate_table based iproute2 tc */ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], @@ -2121,15 +2119,8 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 83d2e54bf303..d0bc660d7401 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -247,11 +247,8 @@ static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg) arg->count = arg->skip; for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { - if (arg->fn(sch, ntx + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b29f3453c6ea..4c68abaa289b 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -558,11 +558,8 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) /* Walk hierarchy with a virtual class per tc */ arg->count = arg->skip; for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) { - if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, ntx + TC_H_MIN_PRIORITY, arg)) return; - } - arg->count++; } /* Pad the values and skip over unused traffic classes */ diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index cd8ab90c4765..75c9c860182b 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -152,7 +152,6 @@ multiq_reset(struct Qdisc *sch) for (band = 0; band < q->bands; band++) qdisc_reset(q->queues[band]); - sch->q.qlen = 0; q->curband = 0; } @@ -354,15 +353,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (band = 0; band < q->bands; band++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, band + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, band + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index ed4ccef5d6a8..fb00ac40ecb7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -171,7 +171,7 @@ static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) static void init_crandom(struct crndstate *state, unsigned long rho) { state->rho = rho; - state->last = prandom_u32(); + state->last = get_random_u32(); } /* get_crandom - correlated random number generator @@ -184,9 +184,9 @@ static u32 get_crandom(struct crndstate *state) unsigned long answer; if (!state || state->rho == 0) /* no correlation */ - return prandom_u32(); + return get_random_u32(); - value = prandom_u32(); + value = get_random_u32(); rho = (u64)state->rho + 1; answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; state->last = answer; @@ -200,7 +200,7 @@ static u32 get_crandom(struct crndstate *state) static bool loss_4state(struct netem_sched_data *q) { struct clgstate *clg = &q->clg; - u32 rnd = prandom_u32(); + u32 rnd = get_random_u32(); /* * Makes a comparison between rnd and the transition @@ -268,15 +268,15 @@ static bool loss_gilb_ell(struct netem_sched_data *q) switch (clg->state) { case GOOD_STATE: - if (prandom_u32() < clg->a1) + if (get_random_u32() < clg->a1) clg->state = BAD_STATE; - if (prandom_u32() < clg->a4) + if (get_random_u32() < clg->a4) return true; break; case BAD_STATE: - if (prandom_u32() < clg->a2) + if (get_random_u32() < clg->a2) clg->state = GOOD_STATE; - if (prandom_u32() > clg->a3) + if (get_random_u32() > clg->a3) return true; } @@ -513,8 +513,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto finish_segs; } - skb->data[prandom_u32() % skb_headlen(skb)] ^= - 1<<(prandom_u32() % 8); + skb->data[prandom_u32_max(skb_headlen(skb))] ^= + 1<<prandom_u32_max(8); } if (unlikely(sch->q.qlen >= sch->limit)) { @@ -632,7 +632,7 @@ static void get_slot_next(struct netem_sched_data *q, u64 now) if (!q->slot_dist) next_delay = q->slot_config.min_delay + - (prandom_u32() * + (get_random_u32() * (q->slot_config.max_delay - q->slot_config.min_delay) >> 32); else @@ -961,9 +961,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, int old_loss_model = CLG_RANDOM; int ret; - if (opt == NULL) - return -EINVAL; - qopt = nla_data(opt); ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); if (ret < 0) @@ -1146,9 +1143,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_rate rate; struct tc_netem_slot slot; - qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), UINT_MAX); - qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; @@ -1254,12 +1251,8 @@ static unsigned long netem_find(struct Qdisc *sch, u32 classid) static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + if (!tc_qdisc_stats_dump(sch, 1, walker)) + return; } } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 5a457ff61acd..265c238047a4 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -72,7 +72,7 @@ bool pie_drop_early(struct Qdisc *sch, struct pie_params *params, if (vars->accu_prob >= (MAX_PROB / 2) * 17) return true; - prandom_bytes(&rnd, 8); + get_random_bytes(&rnd, 8); if ((rnd >> BITS_PER_BYTE) < local_prob) { vars->accu_prob = 0; return true; @@ -143,9 +143,6 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, unsigned int qlen, dropped = 0; int err; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_PIE_MAX, opt, pie_policy, NULL); if (err < 0) diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index cbc2ebca4548..ea8c4a7174bb 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -161,9 +161,6 @@ static int plug_change(struct Qdisc *sch, struct nlattr *opt, struct plug_sched_data *q = qdisc_priv(sch); struct tc_plug_qopt *msg; - if (opt == NULL) - return -EINVAL; - msg = nla_data(opt); if (nla_len(opt) < sizeof(*msg)) return -EINVAL; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3b8d7197c06b..fdc5ef52c3ee 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -135,8 +135,6 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) @@ -187,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, return -EINVAL; qopt = nla_data(opt); - if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) + if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < TCQ_MIN_PRIO_BANDS) return -EINVAL; for (i = 0; i <= TC_PRIO_MAX; i++) { @@ -378,15 +376,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (prio = 0; prio < q->bands; prio++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, prio + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, prio + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d4ce58c90f9f..cf5ebe43b3b4 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -659,15 +659,8 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } @@ -1458,8 +1451,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void qfq_destroy_qdisc(struct Qdisc *sch) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 40adf1f07a82..98129324e157 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -72,6 +72,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; + unsigned int len; int ret; q->vars.qavg = red_calc_qavg(&q->parms, @@ -126,9 +127,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; } + len = qdisc_pkt_len(skb); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -176,8 +178,6 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; red_restart(&q->vars); } @@ -370,9 +370,6 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_RED_MAX + 1]; int err; - if (!opt) - return -EINVAL; - err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, extack); if (err < 0) @@ -463,7 +460,6 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) } st.early = q->stats.prob_drop + q->stats.forced_drop; st.pdrop = q->stats.pdrop; - st.other = q->stats.other; st.marked = q->stats.prob_mark + q->stats.forced_mark; return gnet_stats_copy_app(d, &st, sizeof(st)); @@ -522,12 +518,7 @@ static unsigned long red_find(struct Qdisc *sch, u32 classid) static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 3d061a13d7ed..1871a1c0224d 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) } } -static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q) { u32 sfbhash; - sfbhash = sfb_hash(skb, 0); + sfbhash = cb->hashes[0]; if (sfbhash) increment_one_qlen(sfbhash, 0, q); - sfbhash = sfb_hash(skb, 1); + sfbhash = cb->hashes[1]; if (sfbhash) increment_one_qlen(sfbhash, 1, q); } @@ -281,8 +281,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct sfb_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; struct tcf_proto *fl; + struct sfb_skb_cb cb; int i; u32 p_min = ~0; u32 minqlen = ~0; @@ -377,7 +379,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto enqueue; } - r = prandom_u32() & SFB_MAX_PROB; + r = get_random_u16() & SFB_MAX_PROB; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { @@ -399,11 +401,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } enqueue: + memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; - increment_qlen(skb, q); + increment_qlen(&cb, q); } else if (net_xmit_drop_count(ret)) { q->stats.childdrop++; qdisc_qstats_drop(sch); @@ -452,9 +455,8 @@ static void sfb_reset(struct Qdisc *sch) { struct sfb_sched_data *q = qdisc_priv(sch); - qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; + if (likely(q->qdisc)) + qdisc_reset(q->qdisc); q->slot = 0; q->double_buffering = false; sfb_zero_all_buckets(q); @@ -658,12 +660,7 @@ static int sfb_delete(struct Qdisc *sch, unsigned long cl, static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f8e569f79f13..abd436307d6a 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -888,16 +888,12 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->divisor; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT || - arg->count < arg->skip) { + if (q->ht[i] == SFQ_EMPTY_SLOT) { arg->count++; continue; } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 7a5e4c454715..5df2dacb7b1a 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -213,9 +213,6 @@ static void skbprio_reset(struct Qdisc *sch) struct skbprio_sched_data *q = qdisc_priv(sch); int prio; - sch->qstats.backlog = 0; - sch->q.qlen = 0; - for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++) __skb_queue_purge(&q->qdiscs[prio]); @@ -268,15 +265,8 @@ static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 377f896bdedc..570389f6cdd7 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -18,6 +18,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/rcupdate.h> +#include <linux/time.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -26,7 +27,6 @@ #include <net/tcp.h> static LIST_HEAD(taprio_list); -static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 @@ -66,6 +66,7 @@ struct taprio_sched { u32 flags; enum tk_offsets tk_offset; int clockid; + bool offloaded; atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ * speeds it's sub-nanoseconds per byte */ @@ -77,8 +78,8 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; - struct sk_buff *(*dequeue)(struct Qdisc *sch); - struct sk_buff *(*peek)(struct Qdisc *sch); + u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */ + u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */ u32 txtime_delay; }; @@ -176,7 +177,7 @@ static ktime_t get_interval_end_time(struct sched_gate_list *sched, static int length_to_duration(struct taprio_sched *q, int len) { - return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); + return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC); } /* Returns the entry corresponding to next available interval. If @@ -416,8 +417,12 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int prio = skb->priority; + u8 tc; - if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { + /* sk_flags are only safe to use on full sockets. */ + if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { if (!is_valid_interval(skb, sch)) return qdisc_drop(skb, sch, to_free); } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { @@ -426,12 +431,20 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } + /* Devices with full offload are expected to honor this in hardware */ + tc = netdev_get_prio_tc_map(dev, prio); + if (skb->len > q->max_frm_len[tc]) + return qdisc_drop(skb, sch, to_free); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return qdisc_enqueue(skb, child, to_free); } +/* Will not be called in the full offload case, since the TX queues are + * attached to the Qdisc created using qdisc_create_dflt() + */ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -439,11 +452,6 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child; int queue; - if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) { - WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n"); - return qdisc_drop(skb, sch, to_free); - } - queue = skb_get_queue_mapping(skb); child = q->qdiscs[queue]; @@ -452,10 +460,10 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Large packets might not be transmitted when the transmission duration * exceeds any configured interval. Therefore, segment the skb into - * smaller chunks. Skip it for the full offload case, as the driver - * and/or the hardware is expected to handle this. + * smaller chunks. Drivers with full offload are expected to handle + * this in hardware. */ - if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) { + if (skb_is_gso(skb)) { unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); netdev_features_t features = netif_skb_features(skb); struct sk_buff *segs, *nskb; @@ -489,7 +497,10 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return taprio_enqueue_one(skb, sch, child, to_free); } -static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) +/* Will not be called in the full offload case, since the TX queues are + * attached to the Qdisc created using qdisc_create_dflt() + */ +static struct sk_buff *taprio_peek(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -533,28 +544,17 @@ static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) return NULL; } -static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) -{ - WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n"); - - return NULL; -} - -static struct sk_buff *taprio_peek(struct Qdisc *sch) -{ - struct taprio_sched *q = qdisc_priv(sch); - - return q->peek(sch); -} - static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, - div64_u64((u64)entry->interval * 1000, + div64_u64((u64)entry->interval * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte))); } -static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) +/* Will not be called in the full offload case, since the TX queues are + * attached to the Qdisc created using qdisc_create_dflt() + */ +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -642,20 +642,6 @@ done: return skb; } -static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) -{ - WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n"); - - return NULL; -} - -static struct sk_buff *taprio_dequeue(struct Qdisc *sch) -{ - struct taprio_sched *q = qdisc_priv(sch); - - return q->dequeue(sch); -} - static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { @@ -778,6 +764,11 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; +static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { + [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, +}; + static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -790,6 +781,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED }, }; static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, @@ -1096,27 +1088,20 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net_device *qdev; struct taprio_sched *q; - bool found = false; ASSERT_RTNL(); if (event != NETDEV_UP && event != NETDEV_CHANGE) return NOTIFY_DONE; - spin_lock(&taprio_list_lock); list_for_each_entry(q, &taprio_list, taprio_list) { - qdev = qdisc_dev(q->root); - if (qdev == dev) { - found = true; - break; - } - } - spin_unlock(&taprio_list_lock); + if (dev != qdisc_dev(q->root)) + continue; - if (found) taprio_set_picos_per_byte(dev, q); + break; + } return NOTIFY_DONE; } @@ -1191,16 +1176,10 @@ static void taprio_offload_config_changed(struct taprio_sched *q) { struct sched_gate_list *oper, *admin; - spin_lock(&q->current_entry_lock); - - oper = rcu_dereference_protected(q->oper_sched, - lockdep_is_held(&q->current_entry_lock)); - admin = rcu_dereference_protected(q->admin_sched, - lockdep_is_held(&q->current_entry_lock)); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); switch_schedules(q, &admin, &oper); - - spin_unlock(&q->current_entry_lock); } static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask) @@ -1253,7 +1232,8 @@ static int taprio_enable_offload(struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; struct tc_taprio_qopt_offload *offload; - int err = 0; + struct tc_taprio_caps caps; + int tc, err = 0; if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, @@ -1261,6 +1241,19 @@ static int taprio_enable_offload(struct net_device *dev, return -EOPNOTSUPP; } + qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO, + &caps, sizeof(caps)); + + if (!caps.supports_queue_max_sdu) { + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + if (q->max_sdu[tc]) { + NL_SET_ERR_MSG_MOD(extack, + "Device does not handle queueMaxSDU"); + return -EOPNOTSUPP; + } + } + } + offload = taprio_offload_alloc(sched->num_entries); if (!offload) { NL_SET_ERR_MSG(extack, @@ -1270,6 +1263,9 @@ static int taprio_enable_offload(struct net_device *dev, offload->enable = 1; taprio_sched_to_offload(dev, sched, offload); + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + offload->max_sdu[tc] = q->max_sdu[tc]; + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { NL_SET_ERR_MSG(extack, @@ -1277,6 +1273,8 @@ static int taprio_enable_offload(struct net_device *dev, goto done; } + q->offloaded = true; + done: taprio_offload_free(offload); @@ -1291,12 +1289,9 @@ static int taprio_disable_offload(struct net_device *dev, struct tc_taprio_qopt_offload *offload; int err; - if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + if (!q->offloaded) return 0; - if (!ops->ndo_setup_tc) - return -EOPNOTSUPP; - offload = taprio_offload_alloc(0); if (!offload) { NL_SET_ERR_MSG(extack, @@ -1312,6 +1307,8 @@ static int taprio_disable_offload(struct net_device *dev, goto out; } + q->offloaded = false; + out: taprio_offload_free(offload); @@ -1403,6 +1400,89 @@ out: return err; } +static int taprio_parse_tc_entry(struct Qdisc *sch, + struct nlattr *opt, + u32 max_sdu[TC_QOPT_MAX_QUEUE], + unsigned long *seen_tcs, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { }; + struct net_device *dev = qdisc_dev(sch); + u32 val = 0; + int err, tc; + + err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt, + taprio_tc_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) { + NL_SET_ERR_MSG_MOD(extack, "TC entry index missing"); + return -EINVAL; + } + + tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); + if (tc >= TC_QOPT_MAX_QUEUE) { + NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); + return -ERANGE; + } + + if (*seen_tcs & BIT(tc)) { + NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry"); + return -EINVAL; + } + + *seen_tcs |= BIT(tc); + + if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) + val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]); + + if (val > dev->max_mtu) { + NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU"); + return -ERANGE; + } + + max_sdu[tc] = val; + + return 0; +} + +static int taprio_parse_tc_entries(struct Qdisc *sch, + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + u32 max_sdu[TC_QOPT_MAX_QUEUE]; + unsigned long seen_tcs = 0; + struct nlattr *n; + int tc, rem; + int err = 0; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + max_sdu[tc] = q->max_sdu[tc]; + + nla_for_each_nested(n, opt, rem) { + if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) + continue; + + err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack); + if (err) + goto out; + } + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + q->max_sdu[tc] = max_sdu[tc]; + if (max_sdu[tc]) + q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len; + else + q->max_frm_len[tc] = U32_MAX; /* never oversized */ + } + +out: + return err; +} + static int taprio_mqprio_cmp(const struct net_device *dev, const struct tc_mqprio_qopt *mqprio) { @@ -1481,6 +1561,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; + err = taprio_parse_tc_entries(sch, opt, extack); + if (err) + return err; + new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); if (!new_admin) { NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); @@ -1488,10 +1572,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } INIT_LIST_HEAD(&new_admin->entries); - rcu_read_lock(); - oper = rcu_dereference(q->oper_sched); - admin = rcu_dereference(q->admin_sched); - rcu_read_unlock(); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); /* no changes - no new mqprio settings */ if (!taprio_mqprio_cmp(dev, mqprio)) @@ -1561,17 +1643,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->advance_timer.function = advance_sched; } - if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - q->dequeue = taprio_dequeue_offload; - q->peek = taprio_peek_offload; - } else { - /* Be sure to always keep the function pointers - * in a consistent state. - */ - q->dequeue = taprio_dequeue_soft; - q->peek = taprio_peek_soft; - } - err = taprio_get_start_time(sch, new_admin, &start); if (err < 0) { NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); @@ -1634,19 +1705,16 @@ static void taprio_reset(struct Qdisc *sch) if (q->qdiscs[i]) qdisc_reset(q->qdiscs[i]); } - sch->qstats.backlog = 0; - sch->q.qlen = 0; } static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sched_gate_list *oper, *admin; unsigned int i; - spin_lock(&taprio_list_lock); list_del(&q->taprio_list); - spin_unlock(&taprio_list_lock); /* Note that taprio_reset() might not be called if an error * happens in qdisc_create(), after taprio_init() has been called. @@ -1665,11 +1733,14 @@ static void taprio_destroy(struct Qdisc *sch) netdev_reset_tc(dev); - if (q->oper_sched) - call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); - if (q->admin_sched) - call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb); + if (oper) + call_rcu(&oper->rcu, taprio_free_sched_cb); + + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); } static int taprio_init(struct Qdisc *sch, struct nlattr *opt, @@ -1684,9 +1755,6 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; - q->dequeue = taprio_dequeue_soft; - q->peek = taprio_peek_soft; - q->root = sch; /* We only support static clockids. Use an invalid value as default @@ -1695,15 +1763,17 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, q->clockid = -1; q->flags = TAPRIO_FLAGS_INVALID; - spin_lock(&taprio_list_lock); list_add(&q->taprio_list, &taprio_list); - spin_unlock(&taprio_list_lock); - if (sch->parent != TC_H_ROOT) + if (sch->parent != TC_H_ROOT) { + NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc"); return -EOPNOTSUPP; + } - if (!netif_is_multiqueue(dev)) + if (!netif_is_multiqueue(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required"); return -EOPNOTSUPP; + } /* pre-allocate qdisc, attachment can't fail */ q->qdiscs = kcalloc(dev->num_tx_queues, @@ -1875,6 +1945,33 @@ error_nest: return -1; } +static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb) +{ + struct nlattr *n; + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY); + if (!n) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU, + q->max_sdu[tc])) + goto nla_put_failure; + + nla_nest_end(skb, n); + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, n); + return -EMSGSIZE; +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); @@ -1884,9 +1981,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest, *sched_nest; unsigned int i; - rcu_read_lock(); - oper = rcu_dereference(q->oper_sched); - admin = rcu_dereference(q->admin_sched); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); @@ -1914,6 +2010,9 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; + if (taprio_dump_tc_entries(q, skb)) + goto options_error; + if (oper && dump_schedule(skb, oper)) goto options_error; @@ -1930,8 +2029,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_nest_end(skb, sched_nest); done: - rcu_read_unlock(); - return nla_nest_end(skb, nest); admin_error: @@ -1941,7 +2038,6 @@ options_error: nla_nest_cancel(skb, nest); start_error: - rcu_read_unlock(); return -ENOSPC; } @@ -2000,11 +2096,8 @@ static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) arg->count = arg->skip; for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { - if (arg->fn(sch, ntx + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 72102277449e..277ad11f4d61 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -330,8 +330,6 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - sch->qstats.backlog = 0; - sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; @@ -356,6 +354,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; struct Qdisc *child = NULL; + struct Qdisc *old = NULL; struct psched_ratecfg rate; struct psched_ratecfg peak; u64 max_size; @@ -447,7 +446,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { qdisc_tree_flush_backlog(q->qdisc); - qdisc_put(q->qdisc); + old = q->qdisc; q->qdisc = child; } q->limit = qopt->limit; @@ -467,6 +466,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); sch_tree_unlock(sch); + qdisc_put(old); err = 0; tbf_offload_change(sch); @@ -580,12 +580,7 @@ static unsigned long tbf_find(struct Qdisc *sch, u32 classid) static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6af6b95bdb67..16f9238aa51d 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -124,7 +124,6 @@ teql_reset(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); skb_queue_purge(&dat->q); - sch->q.qlen = 0; } static void @@ -492,7 +491,7 @@ static int __init teql_init(void) master = netdev_priv(dev); - strlcpy(master->qops.id, dev->name, IFNAMSIZ); + strscpy(master->qops.id, dev->name, IFNAMSIZ); err = register_qdisc(&master->qops); if (err) { |