diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/act_api.c | 2 | ||||
-rw-r--r-- | net/sched/act_ct.c | 9 | ||||
-rw-r--r-- | net/sched/act_ipt.c | 70 | ||||
-rw-r--r-- | net/sched/act_pedit.c | 53 | ||||
-rw-r--r-- | net/sched/act_police.c | 11 | ||||
-rw-r--r-- | net/sched/cls_api.c | 15 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 145 | ||||
-rw-r--r-- | net/sched/cls_fw.c | 10 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 18 | ||||
-rw-r--r-- | net/sched/sch_api.c | 88 | ||||
-rw-r--r-- | net/sched/sch_cake.c | 1 | ||||
-rw-r--r-- | net/sched/sch_fq_pie.c | 10 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 44 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 7 | ||||
-rw-r--r-- | net/sched/sch_ingress.c | 16 | ||||
-rw-r--r-- | net/sched/sch_mq.c | 8 | ||||
-rw-r--r-- | net/sched/sch_mqprio.c | 8 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 62 | ||||
-rw-r--r-- | net/sched/sch_pie.c | 5 | ||||
-rw-r--r-- | net/sched/sch_qfq.c | 18 | ||||
-rw-r--r-- | net/sched/sch_red.c | 5 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 5 | ||||
-rw-r--r-- | net/sched/sch_taprio.c | 97 | ||||
-rw-r--r-- | net/sched/sch_tbf.c | 1 | ||||
-rw-r--r-- | net/sched/sch_teql.c | 2 |
25 files changed, 561 insertions, 149 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f7887f42d542..9d3f26bf0440 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1320,7 +1320,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, return ERR_PTR(err); } } else { - if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) { + if (strscpy(act_name, "police", IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9cc0bc7c71ed..abc71a06d634 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct flow_offload_tuple tuple = {}; enum ip_conntrack_info ctinfo; struct tcphdr *tcph = NULL; + bool force_refresh = false; struct flow_offload *flow; struct nf_conn *ct; u8 dir; @@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, * established state, then don't refresh. */ return false; + force_refresh = true; } if (tcph && (unlikely(tcph->fin || tcph->rst))) { @@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; - flow_offload_refresh(nf_ft, flow); + flow_offload_refresh(nf_ft, flow, force_refresh); + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + /* Process this flow in SW to allow promoting to ASSURED */ + return false; + } + nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); if (nf_ft->flags & NF_FLOWTABLE_COUNTER) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 5d96ffebd40f..598d6e299152 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -21,6 +21,7 @@ #include <linux/tc_act/tc_ipt.h> #include <net/tc_act/tc_ipt.h> #include <net/tc_wrapper.h> +#include <net/ip.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -48,7 +49,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, par.entryinfo = &e; par.target = target; par.targinfo = t->data; - par.hook_mask = hook; + par.hook_mask = 1 << hook; par.family = NFPROTO_IPV4; ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); @@ -85,7 +86,8 @@ static void tcf_ipt_release(struct tc_action *a) static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, - [TCA_IPT_HOOK] = { .type = NLA_U32 }, + [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING, + NF_INET_NUMHOOKS), [TCA_IPT_INDEX] = { .type = NLA_U32 }, [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; @@ -158,15 +160,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, return -EEXIST; } } + + err = -EINVAL; hook = nla_get_u32(tb[TCA_IPT_HOOK]); + switch (hook) { + case NF_INET_PRE_ROUTING: + break; + case NF_INET_POST_ROUTING: + break; + default: + goto err1; + } + + if (tb[TCA_IPT_TABLE]) { + /* mangle only for now */ + if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle")) + goto err1; + } - err = -ENOMEM; - tname = kmalloc(IFNAMSIZ, GFP_KERNEL); + tname = kstrdup("mangle", GFP_KERNEL); if (unlikely(!tname)) goto err1; - if (tb[TCA_IPT_TABLE] == NULL || - nla_strscpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ) - strcpy(tname, "mangle"); t = kmemdup(td, td->u.target_size, GFP_KERNEL); if (unlikely(!t)) @@ -217,10 +231,31 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, a, &act_xt_ops, tp, flags); } +static bool tcf_ipt_act_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + unsigned int nhoff, len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return false; + + nhoff = skb_network_offset(skb); + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return false; + + len = skb_ip_totlen(skb); + if (skb->len < nhoff + len || len < (iph->ihl * 4u)) + return false; + + return pskb_may_pull(skb, iph->ihl * 4u); +} + TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { + char saved_cb[sizeof_field(struct sk_buff, cb)]; int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; @@ -231,9 +266,24 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, .pf = NFPROTO_IPV4, }; + if (skb_protocol(skb, false) != htons(ETH_P_IP)) + return TC_ACT_UNSPEC; + if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; + if (!tcf_ipt_act_check(skb)) + return TC_ACT_UNSPEC; + + if (state.hook == NF_INET_POST_ROUTING) { + if (!skb_dst(skb)) + return TC_ACT_UNSPEC; + + state.out = skb->dev; + } + + memcpy(saved_cb, skb->cb, sizeof(saved_cb)); + spin_lock(&ipt->tcf_lock); tcf_lastuse_update(&ipt->tcf_tm); @@ -246,6 +296,9 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + ret = par.target->target(skb, &par); switch (ret) { @@ -266,6 +319,9 @@ TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, break; } spin_unlock(&ipt->tcf_lock); + + memcpy(skb->cb, saved_cb, sizeof(skb->cb)); + return result; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fc945c7e4123..1ef8fcfa9997 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -13,7 +13,10 @@ #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/slab.h> +#include <net/ipv6.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> @@ -26,6 +29,7 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) }, [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; @@ -242,14 +246,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, nparms->tcfp_flags = parm->flags; nparms->tcfp_nkeys = parm->nkeys; - nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL); + nparms->tcfp_keys = kmemdup(parm->keys, ksize, GFP_KERNEL); if (!nparms->tcfp_keys) { ret = -ENOMEM; goto put_chain; } - memcpy(nparms->tcfp_keys, parm->keys, ksize); - for (i = 0; i < nparms->tcfp_nkeys; ++i) { u32 offmask = nparms->tcfp_keys[i].offmask; u32 cur = nparms->tcfp_keys[i].off; @@ -327,28 +329,58 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } -static void pedit_skb_hdr_offset(struct sk_buff *skb, +static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type) +{ + const int noff = skb_network_offset(skb); + int ret = -EINVAL; + struct iphdr _iph; + + switch (skb->protocol) { + case htons(ETH_P_IP): { + const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph); + + if (!iph) + goto out; + *hoffset = noff + iph->ihl * 4; + ret = 0; + break; + } + case htons(ETH_P_IPV6): + ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL; + break; + } +out: + return ret; +} + +static int pedit_skb_hdr_offset(struct sk_buff *skb, enum pedit_header_type htype, int *hoffset) { + int ret = -EINVAL; /* 'htype' is validated in the netlink parsing */ switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb)) { *hoffset = skb_mac_offset(skb); + ret = 0; + } break; case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: *hoffset = skb_network_offset(skb); + ret = 0; break; case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP); + break; case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - if (skb_transport_header_was_set(skb)) - *hoffset = skb_transport_offset(skb); + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP); break; default: break; } + return ret; } TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, @@ -384,6 +416,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, int hoffset = 0; u32 *ptr, hdata; u32 val; + int rc; if (tkey_ex) { htype = tkey_ex->htype; @@ -392,7 +425,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, tkey_ex++; } - pedit_skb_hdr_offset(skb, htype, &hoffset); + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype); + goto bad; + } if (tkey->offmask) { u8 *d, _d; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 227cba58ce9f..f3121c5a85e9 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <net/act_api.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_police.h> @@ -357,23 +358,23 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + if ((p->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_POLICE_RATE64, - police->params->rate.rate_bytes_ps, + p->rate.rate_bytes_ps, TCA_POLICE_PAD)) goto nla_put_failure; } if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); - if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + if ((p->peak.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, - police->params->peak.rate_bytes_ps, + p->peak.rate_bytes_ps, TCA_POLICE_PAD)) goto nla_put_failure; } if (p->pps_present) { if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64, - police->params->ppsrate.rate_pkts_ps, + p->ppsrate.rate_pkts_ps, TCA_POLICE_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2621550bfddc..a193cc7b3241 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -43,8 +43,6 @@ #include <net/flow_offload.h> #include <net/tc_wrapper.h> -extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; - /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); @@ -659,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; + unsigned int refcnt, non_act_refcnt; bool free_block = false; - unsigned int refcnt; void *tmplt_priv; mutex_lock(&block->lock); @@ -680,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, * save these to temporary variables. */ refcnt = --chain->refcnt; + non_act_refcnt = refcnt - chain->action_refcnt; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; - /* The last dropped non-action reference will trigger notification. */ - if (refcnt - chain->action_refcnt == 0 && !by_act) { - tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, - block, NULL, 0, 0, false); + if (non_act_refcnt == chain->explicitly_created && !by_act) { + if (non_act_refcnt == 0) + tc_chain_notify_delete(tmplt_ops, tmplt_priv, + chain->index, block, NULL, 0, 0, + false); /* Last reference to chain, no need to lock. */ chain->flushing = false; } @@ -2952,6 +2952,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); + module_put(ops->owner); return -EOPNOTSUPP; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9dbc43388e57..f2b0bc4142fe 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -11,6 +11,7 @@ #include <linux/rhashtable.h> #include <linux/workqueue.h> #include <linux/refcount.h> +#include <linux/bitfield.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -71,6 +72,7 @@ struct fl_flow_key { struct flow_dissector_key_num_of_vlans num_of_vlans; struct flow_dissector_key_pppoe pppoe; struct flow_dissector_key_l2tpv3 l2tpv3; + struct flow_dissector_key_cfm cfm; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -120,6 +122,7 @@ struct cls_fl_filter { u32 handle; u32 flags; u32 in_hw_count; + u8 needs_tc_skb_ext:1; struct rcu_work rwork; struct net_device *hw_dev; /* Flower classifier is unlocked, which means that its reference counter @@ -415,6 +418,8 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) static void __fl_destroy_filter(struct cls_fl_filter *f) { + if (f->needs_tc_skb_ext) + tc_skb_ext_tc_disable(); tcf_exts_destroy(&f->exts); tcf_exts_put_net(&f->exts); kfree(f); @@ -615,7 +620,8 @@ static void *fl_get(struct tcf_proto *tp, u32 handle) } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { - [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC }, + [TCA_FLOWER_UNSPEC] = { .strict_start_type = + TCA_FLOWER_L2_MISS }, [TCA_FLOWER_CLASSID] = { .type = NLA_U32 }, [TCA_FLOWER_INDEV] = { .type = NLA_STRING, .len = IFNAMSIZ }, @@ -720,7 +726,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 }, - + [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), + [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, }; static const struct nla_policy @@ -769,6 +776,12 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 }, }; +static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = { + [TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8, + FLOW_DIS_CFM_MDL_MAX), + [TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 }, +}; + static void fl_set_key_val(struct nlattr **tb, void *val, int val_type, void *mask, int mask_type, int len) @@ -799,6 +812,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); + if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) { + NL_SET_ERR_MSG(extack, + "Both min and max destination ports must be specified"); + return -EINVAL; + } + if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) { + NL_SET_ERR_MSG(extack, + "Both min and max source ports must be specified"); + return -EINVAL; + } if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && ntohs(key->tp_range.tp_max.dst) <= ntohs(key->tp_range.tp_min.dst)) { @@ -1153,6 +1176,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, if (option_len > sizeof(struct geneve_opt)) data_len = option_len - sizeof(struct geneve_opt); + if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4) + return -ERANGE; + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; memset(opt, 0xff, option_len); opt->length = data_len / 4; @@ -1653,6 +1679,53 @@ static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype, return false; } +static void fl_set_key_cfm_md_level(struct nlattr **tb, + struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + u8 level; + + if (!tb[TCA_FLOWER_KEY_CFM_MD_LEVEL]) + return; + + level = nla_get_u8(tb[TCA_FLOWER_KEY_CFM_MD_LEVEL]); + key->cfm.mdl_ver = FIELD_PREP(FLOW_DIS_CFM_MDL_MASK, level); + mask->cfm.mdl_ver = FLOW_DIS_CFM_MDL_MASK; +} + +static void fl_set_key_cfm_opcode(struct nlattr **tb, + struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + fl_set_key_val(tb, &key->cfm.opcode, TCA_FLOWER_KEY_CFM_OPCODE, + &mask->cfm.opcode, TCA_FLOWER_UNSPEC, + sizeof(key->cfm.opcode)); +} + +static int fl_set_key_cfm(struct nlattr **tb, + struct fl_flow_key *key, + struct fl_flow_key *mask, + struct netlink_ext_ack *extack) +{ + struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX]; + int err; + + if (!tb[TCA_FLOWER_KEY_CFM]) + return 0; + + err = nla_parse_nested(nla_cfm_opt, TCA_FLOWER_KEY_CFM_OPT_MAX, + tb[TCA_FLOWER_KEY_CFM], cfm_opt_policy, extack); + if (err < 0) + return err; + + fl_set_key_cfm_opcode(nla_cfm_opt, key, mask, extack); + fl_set_key_cfm_md_level(nla_cfm_opt, key, mask, extack); + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -1668,6 +1741,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb, mask->meta.ingress_ifindex = 0xffffffff; } + fl_set_key_val(tb, &key->meta.l2_miss, TCA_FLOWER_L2_MISS, + &mask->meta.l2_miss, TCA_FLOWER_UNSPEC, + sizeof(key->meta.l2_miss)); + fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, sizeof(key->eth.dst)); @@ -1803,6 +1880,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb, TCA_FLOWER_KEY_L2TPV3_SID, &mask->l2tpv3.session_id, TCA_FLOWER_UNSPEC, sizeof(key->l2tpv3.session_id)); + } else if (key->basic.n_proto == htons(ETH_P_CFM)) { + ret = fl_set_key_cfm(tb, key, mask, extack); + if (ret) + return ret; } if (key->basic.ip_proto == IPPROTO_TCP || @@ -1985,6 +2066,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_PPPOE, pppoe); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_CFM, cfm); skb_flow_dissector_init(dissector, keys, cnt); } @@ -2085,6 +2168,11 @@ errout_cleanup: return ret; } +static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask) +{ + return mask->meta.l2_miss; +} + static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, @@ -2121,6 +2209,14 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, return -EINVAL; } + /* Enable tc skb extension if filter matches on data extracted from + * this extension. + */ + if (fl_needs_tc_skb_ext(&mask->key)) { + f->needs_tc_skb_ext = 1; + tc_skb_ext_tc_enable(); + } + return 0; } @@ -3005,6 +3101,43 @@ nla_put_failure: return -EMSGSIZE; } +static int fl_dump_key_cfm(struct sk_buff *skb, + struct flow_dissector_key_cfm *key, + struct flow_dissector_key_cfm *mask) +{ + struct nlattr *opts; + int err; + u8 mdl; + + if (!memchr_inv(mask, 0, sizeof(*mask))) + return 0; + + opts = nla_nest_start(skb, TCA_FLOWER_KEY_CFM); + if (!opts) + return -EMSGSIZE; + + if (FIELD_GET(FLOW_DIS_CFM_MDL_MASK, mask->mdl_ver)) { + mdl = FIELD_GET(FLOW_DIS_CFM_MDL_MASK, key->mdl_ver); + err = nla_put_u8(skb, TCA_FLOWER_KEY_CFM_MD_LEVEL, mdl); + if (err) + goto err_cfm_opts; + } + + if (mask->opcode) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_CFM_OPCODE, key->opcode); + if (err) + goto err_cfm_opts; + } + + nla_nest_end(skb, opts); + + return 0; + +err_cfm_opts: + nla_nest_cancel(skb, opts); + return err; +} + static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3074,6 +3207,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, goto nla_put_failure; } + if (fl_dump_key_val(skb, &key->meta.l2_miss, + TCA_FLOWER_L2_MISS, &mask->meta.l2_miss, + TCA_FLOWER_UNSPEC, sizeof(key->meta.l2_miss))) + goto nla_put_failure; + if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST, mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK, sizeof(key->eth.dst)) || @@ -3287,6 +3425,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, sizeof(key->hash.hash))) goto nla_put_failure; + if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ae9439a6c56c..8641f8059317 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -212,11 +212,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, if (err < 0) return err; - if (tb[TCA_FW_CLASSID]) { - f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); - tcf_bind_filter(tp, &f->res, base); - } - if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); @@ -233,6 +228,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, } else if (head->mask != 0xFFFFFFFF) return err; + if (tb[TCA_FW_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + return 0; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4e2e269f121f..d15d50de7980 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { - int err; + int err, ifindex = -1; err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, fl_flags, extack); if (err < 0) return err; + if (tb[TCA_U32_INDEV]) { + ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); + if (ifindex < 0) + return -EINVAL; + } + if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; @@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &n->res, base); } - if (tb[TCA_U32_INDEV]) { - int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); - if (ret < 0) - return -EINVAL; - n->ifindex = ret; - } + if (ifindex >= 0) + n->ifindex = ifindex; + return 0; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fdb8f429333d..aa6b1fe65151 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -309,7 +309,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) if (dev_ingress_queue(dev)) q = qdisc_match_from_root( - dev_ingress_queue(dev)->qdisc_sleeping, + rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping), handle); out: return q; @@ -328,7 +328,8 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) nq = dev_ingress_queue_rcu(dev); if (nq) - q = qdisc_match_from_root(nq->qdisc_sleeping, handle); + q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping), + handle); out: return q; } @@ -634,8 +635,13 @@ EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, u64 delta_ns) { - if (test_bit(__QDISC_STATE_DEACTIVATED, - &qdisc_root_sleeping(wd->qdisc)->state)) + bool deactivated; + + rcu_read_lock(); + deactivated = test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state); + rcu_read_unlock(); + if (deactivated) return; if (hrtimer_is_queued(&wd->timer)) { @@ -1073,17 +1079,29 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (parent == NULL) { unsigned int i, num_q, ingress; + struct netdev_queue *dev_queue; ingress = 0; num_q = dev->num_tx_queues; if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { - num_q = 1; ingress = 1; - if (!dev_ingress_queue(dev)) { + dev_queue = dev_ingress_queue(dev); + if (!dev_queue) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; } + + q = rtnl_dereference(dev_queue->qdisc_sleeping); + + /* This is the counterpart of that qdisc_refcount_inc_nz() call in + * __tcf_qdisc_find() for filter requests. + */ + if (!qdisc_refcount_dec_if_one(q)) { + NL_SET_ERR_MSG(extack, + "Current ingress or clsact Qdisc has ongoing filter requests"); + return -EBUSY; + } } if (dev->flags & IFF_UP) @@ -1094,18 +1112,26 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && new->ops->attach && !ingress) goto skip; - for (i = 0; i < num_q; i++) { - struct netdev_queue *dev_queue = dev_ingress_queue(dev); - - if (!ingress) + if (!ingress) { + for (i = 0; i < num_q; i++) { dev_queue = netdev_get_tx_queue(dev, i); + old = dev_graft_qdisc(dev_queue, new); - old = dev_graft_qdisc(dev_queue, new); - if (new && i > 0) - qdisc_refcount_inc(new); - - if (!ingress) + if (new && i > 0) + qdisc_refcount_inc(new); qdisc_put(old); + } + } else { + old = dev_graft_qdisc(dev_queue, NULL); + + /* {ingress,clsact}_destroy() @old before grafting @new to avoid + * unprotected concurrent accesses to net_device::miniq_{in,e}gress + * pointer(s) in mini_qdisc_pair_swap(). + */ + qdisc_notify(net, skb, n, classid, old, new, extack); + qdisc_destroy(old); + + dev_graft_qdisc(dev_queue, new); } skip: @@ -1119,8 +1145,6 @@ skip: if (new && new->ops->attach) new->ops->attach(new); - } else { - notify_and_destroy(net, skb, n, classid, old, new, extack); } if (dev->flags & IFF_UP) @@ -1252,7 +1276,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev, sch->parent = parent; if (handle == TC_H_INGRESS) { - sch->flags |= TCQ_F_INGRESS; + if (!(sch->flags & TCQ_F_INGRESS)) { + NL_SET_ERR_MSG(extack, + "Specified parent ID is reserved for ingress and clsact Qdiscs"); + err = -EINVAL; + goto err_out3; + } handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { if (handle == 0) { @@ -1473,7 +1502,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue(dev)) { - q = dev_ingress_queue(dev)->qdisc_sleeping; + q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); @@ -1559,7 +1588,7 @@ replay: } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue_create(dev)) { - q = dev_ingress_queue(dev)->qdisc_sleeping; + q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); @@ -1591,11 +1620,20 @@ replay: NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; } + if (q->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot regraft ingress or clsact Qdiscs"); + return -EINVAL; + } if (q == p || (p && check_loop(q, p, 0))) { NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); return -ELOOP; } + if (clid == TC_H_INGRESS) { + NL_SET_ERR_MSG(extack, "Ingress cannot graft directly"); + return -EINVAL; + } qdisc_refcount_inc(q); goto graft; } else { @@ -1791,8 +1829,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) dev_queue = dev_ingress_queue(dev); if (dev_queue && - tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx, false, + tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), + skb, cb, &q_idx, s_q_idx, false, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; @@ -2235,8 +2273,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) dev_queue = dev_ingress_queue(dev); if (dev_queue && - tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, - &t, s_t, false) < 0) + tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping), + skb, tcm, cb, &t, s_t, false) < 0) goto done; done: @@ -2288,7 +2326,9 @@ static struct pernet_operations psched_net_ops = { .exit = psched_net_exit, }; +#if IS_ENABLED(CONFIG_RETPOLINE) DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); +#endif static int __init pktsched_init(void) { diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 891e007d5c0b..9cff99558694 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -65,6 +65,7 @@ #include <linux/reciprocal_div.h> #include <net/netlink.h> #include <linux/if_vlan.h> +#include <net/gso.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tcp.h> diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 6980796d435d..591d87d5e5c0 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -201,6 +201,11 @@ out: return NET_XMIT_CN; } +static struct netlink_range_validation fq_pie_q_range = { + .min = 1, + .max = 1 << 20, +}; + static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, @@ -208,7 +213,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, - [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32}, + [TCA_FQ_PIE_QUANTUM] = + NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range), [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, @@ -373,6 +379,7 @@ static void fq_pie_timer(struct timer_list *t) spinlock_t *root_lock; /* to lock qdisc for probability calculations */ u32 idx; + rcu_read_lock(); root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -385,6 +392,7 @@ static void fq_pie_timer(struct timer_list *t) mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate); spin_unlock(root_lock); + rcu_read_unlock(); } static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 37e41f972f69..5d7e23f4cc0e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -648,7 +648,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { static struct netdev_queue noop_netdev_queue = { RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc), - .qdisc_sleeping = &noop_qdisc, + RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc), }; struct Qdisc noop_qdisc = { @@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head) qdisc_free(q); } -static void qdisc_destroy(struct Qdisc *qdisc) +static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc) call_rcu(&qdisc->rcu, qdisc_free_cb); } +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return; + + __qdisc_destroy(qdisc); +} + void qdisc_put(struct Qdisc *qdisc) { if (!qdisc) @@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc) !refcount_dec_and_test(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); } EXPORT_SYMBOL(qdisc_put); @@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc) !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); rtnl_unlock(); } EXPORT_SYMBOL(qdisc_put_unlocked); @@ -1103,7 +1111,7 @@ EXPORT_SYMBOL(qdisc_put_unlocked); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc) { - struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping); spinlock_t *root_lock; root_lock = qdisc_lock(oqdisc); @@ -1112,7 +1120,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); spin_unlock_bh(root_lock); @@ -1125,12 +1133,12 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc_default) { - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default); qdisc_put(qdisc); } @@ -1154,7 +1162,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (!netif_is_multiqueue(dev)) qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); } static void attach_default_qdiscs(struct net_device *dev) @@ -1167,7 +1175,7 @@ static void attach_default_qdiscs(struct net_device *dev) if (!netif_is_multiqueue(dev) || dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - qdisc = txq->qdisc_sleeping; + qdisc = rtnl_dereference(txq->qdisc_sleeping); rcu_assign_pointer(dev->qdisc, qdisc); qdisc_refcount_inc(qdisc); } else { @@ -1186,7 +1194,7 @@ static void attach_default_qdiscs(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - qdisc = txq->qdisc_sleeping; + qdisc = rtnl_dereference(txq->qdisc_sleeping); rcu_assign_pointer(dev->qdisc, qdisc); qdisc_refcount_inc(qdisc); dev->priv_flags ^= IFF_NO_QUEUE; @@ -1202,7 +1210,7 @@ static void transition_one_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_need_watchdog) { - struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); int *need_watchdog_p = _need_watchdog; if (!(new_qdisc->flags & TCQ_F_BUILTIN)) @@ -1272,7 +1280,7 @@ static void dev_reset_queue(struct net_device *dev, struct Qdisc *qdisc; bool nolock; - qdisc = dev_queue->qdisc_sleeping; + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); if (!qdisc) return; @@ -1303,7 +1311,7 @@ static bool some_qdisc_is_busy(struct net_device *dev) int val; dev_queue = netdev_get_tx_queue(dev, i); - q = dev_queue->qdisc_sleeping; + q = rtnl_dereference(dev_queue->qdisc_sleeping); root_lock = qdisc_lock(q); spin_lock_bh(root_lock); @@ -1379,7 +1387,7 @@ EXPORT_SYMBOL(dev_deactivate); static int qdisc_change_tx_queue_len(struct net_device *dev, struct netdev_queue *dev_queue) { - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); const struct Qdisc_ops *ops = qdisc->ops; if (ops->change_tx_queue_len) @@ -1404,7 +1412,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) unsigned int i; for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); /* Only update the default qdiscs we created, * qdiscs with handles are always hashed. */ @@ -1412,7 +1420,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) qdisc_hash_del(qdisc); } for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); if (qdisc != &noop_qdisc && !qdisc->handle) qdisc_hash_add(qdisc, false); } @@ -1449,7 +1457,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc = _qdisc; rcu_assign_pointer(dev_queue->qdisc, qdisc); - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 8aef7dd9fb88..325c29041c7d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1814,10 +1814,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); goto failure; } - if (hopt->prio) { - NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); - goto failure; - } } /* Keeping backward compatible with rate_table based iproute2 tc */ @@ -1913,6 +1909,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, TC_HTB_CLASSID_ROOT, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -1933,6 +1930,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, TC_H_MIN(parent->common.classid), .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -2018,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, .classid = cl->common.classid, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 84838128b9c5..e43a45499372 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -80,6 +80,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); int err; + if (sch->parent != TC_H_INGRESS) + return -EOPNOTSUPP; + net_inc_ingress_queue(); mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); @@ -101,6 +104,9 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); + if (sch->parent != TC_H_INGRESS) + return; + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } @@ -134,7 +140,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", .priv_size = sizeof(struct ingress_sched_data), - .static_flags = TCQ_F_CPUSTATS, + .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -219,6 +225,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); int err; + if (sch->parent != TC_H_CLSACT) + return -EOPNOTSUPP; + net_inc_ingress_queue(); net_inc_egress_queue(); @@ -248,6 +257,9 @@ static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); + if (sch->parent != TC_H_CLSACT) + return; + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); @@ -269,7 +281,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", .priv_size = sizeof(struct clsact_sched_data), - .static_flags = TCQ_F_CPUSTATS, + .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index d0bc660d7401..c860119a8f09 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -141,7 +141,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -202,7 +202,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mq_find(struct Qdisc *sch, u32 classid) @@ -221,7 +221,7 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } @@ -230,7 +230,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index dc5a0ff50b14..ab69ff7577fc 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -557,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -604,7 +604,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) @@ -637,7 +637,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = (tc < 0) ? 0 : TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(tc + TC_H_MIN_PRIORITY)); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; @@ -693,7 +693,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ef3021e1169..38d9aa0cd30e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -21,6 +21,7 @@ #include <linux/reciprocal_div.h> #include <linux/rbtree.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> @@ -773,12 +774,10 @@ static void dist_free(struct disttable *d) * signed 16 bit values. */ -static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, - const struct nlattr *attr) +static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) { size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); - spinlock_t *root_lock; struct disttable *d; int i; @@ -793,13 +792,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl, for (i = 0; i < n; i++) d->table[i] = data[i]; - root_lock = qdisc_root_sleeping_lock(sch); - - spin_lock_bh(root_lock); - swap(*tbl, d); - spin_unlock_bh(root_lock); - - dist_free(d); + *tbl = d; return 0; } @@ -956,6 +949,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, { struct netem_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_NETEM_MAX + 1]; + struct disttable *delay_dist = NULL; + struct disttable *slot_dist = NULL; struct tc_netem_qopt *qopt; struct clgstate old_clg; int old_loss_model = CLG_RANDOM; @@ -966,6 +961,19 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (ret < 0) return ret; + if (tb[TCA_NETEM_DELAY_DIST]) { + ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); + if (ret) + goto table_free; + } + + if (tb[TCA_NETEM_SLOT_DIST]) { + ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); + if (ret) + goto table_free; + } + + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; old_loss_model = q->loss_model; @@ -974,26 +982,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); if (ret) { q->loss_model = old_loss_model; - return ret; + q->clg = old_clg; + goto unlock; } } else { q->loss_model = CLG_RANDOM; } - if (tb[TCA_NETEM_DELAY_DIST]) { - ret = get_dist_table(sch, &q->delay_dist, - tb[TCA_NETEM_DELAY_DIST]); - if (ret) - goto get_table_failure; - } - - if (tb[TCA_NETEM_SLOT_DIST]) { - ret = get_dist_table(sch, &q->slot_dist, - tb[TCA_NETEM_SLOT_DIST]); - if (ret) - goto get_table_failure; - } - + if (delay_dist) + swap(q->delay_dist, delay_dist); + if (slot_dist) + swap(q->slot_dist, slot_dist); sch->limit = qopt->limit; q->latency = PSCHED_TICKS2NS(qopt->latency); @@ -1041,15 +1040,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); - return ret; +unlock: + sch_tree_unlock(sch); -get_table_failure: - /* recover clg and loss_model, in case of - * q->clg and q->loss_model were modified - * in get_loss_clg() - */ - q->clg = old_clg; - q->loss_model = old_loss_model; +table_free: + dist_free(delay_dist); + dist_free(slot_dist); return ret; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 2152a56d73f8..2da6250ec346 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -421,8 +421,10 @@ static void pie_timer(struct timer_list *t) { struct pie_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog); @@ -430,6 +432,7 @@ static void pie_timer(struct timer_list *t) if (q->params.tupdate) mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); spin_unlock(root_lock); + rcu_read_unlock(); } static int pie_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index dfd9a99e6257..befaf74b33ca 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, u32 lmax) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight); + struct qfq_aggregate *new_agg; + /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ + if (lmax > QFQ_MAX_LMAX) + return -EINVAL; + + new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); if (new_agg == NULL) @@ -423,10 +428,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, else weight = 1; - if (tb[TCA_QFQ_LMAX]) + if (tb[TCA_QFQ_LMAX]) { lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - else + } else { + /* MTU size is user controlled */ lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) { + NL_SET_ERR_MSG_MOD(extack, + "MTU size out of bounds for qfq"); + return -EINVAL; + } + } inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 98129324e157..16277b6a0238 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -321,12 +321,15 @@ static inline void red_adaptative_timer(struct timer_list *t) { struct red_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); red_adaptative_algo(&q->parms, &q->vars); mod_timer(&q->adapt_timer, jiffies + HZ/2); spin_unlock(root_lock); + rcu_read_unlock(); } static int red_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index abd436307d6a..66dcb18638fe 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -606,10 +606,12 @@ static void sfq_perturbation(struct timer_list *t) { struct sfq_sched_data *q = from_timer(q, t, perturb_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; siphash_key_t nkey; get_random_bytes(&nkey, sizeof(nkey)); + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); q->perturbation = nkey; if (!q->filter_list && q->tail) @@ -618,6 +620,7 @@ static void sfq_perturbation(struct timer_list *t) if (q->perturb_period) mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + rcu_read_unlock(); } static int sfq_change(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 76db9a10ef50..717ae51d94a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -20,6 +20,7 @@ #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/time.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -27,6 +28,8 @@ #include <net/sock.h> #include <net/tcp.h> +#define TAPRIO_STAT_NOT_SET (~0ULL) + #include "sch_mqprio_lib.h" static LIST_HEAD(taprio_list); @@ -797,6 +800,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch, taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]); + if (q->cur_txq[tc] >= dev->num_tx_queues) + q->cur_txq[tc] = first_txq; + if (skb) return skb; } while (q->cur_txq[tc] != first_txq); @@ -1524,7 +1530,7 @@ static int taprio_enable_offload(struct net_device *dev, "Not enough memory for enabling offload mode"); return -ENOMEM; } - offload->enable = 1; + offload->cmd = TAPRIO_CMD_REPLACE; offload->extack = extack; mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt); offload->mqprio.extack = extack; @@ -1572,7 +1578,7 @@ static int taprio_disable_offload(struct net_device *dev, "Not enough memory to disable offload mode"); return -ENOMEM; } - offload->enable = 0; + offload->cmd = TAPRIO_CMD_DESTROY; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { @@ -2289,6 +2295,72 @@ nla_put_failure: return -EMSGSIZE; } +static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype) +{ + if (val == TAPRIO_STAT_NOT_SET) + return 0; + if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD)) + return -EMSGSIZE; + return 0; +} + +static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d, + struct tc_taprio_qopt_offload *offload, + struct tc_taprio_qopt_stats *stats) +{ + struct net_device *dev = qdisc_dev(sch); + const struct net_device_ops *ops; + struct sk_buff *skb = d->skb; + struct nlattr *xstats; + int err; + + ops = qdisc_dev(sch)->netdev_ops; + + /* FIXME I could use qdisc_offload_dump_helper(), but that messes + * with sch->flags depending on whether the device reports taprio + * stats, and I'm not sure whether that's a good idea, considering + * that stats are optional to the offload itself + */ + if (!ops->ndo_setup_tc) + return 0; + + memset(stats, 0xff, sizeof(*stats)); + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + xstats = nla_nest_start(skb, TCA_STATS_APP); + if (!xstats) + goto err; + + if (taprio_put_stat(skb, stats->window_drops, + TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) || + taprio_put_stat(skb, stats->tx_overruns, + TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS)) + goto err_cancel; + + nla_nest_end(skb, xstats); + + return 0; + +err_cancel: + nla_nest_cancel(skb, xstats); +err: + return -EMSGSIZE; +} + +static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct tc_taprio_qopt_offload offload = { + .cmd = TAPRIO_CMD_STATS, + }; + + return taprio_dump_xstats(sch, d, &offload, &offload.stats); +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); @@ -2358,7 +2430,7 @@ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2377,7 +2449,7 @@ static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } @@ -2388,12 +2460,20 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __acquires(d->lock) { struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct tc_taprio_qopt_offload offload = { + .cmd = TAPRIO_CMD_QUEUE_STATS, + .queue_stats = { + .queue = cl - 1, + }, + }; + struct Qdisc *child; - sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || - qdisc_qstats_copy(d, sch) < 0) + child = rtnl_dereference(dev_queue->qdisc_sleeping); + if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 || + qdisc_qstats_copy(d, child) < 0) return -1; - return 0; + + return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats); } static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) @@ -2440,6 +2520,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .dequeue = taprio_dequeue, .enqueue = taprio_enqueue, .dump = taprio_dump, + .dump_stats = taprio_dump_stats, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 277ad11f4d61..17d2d00ddb18 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,6 +13,7 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <net/gso.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 16f9238aa51d..7721239c185f 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -297,7 +297,7 @@ restart: struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - if (slave_txq->qdisc_sleeping != q) + if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) continue; if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || !netif_running(slave)) { |