aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c56
-rw-r--r--net/sched/act_csum.c30
-rw-r--r--net/sched/act_ife.c117
-rw-r--r--net/sched/act_mirred.c23
-rw-r--r--net/sched/act_pedit.c220
-rw-r--r--net/sched/act_sample.c276
-rw-r--r--net/sched/cls_api.c190
-rw-r--r--net/sched/cls_bpf.c13
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_flower.c102
-rw-r--r--net/sched/cls_matchall.c37
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/sch_api.c39
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_cbq.c1
-rw-r--r--net/sched/sch_choke.c1
-rw-r--r--net/sched/sch_dsmark.c1
-rw-r--r--net/sched/sch_fq_codel.c7
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_htb.c1
-rw-r--r--net/sched/sch_ingress.c1
-rw-r--r--net/sched/sch_mq.c10
-rw-r--r--net/sched/sch_mqprio.c19
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_netem.c2
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_sfb.c1
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sched/sch_teql.c5
32 files changed, 912 insertions, 287 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 87956a768d1b..403790cce7d2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -650,6 +650,18 @@ config NET_ACT_MIRRED
To compile this code as a module, choose M here: the
module will be called act_mirred.
+config NET_ACT_SAMPLE
+ tristate "Traffic Sampling"
+ depends on NET_CLS_ACT
+ select PSAMPLE
+ ---help---
+ Say Y here to allow packet sampling tc action. The packet sample
+ action consists of statistically choosing packets and sampling
+ them using the psample module.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_sample.
+
config NET_ACT_IPT
tristate "IPtables targets"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
@@ -707,6 +719,7 @@ config NET_ACT_SKBEDIT
config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
+ select LIBCRC32C
---help---
Say Y here to update some common checksum after some direct
packet alterations.
@@ -763,6 +776,7 @@ config NET_ACT_SKBMOD
config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
+ select NET_IFE
---help---
Say Y here to allow for sourcing and terminating metadata
For details refer to netdev01 paper:
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 4bdda3634e0b..7b915d226de7 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_NET_CLS_ACT) += act_api.o
obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
+obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o
obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e10456ef6f7a..f219ff325ed4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -24,6 +24,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
#include <net/act_api.h>
#include <net/netlink.h>
@@ -33,6 +34,12 @@ static void free_tcf(struct rcu_head *head)
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats);
+
+ if (p->act_cookie) {
+ kfree(p->act_cookie->data);
+ kfree(p->act_cookie);
+ }
+
kfree(p);
}
@@ -426,11 +433,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
{
int ret = -1, i;
- if (skb->tc_verd & TC_NCLS) {
- skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
- ret = TC_ACT_OK;
- goto exec_done;
- }
+ if (skb_skip_tc_classify(skb))
+ return TC_ACT_OK;
+
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
@@ -439,9 +444,8 @@ repeat:
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
- goto exec_done;
+ break;
}
-exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
@@ -478,6 +482,12 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
goto nla_put_failure;
+ if (a->act_cookie) {
+ if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
+ a->act_cookie->data))
+ goto nla_put_failure;
+ }
+
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
@@ -519,6 +529,22 @@ errout:
return err;
}
+static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb)
+{
+ a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL);
+ if (!a->act_cookie)
+ return -ENOMEM;
+
+ a->act_cookie->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL);
+ if (!a->act_cookie->data) {
+ kfree(a->act_cookie);
+ return -ENOMEM;
+ }
+ a->act_cookie->len = nla_len(tb[TCA_ACT_COOKIE]);
+
+ return 0;
+}
+
struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
int bind)
@@ -578,6 +604,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
if (err < 0)
goto err_mod;
+ if (tb[TCA_ACT_COOKIE]) {
+ int cklen = nla_len(tb[TCA_ACT_COOKIE]);
+
+ if (cklen > TC_COOKIE_MAX_SIZE) {
+ err = -EINVAL;
+ tcf_hash_release(a, bind);
+ goto err_mod;
+ }
+
+ err = nla_memdup_cookie(a, tb);
+ if (err < 0) {
+ tcf_hash_release(a, bind);
+ goto err_mod;
+ }
+ }
+
/* module count goes up only when brand new policy is created
* if it exists and is only bound to in a_o->init() then
* ACT_P_CREATED is not returned (a zero is).
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index a0edd80a44db..e978ccd4402c 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -30,6 +30,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/ip6_checksum.h>
+#include <net/sctp/checksum.h>
#include <net/act_api.h>
@@ -322,6 +323,25 @@ ignore_obscure_skb:
return 1;
}
+static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
+ unsigned int ipl)
+{
+ struct sctphdr *sctph;
+
+ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+ return 1;
+
+ sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
+ if (!sctph)
+ return 0;
+
+ sctph->checksum = sctp_compute_cksum(skb,
+ skb_network_offset(skb) + ihl);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return 1;
+}
+
static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
{
const struct iphdr *iph;
@@ -365,6 +385,11 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
ntohs(iph->tot_len), 1))
goto fail;
break;
+ case IPPROTO_SCTP:
+ if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) &&
+ !tcf_csum_sctp(skb, iph->ihl * 4, ntohs(iph->tot_len)))
+ goto fail;
+ break;
}
if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
@@ -481,6 +506,11 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
pl + sizeof(*ip6h), 1))
goto fail;
goto done;
+ case IPPROTO_SCTP:
+ if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) &&
+ !tcf_csum_sctp(skb, hl, pl + sizeof(*ip6h)))
+ goto fail;
+ goto done;
default:
goto ignore_skb;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 80b848d3f096..71e7ff22f7c9 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -32,6 +32,7 @@
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
+#include <net/ife.h>
#define IFE_TAB_MASK 15
@@ -46,23 +47,6 @@ static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = {
[TCA_IFE_TYPE] = { .type = NLA_U16},
};
-/* Caller takes care of presenting data in network order
-*/
-int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
-{
- u32 *tlv = (u32 *)(skbdata);
- u16 totlen = nla_total_size(dlen); /*alignment + hdr */
- char *dptr = (char *)tlv + NLA_HDRLEN;
- u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
-
- *tlv = htonl(htlv);
- memset(dptr, 0, totlen - NLA_HDRLEN);
- memcpy(dptr, dval, dlen);
-
- return totlen;
-}
-EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
-
int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi)
{
u16 edata = 0;
@@ -637,69 +621,59 @@ int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife,
return 0;
}
-struct ifeheadr {
- __be16 metalen;
- u8 tlv_data[];
-};
-
-struct meta_tlvhdr {
- __be16 type;
- __be16 len;
-};
-
static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
- struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
- int ifehdrln = (int)ifehdr->metalen;
- struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
+ u8 *ifehdr_end;
+ u8 *tlv_data;
+ u16 metalen;
spin_lock(&ife->tcf_lock);
bstats_update(&ife->tcf_bstats, skb);
tcf_lastuse_update(&ife->tcf_tm);
spin_unlock(&ife->tcf_lock);
- ifehdrln = ntohs(ifehdrln);
- if (unlikely(!pskb_may_pull(skb, ifehdrln))) {
+ if (skb_at_tc_ingress(skb))
+ skb_push(skb, skb->dev->hard_header_len);
+
+ tlv_data = ife_decode(skb, &metalen);
+ if (unlikely(!tlv_data)) {
spin_lock(&ife->tcf_lock);
ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
return TC_ACT_SHOT;
}
- skb_set_mac_header(skb, ifehdrln);
- __skb_pull(skb, ifehdrln);
- skb->protocol = eth_type_trans(skb, skb->dev);
- ifehdrln -= IFE_METAHDRLEN;
-
- while (ifehdrln > 0) {
- u8 *tlvdata = (u8 *)tlv;
- u16 mtype = tlv->type;
- u16 mlen = tlv->len;
- u16 alen;
+ ifehdr_end = tlv_data + metalen;
+ for (; tlv_data < ifehdr_end; tlv_data = ife_tlv_meta_next(tlv_data)) {
+ u8 *curr_data;
+ u16 mtype;
+ u16 dlen;
- mtype = ntohs(mtype);
- mlen = ntohs(mlen);
- alen = NLA_ALIGN(mlen);
+ curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL);
- if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
- (void *)(tlvdata + NLA_HDRLEN))) {
+ if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) {
/* abuse overlimits to count when we receive metadata
* but dont have an ops for it
*/
- pr_info_ratelimited("Unknown metaid %d alnlen %d\n",
- mtype, mlen);
+ pr_info_ratelimited("Unknown metaid %d dlen %d\n",
+ mtype, dlen);
ife->tcf_qstats.overlimits++;
}
+ }
- tlvdata += alen;
- ifehdrln -= alen;
- tlv = (struct meta_tlvhdr *)tlvdata;
+ if (WARN_ON(tlv_data != ifehdr_end)) {
+ spin_lock(&ife->tcf_lock);
+ ife->tcf_qstats.drops++;
+ spin_unlock(&ife->tcf_lock);
+ return TC_ACT_SHOT;
}
+ skb->protocol = eth_type_trans(skb, skb->dev);
skb_reset_network_header(skb);
+
return action;
}
@@ -727,7 +701,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
struct ethhdr *oethh; /* outer ether header */
- struct ethhdr *iethh; /* inner eth header */
struct tcf_meta_info *e;
/*
OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA
@@ -735,13 +708,13 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
*/
u16 metalen = ife_get_sz(skb, ife);
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
- unsigned int skboff = skb->dev->hard_header_len;
- u32 at = G_TC_AT(skb->tc_verd);
+ unsigned int skboff = 0;
int new_len = skb->len + hdrm;
bool exceed_mtu = false;
- int err;
+ void *ife_meta;
+ int err = 0;
- if (at & AT_EGRESS) {
+ if (!skb_at_tc_ingress(skb)) {
if (new_len > skb->dev->mtu)
exceed_mtu = true;
}
@@ -766,27 +739,10 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
return TC_ACT_SHOT;
}
- err = skb_cow_head(skb, hdrm);
- if (unlikely(err)) {
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
- return TC_ACT_SHOT;
- }
-
- if (!(at & AT_EGRESS))
+ if (skb_at_tc_ingress(skb))
skb_push(skb, skb->dev->hard_header_len);
- iethh = (struct ethhdr *)skb->data;
- __skb_push(skb, hdrm);
- memcpy(skb->data, iethh, skb->mac_len);
- skb_reset_mac_header(skb);
- oethh = eth_hdr(skb);
-
- /*total metadata length */
- metalen += IFE_METAHDRLEN;
- metalen = htons(metalen);
- memcpy((skb->data + skboff), &metalen, IFE_METAHDRLEN);
- skboff += IFE_METAHDRLEN;
+ ife_meta = ife_encode(skb, metalen);
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
@@ -794,7 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
*/
list_for_each_entry(e, &ife->metalist, metalist) {
if (e->ops->encode) {
- err = e->ops->encode(skb, (void *)(skb->data + skboff),
+ err = e->ops->encode(skb, (void *)(ife_meta + skboff),
e);
}
if (err < 0) {
@@ -805,18 +761,15 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
}
skboff += err;
}
+ oethh = (struct ethhdr *)skb->data;
if (!is_zero_ether_addr(ife->eth_src))
ether_addr_copy(oethh->h_source, ife->eth_src);
- else
- ether_addr_copy(oethh->h_source, iethh->h_source);
if (!is_zero_ether_addr(ife->eth_dst))
ether_addr_copy(oethh->h_dest, ife->eth_dst);
- else
- ether_addr_copy(oethh->h_dest, iethh->h_dest);
oethh->h_proto = htons(ife->eth_type);
- if (!(at & AT_EGRESS))
+ if (skb_at_tc_ingress(skb))
skb_pull(skb, skb->dev->hard_header_len);
spin_unlock(&ife->tcf_lock);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 2d9fa6e0a1b4..af49c7dca860 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -28,8 +28,6 @@
#include <linux/tc_act/tc_mirred.h>
#include <net/tc_act/tc_mirred.h>
-#include <linux/if_arp.h>
-
#define MIRRED_TAB_MASK 7
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
@@ -39,15 +37,15 @@ static bool tcf_mirred_is_act_redirect(int action)
return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR;
}
-static u32 tcf_mirred_act_direction(int action)
+static bool tcf_mirred_act_wants_ingress(int action)
{
switch (action) {
case TCA_EGRESS_REDIR:
case TCA_EGRESS_MIRROR:
- return AT_EGRESS;
+ return false;
case TCA_INGRESS_REDIR:
case TCA_INGRESS_MIRROR:
- return AT_INGRESS;
+ return true;
default:
BUG();
}
@@ -170,7 +168,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
int retval, err = 0;
int m_eaction;
int mac_len;
- u32 at;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -191,7 +188,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- at = G_TC_AT(skb->tc_verd);
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
goto out;
@@ -200,8 +196,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
* and devices expect a mac header on xmit, then mac push/pull is
* needed.
*/
- if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) {
- if (at & AT_EGRESS) {
+ if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
+ m_mac_header_xmit) {
+ if (!skb_at_tc_ingress(skb)) {
/* caught at egress, act ingress: pull mac */
mac_len = skb_network_header(skb) - skb_mac_header(skb);
skb_pull_rcsum(skb2, mac_len);
@@ -212,12 +209,14 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
/* mirror is always swallowed */
- if (tcf_mirred_is_act_redirect(m_eaction))
- skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
+ if (tcf_mirred_is_act_redirect(m_eaction)) {
+ skb2->tc_redirected = 1;
+ skb2->tc_from_ingress = skb2->tc_at_ingress;
+ }
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
- if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS)
+ if (!tcf_mirred_act_wants_ingress(m_eaction))
err = dev_queue_xmit(skb2);
else
err = netif_receive_skb(skb2);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b27c4daec88f..c1310472f620 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -22,6 +22,7 @@
#include <net/pkt_sched.h>
#include <linux/tc_act/tc_pedit.h>
#include <net/tc_act/tc_pedit.h>
+#include <uapi/linux/tc_act/tc_pedit.h>
#define PEDIT_TAB_MASK 15
@@ -30,18 +31,117 @@ static struct tc_action_ops act_pedit_ops;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
+ [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED },
};
+static const struct nla_policy pedit_key_ex_policy[TCA_PEDIT_KEY_EX_MAX + 1] = {
+ [TCA_PEDIT_KEY_EX_HTYPE] = { .type = NLA_U16 },
+ [TCA_PEDIT_KEY_EX_CMD] = { .type = NLA_U16 },
+};
+
+static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
+ u8 n)
+{
+ struct tcf_pedit_key_ex *keys_ex;
+ struct tcf_pedit_key_ex *k;
+ const struct nlattr *ka;
+ int err = -EINVAL;
+ int rem;
+
+ if (!nla || !n)
+ return NULL;
+
+ keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL);
+ if (!keys_ex)
+ return ERR_PTR(-ENOMEM);
+
+ k = keys_ex;
+
+ nla_for_each_nested(ka, nla, rem) {
+ struct nlattr *tb[TCA_PEDIT_KEY_EX_MAX + 1];
+
+ if (!n) {
+ err = -EINVAL;
+ goto err_out;
+ }
+ n--;
+
+ if (nla_type(ka) != TCA_PEDIT_KEY_EX) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka,
+ pedit_key_ex_policy);
+ if (err)
+ goto err_out;
+
+ if (!tb[TCA_PEDIT_KEY_EX_HTYPE] ||
+ !tb[TCA_PEDIT_KEY_EX_CMD]) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ k->htype = nla_get_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]);
+ k->cmd = nla_get_u16(tb[TCA_PEDIT_KEY_EX_CMD]);
+
+ if (k->htype > TCA_PEDIT_HDR_TYPE_MAX ||
+ k->cmd > TCA_PEDIT_CMD_MAX) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ k++;
+ }
+
+ if (n)
+ goto err_out;
+
+ return keys_ex;
+
+err_out:
+ kfree(keys_ex);
+ return ERR_PTR(err);
+}
+
+static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
+ struct tcf_pedit_key_ex *keys_ex, int n)
+{
+ struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX);
+
+ for (; n > 0; n--) {
+ struct nlattr *key_start;
+
+ key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX);
+
+ if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) ||
+ nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) {
+ nlmsg_trim(skb, keys_start);
+ return -EINVAL;
+ }
+
+ nla_nest_end(skb, key_start);
+
+ keys_ex++;
+ }
+
+ nla_nest_end(skb, keys_start);
+
+ return 0;
+}
+
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct nlattr *pattr;
struct tc_pedit *parm;
int ret = 0, err;
struct tcf_pedit *p;
struct tc_pedit_key *keys = NULL;
+ struct tcf_pedit_key_ex *keys_ex;
int ksize;
if (nla == NULL)
@@ -51,13 +151,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (err < 0)
return err;
- if (tb[TCA_PEDIT_PARMS] == NULL)
+ pattr = tb[TCA_PEDIT_PARMS];
+ if (!pattr)
+ pattr = tb[TCA_PEDIT_PARMS_EX];
+ if (!pattr)
return -EINVAL;
- parm = nla_data(tb[TCA_PEDIT_PARMS]);
+
+ parm = nla_data(pattr);
ksize = parm->nkeys * sizeof(struct tc_pedit_key);
- if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
+ if (nla_len(pattr) < sizeof(*parm) + ksize)
return -EINVAL;
+ keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+ if (IS_ERR(keys_ex))
+ return PTR_ERR(keys_ex);
+
if (!tcf_hash_check(tn, parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
@@ -69,6 +177,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
tcf_hash_cleanup(*a, est);
+ kfree(keys_ex);
return -ENOMEM;
}
ret = ACT_P_CREATED;
@@ -81,8 +190,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p = to_pedit(*a);
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
- if (keys == NULL)
+ if (!keys) {
+ kfree(keys_ex);
return -ENOMEM;
+ }
}
}
@@ -95,6 +206,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+
+ kfree(p->tcfp_keys_ex);
+ p->tcfp_keys_ex = keys_ex;
+
spin_unlock_bh(&p->tcf_lock);
if (ret == ACT_P_CREATED)
tcf_hash_insert(tn, *a);
@@ -106,6 +221,7 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind)
struct tcf_pedit *p = to_pedit(a);
struct tc_pedit_key *keys = p->tcfp_keys;
kfree(keys);
+ kfree(p->tcfp_keys_ex);
}
static bool offset_valid(struct sk_buff *skb, int offset)
@@ -119,38 +235,88 @@ static bool offset_valid(struct sk_buff *skb, int offset)
return true;
}
+static int pedit_skb_hdr_offset(struct sk_buff *skb,
+ enum pedit_header_type htype, int *hoffset)
+{
+ int ret = -EINVAL;
+
+ switch (htype) {
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+ if (skb_mac_header_was_set(skb)) {
+ *hoffset = skb_mac_offset(skb);
+ ret = 0;
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK:
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+ *hoffset = skb_network_offset(skb);
+ ret = 0;
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+ if (skb_transport_header_was_set(skb)) {
+ *hoffset = skb_transport_offset(skb);
+ ret = 0;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ };
+
+ return ret;
+}
+
static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
int i;
- unsigned int off;
if (skb_unclone(skb, GFP_ATOMIC))
return p->tcf_action;
- off = skb_network_offset(skb);
-
spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
struct tc_pedit_key *tkey = p->tcfp_keys;
+ struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
+ enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+ enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
u32 *ptr, _data;
int offset = tkey->off;
+ int hoffset;
+ u32 val;
+ int rc;
+
+ if (tkey_ex) {
+ htype = tkey_ex->htype;
+ cmd = tkey_ex->cmd;
+
+ tkey_ex++;
+ }
+
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info("tc filter pedit bad header type specified (0x%x)\n",
+ htype);
+ goto bad;
+ }
if (tkey->offmask) {
char *d, _d;
- if (!offset_valid(skb, off + tkey->at)) {
+ if (!offset_valid(skb, hoffset + tkey->at)) {
pr_info("tc filter pedit 'at' offset %d out of bounds\n",
- off + tkey->at);
+ hoffset + tkey->at);
goto bad;
}
- d = skb_header_pointer(skb, off + tkey->at, 1,
+ d = skb_header_pointer(skb, hoffset + tkey->at, 1,
&_d);
if (!d)
goto bad;
@@ -163,19 +329,32 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
goto bad;
}
- if (!offset_valid(skb, off + offset)) {
+ if (!offset_valid(skb, hoffset + offset)) {
pr_info("tc filter pedit offset %d out of bounds\n",
- offset);
+ hoffset + offset);
goto bad;
}
- ptr = skb_header_pointer(skb, off + offset, 4, &_data);
+ ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data);
if (!ptr)
goto bad;
/* just do it, baby */
- *ptr = ((*ptr & tkey->mask) ^ tkey->val);
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ val = tkey->val;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ val = (*ptr + tkey->val) & ~tkey->mask;
+ break;
+ default:
+ pr_info("tc filter pedit bad command (%d)\n",
+ cmd);
+ goto bad;
+ }
+
+ *ptr = ((*ptr & tkey->mask) ^ val);
if (ptr == &_data)
- skb_store_bits(skb, off + offset, ptr, 4);
+ skb_store_bits(skb, hoffset + offset, ptr, 4);
}
goto done;
@@ -215,8 +394,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
opt->refcnt = p->tcf_refcnt - ref;
opt->bindcnt = p->tcf_bindcnt - bind;
- if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
- goto nla_put_failure;
+ if (p->tcfp_keys_ex) {
+ tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
+
+ if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
+ goto nla_put_failure;
+ } else {
+ if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
+ goto nla_put_failure;
+ }
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
new file mode 100644
index 000000000000..0b8217b4763f
--- /dev/null
+++ b/net/sched/act_sample.c
@@ -0,0 +1,276 @@
+/*
+ * net/sched/act_sample.c - Packet sampling tc action
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_sample.h>
+#include <net/tc_act/tc_sample.h>
+#include <net/psample.h>
+
+#include <linux/if_arp.h>
+
+#define SAMPLE_TAB_MASK 7
+static unsigned int sample_net_id;
+static struct tc_action_ops act_sample_ops;
+
+static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
+ [TCA_SAMPLE_PARMS] = { .len = sizeof(struct tc_sample) },
+ [TCA_SAMPLE_RATE] = { .type = NLA_U32 },
+ [TCA_SAMPLE_TRUNC_SIZE] = { .type = NLA_U32 },
+ [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NLA_U32 },
+};
+
+static int tcf_sample_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a, int ovr,
+ int bind)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+ struct nlattr *tb[TCA_SAMPLE_MAX + 1];
+ struct psample_group *psample_group;
+ struct tc_sample *parm;
+ struct tcf_sample *s;
+ bool exists = false;
+ int ret;
+
+ if (!nla)
+ return -EINVAL;
+ ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy);
+ if (ret < 0)
+ return ret;
+ if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
+ !tb[TCA_SAMPLE_PSAMPLE_GROUP])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_SAMPLE_PARMS]);
+
+ exists = tcf_hash_check(tn, parm->index, a, bind);
+ if (exists && bind)
+ return 0;
+
+ if (!exists) {
+ ret = tcf_hash_create(tn, parm->index, est, a,
+ &act_sample_ops, bind, false);
+ if (ret)
+ return ret;
+ ret = ACT_P_CREATED;
+ } else {
+ tcf_hash_release(*a, bind);
+ if (!ovr)
+ return -EEXIST;
+ }
+ s = to_sample(*a);
+
+ s->tcf_action = parm->action;
+ s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
+ psample_group = psample_group_get(net, s->psample_group_num);
+ if (!psample_group) {
+ if (ret == ACT_P_CREATED)
+ tcf_hash_release(*a, bind);
+ return -ENOMEM;
+ }
+ RCU_INIT_POINTER(s->psample_group, psample_group);
+
+ if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
+ s->truncate = true;
+ s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
+ }
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(tn, *a);
+ return ret;
+}
+
+static void tcf_sample_cleanup_rcu(struct rcu_head *rcu)
+{
+ struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu);
+ struct psample_group *psample_group;
+
+ psample_group = rcu_dereference_protected(s->psample_group, 1);
+ RCU_INIT_POINTER(s->psample_group, NULL);
+ psample_group_put(psample_group);
+}
+
+static void tcf_sample_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_sample *s = to_sample(a);
+
+ call_rcu(&s->rcu, tcf_sample_cleanup_rcu);
+}
+
+static bool tcf_sample_dev_ok_push(struct net_device *dev)
+{
+ switch (dev->type) {
+ case ARPHRD_TUNNEL:
+ case ARPHRD_TUNNEL6:
+ case ARPHRD_SIT:
+ case ARPHRD_IPGRE:
+ case ARPHRD_VOID:
+ case ARPHRD_NONE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_sample *s = to_sample(a);
+ struct psample_group *psample_group;
+ int retval;
+ int size;
+ int iif;
+ int oif;
+
+ tcf_lastuse_update(&s->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+ retval = READ_ONCE(s->tcf_action);
+
+ rcu_read_lock();
+ psample_group = rcu_dereference(s->psample_group);
+
+ /* randomly sample packets according to rate */
+ if (psample_group && (prandom_u32() % s->rate == 0)) {
+ if (!skb_at_tc_ingress(skb)) {
+ iif = skb->skb_iif;
+ oif = skb->dev->ifindex;
+ } else {
+ iif = skb->dev->ifindex;
+ oif = 0;
+ }
+
+ /* on ingress, the mac header gets popped, so push it back */
+ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
+ skb_push(skb, skb->mac_len);
+
+ size = s->truncate ? s->trunc_size : skb->len;
+ psample_sample_packet(psample_group, skb, size, iif, oif,
+ s->rate);
+
+ if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
+ skb_pull(skb, skb->mac_len);
+ }
+
+ rcu_read_unlock();
+ return retval;
+}
+
+static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_sample *s = to_sample(a);
+ struct tc_sample opt = {
+ .index = s->tcf_index,
+ .action = s->tcf_action,
+ .refcnt = s->tcf_refcnt - ref,
+ .bindcnt = s->tcf_bindcnt - bind,
+ };
+ struct tcf_t t;
+
+ if (nla_put(skb, TCA_SAMPLE_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ tcf_tm_dump(&t, &s->tcf_tm);
+ if (nla_put_64bit(skb, TCA_SAMPLE_TM, sizeof(t), &t, TCA_SAMPLE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_SAMPLE_RATE, s->rate))
+ goto nla_put_failure;
+
+ if (s->truncate)
+ if (nla_put_u32(skb, TCA_SAMPLE_TRUNC_SIZE, s->trunc_size))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_SAMPLE_PSAMPLE_GROUP, s->psample_group_num))
+ goto nla_put_failure;
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_sample_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_sample_ops = {
+ .kind = "sample",
+ .type = TCA_ACT_SAMPLE,
+ .owner = THIS_MODULE,
+ .act = tcf_sample_act,
+ .dump = tcf_sample_dump,
+ .init = tcf_sample_init,
+ .cleanup = tcf_sample_cleanup,
+ .walk = tcf_sample_walker,
+ .lookup = tcf_sample_search,
+ .size = sizeof(struct tcf_sample),
+};
+
+static __net_init int sample_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK);
+}
+
+static void __net_exit sample_exit_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ tc_action_net_exit(tn);
+}
+
+static struct pernet_operations sample_net_ops = {
+ .init = sample_init_net,
+ .exit = sample_exit_net,
+ .id = &sample_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init sample_init_module(void)
+{
+ return tcf_register_action(&act_sample_ops, &sample_net_ops);
+}
+
+static void __exit sample_cleanup_module(void)
+{
+ tcf_unregister_action(&act_sample_ops, &sample_net_ops);
+}
+
+module_init(sample_init_module);
+module_exit(sample_cleanup_module);
+
+MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_DESCRIPTION("Packet sampling action");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1ecdf809b5fa..732f7cae459d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
@@ -38,14 +39,14 @@ static DEFINE_RWLOCK(cls_mod_lock);
/* Find classifier type by string name */
-static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
+static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
{
const struct tcf_proto_ops *t, *res = NULL;
if (kind) {
read_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
- if (nla_strcmp(kind, t->kind) == 0) {
+ if (strcmp(kind, t->kind) == 0) {
if (try_module_get(t->owner))
res = t;
break;
@@ -127,6 +128,77 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
return first;
}
+static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
+ u32 prio, u32 parent, struct Qdisc *q)
+{
+ struct tcf_proto *tp;
+ int err;
+
+ tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+ if (!tp)
+ return ERR_PTR(-ENOBUFS);
+
+ err = -ENOENT;
+ tp->ops = tcf_proto_lookup_ops(kind);
+ if (!tp->ops) {
+#ifdef CONFIG_MODULES
+ rtnl_unlock();
+ request_module("cls_%s", kind);
+ rtnl_lock();
+ tp->ops = tcf_proto_lookup_ops(kind);
+ /* We dropped the RTNL semaphore in order to perform
+ * the module load. So, even if we succeeded in loading
+ * the module we have to replay the request. We indicate
+ * this using -EAGAIN.
+ */
+ if (tp->ops) {
+ module_put(tp->ops->owner);
+ err = -EAGAIN;
+ } else {
+ err = -ENOENT;
+ }
+ goto errout;
+#endif
+ }
+ tp->classify = tp->ops->classify;
+ tp->protocol = protocol;
+ tp->prio = prio;
+ tp->classid = parent;
+ tp->q = q;
+
+ err = tp->ops->init(tp);
+ if (err) {
+ module_put(tp->ops->owner);
+ goto errout;
+ }
+ return tp;
+
+errout:
+ kfree(tp);
+ return ERR_PTR(err);
+}
+
+static bool tcf_proto_destroy(struct tcf_proto *tp, bool force)
+{
+ if (tp->ops->destroy(tp, force)) {
+ module_put(tp->ops->owner);
+ kfree_rcu(tp, rcu);
+ return true;
+ }
+ return false;
+}
+
+void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+{
+ struct tcf_proto *tp;
+
+ while ((tp = rtnl_dereference(*fl)) != NULL) {
+ RCU_INIT_POINTER(*fl, tp->next);
+ tcf_proto_destroy(tp, true);
+ }
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
+
/* Add/change/delete/get a filter node */
static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
@@ -142,8 +214,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
struct Qdisc *q;
struct tcf_proto __rcu **back;
struct tcf_proto __rcu **chain;
+ struct tcf_proto *next;
struct tcf_proto *tp;
- const struct tcf_proto_ops *tp_ops;
const struct Qdisc_class_ops *cops;
unsigned long cl;
unsigned long fh;
@@ -222,9 +294,10 @@ replay:
/* And the last stroke */
chain = cops->tcf_chain(q, cl);
- err = -EINVAL;
- if (chain == NULL)
+ if (chain == NULL) {
+ err = -EINVAL;
goto errout;
+ }
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
tcf_destroy_chain(chain);
@@ -239,10 +312,13 @@ replay:
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (!nprio ||
- (tp->protocol != protocol && protocol))
+ (tp->protocol != protocol && protocol)) {
+ err = -EINVAL;
goto errout;
- } else
+ }
+ } else {
tp = NULL;
+ }
break;
}
}
@@ -250,109 +326,69 @@ replay:
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
- if (tca[TCA_KIND] == NULL || !protocol)
+ if (tca[TCA_KIND] == NULL || !protocol) {
+ err = -EINVAL;
goto errout;
+ }
- err = -ENOENT;
if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE))
+ !(n->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
goto errout;
+ }
+ if (!nprio)
+ nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
- /* Create new proto tcf */
-
- err = -ENOBUFS;
- tp = kzalloc(sizeof(*tp), GFP_KERNEL);
- if (tp == NULL)
- goto errout;
- err = -ENOENT;
- tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
- if (tp_ops == NULL) {
-#ifdef CONFIG_MODULES
- struct nlattr *kind = tca[TCA_KIND];
- char name[IFNAMSIZ];
-
- if (kind != NULL &&
- nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
- rtnl_unlock();
- request_module("cls_%s", name);
- rtnl_lock();
- tp_ops = tcf_proto_lookup_ops(kind);
- /* We dropped the RTNL semaphore in order to
- * perform the module load. So, even if we
- * succeeded in loading the module we have to
- * replay the request. We indicate this using
- * -EAGAIN.
- */
- if (tp_ops != NULL) {
- module_put(tp_ops->owner);
- err = -EAGAIN;
- }
- }
-#endif
- kfree(tp);
- goto errout;
- }
- tp->ops = tp_ops;
- tp->protocol = protocol;
- tp->prio = nprio ? :
- TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
- tp->q = q;
- tp->classify = tp_ops->classify;
- tp->classid = parent;
-
- err = tp_ops->init(tp);
- if (err != 0) {
- module_put(tp_ops->owner);
- kfree(tp);
+ tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
+ protocol, nprio, parent, q);
+ if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
goto errout;
}
-
tp_created = 1;
-
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
+ } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ err = -EINVAL;
goto errout;
+ }
fh = tp->ops->get(tp, t->tcm_handle);
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- struct tcf_proto *next = rtnl_dereference(tp->next);
-
+ next = rtnl_dereference(tp->next);
RCU_INIT_POINTER(*back, next);
-
tfilter_notify(net, skb, n, tp, fh,
RTM_DELTFILTER, false);
- tcf_destroy(tp, true);
+ tcf_proto_destroy(tp, true);
err = 0;
goto errout;
}
- err = -ENOENT;
if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE))
+ !(n->nlmsg_flags & NLM_F_CREATE)) {
+ err = -ENOENT;
goto errout;
+ }
} else {
switch (n->nlmsg_type) {
case RTM_NEWTFILTER:
- err = -EEXIST;
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_destroy(tp, true);
+ tcf_proto_destroy(tp, true);
+ err = -EEXIST;
goto errout;
}
break;
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
- if (err == 0) {
- struct tcf_proto *next = rtnl_dereference(tp->next);
-
- tfilter_notify(net, skb, n, tp,
- t->tcm_handle,
- RTM_DELTFILTER, false);
- if (tcf_destroy(tp, false))
- RCU_INIT_POINTER(*back, next);
- }
+ if (err)
+ goto errout;
+ next = rtnl_dereference(tp->next);
+ tfilter_notify(net, skb, n, tp, t->tcm_handle,
+ RTM_DELTFILTER, false);
+ if (tcf_proto_destroy(tp, false))
+ RCU_INIT_POINTER(*back, next);
goto errout;
case RTM_GETTFILTER:
err = tfilter_notify(net, skb, n, tp, fh,
@@ -374,7 +410,7 @@ replay:
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_destroy(tp, true);
+ tcf_proto_destroy(tp, true);
}
errout:
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index d9c97018317d..80f688436dd7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -148,6 +148,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_bpf_offload bpf_offload = {};
struct tc_to_netdev offload;
+ int err;
offload.type = TC_SETUP_CLSBPF;
offload.cls_bpf = &bpf_offload;
@@ -159,8 +160,13 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
bpf_offload.exts_integrated = prog->exts_integrated;
bpf_offload.gen_flags = prog->gen_flags;
- return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
- tp->protocol, &offload);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+ tp->protocol, &offload);
+
+ if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
+ prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+
+ return err;
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
@@ -511,6 +517,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
return ret;
}
+ if (!tc_in_hw(prog->gen_flags))
+ prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
if (oldprog) {
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6575aba87630..3d6b9286c203 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb)
static u32 flow_get_nfct(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- return addr_fold(skb->nfct);
+ return addr_fold(skb_nfct(skb));
#else
return 0;
#endif
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5752789acc13..9d0c99d2e9fb 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -40,6 +40,7 @@ struct fl_flow_key {
};
struct flow_dissector_key_ports tp;
struct flow_dissector_key_icmp icmp;
+ struct flow_dissector_key_arp arp;
struct flow_dissector_key_keyid enc_key_id;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
@@ -133,6 +134,14 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}
+static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head,
+ struct fl_flow_key *mkey)
+{
+ return rhashtable_lookup_fast(&head->ht,
+ fl_key_get_start(mkey, &head->mask),
+ head->ht_params);
+}
+
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -180,9 +189,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
- f = rhashtable_lookup_fast(&head->ht,
- fl_key_get_start(&skb_mkey, &head->mask),
- head->ht_params);
+ f = fl_lookup(head, &skb_mkey);
if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
@@ -222,6 +229,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
return;
offload.command = TC_CLSFLOWER_DESTROY;
+ offload.prio = tp->prio;
offload.cookie = (unsigned long)f;
tc->type = TC_SETUP_CLSFLOWER;
@@ -253,6 +261,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
}
offload.command = TC_CLSFLOWER_REPLACE;
+ offload.prio = tp->prio;
offload.cookie = (unsigned long)f;
offload.dissector = dissector;
offload.mask = mask;
@@ -264,6 +273,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
tc);
+ if (!err)
+ f->flags |= TCA_CLS_FLAGS_IN_HW;
if (tc_skip_sw(f->flags))
return err;
@@ -280,6 +291,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
return;
offload.command = TC_CLSFLOWER_STATS;
+ offload.prio = tp->prio;
offload.cookie = (unsigned long)f;
offload.exts = &f->exts;
@@ -401,6 +413,16 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ARP_SIP] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_SIP_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_TIP] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_TIP_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ARP_OP] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ARP_OP_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ARP_SHA] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -572,6 +594,23 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->icmp.code,
TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_ARP) ||
+ key->basic.n_proto == htons(ETH_P_RARP)) {
+ fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
+ &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK,
+ sizeof(key->arp.sip));
+ fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP,
+ &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK,
+ sizeof(key->arp.tip));
+ fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP,
+ &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK,
+ sizeof(key->arp.op));
+ fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
+ mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
+ sizeof(key->arp.sha));
+ fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
+ mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
+ sizeof(key->arp.tha));
}
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
@@ -689,6 +728,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ICMP, icmp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ARP, arp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
@@ -796,23 +837,31 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
struct cls_fl_filter *fnew;
- struct nlattr *tb[TCA_FLOWER_MAX + 1];
+ struct nlattr **tb;
struct fl_flow_mask mask = {};
int err;
if (!tca[TCA_OPTIONS])
return -EINVAL;
+ tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
+ if (!tb)
+ return -ENOBUFS;
+
err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
if (err < 0)
- return err;
+ goto errout_tb;
- if (fold && handle && fold->handle != handle)
- return -EINVAL;
+ if (fold && handle && fold->handle != handle) {
+ err = -EINVAL;
+ goto errout_tb;
+ }
fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
- if (!fnew)
- return -ENOBUFS;
+ if (!fnew) {
+ err = -ENOBUFS;
+ goto errout_tb;
+ }
err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
if (err < 0)
@@ -845,6 +894,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (!tc_skip_sw(fnew->flags)) {
+ if (!fold && fl_lookup(head, &fnew->mkey)) {
+ err = -EEXIST;
+ goto errout;
+ }
+
err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
head->ht_params);
if (err)
@@ -860,6 +914,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
+ if (!tc_in_hw(fnew->flags))
+ fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
if (fold) {
if (!tc_skip_sw(fold->flags))
rhashtable_remove_fast(&head->ht, &fold->ht_node,
@@ -878,11 +935,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
list_add_tail_rcu(&fnew->list, &head->filters);
}
+ kfree(tb);
return 0;
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
+errout_tb:
+ kfree(tb);
return err;
}
@@ -1112,6 +1172,27 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
sizeof(key->icmp.code))))
goto nla_put_failure;
+ else if ((key->basic.n_proto == htons(ETH_P_ARP) ||
+ key->basic.n_proto == htons(ETH_P_RARP)) &&
+ (fl_dump_key_val(skb, &key->arp.sip,
+ TCA_FLOWER_KEY_ARP_SIP, &mask->arp.sip,
+ TCA_FLOWER_KEY_ARP_SIP_MASK,
+ sizeof(key->arp.sip)) ||
+ fl_dump_key_val(skb, &key->arp.tip,
+ TCA_FLOWER_KEY_ARP_TIP, &mask->arp.tip,
+ TCA_FLOWER_KEY_ARP_TIP_MASK,
+ sizeof(key->arp.tip)) ||
+ fl_dump_key_val(skb, &key->arp.op,
+ TCA_FLOWER_KEY_ARP_OP, &mask->arp.op,
+ TCA_FLOWER_KEY_ARP_OP_MASK,
+ sizeof(key->arp.op)) ||
+ fl_dump_key_val(skb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
+ mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
+ sizeof(key->arp.sha)) ||
+ fl_dump_key_val(skb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
+ mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
+ sizeof(key->arp.tha))))
+ goto nla_put_failure;
if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -1153,7 +1234,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
goto nla_put_failure;
- nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
+ if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts))
goto nla_put_failure;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index b12bc2abea93..224eb2c14346 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -56,6 +56,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
struct net_device *dev = tp->q->dev_queue->dev;
struct tc_to_netdev offload;
struct tc_cls_matchall_offload mall_offload = {0};
+ int err;
offload.type = TC_SETUP_MATCHALL;
offload.cls_mall = &mall_offload;
@@ -63,8 +64,12 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
offload.cls_mall->exts = &head->exts;
offload.cls_mall->cookie = cookie;
- return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
- &offload);
+ err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+ &offload);
+ if (!err)
+ head->flags |= TCA_CLS_FLAGS_IN_HW;
+
+ return err;
}
static void mall_destroy_hw_filter(struct tcf_proto *tp,
@@ -118,10 +123,12 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct tcf_exts e;
int err;
- tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
+ err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
+ if (err)
+ return err;
err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
if (err < 0)
- return err;
+ goto errout;
if (tb[TCA_MATCHALL_CLASSID]) {
head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
@@ -131,6 +138,9 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
tcf_exts_change(tp, &head->exts, &e);
return 0;
+errout:
+ tcf_exts_destroy(&e);
+ return err;
}
static int mall_change(struct net *net, struct sk_buff *in_skb,
@@ -166,7 +176,9 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOBUFS;
- tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+ err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+ if (err)
+ goto err_exts_init;
if (!handle)
handle = 1;
@@ -175,25 +187,31 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr);
if (err)
- goto errout;
+ goto err_set_parms;
if (tc_should_offload(dev, tp, flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long) new);
if (err) {
if (tc_skip_sw(flags))
- goto errout;
+ goto err_replace_hw_filter;
else
err = 0;
}
}
+ if (!tc_in_hw(new->flags))
+ new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
*arg = (unsigned long) head;
rcu_assign_pointer(tp->root, new);
if (head)
call_rcu(&head->rcu, mall_destroy_rcu);
return 0;
-errout:
+err_replace_hw_filter:
+err_set_parms:
+ tcf_exts_destroy(&new->exts);
+err_exts_init:
kfree(new);
return err;
}
@@ -234,6 +252,9 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid))
goto nla_put_failure;
+ if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
+ goto nla_put_failure;
+
if (tcf_exts_dump(skb, &head->exts))
goto nla_put_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ae83c3aec308..4dbe0c680fe6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -334,7 +334,6 @@ static int u32_init(struct tcf_proto *tp)
if (root_ht == NULL)
return -ENOBUFS;
- root_ht->divisor = 0;
root_ht->refcnt++;
root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
root_ht->prio = tp->prio;
@@ -524,6 +523,10 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
tp->protocol, &offload);
+
+ if (!err)
+ n->flags |= TCA_CLS_FLAGS_IN_HW;
+
if (tc_skip_sw(flags))
return err;
@@ -896,6 +899,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
+ if (!tc_in_hw(new->flags))
+ new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
call_rcu(&n->rcu, u32_delete_key_rcu);
@@ -1015,6 +1021,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errhw;
+ if (!tc_in_hw(n->flags))
+ n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+
ins = &ht->ht[TC_U32_HASH(handle)];
for (pins = rtnl_dereference(*ins); pins;
ins = &pins->next, pins = rtnl_dereference(*ins))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d7b93429f0cc..bcf49cd22786 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -440,7 +440,6 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
EXPORT_SYMBOL(qdisc_put_rtab);
static LIST_HEAD(qdisc_stab_list);
-static DEFINE_SPINLOCK(qdisc_stab_lock);
static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
[TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
@@ -474,20 +473,15 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
if (tsize != s->tsize || (!tab && tsize > 0))
return ERR_PTR(-EINVAL);
- spin_lock(&qdisc_stab_lock);
-
list_for_each_entry(stab, &qdisc_stab_list, list) {
if (memcmp(&stab->szopts, s, sizeof(*s)))
continue;
if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
continue;
stab->refcnt++;
- spin_unlock(&qdisc_stab_lock);
return stab;
}
- spin_unlock(&qdisc_stab_lock);
-
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -497,9 +491,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
if (tsize > 0)
memcpy(stab->data, tab, tsize * sizeof(u16));
- spin_lock(&qdisc_stab_lock);
list_add_tail(&stab->list, &qdisc_stab_list);
- spin_unlock(&qdisc_stab_lock);
return stab;
}
@@ -514,14 +506,10 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
if (!tab)
return;
- spin_lock(&qdisc_stab_lock);
-
if (--tab->refcnt == 0) {
list_del(&tab->list);
call_rcu_bh(&tab->rcu, stab_kfree_rcu);
}
-
- spin_unlock(&qdisc_stab_lock);
}
EXPORT_SYMBOL(qdisc_put_stab);
@@ -1019,6 +1007,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
}
+ /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
+ ops->destroy(sch);
err_out3:
dev_put(dev);
kfree((char *) sch - sch->padded);
@@ -1861,6 +1851,7 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
{
__be16 protocol = tc_skb_protocol(skb);
#ifdef CONFIG_NET_CLS_ACT
+ const int max_reclassify_loop = 4;
const struct tcf_proto *old_tp = tp;
int limit = 0;
@@ -1885,7 +1876,7 @@ reclassify:
return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
- if (unlikely(limit++ >= MAX_REC_LOOP)) {
+ if (unlikely(limit++ >= max_reclassify_loop)) {
net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
tp->q->ops->id, tp->prio & 0xffff,
ntohs(tp->protocol));
@@ -1899,28 +1890,6 @@ reset:
}
EXPORT_SYMBOL(tc_classify);
-bool tcf_destroy(struct tcf_proto *tp, bool force)
-{
- if (tp->ops->destroy(tp, force)) {
- module_put(tp->ops->owner);
- kfree_rcu(tp, rcu);
- return true;
- }
-
- return false;
-}
-
-void tcf_destroy_chain(struct tcf_proto __rcu **fl)
-{
- struct tcf_proto *tp;
-
- while ((tp = rtnl_dereference(*fl)) != NULL) {
- RCU_INIT_POINTER(*fl, tp->next);
- tcf_destroy(tp, true);
- }
-}
-EXPORT_SYMBOL(tcf_destroy_chain);
-
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 481e4f12aeb4..2209c2ddacbf 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -15,6 +15,7 @@
#include <linux/file.h> /* for fput */
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
/*
* The ATM queuing discipline provides a framework for invoking classifiers
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f1207582cbf3..d6ca18dc04c3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -19,6 +19,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
/* Class-Based Queueing (CBQ) algorithm.
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3b6d5bd69101..3b86a97bc67c 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -16,6 +16,7 @@
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <net/red.h>
#include <net/flow_dissector.h>
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1308bbf460f7..802ac7c2e5e8 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -13,6 +13,7 @@
#include <linux/rtnetlink.h>
#include <linux/bitops.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <asm/byteorder.h>
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a5ea0e9b6be4..9f3a884d1590 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -23,6 +23,7 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/codel.h>
#include <net/codel_impl.h>
#include <net/codel_qdisc.h>
@@ -57,7 +58,6 @@ struct fq_codel_sched_data {
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
- u32 perturbation; /* hash perturbation */
u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
u32 drop_batch_size;
u32 memory_limit;
@@ -75,9 +75,7 @@ struct fq_codel_sched_data {
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
struct sk_buff *skb)
{
- u32 hash = skb_get_hash_perturb(skb, q->perturbation);
-
- return reciprocal_scale(hash, q->flows_cnt);
+ return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
}
static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -482,7 +480,6 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
q->memory_limit = 32 << 20; /* 32 MBytes */
q->drop_batch_size = 64;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
INIT_LIST_HEAD(&q->old_flows);
codel_params_init(&q->cparams);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6eb9c8e88519..b052b27a984e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = weight_p;
+ int quota = dev_tx_weight;
int packets;
while (qdisc_restart(q, &packets)) {
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index e3d0458af17b..2fae8b5f1b80 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -627,7 +627,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
sizeof(u32));
if (!q->hhf_arrays[i]) {
- hhf_destroy(sch);
+ /* Note: hhf_destroy() will be called
+ * by our caller.
+ */
return -ENOMEM;
}
}
@@ -638,7 +640,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
BITS_PER_BYTE);
if (!q->hhf_valid_bits[i]) {
- hhf_destroy(sch);
+ /* Note: hhf_destroy() will be called
+ * by our caller.
+ */
return -ENOMEM;
}
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 760f39e7caee..4cd5fb134bc9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -40,6 +40,7 @@
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
/* HTB algorithm.
Author: devik@cdi.cz
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 8fe6999b642a..3bab5f66c392 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 2bc8d7f8df16..20b7f1646f69 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
/* pre-allocate qdiscs, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
- if (priv->qdiscs == NULL)
+ if (!priv->qdiscs)
return -ENOMEM;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
@@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
- if (qdisc == NULL)
- goto err;
+ if (!qdisc)
+ return -ENOMEM;
priv->qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
return 0;
-
-err:
- mq_destroy(sch);
- return -ENOMEM;
}
static void mq_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b5c502c78143..922683418e53 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -118,10 +118,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
/* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
- if (priv->qdiscs == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!priv->qdiscs)
+ return -ENOMEM;
for (i = 0; i < dev->num_tx_queues; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
@@ -129,10 +127,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
get_default_qdisc_ops(dev, i),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(i + 1)));
- if (qdisc == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!qdisc)
+ return -ENOMEM;
+
priv->qdiscs[i] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
@@ -148,7 +145,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
priv->hw_owned = 1;
err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
- goto err;
+ return err;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -162,10 +159,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
sch->flags |= TCQ_F_MQROOT;
return 0;
-
-err:
- mqprio_destroy(sch);
- return err;
}
static void mqprio_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 9ffbb025b37e..e7839a0d0eaa 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -25,7 +25,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
+#include <net/pkt_cls.h>
struct multiq_sched_data {
u16 bands;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bcfadfdea8e0..c8bb62a1e744 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -626,7 +626,7 @@ deliver:
* If it's at ingress let's pretend the delay is
* from the network (tstamp will be updated).
*/
- if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
+ if (skb->tc_redirected && skb->tc_from_ingress)
skb->tstamp = 0;
#endif
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8f575899adfa..d4d7db267b6e 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -20,7 +20,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
+#include <net/pkt_cls.h>
struct prio_sched_data {
int bands;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 20a350bd1b1d..fe6963d21519 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -25,6 +25,7 @@
#include <linux/jhash.h>
#include <net/ip.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
/*
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 7f195ed4d568..42e8c8615e65 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,6 +23,7 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/red.h>
@@ -742,9 +743,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
if (!q->ht || !q->slots) {
- sfq_destroy(sch);
+ /* Note: sfq_destroy() will be called by our caller */
return -ENOMEM;
}
+
for (i = 0; i < q->divisor; i++)
q->ht[i] = SFQ_EMPTY_SLOT;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index b0196366d58d..9fe6b427afed 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -401,8 +401,8 @@ static int teql_master_close(struct net_device *dev)
return 0;
}
-static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *stats)
+static void teql_master_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
{
struct teql_master *m = netdev_priv(dev);
@@ -410,7 +410,6 @@ static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
stats->tx_bytes = m->tx_bytes;
stats->tx_errors = m->tx_errors;
stats->tx_dropped = m->tx_dropped;
- return stats;
}
static int teql_master_mtu(struct net_device *dev, int new_mtu)