From 24ea591d2201c3257d666466e8fac50a6cf3c52f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jul 2015 05:18:03 -0700 Subject: net: sched: extend percpu stats helpers qdisc_bstats_update_cpu() and other helpers were added to support percpu stats for qdisc. We want to add percpu stats for tc action, so this patch add common helpers. qdisc_bstats_update_cpu() is renamed to qdisc_bstats_cpu_update() qdisc_qstats_drop_cpu() is renamed to qdisc_qstats_cpu_drop() Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Jamal Hadi Salim Acked-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 6778a9999d52..e0d270143fc7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3646,7 +3646,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - qdisc_bstats_update_cpu(cl->q, skb); + qdisc_bstats_cpu_update(cl->q, skb); switch (tc_classify(skb, cl, &cl_res)) { case TC_ACT_OK: @@ -3654,7 +3654,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_drop_cpu(cl->q); + qdisc_qstats_cpu_drop(cl->q); case TC_ACT_STOLEN: case TC_ACT_QUEUED: kfree_skb(skb); -- cgit v1.3-6-gb490 From d746d707a8b1421a4ba46b497cb5d59e20161645 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 14 Jul 2015 13:43:19 -0700 Subject: net core: Add protodown support. This patch introduces the proto_down flag that can be used by user space applications to notify switch drivers that errors have been detected on the device. The switch driver can react to protodown notification by doing a phys down on the associated switch port. Signed-off-by: Anuradha Karuppiah Signed-off-by: Andy Gospodarek Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++++++++++++++ net/core/dev.c | 20 ++++++++++++++++++++ net/core/net-sysfs.c | 14 ++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e20979dfd6a9..45cfd797eb77 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1041,6 +1041,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * TX queue. * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. + * void (*ndo_change_proto_down)(struct net_device *dev, + * bool proto_down); + * This function is used to pass protocol port error state information + * to the switch driver. The switch driver can react to the proto_down + * by doing a phys down on the associated switch port. + * */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1211,6 +1217,8 @@ struct net_device_ops { int queue_index, u32 maxrate); int (*ndo_get_iflink)(const struct net_device *dev); + int (*ndo_change_proto_down)(struct net_device *dev, + bool proto_down); }; /** @@ -1502,6 +1510,10 @@ enum netdev_priv_flags { * * @qdisc_tx_busylock: XXX: need comments on this one * + * @proto_down: protocol port state information can be sent to the + * switch driver and used to set the phys state of the + * switch port. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -1762,6 +1774,7 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; + bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2982,6 +2995,7 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int dev_change_proto_down(struct net_device *dev, bool proto_down); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/net/core/dev.c b/net/core/dev.c index 69445a33ace6..8810b6bbebfe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6074,6 +6074,26 @@ int dev_get_phys_port_name(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_name); +/** + * dev_change_proto_down - update protocol port state information + * @dev: device + * @proto_down: new value + * + * This info can be used by switch drivers to set the phys state of the + * port. + */ +int dev_change_proto_down(struct net_device *dev, bool proto_down) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_change_proto_down) + return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_change_proto_down(dev, proto_down); +} +EXPORT_SYMBOL(dev_change_proto_down); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 18b34d771ed4..194c1d03b2b3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -404,6 +404,19 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr, NETDEVICE_SHOW(group, fmt_dec); static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store); +static int change_proto_down(struct net_device *dev, unsigned long proto_down) +{ + return dev_change_proto_down(dev, (bool) proto_down); +} + +static ssize_t proto_down_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_proto_down); +} +NETDEVICE_SHOW_RW(proto_down, fmt_dec); + static ssize_t phys_port_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -501,6 +514,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_phys_port_id.attr, &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, + &dev_attr_proto_down.attr, NULL, }; ATTRIBUTE_GROUPS(net_class); -- cgit v1.3-6-gb490 From 0c4f691ff6791e55ac831666df0b49b1679c56e4 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sat, 18 Jul 2015 18:24:48 -0700 Subject: net: don't reforward packets already forwarded by offload device Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->offload_fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->offload_skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Acked-by: Roopa Prabhu Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ include/linux/skbuff.h | 9 ++++++++- net/core/dev.c | 10 ++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45cfd797eb77..8364f29e08be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,6 +1456,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @offload_fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1697,6 +1699,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 offload_fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e87d53..af7a09650fa2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -650,9 +651,15 @@ struct sk_buff { unsigned int sender_cpu; }; #endif + union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; +#endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; #endif + }; + union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index 8810b6bbebfe..2ee15afb412d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->offload_fwd_mark && + skb->offload_fwd_mark == dev->offload_fwd_mark) { + consume_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- cgit v1.3-6-gb490 From f38a9eb1f77b296ff07e000823884a0f64d67b2a Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:43:56 +0200 Subject: dst: Metadata destinations Introduces a new dst_metadata which enables to carry per packet metadata between forwarding and processing elements via the skb->dst pointer. The structure is set up to be a union. Thus, each separate type of metadata requires its own dst instance. If demand arises to carry multiple types of metadata concurrently, metadata dst entries can be made stackable. The metadata dst entry is refcnt'ed as expected for now but a non reference counted use is possible if the reference is forced before queueing the skb. In order to allow allocating dsts with variable length, the existing dst_alloc() is split into a dst_alloc() and dst_init() function. The existing dst_init() function to initialize the subsystem is being renamed to dst_subsys_init() to make it clear what is what. The check before ip_route_input() is changed to ignore metadata dsts and drop the dst inside the routing function thus allowing to interpret metadata in a later commit. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/dst.h | 6 +++- include/net/dst_metadata.h | 32 ++++++++++++++++++ net/core/dev.c | 2 +- net/core/dst.c | 84 ++++++++++++++++++++++++++++++++++++++-------- net/ipv4/ip_input.c | 3 +- net/ipv4/route.c | 2 ++ 6 files changed, 112 insertions(+), 17 deletions(-) create mode 100644 include/net/dst_metadata.h (limited to 'net/core/dev.c') diff --git a/include/net/dst.h b/include/net/dst.h index 2bc73f8a00a9..2578811cef51 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -57,6 +57,7 @@ struct dst_entry { #define DST_FAKE_RTABLE 0x0040 #define DST_XFRM_TUNNEL 0x0080 #define DST_XFRM_QUEUE 0x0100 +#define DST_METADATA 0x0200 unsigned short pending_confirm; @@ -356,6 +357,9 @@ static inline int dst_discard(struct sk_buff *skb) } void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags); +void dst_init(struct dst_entry *dst, struct dst_ops *ops, + struct net_device *dev, int initial_ref, int initial_obsolete, + unsigned short flags); void __dst_free(struct dst_entry *dst); struct dst_entry *dst_destroy(struct dst_entry *dst); @@ -457,7 +461,7 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) return dst; } -void dst_init(void); +void dst_subsys_init(void); /* Flags for xfrm_lookup flags argument. */ enum { diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h new file mode 100644 index 000000000000..4f7694f3c7d0 --- /dev/null +++ b/include/net/dst_metadata.h @@ -0,0 +1,32 @@ +#ifndef __NET_DST_METADATA_H +#define __NET_DST_METADATA_H 1 + +#include +#include +#include + +struct metadata_dst { + struct dst_entry dst; + size_t opts_len; +}; + +static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); + + if (md_dst && md_dst->dst.flags & DST_METADATA) + return md_dst; + + return NULL; +} + +static inline bool skb_valid_dst(const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + return dst && !(dst->flags & DST_METADATA); +} + +struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); + +#endif /* __NET_DST_METADATA_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 2ee15afb412d..cb52cba30ae8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7669,7 +7669,7 @@ static int __init net_dev_init(void) open_softirq(NET_RX_SOFTIRQ, net_rx_action); hotcpu_notifier(dev_cpu_callback, 0); - dst_init(); + dst_subsys_init(); rc = 0; out: return rc; diff --git a/net/core/dst.c b/net/core/dst.c index e956ce6d1378..917364f0d0be 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -22,6 +22,7 @@ #include #include +#include /* * Theory of operations: @@ -158,19 +159,10 @@ const u32 dst_default_metrics[RTAX_MAX + 1] = { [RTAX_MAX] = 0xdeadbeef, }; - -void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, unsigned short flags) +void dst_init(struct dst_entry *dst, struct dst_ops *ops, + struct net_device *dev, int initial_ref, int initial_obsolete, + unsigned short flags) { - struct dst_entry *dst; - - if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) - return NULL; - } - dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); - if (!dst) - return NULL; dst->child = NULL; dst->dev = dev; if (dev) @@ -200,6 +192,25 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); +} +EXPORT_SYMBOL(dst_init); + +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, + int initial_ref, int initial_obsolete, unsigned short flags) +{ + struct dst_entry *dst; + + if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { + if (ops->gc(ops)) + return NULL; + } + + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); + if (!dst) + return NULL; + + dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); + return dst; } EXPORT_SYMBOL(dst_alloc); @@ -248,7 +259,11 @@ again: dst->ops->destroy(dst); if (dst->dev) dev_put(dst->dev); - kmem_cache_free(dst->ops->kmem_cachep, dst); + + if (dst->flags & DST_METADATA) + kfree(dst); + else + kmem_cache_free(dst->ops->kmem_cachep, dst); dst = child; if (dst) { @@ -327,6 +342,47 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) } EXPORT_SYMBOL(__dst_destroy_metrics_generic); +static struct dst_ops md_dst_ops = { + .family = AF_UNSPEC, +}; + +static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb) +{ + WARN_ONCE(1, "Attempting to call output on metadata dst\n"); + kfree_skb(skb); + return 0; +} + +static int dst_md_discard(struct sk_buff *skb) +{ + WARN_ONCE(1, "Attempting to call input on metadata dst\n"); + kfree_skb(skb); + return 0; +} + +struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags) +{ + struct metadata_dst *md_dst; + struct dst_entry *dst; + + md_dst = kmalloc(sizeof(*md_dst) + optslen, flags); + if (!md_dst) + return ERR_PTR(-ENOMEM); + + dst = &md_dst->dst; + dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE, + DST_METADATA | DST_NOCACHE | DST_NOCOUNT); + + dst->input = dst_md_discard; + dst->output = dst_md_discard_sk; + + memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); + md_dst->opts_len = optslen; + + return md_dst; +} +EXPORT_SYMBOL_GPL(metadata_dst_alloc); + /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat * this mistake in 2.3, but we have no choice @@ -391,7 +447,7 @@ static struct notifier_block dst_dev_notifier = { .priority = -10, /* must be called after other network notifiers */ }; -void __init dst_init(void) +void __init dst_subsys_init(void) { register_netdevice_notifier(&dst_dev_notifier); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2db4c8773c1b..f4fc8a77aaa7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -146,6 +146,7 @@ #include #include #include +#include /* * Process Router Attention IP option (RFC 2113) @@ -331,7 +332,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ - if (!skb_dst(skb)) { + if (!skb_valid_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev); if (unlikely(err)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cd3157c464e6..4c8e84e75871 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1690,6 +1690,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, by fib_lookup. */ + skb_dst_drop(skb); + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; -- cgit v1.3-6-gb490 From b469139e81ca8265fb4797c007f8d3338f4191a5 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Fri, 24 Jul 2015 03:03:29 +0000 Subject: dev: Spelling fix in comments Fix the following typo - unchainged -> unchanged Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cb52cba30ae8..4870c3556a5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4995,7 +4995,7 @@ EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); * Gets the next netdev_adjacent->private from the dev's lower neighbour * list, starting from iter position. The caller must hold either hold the * RTNL lock or its own locking that guarantees that the neighbour lower - * list will remain unchainged. + * list will remain unchanged. */ void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter) @@ -5050,7 +5050,7 @@ EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); * Gets the next netdev_adjacent from the dev's lower neighbour * list, starting from iter position. The caller must hold RTNL lock or * its own locking that guarantees that the neighbour lower - * list will remain unchainged. + * list will remain unchanged. */ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) { -- cgit v1.3-6-gb490 From 906470c19da771e638e7c8e16e16c31995b139cc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 18 Aug 2015 10:30:48 +0200 Subject: net: warn if drivers set tx_queue_len = 0 Due to the introduction of IFF_NO_QUEUE, there is a better way for drivers to indicate that no qdisc should be attached by default. Though, the old convention can't be dropped since ignoring that setting would break drivers still using it. Instead, add a warning so out-of-tree driver maintainers get a chance to adjust their code before we finally get rid of any special handling of tx_queue_len == 0. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4870c3556a5a..b1f3f4844e60 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6997,6 +6997,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); + if (!dev->tx_queue_len) + printk(KERN_WARNING "%s uses DEPRECATED zero tx_queue_len - convert driver to use IFF_NO_QUEUE instead.\n", name); + dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) -- cgit v1.3-6-gb490 From 3b3ae880266d148bf73a573a766bc9b78c08d805 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 26 Aug 2015 23:00:06 +0200 Subject: net: sched: consolidate tc_classify{,_compat} For classifiers getting invoked via tc_classify(), we always need an extra function call into tc_classify_compat(), as both are being exported as symbols and tc_classify() itself doesn't do much except handling of reclassifications when tp->classify() returned with TC_ACT_RECLASSIFY. CBQ and ATM are the only qdiscs that directly call into tc_classify_compat(), all others use tc_classify(). When tc actions are being configured out in the kernel, tc_classify() effectively does nothing besides delegating. We could spare this layer and consolidate both functions. pktgen on single CPU constantly pushing skbs directly into the netif_receive_skb() path with a dummy classifier on ingress qdisc attached, improves slightly from 22.3Mpps to 23.1Mpps. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 4 +--- net/core/dev.c | 2 +- net/sched/sch_api.c | 55 ++++++++++++++++++++++-------------------------- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_choke.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_multiq.c | 2 +- net/sched/sch_prio.c | 2 +- net/sched/sch_qfq.c | 2 +- net/sched/sch_sfb.c | 2 +- net/sched/sch_sfq.c | 2 +- 16 files changed, 40 insertions(+), 47 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2342bf12cb78..401038d2f9b8 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -110,10 +110,8 @@ static inline void qdisc_run(struct Qdisc *q) __qdisc_run(q); } -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res); int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res); + struct tcf_result *res, bool compat_mode); static inline __be16 tc_skb_protocol(const struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index b1f3f4844e60..7bb24f1879b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3657,7 +3657,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); qdisc_bstats_cpu_update(cl->q, skb); - switch (tc_classify(skb, cl, &cl_res)) { + switch (tc_classify(skb, cl, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f06aa01d60fd..59c227f26b56 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1806,51 +1806,46 @@ done: * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) { __be16 protocol = tc_skb_protocol(skb); - int err; +#ifdef CONFIG_NET_CLS_ACT + const struct tcf_proto *old_tp = tp; + int limit = 0; +reclassify: +#endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + int err; + if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; - err = tp->classify(skb, tp, res); + err = tp->classify(skb, tp, res); +#ifdef CONFIG_NET_CLS_ACT + if (unlikely(err == TC_ACT_RECLASSIFY && + !compat_mode)) + goto reset; +#endif if (err >= 0) return err; } - return -1; -} -EXPORT_SYMBOL(tc_classify_compat); -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - int err = 0; -#ifdef CONFIG_NET_CLS_ACT - const struct tcf_proto *otp = tp; - int limit = 0; -reclassify: -#endif - - err = tc_classify_compat(skb, tp, res); + return -1; #ifdef CONFIG_NET_CLS_ACT - if (err == TC_ACT_RECLASSIFY) { - tp = otp; - - if (unlikely(limit++ >= MAX_REC_LOOP)) { - net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", - tp->q->ops->id, - tp->prio & 0xffff, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - goto reclassify; +reset: + if (unlikely(limit++ >= MAX_REC_LOOP)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, " + "protocol %02x\n", tp->q->ops->id, + tp->prio & 0xffff, ntohs(tp->protocol)); + return TC_ACT_SHOT; } + + tp = old_tp; + goto reclassify; #endif - return err; } EXPORT_SYMBOL(tc_classify); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index e3e2cc5fd068..1911af3ca7c0 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -375,7 +375,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) list_for_each_entry(flow, &p->flows, list) { fl = rcu_dereference_bh(flow->filter_list); if (fl) { - result = tc_classify_compat(skb, fl, &res); + result = tc_classify(skb, fl, &res, true); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index beeb75f80fdb..c538d9e4a8f6 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -240,7 +240,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - result = tc_classify_compat(skb, fl, &res); + result = tc_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 6a783afe4960..665bde07916b 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -201,7 +201,7 @@ static bool choke_classify(struct sk_buff *skb, int result; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 338706092c27..f26bdea875c1 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -331,7 +331,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 66700a6116aa..c4d45fd8c551 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -230,7 +230,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) else { struct tcf_result res; struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tc_classify(skb, fl, &res); + int result = tc_classify(skb, fl, &res, false); pr_debug("result %d class 0x%04x\n", result, res.classid); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a9ba030435a2..4c834e93dafb 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -92,7 +92,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, filter, &res); + result = tc_classify(skb, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index e6c7416d0332..b7ebe2c87586 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1165,7 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); - while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { + while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cf4b0f865d1b..15ccd7f8fb2a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -229,7 +229,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { + while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 42dd218871e0..4e904ca0af9d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -46,7 +46,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tc_classify(skb, fl, &res); + err = tc_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 8e5cd34aaa74..ba6487f2741f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -42,7 +42,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); - err = tc_classify(skb, fl, &res); + err = tc_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index ffaeea63d473..3dc3a6e56052 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -717,7 +717,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index dcdff5c769a1..5bbb6332ec57 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -258,7 +258,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, struct tcf_result res; int result; - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 52f75a5473e1..3abab534eb5c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -179,7 +179,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return sfq_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, fl, &res); + result = tc_classify(skb, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { -- cgit v1.3-6-gb490 From 0e4ead9d7b3655d76371604abb9b0dcc4e79bb7d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 27 Aug 2015 09:31:18 +0200 Subject: net: introduce change upper device notifier change info Add info that is passed along with NETDEV_CHANGEUPPER event. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ net/core/dev.c | 16 ++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6abe0d6f1e1d..39f30daac483 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2127,6 +2127,13 @@ struct netdev_notifier_change_info { unsigned int flags_changed; }; +struct netdev_notifier_changeupper_info { + struct netdev_notifier_info info; /* must be first */ + struct net_device *upper_dev; /* new upper dev */ + bool master; /* is upper dev master */ + bool linking; /* is the nofication for link or unlink */ +}; + static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { diff --git a/net/core/dev.c b/net/core/dev.c index 7bb24f1879b8..a8e6cf4298d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5311,6 +5311,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *private) { + struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; @@ -5329,6 +5330,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; + changeupper_info.upper_dev = upper_dev; + changeupper_info.master = master; + changeupper_info.linking = true; + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, master); if (ret) @@ -5367,7 +5372,8 @@ static int __netdev_upper_dev_link(struct net_device *dev, goto rollback_lower_mesh; } - call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); return 0; rollback_lower_mesh: @@ -5462,9 +5468,14 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link_private); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_notifier_changeupper_info changeupper_info; struct netdev_adjacent *i, *j; ASSERT_RTNL(); + changeupper_info.upper_dev = upper_dev; + changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; + changeupper_info.linking = false; + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower @@ -5484,7 +5495,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(dev, i->dev); - call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); -- cgit v1.3-6-gb490 From f84bb1eac0275283ccd76455e20f926e186ea8c8 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Aug 2015 21:21:36 +0200 Subject: net: fix IFF_NO_QUEUE for drivers using alloc_netdev Printing a warning in alloc_netdev_mqs() if tx_queue_len is zero and IFF_NO_QUEUE not set is not appropriate since drivers may use one of the alloc_netdev* macros instead of alloc_etherdev*, thereby not intentionally leaving tx_queue_len uninitialized. Instead check here if tx_queue_len is zero and set IFF_NO_QUEUE, so the value of tx_queue_len can be ignored in net/sched_generic.c. Fixes: 906470c ("net: warn if drivers set tx_queue_len = 0") Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a8e6cf4298d3..877c84834d81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7010,7 +7010,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, setup(dev); if (!dev->tx_queue_len) - printk(KERN_WARNING "%s uses DEPRECATED zero tx_queue_len - convert driver to use IFF_NO_QUEUE instead.\n", name); + dev->priv_flags |= IFF_NO_QUEUE; dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; -- cgit v1.3-6-gb490