aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c251
-rw-r--r--net/core/dst.c39
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/neighbour.c13
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/pktgen.c234
-rw-r--r--net/core/rtnetlink.c43
-rw-r--r--net/core/skbuff.c4
9 files changed, 398 insertions, 211 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index b6d0bf875a8e..f46ee357ff2e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
#include <trace/events/skb.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
+#include <linux/cpu_rmap.h>
#include "net-sysfs.h"
@@ -1286,7 +1287,7 @@ static int __dev_close(struct net_device *dev)
return __dev_close_many(&single);
}
-int dev_close_many(struct list_head *head)
+static int dev_close_many(struct list_head *head)
{
struct net_device *dev, *tmp;
LIST_HEAD(tmp_list);
@@ -1594,6 +1595,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_unlock();
}
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+ int i;
+ struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+ /* If TC0 is invalidated disable TC mapping */
+ if (tc->offset + tc->count > txq) {
+ pr_warning("Number of in use tx queues changed "
+ "invalidating tc mappings. Priority "
+ "traffic classification disabled!\n");
+ dev->num_tc = 0;
+ return;
+ }
+
+ /* Invalidated prio to tc mappings set to TC0 */
+ for (i = 1; i < TC_BITMASK + 1; i++) {
+ int q = netdev_get_prio_tc_map(dev, i);
+
+ tc = &dev->tc_to_txq[q];
+ if (tc->offset + tc->count > txq) {
+ pr_warning("Number of in use tx queues "
+ "changed. Priority %i to tc "
+ "mapping %i is no longer valid "
+ "setting map to 0\n",
+ i, q);
+ netdev_set_prio_tc_map(dev, i, 0);
+ }
+ }
+}
+
/*
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1613,6 +1656,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (rc)
return rc;
+ if (dev->num_tc)
+ netif_setup_tc(dev, txq);
+
if (txq < dev->real_num_tx_queues)
qdisc_reset_all_tx_gt(dev, txq);
}
@@ -1812,7 +1858,7 @@ EXPORT_SYMBOL(skb_checksum_help);
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
*/
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
@@ -2000,7 +2046,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
protocol == htons(ETH_P_FCOE)));
}
-static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
{
if (!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
@@ -2012,10 +2058,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features
return features;
}
-int netif_skb_features(struct sk_buff *skb)
+u32 netif_skb_features(struct sk_buff *skb)
{
__be16 protocol = skb->protocol;
- int features = skb->dev->features;
+ u32 features = skb->dev->features;
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2060,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
int rc = NETDEV_TX_OK;
if (likely(!skb->next)) {
- int features;
+ u32 features;
/*
* If device doesnt need skb->dst, release it right now while
@@ -2162,6 +2208,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
unsigned int num_tx_queues)
{
u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
@@ -2170,13 +2218,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
return hash;
}
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
hash = (__force u16) skb->protocol ^ skb->rxhash;
hash = jhash_1word(hash, hashrnd);
- return (u16) (((u64) hash * num_tx_queues) >> 32);
+ return (u16) (((u64) hash * qcount) >> 32) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
@@ -2273,15 +2327,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
- bool contended = qdisc_is_running(q);
+ bool contended;
int rc;
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ qdisc_calculate_pkt_len(skb, q);
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
* This permits __QDISC_STATE_RUNNING owner to get the lock more often
* and dequeue packets faster.
*/
+ contended = qdisc_is_running(q);
if (unlikely(contended))
spin_lock(&q->busylock);
@@ -2299,7 +2356,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
- qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_bstats_update(q, skb);
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2314,7 +2370,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
skb_dst_force(skb);
- rc = qdisc_enqueue_root(skb, q);
+ rc = q->enqueue(skb, q) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -2533,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash);
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+static struct rps_dev_flow *
+set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow *rflow, u16 next_cpu)
+{
+ u16 tcpu;
+
+ tcpu = rflow->cpu = next_cpu;
+ if (tcpu != RPS_NO_CPU) {
+#ifdef CONFIG_RFS_ACCEL
+ struct netdev_rx_queue *rxqueue;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_dev_flow *old_rflow;
+ u32 flow_id;
+ u16 rxq_index;
+ int rc;
+
+ /* Should we steer this flow to a different hardware queue? */
+ if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
+ goto out;
+ rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
+ if (rxq_index == skb_get_rx_queue(skb))
+ goto out;
+
+ rxqueue = dev->_rx + rxq_index;
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ if (!flow_table)
+ goto out;
+ flow_id = skb->rxhash & flow_table->mask;
+ rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
+ rxq_index, flow_id);
+ if (rc < 0)
+ goto out;
+ old_rflow = rflow;
+ rflow = &flow_table->flows[flow_id];
+ rflow->cpu = next_cpu;
+ rflow->filter = rc;
+ if (old_rflow->filter == rflow->filter)
+ old_rflow->filter = RPS_NO_FILTER;
+ out:
+#endif
+ rflow->last_qtail =
+ per_cpu(softnet_data, tcpu).input_queue_head;
+ }
+
+ return rflow;
+}
+
/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
@@ -2604,12 +2707,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0)) {
- tcpu = rflow->cpu = next_cpu;
- if (tcpu != RPS_NO_CPU)
- rflow->last_qtail = per_cpu(softnet_data,
- tcpu).input_queue_head;
- }
+ rflow->last_qtail)) >= 0))
+ rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
@@ -2630,6 +2730,46 @@ done:
return cpu;
}
+#ifdef CONFIG_RFS_ACCEL
+
+/**
+ * rps_may_expire_flow - check whether an RFS hardware filter may be removed
+ * @dev: Device on which the filter was set
+ * @rxq_index: RX queue index
+ * @flow_id: Flow ID passed to ndo_rx_flow_steer()
+ * @filter_id: Filter ID returned by ndo_rx_flow_steer()
+ *
+ * Drivers that implement ndo_rx_flow_steer() should periodically call
+ * this function for each installed filter and remove the filters for
+ * which it returns %true.
+ */
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+ u32 flow_id, u16 filter_id)
+{
+ struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_dev_flow *rflow;
+ bool expire = true;
+ int cpu;
+
+ rcu_read_lock();
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ if (flow_table && flow_id <= flow_table->mask) {
+ rflow = &flow_table->flows[flow_id];
+ cpu = ACCESS_ONCE(rflow->cpu);
+ if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
+ ((int)(per_cpu(softnet_data, cpu).input_queue_head -
+ rflow->last_qtail) <
+ (int)(10 * flow_table->mask)))
+ expire = false;
+ }
+ rcu_read_unlock();
+ return expire;
+}
+EXPORT_SYMBOL(rps_may_expire_flow);
+
+#endif /* CONFIG_RFS_ACCEL */
+
/* Called from hardirq (IPI) context */
static void rps_trigger_softirq(void *data)
{
@@ -3914,12 +4054,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev = (v == SEQ_START_TOKEN) ?
- first_net_device(seq_file_net(seq)) :
- next_net_device((struct net_device *)v);
+ struct net_device *dev = v;
+
+ if (v == SEQ_START_TOKEN)
+ dev = first_net_device_rcu(seq_file_net(seq));
+ else
+ dev = next_net_device_rcu(dev);
++*pos;
- return rcu_dereference(dev);
+ return dev;
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4576,6 +4719,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
EXPORT_SYMBOL(dev_set_mtu);
/**
+ * dev_set_group - Change group this device belongs to
+ * @dev: device
+ * @new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+ dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
@@ -5065,41 +5219,49 @@ static void rollback_registered(struct net_device *dev)
rollback_registered_many(&single);
}
-unsigned long netdev_fix_features(unsigned long features, const char *name)
+u32 netdev_fix_features(struct net_device *dev, u32 features)
{
+ /* Fix illegal checksum combinations */
+ if ((features & NETIF_F_HW_CSUM) &&
+ (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_info(dev, "mixed HW and IP checksum settings.\n");
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+ }
+
+ if ((features & NETIF_F_NO_CSUM) &&
+ (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+ netdev_info(dev, "mixed no checksumming and other settings.\n");
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+ }
+
/* Fix illegal SG+CSUM combinations. */
if ((features & NETIF_F_SG) &&
!(features & NETIF_F_ALL_CSUM)) {
- if (name)
- printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
- "checksum feature.\n", name);
+ netdev_info(dev,
+ "Dropping NETIF_F_SG since no checksum feature.\n");
features &= ~NETIF_F_SG;
}
/* TSO requires that SG is present as well. */
if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
- if (name)
- printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
- "SG feature.\n", name);
+ netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
features &= ~NETIF_F_TSO;
}
+ /* UFO needs SG and checksumming */
if (features & NETIF_F_UFO) {
/* maybe split UFO into V4 and V6? */
if (!((features & NETIF_F_GEN_CSUM) ||
(features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- if (name)
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no checksum offload features.\n",
- name);
+ netdev_info(dev,
+ "Dropping NETIF_F_UFO since no checksum offload features.\n");
features &= ~NETIF_F_UFO;
}
if (!(features & NETIF_F_SG)) {
- if (name)
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
- "since no NETIF_F_SG feature.\n", name);
+ netdev_info(dev,
+ "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
features &= ~NETIF_F_UFO;
}
}
@@ -5242,22 +5404,7 @@ int register_netdevice(struct net_device *dev)
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
- /* Fix illegal checksum combinations */
- if ((dev->features & NETIF_F_HW_CSUM) &&
- (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
- dev->name);
- dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
- }
-
- if ((dev->features & NETIF_F_NO_CSUM) &&
- (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
- printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
- dev->name);
- dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
- }
-
- dev->features = netdev_fix_features(dev->features, dev->name);
+ dev->features = netdev_fix_features(dev, dev->features);
/* Enable software GSO if SG is supported. */
if (dev->features & NETIF_F_SG)
@@ -5682,6 +5829,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
+ dev->group = INIT_NETDEV_GROUP;
return dev;
free_pcpu:
@@ -5992,8 +6140,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
*/
-unsigned long netdev_increment_features(unsigned long all, unsigned long one,
- unsigned long mask)
+u32 netdev_increment_features(u32 all, u32 one, u32 mask)
{
/* If device needs checksumming, downgrade to it. */
if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
diff --git a/net/core/dst.c b/net/core/dst.c
index b99c7c7ffce2..c1674fde827d 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard);
+const u32 dst_default_metrics[RTAX_MAX];
+
void *dst_alloc(struct dst_ops *ops)
{
struct dst_entry *dst;
@@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops)
dst->lastuse = jiffies;
dst->path = dst;
dst->input = dst->output = dst_discard;
+ dst_init_metrics(dst, dst_default_metrics, true);
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
@@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst)
}
EXPORT_SYMBOL(dst_release);
+u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+ u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+
+ if (p) {
+ u32 *old_p = __DST_METRICS_PTR(old);
+ unsigned long prev, new;
+
+ memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+ new = (unsigned long) p;
+ prev = cmpxchg(&dst->_metrics, old, new);
+
+ if (prev != old) {
+ kfree(p);
+ p = __DST_METRICS_PTR(prev);
+ if (prev & DST_METRICS_READ_ONLY)
+ p = NULL;
+ }
+ }
+ return p;
+}
+EXPORT_SYMBOL(dst_cow_metrics_generic);
+
+/* Caller asserts that dst_metrics_read_only(dst) is false. */
+void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
+{
+ unsigned long prev, new;
+
+ new = (unsigned long) dst_default_metrics;
+ prev = cmpxchg(&dst->_metrics, old, new);
+ if (prev == old)
+ kfree(__DST_METRICS_PTR(old));
+}
+EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+
/**
* skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index ff2302910b5e..5984ee0c7136 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1458,7 +1458,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
- unsigned long old_features;
+ u32 old_features;
if (!dev || !netif_device_present(dev))
return -ENODEV;
diff --git a/net/core/filter.c b/net/core/filter.c
index afc58374ca96..232b1873bb28 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
if (err)
return err;
- rcu_read_lock_bh();
- filter = rcu_dereference_bh(sk->sk_filter);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter) {
unsigned int pkt_len = sk_run_filter(skb, filter->insns);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return err;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60a902913429..799f06e03a22 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
{
size_t size = entries * sizeof(struct neighbour *);
struct neigh_hash_table *ret;
- struct neighbour **buckets;
+ struct neighbour __rcu **buckets;
ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
if (size <= PAGE_SIZE)
buckets = kzalloc(size, GFP_ATOMIC);
else
- buckets = (struct neighbour **)
+ buckets = (struct neighbour __rcu **)
__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
get_order(size));
if (!buckets) {
kfree(ret);
return NULL;
}
- rcu_assign_pointer(ret->hash_buckets, buckets);
+ ret->hash_buckets = buckets;
ret->hash_mask = entries - 1;
get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct neigh_hash_table,
rcu);
size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
- struct neighbour **buckets = nht->hash_buckets;
+ struct neighbour __rcu **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour proc dir entry");
#endif
- tbl->nht = neigh_hash_alloc(8);
+ RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
}
write_unlock(&neigh_tbl_lock);
- call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+ call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+ neigh_hash_free_rcu);
tbl->nht = NULL;
kfree(tbl->phash_buckets);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e23c01be5a5b..2e4a393dfc3b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec);
NETDEVICE_SHOW(addr_len, fmt_dec);
NETDEVICE_SHOW(iflink, fmt_dec);
NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(features, fmt_long_hex);
+NETDEVICE_SHOW(features, fmt_hex);
NETDEVICE_SHOW(type, fmt_dec);
NETDEVICE_SHOW(link_mode, fmt_dec);
@@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev,
return ret;
}
+NETDEVICE_SHOW(group, fmt_dec);
+
+static int change_group(struct net_device *net, unsigned long new_group)
+{
+ dev_set_group(net, (int) new_group);
+ return 0;
+}
+
+static ssize_t store_group(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, change_group);
+}
+
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
@@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = {
__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len),
+ __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group),
{}
};
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461f..d73b77adb676 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -251,6 +251,7 @@ struct pktgen_dev {
int max_pkt_size; /* = ETH_ZLEN; */
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
+ struct page *page;
u64 delay; /* nano-seconds */
__u64 count; /* Default No packets to send */
@@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file,
if (node_possible(value)) {
pkt_dev->node = value;
sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+ if (pkt_dev->page) {
+ put_page(pkt_dev->page);
+ pkt_dev->page = NULL;
+ }
}
else
sprintf(pg_result, "ERROR: node not possible");
@@ -2605,6 +2610,90 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi,
return htons(id | (cfi << 12) | (prio << 13));
}
+static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
+ int datalen)
+{
+ struct timeval timestamp;
+ struct pktgen_hdr *pgh;
+
+ pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh));
+ datalen -= sizeof(*pgh);
+
+ if (pkt_dev->nfrags <= 0) {
+ pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
+ memset(pgh + 1, 0, datalen);
+ } else {
+ int frags = pkt_dev->nfrags;
+ int i, len;
+
+
+ if (frags > MAX_SKB_FRAGS)
+ frags = MAX_SKB_FRAGS;
+ len = datalen - frags * PAGE_SIZE;
+ if (len > 0) {
+ memset(skb_put(skb, len), 0, len);
+ datalen = frags * PAGE_SIZE;
+ }
+
+ i = 0;
+ while (datalen > 0) {
+ if (unlikely(!pkt_dev->page)) {
+ int node = numa_node_id();
+
+ if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE))
+ node = pkt_dev->node;
+ pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ if (!pkt_dev->page)
+ break;
+ }
+ skb_shinfo(skb)->frags[i].page = pkt_dev->page;
+ get_page(pkt_dev->page);
+ skb_shinfo(skb)->frags[i].page_offset = 0;
+ skb_shinfo(skb)->frags[i].size =
+ (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
+ datalen -= skb_shinfo(skb)->frags[i].size;
+ skb->len += skb_shinfo(skb)->frags[i].size;
+ skb->data_len += skb_shinfo(skb)->frags[i].size;
+ i++;
+ skb_shinfo(skb)->nr_frags = i;
+ }
+
+ while (i < frags) {
+ int rem;
+
+ if (i == 0)
+ break;
+
+ rem = skb_shinfo(skb)->frags[i - 1].size / 2;
+ if (rem == 0)
+ break;
+
+ skb_shinfo(skb)->frags[i - 1].size -= rem;
+
+ skb_shinfo(skb)->frags[i] =
+ skb_shinfo(skb)->frags[i - 1];
+ get_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->frags[i].page =
+ skb_shinfo(skb)->frags[i - 1].page;
+ skb_shinfo(skb)->frags[i].page_offset +=
+ skb_shinfo(skb)->frags[i - 1].size;
+ skb_shinfo(skb)->frags[i].size = rem;
+ i++;
+ skb_shinfo(skb)->nr_frags = i;
+ }
+ }
+
+ /* Stamp the time, and sequence number,
+ * convert them to network byte order
+ */
+ pgh->pgh_magic = htonl(PKTGEN_MAGIC);
+ pgh->seq_num = htonl(pkt_dev->seq_num);
+
+ do_gettimeofday(&timestamp);
+ pgh->tv_sec = htonl(timestamp.tv_sec);
+ pgh->tv_usec = htonl(timestamp.tv_usec);
+}
+
static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct pktgen_dev *pkt_dev)
{
@@ -2613,7 +2702,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
struct udphdr *udph;
int datalen, iplen;
struct iphdr *iph;
- struct pktgen_hdr *pgh = NULL;
__be16 protocol = htons(ETH_P_IP);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -2729,76 +2817,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
pkt_dev->pkt_overhead);
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
-
- if (pkt_dev->nfrags <= 0) {
- pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
- memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr));
- } else {
- int frags = pkt_dev->nfrags;
- int i, len;
-
- pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
- if (frags > MAX_SKB_FRAGS)
- frags = MAX_SKB_FRAGS;
- if (datalen > frags * PAGE_SIZE) {
- len = datalen - frags * PAGE_SIZE;
- memset(skb_put(skb, len), 0, len);
- datalen = frags * PAGE_SIZE;
- }
-
- i = 0;
- while (datalen > 0) {
- struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
- skb_shinfo(skb)->frags[i].page = page;
- skb_shinfo(skb)->frags[i].page_offset = 0;
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
-
- while (i < frags) {
- int rem;
-
- if (i == 0)
- break;
-
- rem = skb_shinfo(skb)->frags[i - 1].size / 2;
- if (rem == 0)
- break;
-
- skb_shinfo(skb)->frags[i - 1].size -= rem;
-
- skb_shinfo(skb)->frags[i] =
- skb_shinfo(skb)->frags[i - 1];
- get_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->frags[i].page =
- skb_shinfo(skb)->frags[i - 1].page;
- skb_shinfo(skb)->frags[i].page_offset +=
- skb_shinfo(skb)->frags[i - 1].size;
- skb_shinfo(skb)->frags[i].size = rem;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
- }
-
- /* Stamp the time, and sequence number,
- * convert them to network byte order
- */
- if (pgh) {
- struct timeval timestamp;
-
- pgh->pgh_magic = htonl(PKTGEN_MAGIC);
- pgh->seq_num = htonl(pkt_dev->seq_num);
-
- do_gettimeofday(&timestamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
- }
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
#ifdef CONFIG_XFRM
if (!process_ipsec(pkt_dev, skb, protocol))
@@ -2980,7 +2999,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct udphdr *udph;
int datalen;
struct ipv6hdr *iph;
- struct pktgen_hdr *pgh = NULL;
__be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
__be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */
@@ -3083,75 +3101,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
- if (pkt_dev->nfrags <= 0)
- pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
- else {
- int frags = pkt_dev->nfrags;
- int i;
-
- pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8);
-
- if (frags > MAX_SKB_FRAGS)
- frags = MAX_SKB_FRAGS;
- if (datalen > frags * PAGE_SIZE) {
- skb_put(skb, datalen - frags * PAGE_SIZE);
- datalen = frags * PAGE_SIZE;
- }
-
- i = 0;
- while (datalen > 0) {
- struct page *page = alloc_pages(GFP_KERNEL, 0);
- skb_shinfo(skb)->frags[i].page = page;
- skb_shinfo(skb)->frags[i].page_offset = 0;
- skb_shinfo(skb)->frags[i].size =
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE);
- datalen -= skb_shinfo(skb)->frags[i].size;
- skb->len += skb_shinfo(skb)->frags[i].size;
- skb->data_len += skb_shinfo(skb)->frags[i].size;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
-
- while (i < frags) {
- int rem;
-
- if (i == 0)
- break;
-
- rem = skb_shinfo(skb)->frags[i - 1].size / 2;
- if (rem == 0)
- break;
-
- skb_shinfo(skb)->frags[i - 1].size -= rem;
-
- skb_shinfo(skb)->frags[i] =
- skb_shinfo(skb)->frags[i - 1];
- get_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->frags[i].page =
- skb_shinfo(skb)->frags[i - 1].page;
- skb_shinfo(skb)->frags[i].page_offset +=
- skb_shinfo(skb)->frags[i - 1].size;
- skb_shinfo(skb)->frags[i].size = rem;
- i++;
- skb_shinfo(skb)->nr_frags = i;
- }
- }
-
- /* Stamp the time, and sequence number,
- * convert them to network byte order
- * should we update cloned packets too ?
- */
- if (pgh) {
- struct timeval timestamp;
-
- pgh->pgh_magic = htonl(PKTGEN_MAGIC);
- pgh->seq_num = htonl(pkt_dev->seq_num);
-
- do_gettimeofday(&timestamp);
- pgh->tv_sec = htonl(timestamp.tv_sec);
- pgh->tv_usec = htonl(timestamp.tv_usec);
- }
- /* pkt_dev->seq_num++; FF: you really mean this? */
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
return skb;
}
@@ -3884,6 +3834,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
free_SAs(pkt_dev);
#endif
vfree(pkt_dev->flows);
+ if (pkt_dev->page)
+ put_page(pkt_dev->page);
kfree(pkt_dev);
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d65c6bb24c1..da0fe457c858 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+ NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
if (dev->ifindex != dev->iflink)
NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1264,6 +1265,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
modified = 1;
}
+ if (tb[IFLA_GROUP]) {
+ dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+ modified = 1;
+ }
+
/*
* Interface selected by interface index but interface
* name provided implies that a name change has been
@@ -1541,6 +1547,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+ if (tb[IFLA_GROUP])
+ dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
return dev;
@@ -1551,6 +1559,24 @@ err:
}
EXPORT_SYMBOL(rtnl_create_link);
+static int rtnl_group_changelink(struct net *net, int group,
+ struct ifinfomsg *ifm,
+ struct nlattr **tb)
+{
+ struct net_device *dev;
+ int err;
+
+ for_each_netdev(net, dev) {
+ if (dev->group == group) {
+ err = do_setlink(dev, ifm, tb, NULL, 0);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
struct net *net = sock_net(skb->sk);
@@ -1578,10 +1604,12 @@ replay:
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (ifname[0])
- dev = __dev_get_by_name(net, ifname);
- else
- dev = NULL;
+ else {
+ if (ifname[0])
+ dev = __dev_get_by_name(net, ifname);
+ else
+ dev = NULL;
+ }
err = validate_linkmsg(dev, tb);
if (err < 0)
@@ -1645,8 +1673,13 @@ replay:
return do_setlink(dev, ifm, tb, ifname, modified);
}
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ return rtnl_group_changelink(net,
+ nla_get_u32(tb[IFLA_GROUP]),
+ ifm, tb);
return -ENODEV;
+ }
if (ifm->ifi_index)
return -EOPNOTSUPP;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d883dcc78b6b..14cf560b4a3e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2498,7 +2498,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
@@ -2508,7 +2508,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
unsigned int offset = doffset;
unsigned int headroom;
unsigned int len;
- int sg = features & NETIF_F_SG;
+ int sg = !!(features & NETIF_F_SG);
int nfrags = skb_shinfo(skb)->nr_frags;
int err = -ENOMEM;
int i = 0;