aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/dev.c871
-rw-r--r--net/core/dev_mcast.c46
-rw-r--r--net/core/dst.c202
-rw-r--r--net/core/ethtool.c351
-rw-r--r--net/core/fib_rules.c30
-rw-r--r--net/core/neighbour.c97
-rw-r--r--net/core/net-sysfs.c42
-rw-r--r--net/core/net_namespace.c317
-rw-r--r--net/core/netpoll.c87
-rw-r--r--net/core/pktgen.c155
-rw-r--r--net/core/rtnetlink.c179
-rw-r--r--net/core/scm.c9
-rw-r--r--net/core/skbuff.c18
-rw-r--r--net/core/sock.c18
15 files changed, 1630 insertions, 796 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 4751613e1b59..b1332f6d0042 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -3,7 +3,7 @@
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o
+ gen_stats.o gen_estimator.o net_namespace.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
@@ -11,7 +11,7 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_XFRM) += flow.o
-obj-$(CONFIG_SYSFS) += net-sysfs.o
+obj-y += net-sysfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/dev.c b/net/core/dev.c
index a76021c71207..1e169a541ce7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -92,6 +92,7 @@
#include <linux/etherdevice.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <net/net_namespace.h>
#include <net/sock.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
@@ -189,25 +190,50 @@ static struct net_dma net_dma = {
* unregister_netdevice(), which must be called with the rtnl
* semaphore held.
*/
-LIST_HEAD(dev_base_head);
DEFINE_RWLOCK(dev_base_lock);
-EXPORT_SYMBOL(dev_base_head);
EXPORT_SYMBOL(dev_base_lock);
#define NETDEV_HASHBITS 8
-static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
-static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
+#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
-static inline struct hlist_head *dev_name_hash(const char *name)
+static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
- return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
+ return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
}
-static inline struct hlist_head *dev_index_hash(int ifindex)
+static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
{
- return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
+ return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
+}
+
+/* Device list insertion */
+static int list_netdevice(struct net_device *dev)
+{
+ struct net *net = dev->nd_net;
+
+ ASSERT_RTNL();
+
+ write_lock_bh(&dev_base_lock);
+ list_add_tail(&dev->dev_list, &net->dev_base_head);
+ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
+ write_unlock_bh(&dev_base_lock);
+ return 0;
+}
+
+/* Device list removal */
+static void unlist_netdevice(struct net_device *dev)
+{
+ ASSERT_RTNL();
+
+ /* Unlink dev from the device chain */
+ write_lock_bh(&dev_base_lock);
+ list_del(&dev->dev_list);
+ hlist_del(&dev->name_hlist);
+ hlist_del(&dev->index_hlist);
+ write_unlock_bh(&dev_base_lock);
}
/*
@@ -220,17 +246,12 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
-#ifdef CONFIG_SYSFS
-extern int netdev_sysfs_init(void);
-extern int netdev_register_sysfs(struct net_device *);
-extern void netdev_unregister_sysfs(struct net_device *);
-#else
-#define netdev_sysfs_init() (0)
-#define netdev_register_sysfs(dev) (0)
-#define netdev_unregister_sysfs(dev) do { } while(0)
-#endif
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
+
+extern int netdev_kobject_init(void);
+extern int netdev_register_kobject(struct net_device *);
+extern void netdev_unregister_kobject(struct net_device *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
@@ -490,7 +511,7 @@ unsigned long netdev_boot_base(const char *prefix, int unit)
* If device already registered then return base of 1
* to indicate not to probe for this interface
*/
- if (__dev_get_by_name(name))
+ if (__dev_get_by_name(&init_net, name))
return 1;
for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
@@ -545,11 +566,11 @@ __setup("netdev=", netdev_boot_setup);
* careful with locks.
*/
-struct net_device *__dev_get_by_name(const char *name)
+struct net_device *__dev_get_by_name(struct net *net, const char *name)
{
struct hlist_node *p;
- hlist_for_each(p, dev_name_hash(name)) {
+ hlist_for_each(p, dev_name_hash(net, name)) {
struct net_device *dev
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(dev->name, name, IFNAMSIZ))
@@ -569,12 +590,12 @@ struct net_device *__dev_get_by_name(const char *name)
* matching device is found.
*/
-struct net_device *dev_get_by_name(const char *name)
+struct net_device *dev_get_by_name(struct net *net, const char *name)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_name(name);
+ dev = __dev_get_by_name(net, name);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
@@ -592,11 +613,11 @@ struct net_device *dev_get_by_name(const char *name)
* or @dev_base_lock.
*/
-struct net_device *__dev_get_by_index(int ifindex)
+struct net_device *__dev_get_by_index(struct net *net, int ifindex)
{
struct hlist_node *p;
- hlist_for_each(p, dev_index_hash(ifindex)) {
+ hlist_for_each(p, dev_index_hash(net, ifindex)) {
struct net_device *dev
= hlist_entry(p, struct net_device, index_hlist);
if (dev->ifindex == ifindex)
@@ -616,12 +637,12 @@ struct net_device *__dev_get_by_index(int ifindex)
* dev_put to indicate they have finished with it.
*/
-struct net_device *dev_get_by_index(int ifindex)
+struct net_device *dev_get_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(ifindex);
+ dev = __dev_get_by_index(net, ifindex);
if (dev)
dev_hold(dev);
read_unlock(&dev_base_lock);
@@ -642,13 +663,13 @@ struct net_device *dev_get_by_index(int ifindex)
* If the API was consistent this would be __dev_get_by_hwaddr
*/
-struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
{
struct net_device *dev;
ASSERT_RTNL();
- for_each_netdev(dev)
+ for_each_netdev(&init_net, dev)
if (dev->type == type &&
!memcmp(dev->dev_addr, ha, dev->addr_len))
return dev;
@@ -658,12 +679,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
EXPORT_SYMBOL(dev_getbyhwaddr);
-struct net_device *__dev_getfirstbyhwtype(unsigned short type)
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev;
ASSERT_RTNL();
- for_each_netdev(dev)
+ for_each_netdev(net, dev)
if (dev->type == type)
return dev;
@@ -672,12 +693,12 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type)
EXPORT_SYMBOL(__dev_getfirstbyhwtype);
-struct net_device *dev_getfirstbyhwtype(unsigned short type)
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev;
rtnl_lock();
- dev = __dev_getfirstbyhwtype(type);
+ dev = __dev_getfirstbyhwtype(net, type);
if (dev)
dev_hold(dev);
rtnl_unlock();
@@ -697,13 +718,13 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
* dev_put to indicate they have finished with it.
*/
-struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
+struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
{
struct net_device *dev, *ret;
ret = NULL;
read_lock(&dev_base_lock);
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (((dev->flags ^ if_flags) & mask) == 0) {
dev_hold(dev);
ret = dev;
@@ -740,9 +761,10 @@ int dev_valid_name(const char *name)
}
/**
- * dev_alloc_name - allocate a name for a device
- * @dev: device
+ * __dev_alloc_name - allocate a name for a device
+ * @net: network namespace to allocate the device name in
* @name: name format string
+ * @buf: scratch buffer and result name string
*
* Passed a format string - eg "lt%d" it will try and find a suitable
* id. It scans list of devices to build up a free map, then chooses
@@ -753,13 +775,12 @@ int dev_valid_name(const char *name)
* Returns the number of the unit assigned or a negative errno code.
*/
-int dev_alloc_name(struct net_device *dev, const char *name)
+static int __dev_alloc_name(struct net *net, const char *name, char *buf)
{
int i = 0;
- char buf[IFNAMSIZ];
const char *p;
const int max_netdevices = 8*PAGE_SIZE;
- long *inuse;
+ unsigned long *inuse;
struct net_device *d;
p = strnchr(name, IFNAMSIZ-1, '%');
@@ -773,18 +794,18 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -EINVAL;
/* Use one page as a bit array of possible slots */
- inuse = (long *) get_zeroed_page(GFP_ATOMIC);
+ inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
if (!inuse)
return -ENOMEM;
- for_each_netdev(d) {
+ for_each_netdev(net, d) {
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
continue;
/* avoid cases where sscanf is not exact inverse of printf */
- snprintf(buf, sizeof(buf), name, i);
+ snprintf(buf, IFNAMSIZ, name, i);
if (!strncmp(buf, d->name, IFNAMSIZ))
set_bit(i, inuse);
}
@@ -793,11 +814,9 @@ int dev_alloc_name(struct net_device *dev, const char *name)
free_page((unsigned long) inuse);
}
- snprintf(buf, sizeof(buf), name, i);
- if (!__dev_get_by_name(buf)) {
- strlcpy(dev->name, buf, IFNAMSIZ);
+ snprintf(buf, IFNAMSIZ, name, i);
+ if (!__dev_get_by_name(net, buf))
return i;
- }
/* It is possible to run out of possible slots
* when the name is long and there isn't enough space left
@@ -806,6 +825,34 @@ int dev_alloc_name(struct net_device *dev, const char *name)
return -ENFILE;
}
+/**
+ * dev_alloc_name - allocate a name for a device
+ * @dev: device
+ * @name: name format string
+ *
+ * Passed a format string - eg "lt%d" it will try and find a suitable
+ * id. It scans list of devices to build up a free map, then chooses
+ * the first empty slot. The caller must hold the dev_base or rtnl lock
+ * while allocating the name and adding the device in order to avoid
+ * duplicates.
+ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
+ * Returns the number of the unit assigned or a negative errno code.
+ */
+
+int dev_alloc_name(struct net_device *dev, const char *name)
+{
+ char buf[IFNAMSIZ];
+ struct net *net;
+ int ret;
+
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
/**
* dev_change_name - change name of a device
@@ -820,9 +867,12 @@ int dev_change_name(struct net_device *dev, char *newname)
char oldname[IFNAMSIZ];
int err = 0;
int ret;
+ struct net *net;
ASSERT_RTNL();
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
if (dev->flags & IFF_UP)
return -EBUSY;
@@ -837,7 +887,7 @@ int dev_change_name(struct net_device *dev, char *newname)
return err;
strcpy(newname, dev->name);
}
- else if (__dev_get_by_name(newname))
+ else if (__dev_get_by_name(net, newname))
return -EEXIST;
else
strlcpy(dev->name, newname, IFNAMSIZ);
@@ -847,10 +897,10 @@ rollback:
write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
write_unlock_bh(&dev_base_lock);
- ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
if (ret) {
@@ -876,7 +926,7 @@ rollback:
*/
void netdev_features_change(struct net_device *dev)
{
- raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
}
EXPORT_SYMBOL(netdev_features_change);
@@ -891,8 +941,7 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
}
}
@@ -906,26 +955,18 @@ void netdev_state_change(struct net_device *dev)
* available in this kernel then it becomes a nop.
*/
-void dev_load(const char *name)
+void dev_load(struct net *net, const char *name)
{
struct net_device *dev;
read_lock(&dev_base_lock);
- dev = __dev_get_by_name(name);
+ dev = __dev_get_by_name(net, name);
read_unlock(&dev_base_lock);
if (!dev && capable(CAP_SYS_MODULE))
request_module("%s", name);
}
-static int default_rebuild_header(struct sk_buff *skb)
-{
- printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
- skb->dev ? skb->dev->name : "NULL!!!");
- kfree_skb(skb);
- return 1;
-}
-
/**
* dev_open - prepare an interface for use.
* @dev: device to open
@@ -988,7 +1029,7 @@ int dev_open(struct net_device *dev)
/*
* ... and announce new interface.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
+ call_netdevice_notifiers(NETDEV_UP, dev);
}
return ret;
}
@@ -1004,6 +1045,8 @@ int dev_open(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
+ might_sleep();
+
if (!(dev->flags & IFF_UP))
return 0;
@@ -1011,23 +1054,19 @@ int dev_close(struct net_device *dev)
* Tell people we are going down, so that they can
* prepare to death, when device is still operating.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
+ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
dev_deactivate(dev);
clear_bit(__LINK_STATE_START, &dev->state);
/* Synchronize to scheduled poll. We cannot touch poll list,
- * it can be even on different cpu. So just clear netif_running(),
- * and wait when poll really will happen. Actually, the best place
- * for this is inside dev->stop() after device stopped its irq
- * engine, but this requires more changes in devices. */
-
+ * it can be even on different cpu. So just clear netif_running().
+ *
+ * dev->stop() will invoke napi_disable() on all of it's
+ * napi_struct instances on this device.
+ */
smp_mb__after_clear_bit(); /* Commit netif_running(). */
- while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
- /* No hurry. */
- msleep(1);
- }
/*
* Call the device specific close. This cannot fail.
@@ -1048,12 +1087,14 @@ int dev_close(struct net_device *dev)
/*
* Tell people we are down
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
+ call_netdevice_notifiers(NETDEV_DOWN, dev);
return 0;
}
+static int dev_boot_phase = 1;
+
/*
* Device change register/unregister. These are not inline or static
* as we export them to the world.
@@ -1077,23 +1118,27 @@ int register_netdevice_notifier(struct notifier_block *nb)
{
struct net_device *dev;
struct net_device *last;
+ struct net *net;
int err;
rtnl_lock();
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
goto unlock;
+ if (dev_boot_phase)
+ goto unlock;
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ err = notifier_to_errno(err);
+ if (err)
+ goto rollback;
+
+ if (!(dev->flags & IFF_UP))
+ continue;
- for_each_netdev(dev) {
- err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
- err = notifier_to_errno(err);
- if (err)
- goto rollback;
-
- if (!(dev->flags & IFF_UP))
- continue;
-
- nb->notifier_call(nb, NETDEV_UP, dev);
+ nb->notifier_call(nb, NETDEV_UP, dev);
+ }
}
unlock:
@@ -1102,15 +1147,17 @@ unlock:
rollback:
last = dev;
- for_each_netdev(dev) {
- if (dev == last)
- break;
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ if (dev == last)
+ break;
- if (dev->flags & IFF_UP) {
- nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
- nb->notifier_call(nb, NETDEV_DOWN, dev);
+ if (dev->flags & IFF_UP) {
+ nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+ nb->notifier_call(nb, NETDEV_DOWN, dev);
+ }
+ nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
}
- nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
}
goto unlock;
}
@@ -1144,9 +1191,9 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
* are as for raw_notifier_call_chain().
*/
-int call_netdevice_notifiers(unsigned long val, void *v)
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- return raw_notifier_call_chain(&netdev_chain, val, v);
+ return raw_notifier_call_chain(&netdev_chain, val, dev);
}
/* When > 0 there are consumers of rx skb time stamps */
@@ -1233,21 +1280,21 @@ void __netif_schedule(struct net_device *dev)
}
EXPORT_SYMBOL(__netif_schedule);
-void __netif_rx_schedule(struct net_device *dev)
+void dev_kfree_skb_irq(struct sk_buff *skb)
{
- unsigned long flags;
+ if (atomic_dec_and_test(&skb->users)) {
+ struct softnet_data *sd;
+ unsigned long flags;
- local_irq_save(flags);
- dev_hold(dev);
- list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- local_irq_restore(flags);
+ local_irq_save(flags);
+ sd = &__get_cpu_var(softnet_data);
+ skb->next = sd->completion_queue;
+ sd->completion_queue = skb;
+ raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ local_irq_restore(flags);
+ }
}
-EXPORT_SYMBOL(__netif_rx_schedule);
+EXPORT_SYMBOL(dev_kfree_skb_irq);
void dev_kfree_skb_any(struct sk_buff *skb)
{
@@ -1259,7 +1306,12 @@ void dev_kfree_skb_any(struct sk_buff *skb)
EXPORT_SYMBOL(dev_kfree_skb_any);
-/* Hot-plugging. */
+/**
+ * netif_device_detach - mark device as removed
+ * @dev: network device
+ *
+ * Mark device as removed from system and therefore no longer available.
+ */
void netif_device_detach(struct net_device *dev)
{
if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1269,6 +1321,12 @@ void netif_device_detach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_detach);
+/**
+ * netif_device_attach - mark device as attached
+ * @dev: network device
+ *
+ * Mark device as attached from system and restart if needed.
+ */
void netif_device_attach(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
@@ -1501,18 +1559,6 @@ out_kfree_skb:
return 0;
}
-#define HARD_TX_LOCK(dev, cpu) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
- netif_tx_lock(dev); \
- } \
-}
-
-#define HARD_TX_UNLOCK(dev) { \
- if ((dev->features & NETIF_F_LLTX) == 0) { \
- netif_tx_unlock(dev); \
- } \
-}
-
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -1730,7 +1776,7 @@ enqueue:
return NET_RX_SUCCESS;
}
- netif_rx_schedule(&queue->backlog_dev);
+ napi_schedule(&queue->backlog);
goto enqueue;
}
@@ -1771,6 +1817,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
return dev;
}
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1927,7 +1974,7 @@ int netif_receive_skb(struct sk_buff *skb)
__be16 type;
/* if we've gotten here through NAPI, check netpoll */
- if (skb->dev->poll && netpoll_rx(skb))
+ if (netpoll_receive_skb(skb))
return NET_RX_DROP;
if (!skb->tstamp.tv64)
@@ -2017,22 +2064,25 @@ out:
return ret;
}
-static int process_backlog(struct net_device *backlog_dev, int *budget)
+static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
- int quota = min(backlog_dev->quota, *budget);
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
- backlog_dev->weight = weight_p;
- for (;;) {
+ napi->weight = weight_p;
+ do {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
- if (!skb)
- goto job_done;
+ if (!skb) {
+ __napi_complete(napi);
+ local_irq_enable();
+ break;
+ }
+
local_irq_enable();
dev = skb->dev;
@@ -2040,67 +2090,86 @@ static int process_backlog(struct net_device *backlog_dev, int *budget)
netif_receive_skb(skb);
dev_put(dev);
+ } while (++work < quota && jiffies == start_time);
- work++;
-
- if (work >= quota || jiffies - start_time > 1)
- break;
-
- }
-
- backlog_dev->quota -= work;
- *budget -= work;
- return -1;
-
-job_done:
- backlog_dev->quota -= work;
- *budget -= work;
+ return work;
+}
- list_del(&backlog_dev->poll_list);
- smp_mb__before_clear_bit();
- netif_poll_enable(backlog_dev);
+/**
+ * __napi_schedule - schedule for receive
+ * @napi: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void fastcall __napi_schedule(struct napi_struct *n)
+{
+ unsigned long flags;
- local_irq_enable();
- return 0;
+ local_irq_save(flags);
+ list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ local_irq_restore(flags);
}
+EXPORT_SYMBOL(__napi_schedule);
+
static void net_rx_action(struct softirq_action *h)
{
- struct softnet_data *queue = &__get_cpu_var(softnet_data);
+ struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
unsigned long start_time = jiffies;
int budget = netdev_budget;
void *have;
local_irq_disable();
- while (!list_empty(&queue->poll_list)) {
- struct net_device *dev;
+ while (!list_empty(list)) {
+ struct napi_struct *n;
+ int work, weight;
- if (budget <= 0 || jiffies - start_time > 1)
+ /* If softirq window is exhuasted then punt.
+ *
+ * Note that this is a slight policy change from the
+ * previous NAPI code, which would allow up to 2
+ * jiffies to pass before breaking out. The test
+ * used to be "jiffies - start_time > 1".
+ */
+ if (unlikely(budget <= 0 || jiffies != start_time))
goto softnet_break;
local_irq_enable();
- dev = list_entry(queue->poll_list.next,
- struct net_device, poll_list);
- have = netpoll_poll_lock(dev);
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ n = list_entry(list->next, struct napi_struct, poll_list);
- if (dev->quota <= 0 || dev->poll(dev, &budget)) {
- netpoll_poll_unlock(have);
- local_irq_disable();
- list_move_tail(&dev->poll_list, &queue->poll_list);
- if (dev->quota < 0)
- dev->quota += dev->weight;
- else
- dev->quota = dev->weight;
- } else {
- netpoll_poll_unlock(have);
- dev_put(dev);
- local_irq_disable();
- }
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ work = n->poll(n, weight);
+
+ WARN_ON_ONCE(work > weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(work == weight))
+ list_move_tail(&n->poll_list, list);
+
+ netpoll_poll_unlock(have);
}
out:
local_irq_enable();
+
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
@@ -2115,6 +2184,7 @@ out:
}
}
#endif
+
return;
softnet_break:
@@ -2154,7 +2224,7 @@ int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
* match. --pb
*/
-static int dev_ifname(struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq __user *arg)
{
struct net_device *dev;
struct ifreq ifr;
@@ -2167,7 +2237,7 @@ static int dev_ifname(struct ifreq __user *arg)
return -EFAULT;
read_lock(&dev_base_lock);
- dev = __dev_get_by_index(ifr.ifr_ifindex);
+ dev = __dev_get_by_index(net, ifr.ifr_ifindex);
if (!dev) {
read_unlock(&dev_base_lock);
return -ENODEV;
@@ -2187,7 +2257,7 @@ static int dev_ifname(struct ifreq __user *arg)
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(char __user *arg)
+static int dev_ifconf(struct net *net, char __user *arg)
{
struct ifconf ifc;
struct net_device *dev;
@@ -2211,7 +2281,7 @@ static int dev_ifconf(char __user *arg)
*/
total = 0;
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
@@ -2245,6 +2315,7 @@ static int dev_ifconf(char __user *arg)
*/
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq->private;
loff_t off;
struct net_device *dev;
@@ -2253,7 +2324,7 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
return SEQ_START_TOKEN;
off = 1;
- for_each_netdev(dev)
+ for_each_netdev(net, dev)
if (off++ == *pos)
return dev;
@@ -2262,9 +2333,10 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net *net = seq->private;
++*pos;
return v == SEQ_START_TOKEN ?
- first_net_device() : next_net_device((struct net_device *)v);
+ first_net_device(net) : next_net_device((struct net_device *)v);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -2360,7 +2432,26 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &dev_seq_ops);
+ struct seq_file *seq;
+ int res;
+ res = seq_open(file, &dev_seq_ops);
+ if (!res) {
+ seq = file->private_data;
+ seq->private = get_proc_net(inode);
+ if (!seq->private) {
+ seq_release(inode, file);
+ res = -ENXIO;
+ }
+ }
+ return res;
+}
+
+static int dev_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = seq->private;
+ put_net(net);
+ return seq_release(inode, file);
}
static const struct file_operations dev_seq_fops = {
@@ -2368,7 +2459,7 @@ static const struct file_operations dev_seq_fops = {
.open = dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = dev_seq_release,
};
static const struct seq_operations softnet_seq_ops = {
@@ -2520,30 +2611,49 @@ static const struct file_operations ptype_seq_fops = {
};
-static int __init dev_proc_init(void)
+static int __net_init dev_proc_net_init(struct net *net)
{
int rc = -ENOMEM;
- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
+ if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
goto out;
- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
+ if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
goto out_dev;
- if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
- goto out_dev2;
-
- if (wext_proc_init())
+ if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
goto out_softnet;
+
+ if (wext_proc_init(net))
+ goto out_ptype;
rc = 0;
out:
return rc;
+out_ptype:
+ proc_net_remove(net, "ptype");
out_softnet:
- proc_net_remove("ptype");
-out_dev2:
- proc_net_remove("softnet_stat");
+ proc_net_remove(net, "softnet_stat");
out_dev:
- proc_net_remove("dev");
+ proc_net_remove(net, "dev");
goto out;
}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+ wext_proc_exit(net);
+
+ proc_net_remove(net, "ptype");
+ proc_net_remove(net, "softnet_stat");
+ proc_net_remove(net, "dev");
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+ .init = dev_proc_net_init,
+ .exit = dev_proc_net_exit,
+};
+
+static int __init dev_proc_init(void)
+{
+ return register_pernet_subsys(&dev_proc_ops);
+}
#else
#define dev_proc_init() 0
#endif /* CONFIG_PROC_FS */
@@ -2906,8 +3016,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
if (dev->flags & IFF_UP &&
((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
IFF_VOLATILE)))
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGE, dev);
+ call_netdevice_notifiers(NETDEV_CHANGE, dev);
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? +1 : -1;
@@ -2953,8 +3062,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
else
dev->mtu = new_mtu;
if (!err && dev->flags & IFF_UP)
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
return err;
}
@@ -2970,18 +3078,17 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -ENODEV;
err = dev->set_mac_address(dev, sa);
if (!err)
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return err;
}
/*
- * Perform the SIOCxIFxxx calls.
+ * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
*/
-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
+static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
int err;
- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
return -ENODEV;
@@ -2991,25 +3098,15 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_flags = dev_get_flags(dev);
return 0;
- case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
-
case SIOCGIFMETRIC: /* Get the metric on the interface
(currently unused) */
ifr->ifr_metric = 0;
return 0;
- case SIOCSIFMETRIC: /* Set the metric on the interface
- (currently unused) */
- return -EOPNOTSUPP;
-
case SIOCGIFMTU: /* Get the MTU of a device */
ifr->ifr_mtu = dev->mtu;
return 0;
- case SIOCSIFMTU: /* Set the MTU of a device */
- return dev_set_mtu(dev, ifr->ifr_mtu);
-
case SIOCGIFHWADDR:
if (!dev->addr_len)
memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
@@ -3019,17 +3116,9 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_hwaddr.sa_family = dev->type;
return 0;
- case SIOCSIFHWADDR:
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
-
- case SIOCSIFHWBROADCAST:
- if (ifr->ifr_hwaddr.sa_family != dev->type)
- return -EINVAL;
- memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
- min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_CHANGEADDR, dev);
- return 0;
+ case SIOCGIFSLAVE:
+ err = -EINVAL;
+ break;
case SIOCGIFMAP:
ifr->ifr_map.mem_start = dev->mem_start;
@@ -3040,6 +3129,59 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
ifr->ifr_map.port = dev->if_port;
return 0;
+ case SIOCGIFINDEX:
+ ifr->ifr_ifindex = dev->ifindex;
+ return 0;
+
+ case SIOCGIFTXQLEN:
+ ifr->ifr_qlen = dev->tx_queue_len;
+ return 0;
+
+ default:
+ /* dev_ioctl() should ensure this case
+ * is never reached
+ */
+ WARN_ON(1);
+ err = -EINVAL;
+ break;
+
+ }
+ return err;
+}
+
+/*
+ * Perform the SIOCxIFxxx calls, inside rtnl_lock()
+ */
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+{
+ int err;
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+
+ if (!dev)
+ return -ENODEV;
+
+ switch (cmd) {
+ case SIOCSIFFLAGS: /* Set interface flags */
+ return dev_change_flags(dev, ifr->ifr_flags);
+
+ case SIOCSIFMETRIC: /* Set the metric on the interface
+ (currently unused) */
+ return -EOPNOTSUPP;
+
+ case SIOCSIFMTU: /* Set the MTU of a device */
+ return dev_set_mtu(dev, ifr->ifr_mtu);
+
+ case SIOCSIFHWADDR:
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+
+ case SIOCSIFHWBROADCAST:
+ if (ifr->ifr_hwaddr.sa_family != dev->type)
+ return -EINVAL;
+ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
+ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ return 0;
+
case SIOCSIFMAP:
if (dev->set_config) {
if (!netif_device_present(dev))
@@ -3066,14 +3208,6 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
dev->addr_len, 1);
- case SIOCGIFINDEX:
- ifr->ifr_ifindex = dev->ifindex;
- return 0;
-
- case SIOCGIFTXQLEN:
- ifr->ifr_qlen = dev->tx_queue_len;
- return 0;
-
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
@@ -3134,7 +3268,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
* positive or a negative errno code on error.
*/
-int dev_ioctl(unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
int ret;
@@ -3147,12 +3281,12 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
if (cmd == SIOCGIFCONF) {
rtnl_lock();
- ret = dev_ifconf((char __user *) arg);
+ ret = dev_ifconf(net, (char __user *) arg);
rtnl_unlock();
return ret;
}
if (cmd == SIOCGIFNAME)
- return dev_ifname((struct ifreq __user *)arg);
+ return dev_ifname(net, (struct ifreq __user *)arg);
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
@@ -3182,9 +3316,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
read_lock(&dev_base_lock);
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc_locked(net, &ifr, cmd);
read_unlock(&dev_base_lock);
if (!ret) {
if (colon)
@@ -3196,9 +3330,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
return ret;
case SIOCETHTOOL:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ethtool(&ifr);
+ ret = dev_ethtool(net, &ifr);
rtnl_unlock();
if (!ret) {
if (colon)
@@ -3220,9 +3354,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
case SIOCSIFNAME:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
if (!ret) {
if (colon)
@@ -3261,9 +3395,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
return ret;
@@ -3283,9 +3417,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
if (cmd == SIOCWANDEV ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
- dev_load(ifr.ifr_name);
+ dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(&ifr, cmd);
+ ret = dev_ifsioc(net, &ifr, cmd);
rtnl_unlock();
if (!ret && copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
@@ -3294,7 +3428,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
}
/* Take care of Wireless Extensions */
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
- return wext_handle_ioctl(&ifr, cmd, arg);
+ return wext_handle_ioctl(net, &ifr, cmd, arg);
return -EINVAL;
}
}
@@ -3307,19 +3441,17 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
* number. The caller must hold the rtnl semaphore or the
* dev_base_lock to be sure it remains unique.
*/
-static int dev_new_index(void)
+static int dev_new_index(struct net *net)
{
static int ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
- if (!__dev_get_by_index(ifindex))
+ if (!__dev_get_by_index(net, ifindex))
return ifindex;
}
}
-static int dev_boot_phase = 1;
-
/* Delayed registration/unregisteration */
static DEFINE_SPINLOCK(net_todo_list_lock);
static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
@@ -3353,6 +3485,7 @@ int register_netdevice(struct net_device *dev)
struct hlist_head *head;
struct hlist_node *p;
int ret;
+ struct net *net;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -3361,6 +3494,8 @@ int register_netdevice(struct net_device *dev)
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
+ BUG_ON(!dev->nd_net);
+ net = dev->nd_net;
spin_lock_init(&dev->queue_lock);
spin_lock_init(&dev->_xmit_lock);
@@ -3385,12 +3520,12 @@ int register_netdevice(struct net_device *dev)
goto err_uninit;
}
- dev->ifindex = dev_new_index();
+ dev->ifindex = dev_new_index(net);
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
/* Check for existence of name */
- head = dev_name_hash(dev->name);
+ head = dev_name_hash(net, dev->name);
hlist_for_each(p, head) {
struct net_device *d
= hlist_entry(p, struct net_device, name_hlist);
@@ -3446,15 +3581,7 @@ int register_netdevice(struct net_device *dev)
}
}
- /*
- * nil rebuild_header routine,
- * that should be never called and used as just bug trap.
- */
-
- if (!dev->rebuild_header)
- dev->rebuild_header = default_rebuild_header;
-
- ret = netdev_register_sysfs(dev);
+ ret = netdev_register_kobject(dev);
if (ret)
goto err_uninit;
dev->reg_state = NETREG_REGISTERED;
@@ -3467,15 +3594,11 @@ int register_netdevice(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
dev_init_scheduler(dev);
- write_lock_bh(&dev_base_lock);
- list_add_tail(&dev->dev_list, &dev_base_head);
- hlist_add_head(&dev->name_hlist, head);
- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
- write_unlock_bh(&dev_base_lock);
+ list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
- ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret)
unregister_netdevice(dev);
@@ -3546,8 +3669,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
rtnl_lock();
/* Rebroadcast unregister notification */
- raw_notifier_call_chain(&netdev_chain,
- NETDEV_UNREGISTER, dev);
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
@@ -3692,6 +3814,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
+ dev->nd_net = &init_net;
if (sizeof_priv) {
dev->priv = ((char *)dev +
@@ -3704,6 +3827,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->egress_subqueue_count = queue_count;
dev->get_stats = internal_stats;
+ netpoll_netdev_init(dev);
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -3720,7 +3844,6 @@ EXPORT_SYMBOL(alloc_netdev_mq);
*/
void free_netdev(struct net_device *dev)
{
-#ifdef CONFIG_SYSFS
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -3732,9 +3855,6 @@ void free_netdev(struct net_device *dev)
/* will free via device release */
put_device(&dev->dev);
-#else
- kfree((char *)dev - dev->padded);
-#endif
}
/* Synchronize with packet receive processing. */
@@ -3773,15 +3893,10 @@ void unregister_netdevice(struct net_device *dev)
BUG_ON(dev->reg_state != NETREG_REGISTERED);
/* If device is running, close it first. */
- if (dev->flags & IFF_UP)
- dev_close(dev);
+ dev_close(dev);
/* And unlink it from device chain. */
- write_lock_bh(&dev_base_lock);
- list_del(&dev->dev_list);
- hlist_del(&dev->name_hlist);
- hlist_del(&dev->index_hlist);
- write_unlock_bh(&dev_base_lock);
+ unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
@@ -3794,7 +3909,7 @@ void unregister_netdevice(struct net_device *dev)
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
- raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
/*
* Flush the unicast and multicast chains
@@ -3807,8 +3922,8 @@ void unregister_netdevice(struct net_device *dev)
/* Notifier chain MUST detach us from master device. */
BUG_TRAP(!dev->master);
- /* Remove entries from sysfs */
- netdev_unregister_sysfs(dev);
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
/* Finish processing unregister after unlock */
net_set_todo(dev);
@@ -3839,6 +3954,121 @@ void unregister_netdev(struct net_device *dev)
EXPORT_SYMBOL(unregister_netdev);
+/**
+ * dev_change_net_namespace - move device to different nethost namespace
+ * @dev: device
+ * @net: network namespace
+ * @pat: If not NULL name pattern to try if the current device name
+ * is already taken in the destination network namespace.
+ *
+ * This function shuts down a device interface and moves it
+ * to a new network namespace. On success 0 is returned, on
+ * a failure a netagive errno code is returned.
+ *
+ * Callers must hold the rtnl semaphore.
+ */
+
+int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+{
+ char buf[IFNAMSIZ];
+ const char *destname;
+ int err;
+
+ ASSERT_RTNL();
+
+ /* Don't allow namespace local devices to be moved. */
+ err = -EINVAL;
+ if (dev->features & NETIF_F_NETNS_LOCAL)
+ goto out;
+
+ /* Ensure the device has been registrered */
+ err = -EINVAL;
+ if (dev->reg_state != NETREG_REGISTERED)
+ goto out;
+
+ /* Get out if there is nothing todo */
+ err = 0;
+ if (dev->nd_net == net)
+ goto out;
+
+ /* Pick the destination device name, and ensure
+ * we can use it in the destination network namespace.
+ */
+ err = -EEXIST;
+ destname = dev->name;
+ if (__dev_get_by_name(net, destname)) {
+ /* We get here if we can't use the current device name */
+ if (!pat)
+ goto out;
+ if (!dev_valid_name(pat))
+ goto out;
+ if (strchr(pat, '%')) {
+ if (__dev_alloc_name(net, pat, buf) < 0)
+ goto out;
+ destname = buf;
+ } else
+ destname = pat;
+ if (__dev_get_by_name(net, destname))
+ goto out;
+ }
+
+ /*
+ * And now a mini version of register_netdevice unregister_netdevice.
+ */
+
+ /* If device is running close it first. */
+ dev_close(dev);
+
+ /* And unlink it from device chain */
+ err = -ENODEV;
+ unlist_netdevice(dev);
+
+ synchronize_net();
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_addr_discard(dev);
+
+ /* Actually switch the network namespace */
+ dev->nd_net = net;
+
+ /* Assign the new device name */
+ if (destname != dev->name)
+ strcpy(dev->name, destname);
+
+ /* If there is an ifindex conflict assign a new one */
+ if (__dev_get_by_index(net, dev->ifindex)) {
+ int iflink = (dev->iflink == dev->ifindex);
+ dev->ifindex = dev_new_index(net);
+ if (iflink)
+ dev->iflink = dev->ifindex;
+ }
+
+ /* Fixup kobjects */
+ err = device_rename(&dev->dev, dev->name);
+ WARN_ON(err);
+
+ /* Add the device back in the hashes */
+ list_netdevice(dev);
+
+ /* Notify protocols, that a new device appeared. */
+ call_netdevice_notifiers(NETDEV_REGISTER, dev);
+
+ synchronize_net();
+ err = 0;
+out:
+ return err;
+}
+
static int dev_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *ocpu)
@@ -4032,6 +4262,82 @@ int netdev_compute_features(unsigned long all, unsigned long one)
}
EXPORT_SYMBOL(netdev_compute_features);
+static struct hlist_head *netdev_create_hash(void)
+{
+ int i;
+ struct hlist_head *hash;
+
+ hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
+ if (hash != NULL)
+ for (i = 0; i < NETDEV_HASHENTRIES; i++)
+ INIT_HLIST_HEAD(&hash[i]);
+
+ return hash;
+}
+
+/* Initialize per network namespace state */
+static int __net_init netdev_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->dev_base_head);
+ rwlock_init(&dev_base_lock);
+
+ net->dev_name_head = netdev_create_hash();
+ if (net->dev_name_head == NULL)
+ goto err_name;
+
+ net->dev_index_head = netdev_create_hash();
+ if (net->dev_index_head == NULL)
+ goto err_idx;
+
+ return 0;
+
+err_idx:
+ kfree(net->dev_name_head);
+err_name:
+ return -ENOMEM;
+}
+
+static void __net_exit netdev_exit(struct net *net)
+{
+ kfree(net->dev_name_head);
+ kfree(net->dev_index_head);
+}
+
+static struct pernet_operations __net_initdata netdev_net_ops = {
+ .init = netdev_init,
+ .exit = netdev_exit,
+};
+
+static void __net_exit default_device_exit(struct net *net)
+{
+ struct net_device *dev, *next;
+ /*
+ * Push all migratable of the network devices back to the
+ * initial network namespace
+ */
+ rtnl_lock();
+ for_each_netdev_safe(net, dev, next) {
+ int err;
+
+ /* Ignore unmoveable devices (i.e. loopback) */
+ if (dev->features & NETIF_F_NETNS_LOCAL)
+ continue;
+
+ /* Push remaing network devices to init_net */
+ err = dev_change_net_namespace(dev, &init_net, "dev%d");
+ if (err) {
+ printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
+ __func__, dev->name, err);
+ unregister_netdevice(dev);
+ }
+ }
+ rtnl_unlock();
+}
+
+static struct pernet_operations __net_initdata default_device_ops = {
+ .exit = default_device_exit,
+};
+
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
@@ -4052,18 +4358,18 @@ static int __init net_dev_init(void)
if (dev_proc_init())
goto out;
- if (netdev_sysfs_init())
+ if (netdev_kobject_init())
goto out;
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < 16; i++)
INIT_LIST_HEAD(&ptype_base[i]);
- for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
- INIT_HLIST_HEAD(&dev_name_head[i]);
+ if (register_pernet_subsys(&netdev_net_ops))
+ goto out;
- for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
- INIT_HLIST_HEAD(&dev_index_head[i]);
+ if (register_pernet_device(&default_device_ops))
+ goto out;
/*
* Initialise the packet receive queues.
@@ -4076,10 +4382,9 @@ static int __init net_dev_init(void)
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
- set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
- queue->backlog_dev.weight = weight_p;
- queue->backlog_dev.poll = process_backlog;
- atomic_set(&queue->backlog_dev.refcnt, 1);
+
+ queue->backlog.poll = process_backlog;
+ queue->backlog.weight = weight_p;
}
netdev_dma_register();
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 20330c572610..15241cf48af8 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -41,6 +41,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
+#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
#include <linux/skbuff.h>
@@ -186,11 +187,12 @@ EXPORT_SYMBOL(dev_mc_unsync);
#ifdef CONFIG_PROC_FS
static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq->private;
struct net_device *dev;
loff_t off = 0;
read_lock(&dev_base_lock);
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (off++ == *pos)
return dev;
}
@@ -239,7 +241,26 @@ static const struct seq_operations dev_mc_seq_ops = {
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &dev_mc_seq_ops);
+ struct seq_file *seq;
+ int res;
+ res = seq_open(file, &dev_mc_seq_ops);
+ if (!res) {
+ seq = file->private_data;
+ seq->private = get_proc_net(inode);
+ if (!seq->private) {
+ seq_release(inode, file);
+ res = -ENXIO;
+ }
+ }
+ return res;
+}
+
+static int dev_mc_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct net *net = seq->private;
+ put_net(net);
+ return seq_release(inode, file);
}
static const struct file_operations dev_mc_seq_fops = {
@@ -247,14 +268,31 @@ static const struct file_operations dev_mc_seq_fops = {
.open = dev_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = dev_mc_seq_release,
};
#endif
+static int __net_init dev_mc_net_init(struct net *net)
+{
+ if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+ proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+ .init = dev_mc_net_init,
+ .exit = dev_mc_net_exit,
+};
+
void __init dev_mcast_init(void)
{
- proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
+ register_pernet_subsys(&dev_mc_net_ops);
}
EXPORT_SYMBOL(dev_mc_add);
diff --git a/net/core/dst.c b/net/core/dst.c
index c6a05879d58c..16958e64e577 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -9,59 +9,84 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/workqueue.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <net/net_namespace.h>
+#include <net/net_namespace.h>
#include <net/dst.h>
-/* Locking strategy:
- * 1) Garbage collection state of dead destination cache
- * entries is protected by dst_lock.
- * 2) GC is run only from BH context, and is the only remover
- * of entries.
- * 3) Entries are added to the garbage list from both BH
- * and non-BH context, so local BH disabling is needed.
- * 4) All operations modify state, so a spinlock is used.
+/*
+ * Theory of operations:
+ * 1) We use a list, protected by a spinlock, to add
+ * new entries from both BH and non-BH context.
+ * 2) In order to keep spinlock held for a small delay,
+ * we use a second list where are stored long lived
+ * entries, that are handled by the garbage collect thread
+ * fired by a workqueue.
+ * 3) This list is guarded by a mutex,
+ * so that the gc_task and dst_dev_event() can be synchronized.
*/
-static struct dst_entry *dst_garbage_list;
#if RT_CACHE_DEBUG >= 2
static atomic_t dst_total = ATOMIC_INIT(0);
#endif
-static DEFINE_SPINLOCK(dst_lock);
-static unsigned long dst_gc_timer_expires;
-static unsigned long dst_gc_timer_inc = DST_GC_MAX;
-static void dst_run_gc(unsigned long);
+/*
+ * We want to keep lock & list close together
+ * to dirty as few cache lines as possible in __dst_free().
+ * As this is not a very strong hint, we dont force an alignment on SMP.
+ */
+static struct {
+ spinlock_t lock;
+ struct dst_entry *list;
+ unsigned long timer_inc;
+ unsigned long timer_expires;
+} dst_garbage = {
+ .lock = __SPIN_LOCK_UNLOCKED(dst_garbage.lock),
+ .timer_inc = DST_GC_MAX,
+};
+static void dst_gc_task(struct work_struct *work);
static void ___dst_free(struct dst_entry * dst);
-static DEFINE_TIMER(dst_gc_timer, dst_run_gc, DST_GC_MIN, 0);
+static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
-static void dst_run_gc(unsigned long dummy)
+static DEFINE_MUTEX(dst_gc_mutex);
+/*
+ * long lived entries are maintained in this list, guarded by dst_gc_mutex
+ */
+static struct dst_entry *dst_busy_list;
+
+static void dst_gc_task(struct work_struct *work)
{
int delayed = 0;
- int work_performed;
- struct dst_entry * dst, **dstp;
+ int work_performed = 0;
+ unsigned long expires = ~0L;
+ struct dst_entry *dst, *next, head;
+ struct dst_entry *last = &head;
+#if RT_CACHE_DEBUG >= 2
+ ktime_t time_start = ktime_get();
+ struct timespec elapsed;
+#endif
- if (!spin_trylock(&dst_lock)) {
- mod_timer(&dst_gc_timer, jiffies + HZ/10);
- return;
- }
+ mutex_lock(&dst_gc_mutex);
+ next = dst_busy_list;
- del_timer(&dst_gc_timer);
- dstp = &dst_garbage_list;
- work_performed = 0;
- while ((dst = *dstp) != NULL) {
- if (atomic_read(&dst->__refcnt)) {
- dstp = &dst->next;
+loop:
+ while ((dst = next) != NULL) {
+ next = dst->next;
+ prefetch(&next->next);
+ if (likely(atomic_read(&dst->__refcnt))) {
+ last->next = dst;
+ last = dst;
delayed++;
continue;
}
- *dstp = dst->next;
- work_performed = 1;
+ work_performed++;
dst = dst_destroy(dst);
if (dst) {
@@ -77,38 +102,56 @@ static void dst_run_gc(unsigned long dummy)
continue;
___dst_free(dst);
- dst->next = *dstp;
- *dstp = dst;
- dstp = &dst->next;
+ dst->next = next;
+ next = dst;
}
}
- if (!dst_garbage_list) {
- dst_gc_timer_inc = DST_GC_MAX;
- goto out;
+
+ spin_lock_bh(&dst_garbage.lock);
+ next = dst_garbage.list;
+ if (next) {
+ dst_garbage.list = NULL;
+ spin_unlock_bh(&dst_garbage.lock);
+ goto loop;
}
- if (!work_performed) {
- if ((dst_gc_timer_expires += dst_gc_timer_inc) > DST_GC_MAX)
- dst_gc_timer_expires = DST_GC_MAX;
- dst_gc_timer_inc += DST_GC_INC;
- } else {
- dst_gc_timer_inc = DST_GC_INC;
- dst_gc_timer_expires = DST_GC_MIN;
+ last->next = NULL;
+ dst_busy_list = head.next;
+ if (!dst_busy_list)
+ dst_garbage.timer_inc = DST_GC_MAX;
+ else {
+ /*
+ * if we freed less than 1/10 of delayed entries,
+ * we can sleep longer.
+ */
+ if (work_performed <= delayed/10) {
+ dst_garbage.timer_expires += dst_garbage.timer_inc;
+ if (dst_garbage.timer_expires > DST_GC_MAX)
+ dst_garbage.timer_expires = DST_GC_MAX;
+ dst_garbage.timer_inc += DST_GC_INC;
+ } else {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ }
+ expires = dst_garbage.timer_expires;
+ /*
+ * if the next desired timer is more than 4 seconds in the future
+ * then round the timer to whole seconds
+ */
+ if (expires > 4*HZ)
+ expires = round_jiffies_relative(expires);
+ schedule_delayed_work(&dst_gc_work, expires);
}
+
+ spin_unlock_bh(&dst_garbage.lock);
+ mutex_unlock(&dst_gc_mutex);
#if RT_CACHE_DEBUG >= 2
- printk("dst_total: %d/%d %ld\n",
- atomic_read(&dst_total), delayed, dst_gc_timer_expires);
+ elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start));
+ printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d"
+ " expires: %lu elapsed: %lu us\n",
+ atomic_read(&dst_total), delayed, work_performed,
+ expires,
+ elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
#endif
- /* if the next desired timer is more than 4 seconds in the future
- * then round the timer to whole seconds
- */
- if (dst_gc_timer_expires > 4*HZ)
- mod_timer(&dst_gc_timer,
- round_jiffies(jiffies + dst_gc_timer_expires));
- else
- mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
-
-out:
- spin_unlock(&dst_lock);
}
static int dst_discard(struct sk_buff *skb)
@@ -153,16 +196,16 @@ static void ___dst_free(struct dst_entry * dst)
void __dst_free(struct dst_entry * dst)
{
- spin_lock_bh(&dst_lock);
+ spin_lock_bh(&dst_garbage.lock);
___dst_free(dst);
- dst->next = dst_garbage_list;
- dst_garbage_list = dst;
- if (dst_gc_timer_inc > DST_GC_INC) {
- dst_gc_timer_inc = DST_GC_INC;
- dst_gc_timer_expires = DST_GC_MIN;
- mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+ dst->next = dst_garbage.list;
+ dst_garbage.list = dst;
+ if (dst_garbage.timer_inc > DST_GC_INC) {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
}
- spin_unlock_bh(&dst_lock);
+ spin_unlock_bh(&dst_garbage.lock);
}
struct dst_entry *dst_destroy(struct dst_entry * dst)
@@ -236,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
- dst->dev = &loopback_dev;
- dev_hold(&loopback_dev);
+ dst->dev = init_net.loopback_dev;
+ dev_hold(dst->dev);
dev_put(dev);
if (dst->neighbour && dst->neighbour->dev == dev) {
- dst->neighbour->dev = &loopback_dev;
+ dst->neighbour->dev = init_net.loopback_dev;
dev_put(dev);
- dev_hold(&loopback_dev);
+ dev_hold(dst->neighbour->dev);
}
}
}
@@ -250,16 +293,33 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- struct dst_entry *dst;
+ struct dst_entry *dst, *last = NULL;
+
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_UNREGISTER:
case NETDEV_DOWN:
- spin_lock_bh(&dst_lock);
- for (dst = dst_garbage_list; dst; dst = dst->next) {
+ mutex_lock(&dst_gc_mutex);
+ for (dst = dst_busy_list; dst; dst = dst->next) {
+ last = dst;
+ dst_ifdown(dst, dev, event != NETDEV_DOWN);
+ }
+
+ spin_lock_bh(&dst_garbage.lock);
+ dst = dst_garbage.list;
+ dst_garbage.list = NULL;
+ spin_unlock_bh(&dst_garbage.lock);
+
+ if (last)
+ last->next = dst;
+ else
+ dst_busy_list = dst;
+ for (; dst; dst = dst->next) {
dst_ifdown(dst, dev, event != NETDEV_DOWN);
}
- spin_unlock_bh(&dst_lock);
+ mutex_unlock(&dst_gc_mutex);
break;
}
return NOTIFY_DONE;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c5e059352d43..1163eb2256d0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -109,6 +109,32 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
return 0;
}
+/* the following list of flags are the same as their associated
+ * NETIF_F_xxx values in include/linux/netdevice.h
+ */
+static const u32 flags_dup_features =
+ ETH_FLAG_LRO;
+
+u32 ethtool_op_get_flags(struct net_device *dev)
+{
+ /* in the future, this function will probably contain additional
+ * handling for flags which are not so easily handled
+ * by a simple masking operation
+ */
+
+ return dev->features & flags_dup_features;
+}
+
+int ethtool_op_set_flags(struct net_device *dev, u32 data)
+{
+ if (data & ETH_FLAG_LRO)
+ dev->features |= NETIF_F_LRO;
+ else
+ dev->features &= ~NETIF_F_LRO;
+
+ return 0;
+}
+
/* Handlers for each ethtool command */
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
@@ -153,10 +179,26 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
info.cmd = ETHTOOL_GDRVINFO;
ops->get_drvinfo(dev, &info);
- if (ops->self_test_count)
- info.testinfo_len = ops->self_test_count(dev);
- if (ops->get_stats_count)
- info.n_stats = ops->get_stats_count(dev);
+ if (ops->get_sset_count) {
+ int rc;
+
+ rc = ops->get_sset_count(dev, ETH_SS_TEST);
+ if (rc >= 0)
+ info.testinfo_len = rc;
+ rc = ops->get_sset_count(dev, ETH_SS_STATS);
+ if (rc >= 0)
+ info.n_stats = rc;
+ rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS);
+ if (rc >= 0)
+ info.n_priv_flags = rc;
+ } else {
+ /* code path for obsolete hooks */
+
+ if (ops->self_test_count)
+ info.testinfo_len = ops->self_test_count(dev);
+ if (ops->get_stats_count)
+ info.n_stats = ops->get_stats_count(dev);
+ }
if (ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
if (ops->get_eeprom_len)
@@ -230,34 +272,6 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_wol(dev, &wol);
}
-static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GMSGLVL };
-
- if (!dev->ethtool_ops->get_msglevel)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_msglevel(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (!dev->ethtool_ops->set_msglevel)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- dev->ethtool_ops->set_msglevel(dev, edata.data);
- return 0;
-}
-
static int ethtool_nway_reset(struct net_device *dev)
{
if (!dev->ethtool_ops->nway_reset)
@@ -266,20 +280,6 @@ static int ethtool_nway_reset(struct net_device *dev)
return dev->ethtool_ops->nway_reset(dev);
}
-static int ethtool_get_link(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GLINK };
-
- if (!dev->ethtool_ops->get_link)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_link(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
{
struct ethtool_eeprom eeprom;
@@ -447,48 +447,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
}
-static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GRXCSUM };
-
- if (!dev->ethtool_ops->get_rx_csum)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_rx_csum(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (!dev->ethtool_ops->set_rx_csum)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- dev->ethtool_ops->set_rx_csum(dev, edata.data);
- return 0;
-}
-
-static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GTXCSUM };
-
- if (!dev->ethtool_ops->get_tx_csum)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_tx_csum(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int __ethtool_set_sg(struct net_device *dev, u32 data)
{
int err;
@@ -527,20 +485,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tx_csum(dev, edata.data);
}
-static int ethtool_get_sg(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GSG };
-
- if (!dev->ethtool_ops->get_sg)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_sg(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -558,20 +502,6 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
return __ethtool_set_sg(dev, edata.data);
}
-static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GTSO };
-
- if (!dev->ethtool_ops->get_tso)
- return -EOPNOTSUPP;
-
- edata.data = dev->ethtool_ops->get_tso(dev);
-
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -588,18 +518,6 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_tso(dev, edata.data);
}
-static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GUFO };
-
- if (!dev->ethtool_ops->get_ufo)
- return -EOPNOTSUPP;
- edata.data = dev->ethtool_ops->get_ufo(dev);
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -643,16 +561,27 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
struct ethtool_test test;
const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
- int ret;
+ int ret, test_len;
- if (!ops->self_test || !ops->self_test_count)
+ if (!ops->self_test)
return -EOPNOTSUPP;
+ if (!ops->get_sset_count && !ops->self_test_count)
+ return -EOPNOTSUPP;
+
+ if (ops->get_sset_count)
+ test_len = ops->get_sset_count(dev, ETH_SS_TEST);
+ else
+ /* code path for obsolete hook */
+ test_len = ops->self_test_count(dev);
+ if (test_len < 0)
+ return test_len;
+ WARN_ON(test_len == 0);
if (copy_from_user(&test, useraddr, sizeof(test)))
return -EFAULT;
- test.len = ops->self_test_count(dev);
- data = kmalloc(test.len * sizeof(u64), GFP_USER);
+ test.len = test_len;
+ data = kmalloc(test_len * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -684,19 +613,29 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
- switch (gstrings.string_set) {
- case ETH_SS_TEST:
- if (!ops->self_test_count)
- return -EOPNOTSUPP;
- gstrings.len = ops->self_test_count(dev);
- break;
- case ETH_SS_STATS:
- if (!ops->get_stats_count)
- return -EOPNOTSUPP;
- gstrings.len = ops->get_stats_count(dev);
- break;
- default:
- return -EINVAL;
+ if (ops->get_sset_count) {
+ ret = ops->get_sset_count(dev, gstrings.string_set);
+ if (ret < 0)
+ return ret;
+
+ gstrings.len = ret;
+ } else {
+ /* code path for obsolete hooks */
+
+ switch (gstrings.string_set) {
+ case ETH_SS_TEST:
+ if (!ops->self_test_count)
+ return -EOPNOTSUPP;
+ gstrings.len = ops->self_test_count(dev);
+ break;
+ case ETH_SS_STATS:
+ if (!ops->get_stats_count)
+ return -EOPNOTSUPP;
+ gstrings.len = ops->get_stats_count(dev);
+ break;
+ default:
+ return -EINVAL;
+ }
}
data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
@@ -736,16 +675,27 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
struct ethtool_stats stats;
const struct ethtool_ops *ops = dev->ethtool_ops;
u64 *data;
- int ret;
+ int ret, n_stats;
- if (!ops->get_ethtool_stats || !ops->get_stats_count)
+ if (!ops->get_ethtool_stats)
+ return -EOPNOTSUPP;
+ if (!ops->get_sset_count && !ops->get_stats_count)
return -EOPNOTSUPP;
+ if (ops->get_sset_count)
+ n_stats = ops->get_sset_count(dev, ETH_SS_STATS);
+ else
+ /* code path for obsolete hook */
+ n_stats = ops->get_stats_count(dev);
+ if (n_stats < 0)
+ return n_stats;
+ WARN_ON(n_stats == 0);
+
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
- stats.n_stats = ops->get_stats_count(dev);
- data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER);
+ stats.n_stats = n_stats;
+ data = kmalloc(n_stats * sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -783,11 +733,55 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr)
return 0;
}
+static int ethtool_get_value(struct net_device *dev, char __user *useraddr,
+ u32 cmd, u32 (*actor)(struct net_device *))
+{
+ struct ethtool_value edata = { cmd };
+
+ if (!actor)
+ return -EOPNOTSUPP;
+
+ edata.data = actor(dev);
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr,
+ void (*actor)(struct net_device *, u32))
+{
+ struct ethtool_value edata;
+
+ if (!actor)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ actor(dev, edata.data);
+ return 0;
+}
+
+static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
+ int (*actor)(struct net_device *, u32))
+{
+ struct ethtool_value edata;
+
+ if (!actor)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ return actor(dev, edata.data);
+}
+
/* The main entry point in this file. Called from net/core/dev.c */
-int dev_ethtool(struct ifreq *ifr)
+int dev_ethtool(struct net *net, struct ifreq *ifr)
{
- struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
+ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
void __user *useraddr = ifr->ifr_data;
u32 ethcmd;
int rc;
@@ -817,6 +811,8 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_GPERMADDR:
case ETHTOOL_GUFO:
case ETHTOOL_GGSO:
+ case ETHTOOL_GFLAGS:
+ case ETHTOOL_GPFLAGS:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -849,16 +845,19 @@ int dev_ethtool(struct ifreq *ifr)
rc = ethtool_set_wol(dev, useraddr);
break;
case ETHTOOL_GMSGLVL:
- rc = ethtool_get_msglevel(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_msglevel);
break;
case ETHTOOL_SMSGLVL:
- rc = ethtool_set_msglevel(dev, useraddr);
+ rc = ethtool_set_value_void(dev, useraddr,
+ dev->ethtool_ops->set_msglevel);
break;
case ETHTOOL_NWAY_RST:
rc = ethtool_nway_reset(dev);
break;
case ETHTOOL_GLINK:
- rc = ethtool_get_link(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_link);
break;
case ETHTOOL_GEEPROM:
rc = ethtool_get_eeprom(dev, useraddr);
@@ -885,25 +884,36 @@ int dev_ethtool(struct ifreq *ifr)
rc = ethtool_set_pauseparam(dev, useraddr);
break;
case ETHTOOL_GRXCSUM:
- rc = ethtool_get_rx_csum(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_rx_csum);
break;
case ETHTOOL_SRXCSUM:
- rc = ethtool_set_rx_csum(dev, useraddr);
+ rc = ethtool_set_value(dev, useraddr,
+ dev->ethtool_ops->set_rx_csum);
break;
case ETHTOOL_GTXCSUM:
- rc = ethtool_get_tx_csum(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_tx_csum ?
+ dev->ethtool_ops->get_tx_csum :
+ ethtool_op_get_tx_csum));
break;
case ETHTOOL_STXCSUM:
rc = ethtool_set_tx_csum(dev, useraddr);
break;
case ETHTOOL_GSG:
- rc = ethtool_get_sg(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_sg ?
+ dev->ethtool_ops->get_sg :
+ ethtool_op_get_sg));
break;
case ETHTOOL_SSG:
rc = ethtool_set_sg(dev, useraddr);
break;
case ETHTOOL_GTSO:
- rc = ethtool_get_tso(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_tso ?
+ dev->ethtool_ops->get_tso :
+ ethtool_op_get_tso));
break;
case ETHTOOL_STSO:
rc = ethtool_set_tso(dev, useraddr);
@@ -924,7 +934,10 @@ int dev_ethtool(struct ifreq *ifr)
rc = ethtool_get_perm_addr(dev, useraddr);
break;
case ETHTOOL_GUFO:
- rc = ethtool_get_ufo(dev, useraddr);
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ (dev->ethtool_ops->get_ufo ?
+ dev->ethtool_ops->get_ufo :
+ ethtool_op_get_ufo));
break;
case ETHTOOL_SUFO:
rc = ethtool_set_ufo(dev, useraddr);
@@ -935,6 +948,22 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_SGSO:
rc = ethtool_set_gso(dev, useraddr);
break;
+ case ETHTOOL_GFLAGS:
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_flags);
+ break;
+ case ETHTOOL_SFLAGS:
+ rc = ethtool_set_value(dev, useraddr,
+ dev->ethtool_ops->set_flags);
+ break;
+ case ETHTOOL_GPFLAGS:
+ rc = ethtool_get_value(dev, useraddr, ethcmd,
+ dev->ethtool_ops->get_priv_flags);
+ break;
+ case ETHTOOL_SPFLAGS:
+ rc = ethtool_set_value(dev, useraddr,
+ dev->ethtool_ops->set_priv_flags);
+ break;
default:
rc = -EOPNOTSUPP;
}
@@ -959,3 +988,5 @@ EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
EXPORT_SYMBOL(ethtool_op_set_ufo);
EXPORT_SYMBOL(ethtool_op_get_ufo);
+EXPORT_SYMBOL(ethtool_op_set_flags);
+EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 8c5474e16683..13de6f53f098 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -11,6 +11,8 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/list.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
#include <net/fib_rules.h>
static LIST_HEAD(rules_ops);
@@ -82,7 +84,7 @@ static void cleanup_ops(struct fib_rules_ops *ops)
{
struct fib_rule *rule, *tmp;
- list_for_each_entry_safe(rule, tmp, ops->rules_list, list) {
+ list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
list_del_rcu(&rule->list);
fib_rule_put(rule);
}
@@ -137,7 +139,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
rcu_read_lock();
- list_for_each_entry_rcu(rule, ops->rules_list, list) {
+ list_for_each_entry_rcu(rule, &ops->rules_list, list) {
jumped:
if (!fib_rule_match(rule, ops, fl, flags))
continue;
@@ -197,6 +199,7 @@ errout:
static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct fib_rule *rule, *r, *last = NULL;
@@ -234,7 +237,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
rule->ifindex = -1;
nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
- dev = __dev_get_by_name(rule->ifname);
+ dev = __dev_get_by_name(net, rule->ifname);
if (dev)
rule->ifindex = dev->ifindex;
}
@@ -268,7 +271,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (rule->target <= rule->pref)
goto errout_free;
- list_for_each_entry(r, ops->rules_list, list) {
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref == rule->target) {
rule->ctarget = r;
break;
@@ -284,7 +287,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (err < 0)
goto errout_free;
- list_for_each_entry(r, ops->rules_list, list) {
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
last = r;
@@ -297,7 +300,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
* There are unresolved goto rules in the list, check if
* any of them are pointing to this new rule.
*/
- list_for_each_entry(r, ops->rules_list, list) {
+ list_for_each_entry(r, &ops->rules_list, list) {
if (r->action == FR_ACT_GOTO &&
r->target == rule->pref) {
BUG_ON(r->ctarget != NULL);
@@ -317,7 +320,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (last)
list_add_rcu(&rule->list, &last->list);
else
- list_add_rcu(&rule->list, ops->rules_list);
+ list_add_rcu(&rule->list, &ops->rules_list);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
flush_route_cache(ops);
@@ -356,7 +359,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (err < 0)
goto errout;
- list_for_each_entry(rule, ops->rules_list, list) {
+ list_for_each_entry(rule, &ops->rules_list, list) {
if (frh->action && (frh->action != rule->action))
continue;
@@ -399,7 +402,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
* actually been added.
*/
if (ops->nr_goto_rules > 0) {
- list_for_each_entry(tmp, ops->rules_list, list) {
+ list_for_each_entry(tmp, &ops->rules_list, list) {
if (tmp->ctarget == rule) {
rcu_assign_pointer(tmp->ctarget, NULL);
ops->unresolved_rules++;
@@ -495,7 +498,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
int idx = 0;
struct fib_rule *rule;
- list_for_each_entry(rule, ops->rules_list, list) {
+ list_for_each_entry(rule, &ops->rules_list, list) {
if (idx < cb->args[1])
goto skip;
@@ -596,18 +599,21 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
struct net_device *dev = ptr;
struct fib_rules_ops *ops;
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
ASSERT_RTNL();
rcu_read_lock();
switch (event) {
case NETDEV_REGISTER:
list_for_each_entry(ops, &rules_ops, list)
- attach_rules(ops->rules_list, dev);
+ attach_rules(&ops->rules_list, dev);
break;
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &rules_ops, list)
- detach_rules(ops->rules_list, dev);
+ detach_rules(&ops->rules_list, dev);
break;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f7de8f24d8dd..c52df858d0be 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -25,6 +25,7 @@
#include <linux/sysctl.h>
#endif
#include <linux/times.h>
+#include <net/net_namespace.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/sock.h>
@@ -55,9 +56,8 @@
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-#ifdef CONFIG_ARPD
-static void neigh_app_notify(struct neighbour *n);
-#endif
+static void __neigh_notify(struct neighbour *n, int type, int flags);
+static void neigh_update_notify(struct neighbour *neigh);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
@@ -105,6 +105,15 @@ static int neigh_blackhole(struct sk_buff *skb)
return -ENETDOWN;
}
+static void neigh_cleanup_and_release(struct neighbour *neigh)
+{
+ if (neigh->parms->neigh_cleanup)
+ neigh->parms->neigh_cleanup(neigh);
+
+ __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ neigh_release(neigh);
+}
+
/*
* It is random distribution in the interval (1/2)*base...(3/2)*base.
* It corresponds to default IPv6 settings and is not overridable,
@@ -141,9 +150,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
+ neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
@@ -214,9 +221,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
NEIGH_PRINTK2("neigh %p is stray.\n", n);
}
write_unlock(&n->lock);
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
+ neigh_cleanup_and_release(n);
}
}
}
@@ -677,9 +682,7 @@ static void neigh_periodic_timer(unsigned long arg)
*np = n->next;
n->dead = 1;
write_unlock(&n->lock);
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
+ neigh_cleanup_and_release(n);
continue;
}
write_unlock(&n->lock);
@@ -828,13 +831,10 @@ static void neigh_timer_handler(unsigned long arg)
out:
write_unlock(&neigh->lock);
}
+
if (notify)
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
+ neigh_update_notify(neigh);
-#ifdef CONFIG_ARPD
- if (notify && neigh->parms->app_probes)
- neigh_app_notify(neigh);
-#endif
neigh_release(neigh);
}
@@ -897,8 +897,8 @@ out_unlock_bh:
static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
- void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
- neigh->dev->header_cache_update;
+ void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
+ = neigh->dev->header_ops->cache_update;
if (update) {
for (hh = neigh->hh; hh; hh = hh->hh_next) {
@@ -1063,11 +1063,8 @@ out:
write_unlock_bh(&neigh->lock);
if (notify)
- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
-#ifdef CONFIG_ARPD
- if (notify && neigh->parms->app_probes)
- neigh_app_notify(neigh);
-#endif
+ neigh_update_notify(neigh);
+
return err;
}
@@ -1098,7 +1095,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 0);
hh->hh_next = NULL;
- if (dev->hard_header_cache(n, hh)) {
+
+ if (dev->header_ops->cache(n, hh)) {
kfree(hh);
hh = NULL;
} else {
@@ -1128,10 +1126,9 @@ int neigh_compat_output(struct sk_buff *skb)
__skb_pull(skb, skb_network_offset(skb));
- if (dev->hard_header &&
- dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
- skb->len) < 0 &&
- dev->rebuild_header(skb))
+ if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+ skb->len) < 0 &&
+ dev->header_ops->rebuild(skb))
return 0;
return dev_queue_xmit(skb);
@@ -1153,17 +1150,17 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
- if (dev->hard_header_cache && !dst->hh) {
+ if (dev->header_ops->cache && !dst->hh) {
write_lock_bh(&neigh->lock);
if (!dst->hh)
neigh_hh_init(neigh, dst, dst->ops->protocol);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
write_unlock_bh(&neigh->lock);
} else {
read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
}
if (err >= 0)
@@ -1194,8 +1191,8 @@ int neigh_connected_output(struct sk_buff *skb)
__skb_pull(skb, skb_network_offset(skb));
read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
read_unlock_bh(&neigh->lock);
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
@@ -1354,7 +1351,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS
- tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
+ tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
if (!tbl->pde)
panic("cannot create neighbour proc dir entry");
tbl->pde->proc_fops = &neigh_stat_seq_fops;
@@ -1444,6 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ndmsg *ndm;
struct nlattr *dst_attr;
struct neigh_table *tbl;
@@ -1459,7 +1457,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(ndm->ndm_ifindex);
+ dev = dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -1509,6 +1507,7 @@ out:
static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct neigh_table *tbl;
@@ -1525,7 +1524,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(ndm->ndm_ifindex);
+ dev = dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -2000,6 +1999,11 @@ nla_put_failure:
return -EMSGSIZE;
}
+static void neigh_update_notify(struct neighbour *neigh)
+{
+ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+}
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
@@ -2095,11 +2099,8 @@ void __neigh_for_each_release(struct neigh_table *tbl,
} else
np = &n->next;
write_unlock(&n->lock);
- if (release) {
- if (n->parms->neigh_cleanup)
- n->parms->neigh_cleanup(n);
- neigh_release(n);
- }
+ if (release)
+ neigh_cleanup_and_release(n);
}
}
}
@@ -2422,7 +2423,6 @@ static const struct file_operations neigh_stat_seq_fops = {
#endif /* CONFIG_PROC_FS */
-#ifdef CONFIG_ARPD
static inline size_t neigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
@@ -2454,16 +2454,11 @@ errout:
rtnl_set_sk_err(RTNLGRP_NEIGH, err);
}
+#ifdef CONFIG_ARPD
void neigh_app_ns(struct neighbour *n)
{
__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
-
-static void neigh_app_notify(struct neighbour *n)
-{
- __neigh_notify(n, RTM_NEWNEIGH, 0);
-}
-
#endif /* CONFIG_ARPD */
#ifdef CONFIG_SYSCTL
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c19b0646d7a..6628e457ddc0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -18,6 +18,7 @@
#include <linux/wireless.h>
#include <net/iw_handler.h>
+#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_long_hex[] = "%#lx\n";
static const char fmt_dec[] = "%d\n";
@@ -216,20 +217,6 @@ static ssize_t store_tx_queue_len(struct device *dev,
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
-NETDEVICE_SHOW(weight, fmt_dec);
-
-static int change_weight(struct net_device *net, unsigned long new_weight)
-{
- net->weight = new_weight;
- return 0;
-}
-
-static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
-{
- return netdev_store(dev, attr, buf, len, change_weight);
-}
-
static struct device_attribute net_class_attributes[] = {
__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
__ATTR(iflink, S_IRUGO, show_iflink, NULL),
@@ -246,7 +233,6 @@ static struct device_attribute net_class_attributes[] = {
__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
store_tx_queue_len),
- __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
{}
};
@@ -407,29 +393,25 @@ static struct attribute_group wireless_group = {
};
#endif
+#endif /* CONFIG_SYSFS */
+
#ifdef CONFIG_HOTPLUG
-static int netdev_uevent(struct device *d, char **envp,
- int num_envp, char *buf, int size)
+static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
{
struct net_device *dev = to_net_dev(d);
- int retval, len = 0, i = 0;
+ int retval;
/* pass interface to uevent. */
- retval = add_uevent_var(envp, num_envp, &i,
- buf, size, &len,
- "INTERFACE=%s", dev->name);
+ retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
if (retval)
goto exit;
/* pass ifindex to uevent.
* ifindex is useful as it won't change (interface name may change)
* and is what RtNetlink uses natively. */
- retval = add_uevent_var(envp, num_envp, &i,
- buf, size, &len,
- "IFINDEX=%d", dev->ifindex);
+ retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
exit:
- envp[i] = NULL;
return retval;
}
#endif
@@ -450,7 +432,9 @@ static void netdev_release(struct device *d)
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
+#ifdef CONFIG_SYSFS
.dev_attrs = net_class_attributes,
+#endif /* CONFIG_SYSFS */
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
@@ -459,7 +443,7 @@ static struct class net_class = {
/* Delete sysfs entries but hold kobject reference until after all
* netdev references are gone.
*/
-void netdev_unregister_sysfs(struct net_device * net)
+void netdev_unregister_kobject(struct net_device * net)
{
struct device *dev = &(net->dev);
@@ -468,7 +452,7 @@ void netdev_unregister_sysfs(struct net_device * net)
}
/* Create sysfs entries for network device. */
-int netdev_register_sysfs(struct net_device *net)
+int netdev_register_kobject(struct net_device *net)
{
struct device *dev = &(net->dev);
struct attribute_group **groups = net->sysfs_groups;
@@ -481,6 +465,7 @@ int netdev_register_sysfs(struct net_device *net)
BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
+#ifdef CONFIG_SYSFS
if (net->get_stats)
*groups++ = &netstat_group;
@@ -488,11 +473,12 @@ int netdev_register_sysfs(struct net_device *net)
if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
*groups++ = &wireless_group;
#endif
+#endif /* CONFIG_SYSFS */
return device_add(dev);
}
-int netdev_sysfs_init(void)
+int netdev_kobject_init(void)
{
return class_register(&net_class);
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
new file mode 100644
index 000000000000..6f71db8c4428
--- /dev/null
+++ b/net/core/net_namespace.c
@@ -0,0 +1,317 @@
+#include <linux/workqueue.h>
+#include <linux/rtnetlink.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <net/net_namespace.h>
+
+/*
+ * Our network namespace constructor/destructor lists
+ */
+
+static LIST_HEAD(pernet_list);
+static struct list_head *first_device = &pernet_list;
+static DEFINE_MUTEX(net_mutex);
+
+LIST_HEAD(net_namespace_list);
+
+static struct kmem_cache *net_cachep;
+
+struct net init_net;
+EXPORT_SYMBOL_GPL(init_net);
+
+static struct net *net_alloc(void)
+{
+ return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+}
+
+static void net_free(struct net *net)
+{
+ if (!net)
+ return;
+
+ if (unlikely(atomic_read(&net->use_count) != 0)) {
+ printk(KERN_EMERG "network namespace not free! Usage: %d\n",
+ atomic_read(&net->use_count));
+ return;
+ }
+
+ kmem_cache_free(net_cachep, net);
+}
+
+static void cleanup_net(struct work_struct *work)
+{
+ struct pernet_operations *ops;
+ struct net *net;
+
+ net = container_of(work, struct net, work);
+
+ mutex_lock(&net_mutex);
+
+ /* Don't let anyone else find us. */
+ rtnl_lock();
+ list_del(&net->list);
+ rtnl_unlock();
+
+ /* Run all of the network namespace exit methods */
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit)
+ ops->exit(net);
+ }
+
+ mutex_unlock(&net_mutex);
+
+ /* Ensure there are no outstanding rcu callbacks using this
+ * network namespace.
+ */
+ rcu_barrier();
+
+ /* Finally it is safe to free my network namespace structure */
+ net_free(net);
+}
+
+
+void __put_net(struct net *net)
+{
+ /* Cleanup the network namespace in process context */
+ INIT_WORK(&net->work, cleanup_net);
+ schedule_work(&net->work);
+}
+EXPORT_SYMBOL_GPL(__put_net);
+
+/*
+ * setup_net runs the initializers for the network namespace object.
+ */
+static int setup_net(struct net *net)
+{
+ /* Must be called with net_mutex held */
+ struct pernet_operations *ops;
+ int error;
+
+ atomic_set(&net->count, 1);
+ atomic_set(&net->use_count, 0);
+
+ error = 0;
+ list_for_each_entry(ops, &pernet_list, list) {
+ if (ops->init) {
+ error = ops->init(net);
+ if (error < 0)
+ goto out_undo;
+ }
+ }
+out:
+ return error;
+
+out_undo:
+ /* Walk through the list backwards calling the exit functions
+ * for the pernet modules whose init functions did not fail.
+ */
+ list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
+ if (ops->exit)
+ ops->exit(net);
+ }
+ goto out;
+}
+
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+ struct net *new_net = NULL;
+ int err;
+
+ get_net(old_net);
+
+ if (!(flags & CLONE_NEWNET))
+ return old_net;
+
+#ifndef CONFIG_NET_NS
+ return ERR_PTR(-EINVAL);
+#endif
+
+ err = -ENOMEM;
+ new_net = net_alloc();
+ if (!new_net)
+ goto out;
+
+ mutex_lock(&net_mutex);
+ err = setup_net(new_net);
+ if (err)
+ goto out_unlock;
+
+ rtnl_lock();
+ list_add_tail(&new_net->list, &net_namespace_list);
+ rtnl_unlock();
+
+
+out_unlock:
+ mutex_unlock(&net_mutex);
+out:
+ put_net(old_net);
+ if (err) {
+ net_free(new_net);
+ new_net = ERR_PTR(err);
+ }
+ return new_net;
+}
+
+static int __init net_ns_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
+ net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
+ SMP_CACHE_BYTES,
+ SLAB_PANIC, NULL);
+ mutex_lock(&net_mutex);
+ err = setup_net(&init_net);
+
+ rtnl_lock();
+ list_add_tail(&init_net.list, &net_namespace_list);
+ rtnl_unlock();
+
+ mutex_unlock(&net_mutex);
+ if (err)
+ panic("Could not setup the initial network namespace");
+
+ return 0;
+}
+
+pure_initcall(net_ns_init);
+
+static int register_pernet_operations(struct list_head *list,
+ struct pernet_operations *ops)
+{
+ struct net *net, *undo_net;
+ int error;
+
+ error = 0;
+ list_add_tail(&ops->list, list);
+ for_each_net(net) {
+ if (ops->init) {
+ error = ops->init(net);
+ if (error)
+ goto out_undo;
+ }
+ }
+out:
+ return error;
+
+out_undo:
+ /* If I have an error cleanup all namespaces I initialized */
+ list_del(&ops->list);
+ for_each_net(undo_net) {
+ if (undo_net == net)
+ goto undone;
+ if (ops->exit)
+ ops->exit(undo_net);
+ }
+undone:
+ goto out;
+}
+
+static void unregister_pernet_operations(struct pernet_operations *ops)
+{
+ struct net *net;
+
+ list_del(&ops->list);
+ for_each_net(net)
+ if (ops->exit)
+ ops->exit(net);
+}
+
+/**
+ * register_pernet_subsys - register a network namespace subsystem
+ * @ops: pernet operations structure for the subsystem
+ *
+ * Register a subsystem which has init and exit functions
+ * that are called when network namespaces are created and
+ * destroyed respectively.
+ *
+ * When registered all network namespace init functions are
+ * called for every existing network namespace. Allowing kernel
+ * modules to have a race free view of the set of network namespaces.
+ *
+ * When a new network namespace is created all of the init
+ * methods are called in the order in which they were registered.
+ *
+ * When a network namespace is destroyed all of the exit methods
+ * are called in the reverse of the order with which they were
+ * registered.
+ */
+int register_pernet_subsys(struct pernet_operations *ops)
+{
+ int error;
+ mutex_lock(&net_mutex);
+ error = register_pernet_operations(first_device, ops);
+ mutex_unlock(&net_mutex);
+ return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_subsys);
+
+/**
+ * unregister_pernet_subsys - unregister a network namespace subsystem
+ * @ops: pernet operations structure to manipulate
+ *
+ * Remove the pernet operations structure from the list to be
+ * used when network namespaces are created or destoryed. In
+ * addition run the exit method for all existing network
+ * namespaces.
+ */
+void unregister_pernet_subsys(struct pernet_operations *module)
+{
+ mutex_lock(&net_mutex);
+ unregister_pernet_operations(module);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
+
+/**
+ * register_pernet_device - register a network namespace device
+ * @ops: pernet operations structure for the subsystem
+ *
+ * Register a device which has init and exit functions
+ * that are called when network namespaces are created and
+ * destroyed respectively.
+ *
+ * When registered all network namespace init functions are
+ * called for every existing network namespace. Allowing kernel
+ * modules to have a race free view of the set of network namespaces.
+ *
+ * When a new network namespace is created all of the init
+ * methods are called in the order in which they were registered.
+ *
+ * When a network namespace is destroyed all of the exit methods
+ * are called in the reverse of the order with which they were
+ * registered.
+ */
+int register_pernet_device(struct pernet_operations *ops)
+{
+ int error;
+ mutex_lock(&net_mutex);
+ error = register_pernet_operations(&pernet_list, ops);
+ if (!error && (first_device == &pernet_list))
+ first_device = &ops->list;
+ mutex_unlock(&net_mutex);
+ return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_device);
+
+/**
+ * unregister_pernet_device - unregister a network namespace netdevice
+ * @ops: pernet operations structure to manipulate
+ *
+ * Remove the pernet operations structure from the list to be
+ * used when network namespaces are created or destoryed. In
+ * addition run the exit method for all existing network
+ * namespaces.
+ */
+void unregister_pernet_device(struct pernet_operations *ops)
+{
+ mutex_lock(&net_mutex);
+ if (&ops->list == first_device)
+ first_device = first_device->next;
+ unregister_pernet_operations(ops);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1b26aa5720..95daba624967 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
static void poll_napi(struct netpoll *np)
{
struct netpoll_info *npinfo = np->dev->npinfo;
+ struct napi_struct *napi;
int budget = 16;
- if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
- npinfo->poll_owner != smp_processor_id() &&
- spin_trylock(&npinfo->poll_lock)) {
- npinfo->rx_flags |= NETPOLL_RX_DROP;
- atomic_inc(&trapped);
+ list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
+ napi->poll_owner != smp_processor_id() &&
+ spin_trylock(&napi->poll_lock)) {
+ npinfo->rx_flags |= NETPOLL_RX_DROP;
+ atomic_inc(&trapped);
- np->dev->poll(np->dev, &budget);
+ napi->poll(napi, budget);
- atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
- spin_unlock(&npinfo->poll_lock);
+ atomic_dec(&trapped);
+ npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+ spin_unlock(&napi->poll_lock);
+ }
}
}
@@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np)
/* Process pending work on NIC */
np->dev->poll_controller(np->dev);
- if (np->dev->poll)
+ if (!list_empty(&np->dev->napi_list))
poll_napi(np);
service_arp_queue(np->dev->npinfo);
@@ -233,6 +236,17 @@ repeat:
return skb;
}
+static int netpoll_owner_active(struct net_device *dev)
+{
+ struct napi_struct *napi;
+
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (napi->poll_owner == smp_processor_id())
+ return 1;
+ }
+ return 0;
+}
+
static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status = NETDEV_TX_BUSY;
@@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
/* don't get messages out of order, and no recursion */
- if (skb_queue_len(&npinfo->txq) == 0 &&
- npinfo->poll_owner != smp_processor_id()) {
+ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
unsigned long flags;
local_irq_save(flags);
@@ -402,11 +415,9 @@ static void arp_reply(struct sk_buff *skb)
send_skb->protocol = htons(ETH_P_ARP);
/* Fill the device header for the ARP frame */
-
- if (np->dev->hard_header &&
- np->dev->hard_header(send_skb, skb->dev, ptype,
- sha, np->local_mac,
- send_skb->len) < 0) {
+ if (dev_hard_header(send_skb, skb->dev, ptype,
+ sha, np->local_mac,
+ send_skb->len) < 0) {
kfree_skb(send_skb);
return;
}
@@ -519,6 +530,23 @@ out:
return 0;
}
+void netpoll_print_options(struct netpoll *np)
+{
+ DECLARE_MAC_BUF(mac);
+ printk(KERN_INFO "%s: local port %d\n",
+ np->name, np->local_port);
+ printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
+ np->name, HIPQUAD(np->local_ip));
+ printk(KERN_INFO "%s: interface %s\n",
+ np->name, np->dev_name);
+ printk(KERN_INFO "%s: remote port %d\n",
+ np->name, np->remote_port);
+ printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
+ np->name, HIPQUAD(np->remote_ip));
+ printk(KERN_INFO "%s: remote ethernet address %s\n",
+ np->name, print_mac(mac, np->remote_mac));
+}
+
int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
@@ -531,7 +559,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur = delim;
}
cur++;
- printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port);
if (*cur != '/') {
if ((delim = strchr(cur, '/')) == NULL)
@@ -539,9 +566,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
*delim = 0;
np->local_ip = ntohl(in_aton(cur));
cur = delim;
-
- printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n",
- np->name, HIPQUAD(np->local_ip));
}
cur++;
@@ -555,8 +579,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
}
cur++;
- printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name);
-
if (*cur != '@') {
/* dst port */
if ((delim = strchr(cur, '@')) == NULL)
@@ -566,7 +588,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur = delim;
}
cur++;
- printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port);
/* dst ip */
if ((delim = strchr(cur, '/')) == NULL)
@@ -575,9 +596,6 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
np->remote_ip = ntohl(in_aton(cur));
cur = delim + 1;
- printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n",
- np->name, HIPQUAD(np->remote_ip));
-
if (*cur != 0) {
/* MAC address */
if ((delim = strchr(cur, ':')) == NULL)
@@ -608,15 +626,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
np->remote_mac[5] = simple_strtol(cur, NULL, 16);
}
- printk(KERN_INFO "%s: remote ethernet address "
- "%02x:%02x:%02x:%02x:%02x:%02x\n",
- np->name,
- np->remote_mac[0],
- np->remote_mac[1],
- np->remote_mac[2],
- np->remote_mac[3],
- np->remote_mac[4],
- np->remote_mac[5]);
+ netpoll_print_options(np);
return 0;
@@ -635,7 +645,7 @@ int netpoll_setup(struct netpoll *np)
int err;
if (np->dev_name)
- ndev = dev_get_by_name(np->dev_name);
+ ndev = dev_get_by_name(&init_net, np->dev_name);
if (!ndev) {
printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
np->name, np->dev_name);
@@ -652,8 +662,6 @@ int netpoll_setup(struct netpoll *np)
npinfo->rx_flags = 0;
npinfo->rx_np = NULL;
- spin_lock_init(&npinfo->poll_lock);
- npinfo->poll_owner = -1;
spin_lock_init(&npinfo->rx_lock);
skb_queue_head_init(&npinfo->arp_tx);
@@ -820,6 +828,7 @@ void netpoll_set_trap(int trap)
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
+EXPORT_SYMBOL(netpoll_print_options);
EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 803d0c8826af..2100c734b102 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -152,6 +152,7 @@
#include <linux/wait.h>
#include <linux/etherdevice.h>
#include <linux/kthread.h>
+#include <net/net_namespace.h>
#include <net/checksum.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -167,7 +168,7 @@
#include <asm/div64.h> /* do_div */
#include <asm/timex.h>
-#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n"
+#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n"
/* The buckets are exponential in 'width' */
#define LAT_BUCKETS_MAX 32
@@ -189,6 +190,7 @@
#define F_SVID_RND (1<<10) /* Random SVLAN ID */
#define F_FLOW_SEQ (1<<11) /* Sequential flows */
#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
+#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
/* Thread control flag bits */
#define T_TERMINATE (1<<0)
@@ -331,6 +333,7 @@ struct pktgen_dev {
__be32 cur_daddr;
__u16 cur_udp_dst;
__u16 cur_udp_src;
+ __u16 cur_queue_map;
__u32 cur_pkt_size;
__u8 hh[14];
@@ -358,6 +361,10 @@ struct pktgen_dev {
unsigned lflow; /* Flow length (config) */
unsigned nflows; /* accumulated flows (stats) */
unsigned curfl; /* current sequenced flow (state)*/
+
+ u16 queue_map_min;
+ u16 queue_map_max;
+
#ifdef CONFIG_XFRM
__u8 ipsmode; /* IPSEC mode (config) */
__u8 ipsproto; /* IPSEC type (config) */
@@ -378,7 +385,6 @@ struct pktgen_thread {
struct list_head th_list;
struct task_struct *tsk;
char result[512];
- u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */
/* Field for thread to receive "posted" events terminate, stop ifs etc. */
@@ -593,11 +599,11 @@ static const struct file_operations pktgen_fops = {
static int pktgen_if_show(struct seq_file *seq, void *v)
{
- int i;
struct pktgen_dev *pkt_dev = seq->private;
__u64 sa;
__u64 stopped;
__u64 now = getCurUs();
+ DECLARE_MAC_BUF(mac);
seq_printf(seq,
"Params: count %llu min_pkt_size: %u max_pkt_size: %u\n",
@@ -613,6 +619,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows,
pkt_dev->lflow);
+ seq_printf(seq,
+ " queue_map_min: %u queue_map_max: %u\n",
+ pkt_dev->queue_map_min,
+ pkt_dev->queue_map_max);
+
if (pkt_dev->flags & F_IPV6) {
char b1[128], b2[128], b3[128];
fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -637,19 +648,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " src_mac: ");
- if (is_zero_ether_addr(pkt_dev->src_mac))
- for (i = 0; i < 6; i++)
- seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i],
- i == 5 ? " " : ":");
- else
- for (i = 0; i < 6; i++)
- seq_printf(seq, "%02X%s", pkt_dev->src_mac[i],
- i == 5 ? " " : ":");
+ seq_printf(seq, "%s ",
+ print_mac(mac, is_zero_ether_addr(pkt_dev->src_mac) ?
+ pkt_dev->odev->dev_addr : pkt_dev->src_mac));
seq_printf(seq, "dst_mac: ");
- for (i = 0; i < 6; i++)
- seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i],
- i == 5 ? "\n" : ":");
+ seq_printf(seq, "%s\n", print_mac(mac, pkt_dev->dst_mac));
seq_printf(seq,
" udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
@@ -709,6 +713,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_MPLS_RND)
seq_printf(seq, "MPLS_RND ");
+ if (pkt_dev->flags & F_QUEUE_MAP_RND)
+ seq_printf(seq, "QUEUE_MAP_RND ");
+
if (pkt_dev->cflows) {
if (pkt_dev->flags & F_FLOW_SEQ)
seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/
@@ -764,6 +771,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n",
pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
+ seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map);
+
seq_printf(seq, " flows: %u\n", pkt_dev->nflows);
if (pkt_dev->result[0])
@@ -1215,6 +1224,11 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "FLOW_SEQ") == 0)
pkt_dev->flags |= F_FLOW_SEQ;
+ else if (strcmp(f, "QUEUE_MAP_RND") == 0)
+ pkt_dev->flags |= F_QUEUE_MAP_RND;
+
+ else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
+ pkt_dev->flags &= ~F_QUEUE_MAP_RND;
#ifdef CONFIG_XFRM
else if (strcmp(f, "IPSEC") == 0)
pkt_dev->flags |= F_IPSEC_ON;
@@ -1526,16 +1540,40 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "queue_map_min")) {
+ len = num_arg(&user_buffer[i], 5, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ pkt_dev->queue_map_min = value;
+ sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min);
+ return count;
+ }
+
+ if (!strcmp(name, "queue_map_max")) {
+ len = num_arg(&user_buffer[i], 5, &value);
+ if (len < 0) {
+ return len;
+ }
+ i += len;
+ pkt_dev->queue_map_max = value;
+ sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max);
+ return count;
+ }
+
if (!strcmp(name, "mpls")) {
- unsigned n, offset;
+ unsigned n, cnt;
+
len = get_labels(&user_buffer[i], pkt_dev);
- if (len < 0) { return len; }
+ if (len < 0)
+ return len;
i += len;
- offset = sprintf(pg_result, "OK: mpls=");
+ cnt = sprintf(pg_result, "OK: mpls=");
for (n = 0; n < pkt_dev->nr_labels; n++)
- offset += sprintf(pg_result + offset,
- "%08x%s", ntohl(pkt_dev->labels[n]),
- n == pkt_dev->nr_labels-1 ? "" : ",");
+ cnt += sprintf(pg_result + cnt,
+ "%08x%s", ntohl(pkt_dev->labels[n]),
+ n == pkt_dev->nr_labels-1 ? "" : ",");
if (pkt_dev->nr_labels && pkt_dev->vlan_id != 0xffff) {
pkt_dev->vlan_id = 0xffff; /* turn off VLAN/SVLAN */
@@ -1718,9 +1756,6 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
BUG_ON(!t);
- seq_printf(seq, "Name: %s max_before_softirq: %d\n",
- t->tsk->comm, t->max_before_softirq);
-
seq_printf(seq, "Running: ");
if_lock(t);
@@ -1753,7 +1788,6 @@ static ssize_t pktgen_thread_write(struct file *file,
int i = 0, max, len, ret;
char name[40];
char *pg_result;
- unsigned long value = 0;
if (count < 1) {
// sprintf(pg_result, "Wrong command format");
@@ -1827,12 +1861,8 @@ static ssize_t pktgen_thread_write(struct file *file,
}
if (!strcmp(name, "max_before_softirq")) {
- len = num_arg(&user_buffer[i], 10, &value);
- mutex_lock(&pktgen_thread_lock);
- t->max_before_softirq = value;
- mutex_unlock(&pktgen_thread_lock);
+ sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use");
ret = count;
- sprintf(pg_result, "OK: max_before_softirq=%lu", value);
goto out;
}
@@ -1940,6 +1970,9 @@ static int pktgen_device_event(struct notifier_block *unused,
{
struct net_device *dev = ptr;
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
/* It is OK that we do not hold the group lock right now,
* as we run under the RTNL lock.
*/
@@ -1970,7 +2003,7 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
pkt_dev->odev = NULL;
}
- odev = dev_get_by_name(ifname);
+ odev = dev_get_by_name(&init_net, ifname);
if (!odev) {
printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
return -ENODEV;
@@ -2111,7 +2144,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
if (spin_until_us - now > jiffies_to_usecs(1) + 1)
schedule_timeout_interruptible(1);
else if (spin_until_us - now > 100) {
- do_softirq();
if (!pkt_dev->running)
return;
if (need_resched())
@@ -2387,6 +2419,20 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->cur_pkt_size = t;
}
+ if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+ __u16 t;
+ if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+ t = random32() %
+ (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
+ + pkt_dev->queue_map_min;
+ } else {
+ t = pkt_dev->cur_queue_map + 1;
+ if (t > pkt_dev->queue_map_max)
+ t = pkt_dev->queue_map_min;
+ }
+ pkt_dev->cur_queue_map = t;
+ }
+
pkt_dev->flows[flow].count++;
}
@@ -2557,6 +2603,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+ skb->queue_mapping = pkt_dev->cur_queue_map;
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2686,6 +2733,7 @@ static unsigned int scan_ip6(const char *s, char ip[16])
unsigned int prefixlen = 0;
unsigned int suffixlen = 0;
__be32 tmp;
+ char *pos;
for (i = 0; i < 16; i++)
ip[i] = 0;
@@ -2700,12 +2748,9 @@ static unsigned int scan_ip6(const char *s, char ip[16])
}
s++;
}
- {
- char *tmp;
- u = simple_strtoul(s, &tmp, 16);
- i = tmp - s;
- }
+ u = simple_strtoul(s, &pos, 16);
+ i = pos - s;
if (!i)
return 0;
if (prefixlen == 12 && s[i] == '.') {
@@ -2733,11 +2778,9 @@ static unsigned int scan_ip6(const char *s, char ip[16])
len++;
} else if (suffixlen != 0)
break;
- {
- char *tmp;
- u = simple_strtol(s, &tmp, 16);
- i = tmp - s;
- }
+
+ u = simple_strtol(s, &pos, 16);
+ i = pos - s;
if (!i) {
if (*s)
len--;
@@ -2898,6 +2941,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+ skb->queue_mapping = pkt_dev->cur_queue_map;
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3465,8 +3509,6 @@ static int pktgen_thread_worker(void *arg)
struct pktgen_thread *t = arg;
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- u32 max_before_softirq;
- u32 tx_since_softirq = 0;
BUG_ON(smp_processor_id() != cpu);
@@ -3474,8 +3516,6 @@ static int pktgen_thread_worker(void *arg)
pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid);
- max_before_softirq = t->max_before_softirq;
-
set_current_state(TASK_INTERRUPTIBLE);
set_freezable();
@@ -3494,24 +3534,9 @@ static int pktgen_thread_worker(void *arg)
__set_current_state(TASK_RUNNING);
- if (pkt_dev) {
-
+ if (pkt_dev)
pktgen_xmit(pkt_dev);
- /*
- * We like to stay RUNNING but must also give
- * others fair share.
- */
-
- tx_since_softirq += pkt_dev->last_ok;
-
- if (tx_since_softirq > max_before_softirq) {
- if (local_softirq_pending())
- do_softirq();
- tx_since_softirq = 0;
- }
- }
-
if (t->control & T_STOP) {
pktgen_stop(t);
t->control &= ~(T_STOP);
@@ -3778,7 +3803,7 @@ static int __init pg_init(void)
printk(KERN_INFO "%s", version);
- pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
+ pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
if (!pg_proc_dir)
return -ENODEV;
pg_proc_dir->owner = THIS_MODULE;
@@ -3787,7 +3812,7 @@ static int __init pg_init(void)
if (pe == NULL) {
printk(KERN_ERR "pktgen: ERROR: cannot create %s "
"procfs entry.\n", PGCTRL);
- proc_net_remove(PG_PROC_DIR);
+ proc_net_remove(&init_net, PG_PROC_DIR);
return -EINVAL;
}
@@ -3811,7 +3836,7 @@ static int __init pg_init(void)
"all threads\n");
unregister_netdevice_notifier(&pktgen_notifier_block);
remove_proc_entry(PGCTRL, pg_proc_dir);
- proc_net_remove(PG_PROC_DIR);
+ proc_net_remove(&init_net, PG_PROC_DIR);
return -ENODEV;
}
@@ -3838,7 +3863,7 @@ static void __exit pg_cleanup(void)
/* Clean up proc file system */
remove_proc_entry(PGCTRL, pg_proc_dir);
- proc_net_remove(PG_PROC_DIR);
+ proc_net_remove(&init_net, PG_PROC_DIR);
}
module_init(pg_init);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4756d5857abf..1072d16696c3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,6 +35,7 @@
#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/if_addr.h>
+#include <linux/nsproxy.h>
#include <asm/uaccess.h>
#include <asm/system.h>
@@ -74,8 +75,6 @@ void __rtnl_unlock(void)
void rtnl_unlock(void)
{
mutex_unlock(&rtnl_mutex);
- if (rtnl && rtnl->sk_receive_queue.qlen)
- rtnl->sk_data_ready(rtnl, 0);
netdev_run_todo();
}
@@ -306,10 +305,13 @@ EXPORT_SYMBOL_GPL(rtnl_link_register);
void __rtnl_link_unregister(struct rtnl_link_ops *ops)
{
struct net_device *dev, *n;
+ struct net *net;
- for_each_netdev_safe(dev, n) {
- if (dev->rtnl_link_ops == ops)
- ops->dellink(dev);
+ for_each_net(net) {
+ for_each_netdev_safe(net, dev, n) {
+ if (dev->rtnl_link_ops == ops)
+ ops->dellink(dev);
+ }
}
list_del(&ops->list);
}
@@ -634,7 +636,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
- NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
NLA_PUT_U8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
@@ -694,12 +695,13 @@ nla_put_failure:
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = skb->sk->sk_net;
int idx;
int s_idx = cb->args[0];
struct net_device *dev;
idx = 0;
- for_each_netdev(dev) {
+ for_each_netdev(net, dev) {
if (idx < s_idx)
goto cont;
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -714,7 +716,7 @@ cont:
return skb->len;
}
-static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 },
[IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
@@ -724,6 +726,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_WEIGHT] = { .type = NLA_U32 },
[IFLA_OPERSTATE] = { .type = NLA_U8 },
[IFLA_LINKMODE] = { .type = NLA_U8 },
+ [IFLA_NET_NS_PID] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -731,12 +734,45 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_DATA] = { .type = NLA_NESTED },
};
+static struct net *get_net_ns_by_pid(pid_t pid)
+{
+ struct task_struct *tsk;
+ struct net *net;
+
+ /* Lookup the network namespace */
+ net = ERR_PTR(-ESRCH);
+ rcu_read_lock();
+ tsk = find_task_by_pid(pid);
+ if (tsk) {
+ task_lock(tsk);
+ if (tsk->nsproxy)
+ net = get_net(tsk->nsproxy->net_ns);
+ task_unlock(tsk);
+ }
+ rcu_read_unlock();
+ return net;
+}
+
static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
struct nlattr **tb, char *ifname, int modified)
{
int send_addr_notify = 0;
int err;
+ if (tb[IFLA_NET_NS_PID]) {
+ struct net *net;
+ net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ if (IS_ERR(net)) {
+ err = PTR_ERR(net);
+ goto errout;
+ }
+ err = dev_change_net_namespace(dev, net, ifname);
+ put_net(net);
+ if (err)
+ goto errout;
+ modified = 1;
+ }
+
if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
@@ -834,9 +870,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- if (tb[IFLA_WEIGHT])
- dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
-
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
@@ -862,6 +895,7 @@ errout:
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ifinfomsg *ifm;
struct net_device *dev;
int err;
@@ -880,9 +914,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = dev_get_by_index(ifm->ifi_index);
+ dev = dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = dev_get_by_name(ifname);
+ dev = dev_get_by_name(net, ifname);
else
goto errout;
@@ -908,6 +942,7 @@ errout:
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
@@ -924,9 +959,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(ifm->ifi_index);
+ dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(ifname);
+ dev = __dev_get_by_name(net, ifname);
else
return -EINVAL;
@@ -941,8 +976,52 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
return 0;
}
+struct net_device *rtnl_create_link(struct net *net, char *ifname,
+ const struct rtnl_link_ops *ops, struct nlattr *tb[])
+{
+ int err;
+ struct net_device *dev;
+
+ err = -ENOMEM;
+ dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
+ if (!dev)
+ goto err;
+
+ if (strchr(dev->name, '%')) {
+ err = dev_alloc_name(dev, dev->name);
+ if (err < 0)
+ goto err_free;
+ }
+
+ dev->nd_net = net;
+ dev->rtnl_link_ops = ops;
+
+ if (tb[IFLA_MTU])
+ dev->mtu = nla_get_u32(tb[IFLA_MTU]);
+ if (tb[IFLA_ADDRESS])
+ memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
+ nla_len(tb[IFLA_ADDRESS]));
+ if (tb[IFLA_BROADCAST])
+ memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
+ nla_len(tb[IFLA_BROADCAST]));
+ if (tb[IFLA_TXQLEN])
+ dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
+ if (tb[IFLA_OPERSTATE])
+ set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
+ if (tb[IFLA_LINKMODE])
+ dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+
+ return dev;
+
+err_free:
+ free_netdev(dev);
+err:
+ return ERR_PTR(err);
+}
+
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
@@ -966,9 +1045,9 @@ replay:
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(ifm->ifi_index);
+ dev = __dev_get_by_index(net, ifm->ifi_index);
else if (ifname[0])
- dev = __dev_get_by_name(ifname);
+ dev = __dev_get_by_name(net, ifname);
else
dev = NULL;
@@ -1053,40 +1132,17 @@ replay:
if (!ifname[0])
snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
- if (!dev)
- return -ENOMEM;
-
- if (strchr(dev->name, '%')) {
- err = dev_alloc_name(dev, dev->name);
- if (err < 0)
- goto err_free;
- }
- dev->rtnl_link_ops = ops;
-
- if (tb[IFLA_MTU])
- dev->mtu = nla_get_u32(tb[IFLA_MTU]);
- if (tb[IFLA_ADDRESS])
- memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]),
- nla_len(tb[IFLA_ADDRESS]));
- if (tb[IFLA_BROADCAST])
- memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]),
- nla_len(tb[IFLA_BROADCAST]));
- if (tb[IFLA_TXQLEN])
- dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
- if (tb[IFLA_WEIGHT])
- dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
- if (tb[IFLA_OPERSTATE])
- set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
- if (tb[IFLA_LINKMODE])
- dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
-
- if (ops->newlink)
+
+ dev = rtnl_create_link(net, ifname, ops, tb);
+
+ if (IS_ERR(dev))
+ err = PTR_ERR(dev);
+ else if (ops->newlink)
err = ops->newlink(dev, tb, data);
else
err = register_netdevice(dev);
-err_free:
- if (err < 0)
+
+ if (err < 0 && !IS_ERR(dev))
free_netdev(dev);
return err;
}
@@ -1094,6 +1150,7 @@ err_free:
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
{
+ struct net *net = skb->sk->sk_net;
struct ifinfomsg *ifm;
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
@@ -1106,7 +1163,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0) {
- dev = dev_get_by_index(ifm->ifi_index);
+ dev = dev_get_by_index(net, ifm->ifi_index);
if (dev == NULL)
return -ENODEV;
} else
@@ -1255,22 +1312,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return doit(skb, nlh, (void *)&rta_buf[0]);
}
-static void rtnetlink_rcv(struct sock *sk, int len)
+static void rtnetlink_rcv(struct sk_buff *skb)
{
- unsigned int qlen = 0;
-
- do {
- mutex_lock(&rtnl_mutex);
- netlink_run_queue(sk, &qlen, &rtnetlink_rcv_msg);
- mutex_unlock(&rtnl_mutex);
-
- netdev_run_todo();
- } while (qlen);
+ rtnl_lock();
+ netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
+ rtnl_unlock();
}
static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
+
+ if (dev->nd_net != &init_net)
+ return NOTIFY_DONE;
+
switch (event) {
case NETDEV_UNREGISTER:
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
@@ -1308,8 +1363,8 @@ void __init rtnetlink_init(void)
if (!rta_buf)
panic("rtnetlink_init: cannot allocate rta_buf\n");
- rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
- &rtnl_mutex, THIS_MODULE);
+ rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX,
+ rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
if (rtnl == NULL)
panic("rtnetlink_init: cannot initialize rtnetlink\n");
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
@@ -1335,3 +1390,5 @@ EXPORT_SYMBOL(rtnl_unlock);
EXPORT_SYMBOL(rtnl_unicast);
EXPORT_SYMBOL(rtnl_notify);
EXPORT_SYMBOL(rtnl_set_sk_err);
+EXPORT_SYMBOL(rtnl_create_link);
+EXPORT_SYMBOL(ifla_policy);
diff --git a/net/core/scm.c b/net/core/scm.c
index 44c4ec2c8769..530bee8d9ed9 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -167,7 +167,8 @@ error:
int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
{
- struct cmsghdr __user *cm = (struct cmsghdr __user *)msg->msg_control;
+ struct cmsghdr __user *cm
+ = (__force struct cmsghdr __user *)msg->msg_control;
struct cmsghdr cmhdr;
int cmlen = CMSG_LEN(len);
int err;
@@ -202,7 +203,8 @@ out:
void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
{
- struct cmsghdr __user *cm = (struct cmsghdr __user*)msg->msg_control;
+ struct cmsghdr __user *cm
+ = (__force struct cmsghdr __user*)msg->msg_control;
int fdmax = 0;
int fdnum = scm->fp->count;
@@ -222,7 +224,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
if (fdnum < fdmax)
fdmax = fdnum;
- for (i=0, cmfptr=(int __user *)CMSG_DATA(cm); i<fdmax; i++, cmfptr++)
+ for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
+ i++, cmfptr++)
{
int new_fd;
err = security_file_receive(fp[i]);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 35021eb3ed07..944189d96323 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -472,6 +472,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_INET
new->sp = secpath_get(old->sp);
#endif
+ new->csum_start = old->csum_start;
+ new->csum_offset = old->csum_offset;
+ new->ip_summed = old->ip_summed;
new->transport_header = old->transport_header;
new->network_header = old->network_header;
new->mac_header = old->mac_header;
@@ -545,8 +548,6 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
skb_reserve(n, headerlen);
/* Set the tail pointer and length */
skb_put(n, skb->len);
- n->csum = skb->csum;
- n->ip_summed = skb->ip_summed;
if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
BUG();
@@ -589,8 +590,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
skb_copy_from_linear_data(skb, n->data, n->len);
- n->csum = skb->csum;
- n->ip_summed = skb->ip_summed;
n->truesize += skb->data_len;
n->data_len = skb->data_len;
@@ -686,6 +685,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->transport_header += off;
skb->network_header += off;
skb->mac_header += off;
+ skb->csum_start += off;
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
@@ -734,9 +734,6 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
*
* You must pass %GFP_ATOMIC as the allocation priority if this function
* is called from an interrupt.
- *
- * BUG ALERT: ip_summed is not copied. Why does this work? Is it used
- * only by netfilter in the cases when checksum is recalculated? --ANK
*/
struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
int newheadroom, int newtailroom,
@@ -749,7 +746,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
gfp_mask);
int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
- int off = 0;
+ int off;
if (!n)
return NULL;
@@ -773,12 +770,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
copy_skb_header(n, skb);
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
off = newheadroom - oldheadroom;
-#endif
+ n->csum_start += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
n->transport_header += off;
n->network_header += off;
n->mac_header += off;
+#endif
return n;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 190de61cd648..4ed9b507c1e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -119,6 +119,7 @@
#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
+#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
#include <net/xfrm.h>
@@ -366,6 +367,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
+ struct net *net = sk->sk_net;
char devname[IFNAMSIZ];
int index;
@@ -394,7 +396,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
if (devname[0] == '\0') {
index = 0;
} else {
- struct net_device *dev = dev_get_by_name(devname);
+ struct net_device *dev = dev_get_by_name(net, devname);
ret = -ENODEV;
if (!dev)
@@ -872,7 +874,7 @@ static inline void sock_lock_init(struct sock *sk)
* @prot: struct proto associated with this new sock instance
* @zero_it: if we should zero the newly allocated sock
*/
-struct sock *sk_alloc(int family, gfp_t priority,
+struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int zero_it)
{
struct sock *sk = NULL;
@@ -893,6 +895,7 @@ struct sock *sk_alloc(int family, gfp_t priority,
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
+ sk->sk_net = get_net(net);
}
if (security_sk_alloc(sk, family, priority))
@@ -932,6 +935,7 @@ void sk_free(struct sock *sk)
__FUNCTION__, atomic_read(&sk->sk_omem_alloc));
security_sk_free(sk);
+ put_net(sk->sk_net);
if (sk->sk_prot_creator->slab != NULL)
kmem_cache_free(sk->sk_prot_creator->slab, sk);
else
@@ -941,7 +945,7 @@ void sk_free(struct sock *sk)
struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
{
- struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
+ struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
if (newsk != NULL) {
struct sk_filter *filter;
@@ -1585,9 +1589,9 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (sk->sk_lock.owner)
+ if (sk->sk_lock.owned)
__lock_sock(sk);
- sk->sk_lock.owner = (void *)1;
+ sk->sk_lock.owned = 1;
spin_unlock(&sk->sk_lock.slock);
/*
* The sk_lock has mutex_lock() semantics here:
@@ -1608,7 +1612,7 @@ void fastcall release_sock(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_backlog.tail)
__release_sock(sk);
- sk->sk_lock.owner = NULL;
+ sk->sk_lock.owned = 0;
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);
@@ -1973,7 +1977,7 @@ static const struct file_operations proto_seq_fops = {
static int __init proto_init(void)
{
/* register /proc/net/protocols */
- return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
}
subsys_initcall(proto_init);