aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/flow_dissector.c85
-rw-r--r--net/core/net-sysfs.c20
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/pktgen.c111
-rw-r--r--net/core/ptp_classifier.c64
-rw-r--r--net/core/request_sock.c43
-rw-r--r--net/core/rtnetlink.c101
-rw-r--r--net/core/timestamping.c57
11 files changed, 320 insertions, 219 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 7990984ca364..138ab897de7d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1085,6 +1085,7 @@ static int dev_get_valid_name(struct net *net,
*/
int dev_change_name(struct net_device *dev, const char *newname)
{
+ unsigned char old_assign_type;
char oldname[IFNAMSIZ];
int err = 0;
int ret;
@@ -1112,10 +1113,14 @@ int dev_change_name(struct net_device *dev, const char *newname)
return err;
}
+ old_assign_type = dev->name_assign_type;
+ dev->name_assign_type = NET_NAME_RENAMED;
+
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
write_seqcount_end(&devnet_rename_seq);
return ret;
}
@@ -1144,6 +1149,8 @@ rollback:
write_seqcount_begin(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
+ dev->name_assign_type = old_assign_type;
+ old_assign_type = NET_NAME_RENAMED;
goto rollback;
} else {
pr_err("%s: name change rollback failed: %d\n",
@@ -2745,8 +2752,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC_STATE_RUNNING owner to get the lock more often
- * and dequeue packets faster.
+ * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
if (unlikely(contended))
@@ -5438,13 +5445,9 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
*/
ret = 0;
- if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
+ if ((old_flags ^ flags) & IFF_UP)
ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
- if (!ret)
- dev_set_rx_mode(dev);
- }
-
if ((flags ^ dev->gflags) & IFF_PROMISC) {
int inc = (flags & IFF_PROMISC) ? 1 : -1;
unsigned int old_flags = dev->flags;
@@ -6444,17 +6447,19 @@ void netdev_freemem(struct net_device *dev)
/**
* alloc_netdev_mqs - allocate network device
- * @sizeof_priv: size of private data to allocate space for
- * @name: device name format string
- * @setup: callback to initialize device
- * @txqs: the number of TX subqueues to allocate
- * @rxqs: the number of RX subqueues to allocate
+ * @sizeof_priv: size of private data to allocate space for
+ * @name: device name format string
+ * @name_assign_type: origin of device name
+ * @setup: callback to initialize device
+ * @txqs: the number of TX subqueues to allocate
+ * @rxqs: the number of RX subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subqueue structs
* for each queue on the device.
*/
struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+ unsigned char name_assign_type,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs)
{
@@ -6533,6 +6538,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#endif
strcpy(dev->name, name);
+ dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
@@ -7101,7 +7107,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
- if (dev->rtnl_link_ops)
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
else
unregister_netdevice_queue(dev, &dev_kill_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index e70301eb7a4a..50f9a9db5792 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -289,10 +289,8 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
switch (info->genlhdr->cmd) {
case NET_DM_CMD_START:
return set_all_monitor_traces(TRACE_ON);
- break;
case NET_DM_CMD_STOP:
return set_all_monitor_traces(TRACE_OFF);
- break;
}
return -ENOTSUPP;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1dbf6462f766..b90ae7fb3b89 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -84,15 +84,6 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
return NULL;
}
-static inline void *load_pointer(const struct sk_buff *skb, int k,
- unsigned int size, void *buffer)
-{
- if (k >= 0)
- return skb_header_pointer(skb, k, size, buffer);
-
- return bpf_internal_load_pointer_neg_helper(skb, k, size);
-}
-
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
@@ -537,7 +528,7 @@ load_word:
* BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
*/
- ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
+ ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be32(ptr);
CONT;
@@ -547,7 +538,7 @@ load_word:
LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
off = IMM;
load_half:
- ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
+ ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be16(ptr);
CONT;
@@ -557,7 +548,7 @@ load_half:
LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
off = IMM;
load_byte:
- ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
+ ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = *(u8 *)ptr;
CONT;
@@ -1094,7 +1085,7 @@ err:
* a cell if not previously written, and we check all branches to be sure
* a malicious user doesn't try to abuse us.
*/
-static int check_load_and_stores(struct sock_filter *filter, int flen)
+static int check_load_and_stores(const struct sock_filter *filter, int flen)
{
u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
int pc, ret = 0;
@@ -1227,7 +1218,7 @@ static bool chk_code_allowed(u16 code_to_probe)
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
-int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
+int sk_chk_filter(const struct sock_filter *filter, unsigned int flen)
{
bool anc_found;
int pc;
@@ -1237,7 +1228,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
/* Check the filter code now */
for (pc = 0; pc < flen; pc++) {
- struct sock_filter *ftest = &filter[pc];
+ const struct sock_filter *ftest = &filter[pc];
/* May we actually operate on this code? */
if (!chk_code_allowed(ftest->code))
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 107ed12a5323..5f362c1d0332 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -80,6 +80,8 @@ ip:
case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
+ __be32 flow_label;
+
ipv6:
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (!iph)
@@ -89,6 +91,21 @@ ipv6:
flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
nhoff += sizeof(struct ipv6hdr);
+
+ flow_label = ip6_flowlabel(iph);
+ if (flow_label) {
+ /* Awesome, IPv6 packet has a flow label so we can
+ * use that to represent the ports without any
+ * further dissection.
+ */
+ flow->n_proto = proto;
+ flow->ip_proto = ip_proto;
+ flow->ports = flow_label;
+ flow->thoff = (u16)nhoff;
+
+ return true;
+ }
+
break;
}
case htons(ETH_P_8021AD):
@@ -175,6 +192,7 @@ ipv6:
break;
}
+ flow->n_proto = proto;
flow->ip_proto = ip_proto;
flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
flow->thoff = (u16) nhoff;
@@ -195,12 +213,33 @@ static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
return jhash_3words(a, b, c, hashrnd);
}
-static __always_inline u32 __flow_hash_1word(u32 a)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
{
- __flow_hash_secret_init();
- return jhash_1word(a, hashrnd);
+ u32 hash;
+
+ /* get a consistent hash (same value on both flow directions) */
+ if (((__force u32)keys->dst < (__force u32)keys->src) ||
+ (((__force u32)keys->dst == (__force u32)keys->src) &&
+ ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
+ swap(keys->dst, keys->src);
+ swap(keys->port16[0], keys->port16[1]);
+ }
+
+ hash = __flow_hash_3words((__force u32)keys->dst,
+ (__force u32)keys->src,
+ (__force u32)keys->ports);
+ if (!hash)
+ hash = 1;
+
+ return hash;
}
+u32 flow_hash_from_keys(struct flow_keys *keys)
+{
+ return __flow_hash_from_keys(keys);
+}
+EXPORT_SYMBOL(flow_hash_from_keys);
+
/*
* __skb_get_hash: calculate a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value
@@ -210,7 +249,6 @@ static __always_inline u32 __flow_hash_1word(u32 a)
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
- u32 hash;
if (!skb_flow_dissect(skb, &keys))
return;
@@ -218,21 +256,9 @@ void __skb_get_hash(struct sk_buff *skb)
if (keys.ports)
skb->l4_hash = 1;
- /* get a consistent hash (same value on both flow directions) */
- if (((__force u32)keys.dst < (__force u32)keys.src) ||
- (((__force u32)keys.dst == (__force u32)keys.src) &&
- ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
- swap(keys.dst, keys.src);
- swap(keys.port16[0], keys.port16[1]);
- }
-
- hash = __flow_hash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports);
- if (!hash)
- hash = 1;
+ skb->sw_hash = 1;
- skb->hash = hash;
+ skb->hash = __flow_hash_from_keys(&keys);
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -240,7 +266,7 @@ EXPORT_SYMBOL(__skb_get_hash);
* Returns a Tx hash based on the given packet descriptor a Tx queues' number
* to be used as a distribution range.
*/
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues)
{
u32 hash;
@@ -260,13 +286,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
qcount = dev->tc_to_txq[tc].count;
}
- if (skb->sk && skb->sk->sk_hash)
- hash = skb->sk->sk_hash;
- else
- hash = (__force u16) skb->protocol;
- hash = __flow_hash_1word(hash);
-
- return (u16) (((u64) hash * qcount) >> 32) + qoffset;
+ return (u16) (((u64)skb_get_hash(skb) * qcount) >> 32) + qoffset;
}
EXPORT_SYMBOL(__skb_tx_hash);
@@ -338,17 +358,10 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
- else {
- u32 hash;
- if (skb->sk && skb->sk->sk_hash)
- hash = skb->sk->sk_hash;
- else
- hash = (__force u16) skb->protocol ^
- skb->hash;
- hash = __flow_hash_1word(hash);
+ else
queue_index = map->queues[
- ((u64)hash * map->len) >> 32];
- }
+ ((u64)skb_get_hash(skb) * map->len) >> 32];
+
if (unlikely(queue_index >= dev->real_num_tx_queues))
queue_index = -1;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1cac29ebb05b..7752f2ad49a5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -112,6 +112,25 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec);
NETDEVICE_SHOW_RO(type, fmt_dec);
NETDEVICE_SHOW_RO(link_mode, fmt_dec);
+static ssize_t format_name_assign_type(const struct net_device *net, char *buf)
+{
+ return sprintf(buf, fmt_dec, net->name_assign_type);
+}
+
+static ssize_t name_assign_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *net = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (net->name_assign_type != NET_NAME_UNKNOWN)
+ ret = netdev_show(dev, attr, buf, format_name_assign_type);
+
+ return ret;
+}
+static DEVICE_ATTR_RO(name_assign_type);
+
/* use same locking rules as GIFHWADDR ioctl's */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_dev_port.attr,
&dev_attr_iflink.attr,
&dev_attr_ifindex.attr,
+ &dev_attr_name_assign_type.attr,
&dev_attr_addr_assign_type.attr,
&dev_attr_addr_len.attr,
&dev_attr_link_mode.attr,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e33937fb32a0..907fb5e36c02 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -822,7 +822,8 @@ void __netpoll_cleanup(struct netpoll *np)
RCU_INIT_POINTER(np->dev->npinfo, NULL);
call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
- }
+ } else
+ RCU_INIT_POINTER(np->dev->npinfo, NULL);
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fc17a9d309ac..8b849ddfef2e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,8 +69,9 @@
* for running devices in the if_list and sends packets until count is 0 it
* also the thread checks the thread->control which is used for inter-process
* communication. controlling process "posts" operations to the threads this
- * way. The if_lock should be possible to remove when add/rem_device is merged
- * into this too.
+ * way.
+ * The if_list is RCU protected, and the if_lock remains to protect updating
+ * of if_list, from "add_device" as it invoked from userspace (via proc write).
*
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
@@ -208,7 +209,7 @@
#define T_REMDEVALL (1<<2) /* Remove all devs */
#define T_REMDEV (1<<3) /* Remove one dev */
-/* If lock -- can be removed after some work */
+/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
#define if_unlock(t) spin_unlock(&(t->if_lock));
@@ -241,6 +242,7 @@ struct pktgen_dev {
struct proc_dir_entry *entry; /* proc file */
struct pktgen_thread *pg_thread;/* the owner */
struct list_head list; /* chaining in the thread's run-queue */
+ struct rcu_head rcu; /* freed by RCU */
int running; /* if false, the test will stop */
@@ -802,7 +804,6 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
case '\t':
case ' ':
goto done_str;
- break;
default:
break;
}
@@ -1737,14 +1738,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
seq_puts(seq, "Running: ");
- if_lock(t);
- list_for_each_entry(pkt_dev, &t->if_list, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
seq_puts(seq, "\nStopped: ");
- list_for_each_entry(pkt_dev, &t->if_list, list)
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
if (!pkt_dev->running)
seq_printf(seq, "%s ", pkt_dev->odevname);
@@ -1753,7 +1754,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
else
seq_puts(seq, "\nResult: NA\n");
- if_unlock(t);
+ rcu_read_unlock();
return 0;
}
@@ -1878,10 +1879,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
pkt_dev = pktgen_find_dev(t, ifname, exact);
if (pkt_dev) {
if (remove) {
- if_lock(t);
pkt_dev->removal_mark = 1;
t->control |= T_REMDEV;
- if_unlock(t);
}
break;
}
@@ -1931,7 +1930,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
struct pktgen_dev *pkt_dev;
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
if (pkt_dev->odev != dev)
continue;
@@ -1946,6 +1946,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
dev->name);
break;
}
+ rcu_read_unlock();
}
}
@@ -2997,8 +2998,8 @@ static void pktgen_run(struct pktgen_thread *t)
func_enter();
- if_lock(t);
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
/*
* setup odev and create initial packet.
@@ -3007,18 +3008,18 @@ static void pktgen_run(struct pktgen_thread *t)
if (pkt_dev->odev) {
pktgen_clear_counters(pkt_dev);
- pkt_dev->running = 1; /* Cranke yeself! */
pkt_dev->skb = NULL;
pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
set_pkt_overhead(pkt_dev);
strcpy(pkt_dev->result, "Starting");
+ pkt_dev->running = 1; /* Cranke yeself! */
started++;
} else
strcpy(pkt_dev->result, "Error starting");
}
- if_unlock(t);
+ rcu_read_unlock();
if (started)
t->control &= ~(T_STOP);
}
@@ -3041,27 +3042,25 @@ static int thread_is_running(const struct pktgen_thread *t)
{
const struct pktgen_dev *pkt_dev;
- list_for_each_entry(pkt_dev, &t->if_list, list)
- if (pkt_dev->running)
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
+ if (pkt_dev->running) {
+ rcu_read_unlock();
return 1;
+ }
+ rcu_read_unlock();
return 0;
}
static int pktgen_wait_thread_run(struct pktgen_thread *t)
{
- if_lock(t);
-
while (thread_is_running(t)) {
- if_unlock(t);
-
msleep_interruptible(100);
if (signal_pending(current))
goto signal;
- if_lock(t);
}
- if_unlock(t);
return 1;
signal:
return 0;
@@ -3166,10 +3165,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
return -EINVAL;
}
+ pkt_dev->running = 0;
kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
pkt_dev->stopped_at = ktime_get();
- pkt_dev->running = 0;
show_results(pkt_dev, nr_frags);
@@ -3180,9 +3179,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
{
struct pktgen_dev *pkt_dev, *best = NULL;
- if_lock(t);
-
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
if (!pkt_dev->running)
continue;
if (best == NULL)
@@ -3190,7 +3188,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
best = pkt_dev;
}
- if_unlock(t);
+ rcu_read_unlock();
+
return best;
}
@@ -3200,13 +3199,13 @@ static void pktgen_stop(struct pktgen_thread *t)
func_enter();
- if_lock(t);
+ rcu_read_lock();
- list_for_each_entry(pkt_dev, &t->if_list, list) {
+ list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
pktgen_stop_device(pkt_dev);
}
- if_unlock(t);
+ rcu_read_unlock();
}
/*
@@ -3220,8 +3219,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
func_enter();
- if_lock(t);
-
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
@@ -3235,8 +3232,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
break;
}
-
- if_unlock(t);
}
static void pktgen_rem_all_ifs(struct pktgen_thread *t)
@@ -3248,8 +3243,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
/* Remove all devices, free mem */
- if_lock(t);
-
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
@@ -3258,8 +3251,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
pktgen_remove_device(t, cur);
}
-
- if_unlock(t);
}
static void pktgen_rem_thread(struct pktgen_thread *t)
@@ -3407,10 +3398,10 @@ static int pktgen_thread_worker(void *arg)
pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
- set_current_state(TASK_INTERRUPTIBLE);
-
set_freezable();
+ __set_current_state(TASK_RUNNING);
+
while (!kthread_should_stop()) {
pkt_dev = next_to_run(t);
@@ -3424,8 +3415,6 @@ static int pktgen_thread_worker(void *arg)
continue;
}
- __set_current_state(TASK_RUNNING);
-
if (likely(pkt_dev)) {
pktgen_xmit(pkt_dev);
@@ -3456,9 +3445,8 @@ static int pktgen_thread_worker(void *arg)
}
try_to_freeze();
-
- set_current_state(TASK_INTERRUPTIBLE);
}
+ set_current_state(TASK_INTERRUPTIBLE);
pr_debug("%s stopping all device\n", t->tsk->comm);
pktgen_stop(t);
@@ -3485,8 +3473,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
struct pktgen_dev *p, *pkt_dev = NULL;
size_t len = strlen(ifname);
- if_lock(t);
- list_for_each_entry(p, &t->if_list, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, &t->if_list, list)
if (strncmp(p->odevname, ifname, len) == 0) {
if (p->odevname[len]) {
if (exact || p->odevname[len] != '@')
@@ -3496,7 +3484,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
break;
}
- if_unlock(t);
+ rcu_read_unlock();
pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
return pkt_dev;
}
@@ -3510,6 +3498,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
{
int rv = 0;
+ /* This function cannot be called concurrently, as its called
+ * under pktgen_thread_lock mutex, but it can run from
+ * userspace on another CPU than the kthread. The if_lock()
+ * is used here to sync with concurrent instances of
+ * _rem_dev_from_if_list() invoked via kthread, which is also
+ * updating the if_list */
if_lock(t);
if (pkt_dev->pg_thread) {
@@ -3518,9 +3512,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
goto out;
}
- list_add(&pkt_dev->list, &t->if_list);
- pkt_dev->pg_thread = t;
pkt_dev->running = 0;
+ pkt_dev->pg_thread = t;
+ list_add_rcu(&pkt_dev->list, &t->if_list);
out:
if_unlock(t);
@@ -3675,11 +3669,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
struct list_head *q, *n;
struct pktgen_dev *p;
+ if_lock(t);
list_for_each_safe(q, n, &t->if_list) {
p = list_entry(q, struct pktgen_dev, list);
if (p == pkt_dev)
- list_del(&p->list);
+ list_del_rcu(&p->list);
}
+ if_unlock(t);
}
static int pktgen_remove_device(struct pktgen_thread *t,
@@ -3699,20 +3695,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
pkt_dev->odev = NULL;
}
- /* And update the thread if_list */
-
- _rem_dev_from_if_list(t, pkt_dev);
-
+ /* Remove proc before if_list entry, because add_device uses
+ * list to determine if interface already exist, avoid race
+ * with proc_create_data() */
if (pkt_dev->entry)
proc_remove(pkt_dev->entry);
+ /* And update the thread if_list */
+ _rem_dev_from_if_list(t, pkt_dev);
+
#ifdef CONFIG_XFRM
free_SAs(pkt_dev);
#endif
vfree(pkt_dev->flows);
if (pkt_dev->page)
put_page(pkt_dev->page);
- kfree(pkt_dev);
+ kfree_rcu(pkt_dev, rcu);
return 0;
}
@@ -3812,6 +3810,7 @@ static void __exit pg_cleanup(void)
{
unregister_netdevice_notifier(&pktgen_notifier_block);
unregister_pernet_subsys(&pg_net_ops);
+ /* Don't need rcu_barrier() due to use of kfree_rcu() */
}
module_init(pg_init);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index d3027a73fd4b..12ab7b4be609 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -52,14 +52,43 @@
* test_8021q:
* jneq #0x8100, test_ieee1588 ; ETH_P_8021Q ?
* ldh [16] ; load inner type
- * jneq #0x88f7, drop_ieee1588 ; ETH_P_1588 ?
+ * jneq #0x88f7, test_8021q_ipv4 ; ETH_P_1588 ?
* ldb [18] ; load payload
* and #0x8 ; as we don't have ports here, test
* jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
* ldh [18] ; reload payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x40 ; PTP_CLASS_V2_VLAN
+ * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ * ret a ; return PTP class
+ *
+ * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
+ * test_8021q_ipv4:
+ * jneq #0x800, test_8021q_ipv6 ; ETH_P_IP ?
+ * ldb [27] ; load proto
+ * jneq #17, drop_8021q_ipv4 ; IPPROTO_UDP ?
+ * ldh [24] ; load frag offset field
+ * jset #0x1fff, drop_8021q_ipv4; don't allow fragments
+ * ldxb 4*([18]&0xf) ; load IP header len
+ * ldh [x + 20] ; load UDP dst port
+ * jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ?
+ * ldh [x + 26] ; load payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ * ret a ; return PTP class
+ * drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE
+ *
+ * ; PTP over UDP over IPv6 over 802.1Q over Ethernet
+ * test_8021q_ipv6:
+ * jneq #0x86dd, drop_8021q_ipv6 ; ETH_P_IPV6 ?
+ * ldb [24] ; load proto
+ * jneq #17, drop_8021q_ipv6 ; IPPROTO_UDP ?
+ * ldh [60] ; load UDP dst port
+ * jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ?
+ * ldh [66] ; load payload
+ * and #0xf ; mask PTP_CLASS_VMASK
+ * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
* ret a ; return PTP class
+ * drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE
*
* ; PTP over Ethernet
* test_ieee1588:
@@ -113,16 +142,39 @@ void __init ptp_classifier_init(void)
{ 0x44, 0, 0, 0x00000020 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
- { 0x15, 0, 9, 0x00008100 },
+ { 0x15, 0, 32, 0x00008100 },
{ 0x28, 0, 0, 0x00000010 },
- { 0x15, 0, 15, 0x000088f7 },
+ { 0x15, 0, 7, 0x000088f7 },
{ 0x30, 0, 0, 0x00000012 },
{ 0x54, 0, 0, 0x00000008 },
- { 0x15, 0, 12, 0x00000000 },
+ { 0x15, 0, 35, 0x00000000 },
{ 0x28, 0, 0, 0x00000012 },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000040 },
+ { 0x44, 0, 0, 0x00000070 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x15, 0, 12, 0x00000800 },
+ { 0x30, 0, 0, 0x0000001b },
+ { 0x15, 0, 9, 0x00000011 },
+ { 0x28, 0, 0, 0x00000018 },
+ { 0x45, 7, 0, 0x00001fff },
+ { 0xb1, 0, 0, 0x00000012 },
+ { 0x48, 0, 0, 0x00000014 },
+ { 0x15, 0, 4, 0x0000013f },
+ { 0x48, 0, 0, 0x0000001a },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000050 },
+ { 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
+ { 0x15, 0, 8, 0x000086dd },
+ { 0x30, 0, 0, 0x00000018 },
+ { 0x15, 0, 6, 0x00000011 },
+ { 0x28, 0, 0, 0x0000003c },
+ { 0x15, 0, 4, 0x0000013f },
+ { 0x28, 0, 0, 0x00000042 },
+ { 0x54, 0, 0, 0x0000000f },
+ { 0x44, 0, 0, 0x00000060 },
{ 0x16, 0, 0, 0x00000000 },
+ { 0x06, 0, 0, 0x00000000 },
{ 0x15, 0, 7, 0x000088f7 },
{ 0x30, 0, 0, 0x0000000e },
{ 0x54, 0, 0, 0x00000008 },
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 467f326126e0..04db318e6218 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -41,27 +41,27 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
{
size_t lopt_size = sizeof(struct listen_sock);
- struct listen_sock *lopt;
+ struct listen_sock *lopt = NULL;
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
nr_table_entries = max_t(u32, nr_table_entries, 8);
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
- if (lopt_size > PAGE_SIZE)
+
+ if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ lopt = kzalloc(lopt_size, GFP_KERNEL |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
+ if (!lopt)
lopt = vzalloc(lopt_size);
- else
- lopt = kzalloc(lopt_size, GFP_KERNEL);
- if (lopt == NULL)
+ if (!lopt)
return -ENOMEM;
- for (lopt->max_qlen_log = 3;
- (1 << lopt->max_qlen_log) < nr_table_entries;
- lopt->max_qlen_log++);
-
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;
+ lopt->max_qlen_log = ilog2(nr_table_entries);
write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
@@ -72,22 +72,8 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
void __reqsk_queue_destroy(struct request_sock_queue *queue)
{
- struct listen_sock *lopt;
- size_t lopt_size;
-
- /*
- * this is an error recovery path only
- * no locking needed and the lopt is not NULL
- */
-
- lopt = queue->listen_opt;
- lopt_size = sizeof(struct listen_sock) +
- lopt->nr_table_entries * sizeof(struct request_sock *);
-
- if (lopt_size > PAGE_SIZE)
- vfree(lopt);
- else
- kfree(lopt);
+ /* This is an error recovery path only, no locking needed */
+ kvfree(queue->listen_opt);
}
static inline struct listen_sock *reqsk_queue_yank_listen_sk(
@@ -107,8 +93,6 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
{
/* make all the listen_opt local to us */
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
- size_t lopt_size = sizeof(struct listen_sock) +
- lopt->nr_table_entries * sizeof(struct request_sock *);
if (lopt->qlen != 0) {
unsigned int i;
@@ -125,10 +109,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
}
WARN_ON(lopt->qlen != 0);
- if (lopt_size > PAGE_SIZE)
- vfree(lopt);
- else
- kfree(lopt);
+ kvfree(lopt);
}
/*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1063996f8317..e9918020dbc9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,7 +299,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
if (rtnl_link_ops_get(ops->kind))
return -EEXIST;
- if (!ops->dellink)
+ /* The check for setup is here because if ops
+ * does not have that filled up, it is not possible
+ * to use the ops for creating device. So do not
+ * fill up dellink as well. That disables rtnl_dellink.
+ */
+ if (ops->setup && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
@@ -1777,7 +1782,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
return -ENODEV;
ops = dev->rtnl_link_ops;
- if (!ops)
+ if (!ops || !ops->dellink)
return -EOPNOTSUPP;
ops->dellink(dev, &list_kill);
@@ -1805,7 +1810,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
EXPORT_SYMBOL(rtnl_configure_link);
struct net_device *rtnl_create_link(struct net *net,
- char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[])
+ char *ifname, unsigned char name_assign_type,
+ const struct rtnl_link_ops *ops, struct nlattr *tb[])
{
int err;
struct net_device *dev;
@@ -1823,8 +1829,8 @@ struct net_device *rtnl_create_link(struct net *net,
num_rx_queues = ops->get_num_rx_queues();
err = -ENOMEM;
- dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup,
- num_tx_queues, num_rx_queues);
+ dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
+ ops->setup, num_tx_queues, num_rx_queues);
if (!dev)
goto err;
@@ -1889,6 +1895,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct nlattr *linkinfo[IFLA_INFO_MAX+1];
+ unsigned char name_assign_type = NET_NAME_USER;
int err;
#ifdef CONFIG_MODULES
@@ -2038,14 +2045,19 @@ replay:
return -EOPNOTSUPP;
}
- if (!ifname[0])
+ if (!ops->setup)
+ return -EOPNOTSUPP;
+
+ if (!ifname[0]) {
snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
dest_net = rtnl_link_get_net(net, tb);
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
- dev = rtnl_create_link(dest_net, ifname, ops, tb);
+ dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out;
@@ -2509,6 +2521,7 @@ skip:
int ndo_dflt_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
+ struct net_device *filter_dev,
int idx)
{
int err;
@@ -2526,28 +2539,72 @@ EXPORT_SYMBOL(ndo_dflt_fdb_dump);
static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int idx = 0;
- struct net *net = sock_net(skb->sk);
struct net_device *dev;
+ struct nlattr *tb[IFLA_MAX+1];
+ struct net_device *bdev = NULL;
+ struct net_device *br_dev = NULL;
+ const struct net_device_ops *ops = NULL;
+ const struct net_device_ops *cops = NULL;
+ struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
+ struct net *net = sock_net(skb->sk);
+ int brport_idx = 0;
+ int br_idx = 0;
+ int idx = 0;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (dev->priv_flags & IFF_BRIDGE_PORT) {
- struct net_device *br_dev;
- const struct net_device_ops *ops;
+ if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
+ ifla_policy) == 0) {
+ if (tb[IFLA_MASTER])
+ br_idx = nla_get_u32(tb[IFLA_MASTER]);
+ }
- br_dev = netdev_master_upper_dev_get(dev);
- ops = br_dev->netdev_ops;
- if (ops->ndo_fdb_dump)
- idx = ops->ndo_fdb_dump(skb, cb, dev, idx);
+ brport_idx = ifm->ifi_index;
+
+ if (br_idx) {
+ br_dev = __dev_get_by_index(net, br_idx);
+ if (!br_dev)
+ return -ENODEV;
+
+ ops = br_dev->netdev_ops;
+ bdev = br_dev;
+ }
+
+ for_each_netdev(net, dev) {
+ if (brport_idx && (dev->ifindex != brport_idx))
+ continue;
+
+ if (!br_idx) { /* user did not specify a specific bridge */
+ if (dev->priv_flags & IFF_BRIDGE_PORT) {
+ br_dev = netdev_master_upper_dev_get(dev);
+ cops = br_dev->netdev_ops;
+ }
+
+ bdev = dev;
+ } else {
+ if (dev != br_dev &&
+ !(dev->priv_flags & IFF_BRIDGE_PORT))
+ continue;
+
+ if (br_dev != netdev_master_upper_dev_get(dev) &&
+ !(dev->priv_flags & IFF_EBRIDGE))
+ continue;
+
+ bdev = br_dev;
+ cops = ops;
+ }
+
+ if (dev->priv_flags & IFF_BRIDGE_PORT) {
+ if (cops && cops->ndo_fdb_dump)
+ idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
+ idx);
}
+ idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
if (dev->netdev_ops->ndo_fdb_dump)
- idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
- else
- idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
+ idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
+ idx);
+
+ cops = NULL;
}
- rcu_read_unlock();
cb->args[0] = idx;
return skb->len;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 6521dfd8b7c8..a8770391ea5b 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -43,31 +43,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
return;
type = classify(skb);
+ if (type == PTP_CLASS_NONE)
+ return;
+
+ phydev = skb->dev->phydev;
+ if (likely(phydev->drv->txtstamp)) {
+ if (!atomic_inc_not_zero(&sk->sk_refcnt))
+ return;
- switch (type) {
- case PTP_CLASS_V1_IPV4:
- case PTP_CLASS_V1_IPV6:
- case PTP_CLASS_V2_IPV4:
- case PTP_CLASS_V2_IPV6:
- case PTP_CLASS_V2_L2:
- case PTP_CLASS_V2_VLAN:
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->txtstamp)) {
- if (!atomic_inc_not_zero(&sk->sk_refcnt))
- return;
-
- clone = skb_clone(skb, GFP_ATOMIC);
- if (!clone) {
- sock_put(sk);
- return;
- }
-
- clone->sk = sk;
- phydev->drv->txtstamp(phydev, clone, type);
+ clone = skb_clone(skb, GFP_ATOMIC);
+ if (!clone) {
+ sock_put(sk);
+ return;
}
- break;
- default:
- break;
+
+ clone->sk = sk;
+ phydev->drv->txtstamp(phydev, clone, type);
}
}
EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
@@ -114,20 +105,12 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
__skb_pull(skb, ETH_HLEN);
- switch (type) {
- case PTP_CLASS_V1_IPV4:
- case PTP_CLASS_V1_IPV6:
- case PTP_CLASS_V2_IPV4:
- case PTP_CLASS_V2_IPV6:
- case PTP_CLASS_V2_L2:
- case PTP_CLASS_V2_VLAN:
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->rxtstamp))
- return phydev->drv->rxtstamp(phydev, skb, type);
- break;
- default:
- break;
- }
+ if (type == PTP_CLASS_NONE)
+ return false;
+
+ phydev = skb->dev->phydev;
+ if (likely(phydev->drv->rxtstamp))
+ return phydev->drv->rxtstamp(phydev, skb, type);
return false;
}