aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch/flow_table.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 14:27:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 14:27:40 -0700
commitf9da455b93f6ba076935b4ef4589f61e529ae046 (patch)
tree3c4e69ce1ba1d6bf65915b97a76ca2172105b278 /net/openvswitch/flow_table.c
parentMerge tag 'dm-3.16-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm (diff)
parentrtnetlink: fix userspace API breakage for iproute2 < v3.9.0 (diff)
downloadlinux-dev-f9da455b93f6ba076935b4ef4589f61e529ae046.tar.xz
linux-dev-f9da455b93f6ba076935b4ef4589f61e529ae046.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Seccomp BPF filters can now be JIT'd, from Alexei Starovoitov. 2) Multiqueue support in xen-netback and xen-netfront, from Andrew J Benniston. 3) Allow tweaking of aggregation settings in cdc_ncm driver, from Bjørn Mork. 4) BPF now has a "random" opcode, from Chema Gonzalez. 5) Add more BPF documentation and improve test framework, from Daniel Borkmann. 6) Support TCP fastopen over ipv6, from Daniel Lee. 7) Add software TSO helper functions and use them to support software TSO in mvneta and mv643xx_eth drivers. From Ezequiel Garcia. 8) Support software TSO in fec driver too, from Nimrod Andy. 9) Add Broadcom SYSTEMPORT driver, from Florian Fainelli. 10) Handle broadcasts more gracefully over macvlan when there are large numbers of interfaces configured, from Herbert Xu. 11) Allow more control over fwmark used for non-socket based responses, from Lorenzo Colitti. 12) Do TCP congestion window limiting based upon measurements, from Neal Cardwell. 13) Support busy polling in SCTP, from Neal Horman. 14) Allow RSS key to be configured via ethtool, from Venkata Duvvuru. 15) Bridge promisc mode handling improvements from Vlad Yasevich. 16) Don't use inetpeer entries to implement ID generation any more, it performs poorly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1522 commits) rtnetlink: fix userspace API breakage for iproute2 < v3.9.0 tcp: fixing TLP's FIN recovery net: fec: Add software TSO support net: fec: Add Scatter/gather support net: fec: Increase buffer descriptor entry number net: fec: Factorize feature setting net: fec: Enable IP header hardware checksum net: fec: Factorize the .xmit transmit function bridge: fix compile error when compiling without IPv6 support bridge: fix smatch warning / potential null pointer dereference via-rhine: fix full-duplex with autoneg disable bnx2x: Enlarge the dorq threshold for VFs bnx2x: Check for UNDI in uncommon branch bnx2x: Fix 1G-baseT link bnx2x: Fix link for KR with swapped polarity lane sctp: Fix sk_ack_backlog wrap-around problem net/core: Add VF link state control policy net/fsl: xgmac_mdio is dependent on OF_MDIO net/fsl: Make xgmac_mdio read error message useful net_sched: drr: warn when qdisc is not work conserving ...
Diffstat (limited to 'net/openvswitch/flow_table.c')
-rw-r--r--net/openvswitch/flow_table.c121
1 files changed, 70 insertions, 51 deletions
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3c268b3d71c3..574c3abc9b30 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -48,6 +48,7 @@
#define REHASH_INTERVAL (10 * 60 * HZ)
static struct kmem_cache *flow_cache;
+struct kmem_cache *flow_stats_cache __read_mostly;
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
@@ -57,8 +58,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask)
{
- const long *m = (long *)((u8 *)&mask->key + mask->range.start);
- const long *s = (long *)((u8 *)src + mask->range.start);
+ const long *m = (const long *)((const u8 *)&mask->key +
+ mask->range.start);
+ const long *s = (const long *)((const u8 *)src +
+ mask->range.start);
long *d = (long *)((u8 *)dst + mask->range.start);
int i;
@@ -70,10 +73,11 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
*d++ = *s++ & *m++;
}
-struct sw_flow *ovs_flow_alloc(bool percpu_stats)
+struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- int cpu;
+ struct flow_stats *stats;
+ int node;
flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
if (!flow)
@@ -81,27 +85,22 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats)
flow->sf_acts = NULL;
flow->mask = NULL;
+ flow->stats_last_writer = NUMA_NO_NODE;
- flow->stats.is_percpu = percpu_stats;
+ /* Initialize the default stat node. */
+ stats = kmem_cache_alloc_node(flow_stats_cache,
+ GFP_KERNEL | __GFP_ZERO, 0);
+ if (!stats)
+ goto err;
- if (!percpu_stats) {
- flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL);
- if (!flow->stats.stat)
- goto err;
+ spin_lock_init(&stats->lock);
- spin_lock_init(&flow->stats.stat->lock);
- } else {
- flow->stats.cpu_stats = alloc_percpu(struct flow_stats);
- if (!flow->stats.cpu_stats)
- goto err;
+ RCU_INIT_POINTER(flow->stats[0], stats);
- for_each_possible_cpu(cpu) {
- struct flow_stats *cpu_stats;
+ for_each_node(node)
+ if (node != 0)
+ RCU_INIT_POINTER(flow->stats[node], NULL);
- cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
- spin_lock_init(&cpu_stats->lock);
- }
- }
return flow;
err:
kmem_cache_free(flow_cache, flow);
@@ -138,11 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
static void flow_free(struct sw_flow *flow)
{
- kfree((struct sf_flow_acts __force *)flow->sf_acts);
- if (flow->stats.is_percpu)
- free_percpu(flow->stats.cpu_stats);
- else
- kfree(flow->stats.stat);
+ int node;
+
+ kfree((struct sw_flow_actions __force *)flow->sf_acts);
+ for_each_node(node)
+ if (flow->stats[node])
+ kmem_cache_free(flow_stats_cache,
+ (struct flow_stats __force *)flow->stats[node]);
kmem_cache_free(flow_cache, flow);
}
@@ -158,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred)
if (!flow)
return;
- if (flow->mask) {
- struct sw_flow_mask *mask = flow->mask;
-
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- if (deferred)
- kfree_rcu(mask, rcu);
- else
- kfree(mask);
- }
- }
-
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
else
@@ -375,7 +357,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
static u32 flow_hash(const struct sw_flow_key *key, int key_start,
int key_end)
{
- u32 *hash_key = (u32 *)((u8 *)key + key_start);
+ const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
/* Make sure number of hash bytes are multiple of u32. */
@@ -397,8 +379,8 @@ static bool cmp_key(const struct sw_flow_key *key1,
const struct sw_flow_key *key2,
int key_start, int key_end)
{
- const long *cp1 = (long *)((u8 *)key1 + key_start);
- const long *cp2 = (long *)((u8 *)key2 + key_start);
+ const long *cp1 = (const long *)((const u8 *)key1 + key_start);
+ const long *cp2 = (const long *)((const u8 *)key2 + key_start);
long diffs = 0;
int i;
@@ -490,6 +472,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti)
return table_instance_rehash(ti, ti->n_buckets * 2);
}
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ kfree_rcu(mask, rcu);
+ }
+ }
+}
+
+/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti = ovsl_dereference(table->ti);
@@ -497,6 +498,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[ti->node_ver]);
table->count--;
+
+ /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
+ * accessible as long as the RCU read lock is held.
+ */
+ flow_mask_remove(table, flow->mask);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -513,8 +519,8 @@ static struct sw_flow_mask *mask_alloc(void)
static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b)
{
- u8 *a_ = (u8 *)&a->key + a->range.start;
- u8 *b_ = (u8 *)&b->key + b->range.start;
+ const u8 *a_ = (const u8 *)&a->key + a->range.start;
+ const u8 *b_ = (const u8 *)&b->key + b->range.start;
return (a->range.end == b->range.end)
&& (a->range.start == b->range.start)
@@ -559,6 +565,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return 0;
}
+/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
struct sw_flow_mask *mask)
{
@@ -597,16 +604,28 @@ int ovs_flow_init(void)
BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
- flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
- 0, NULL);
+ flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ + (num_possible_nodes()
+ * sizeof(struct flow_stats *)),
+ 0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
+ flow_stats_cache
+ = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (flow_stats_cache == NULL) {
+ kmem_cache_destroy(flow_cache);
+ flow_cache = NULL;
+ return -ENOMEM;
+ }
+
return 0;
}
/* Uninitializes the flow module. */
void ovs_flow_exit(void)
{
+ kmem_cache_destroy(flow_stats_cache);
kmem_cache_destroy(flow_cache);
}