aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2007-11-05 23:38:39 -0800
committerDavid S. Miller <davem@sunset.davemloft.net>2007-11-07 04:08:57 -0800
commit286ab3d46058840d68e5d7d52e316c1f7e98c59f (patch)
tree1d70e7895c49d2b148e026aa047efe186697fff9 /include
parent[PPP]: L2TP: Fix oops in transmit and receive paths (diff)
downloadlinux-dev-286ab3d46058840d68e5d7d52e316c1f7e98c59f.tar.xz
linux-dev-286ab3d46058840d68e5d7d52e316c1f7e98c59f.zip
[NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way.
"struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/sock.h63
1 files changed, 57 insertions, 6 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 20de3fa7ae40..5504fb9fa88a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -560,6 +560,14 @@ struct proto {
void (*unhash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
+#ifdef CONFIG_SMP
+ /* Keeping track of sockets in use */
+ void (*inuse_add)(struct proto *prot, int inc);
+ int (*inuse_getval)(const struct proto *prot);
+ int *inuse_ptr;
+#else
+ int inuse;
+#endif
/* Memory pressure */
void (*enter_memory_pressure)(void);
atomic_t *memory_allocated; /* Current allocated memory. */
@@ -592,12 +600,38 @@ struct proto {
#ifdef SOCK_REFCNT_DEBUG
atomic_t socks;
#endif
- struct {
- int inuse;
- u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
- } stats[NR_CPUS];
};
+/*
+ * Special macros to let protos use a fast version of inuse{get|add}
+ * using a static percpu variable per proto instead of an allocated one,
+ * saving one dereference.
+ * This might be changed if/when dynamic percpu vars become fast.
+ */
+#ifdef CONFIG_SMP
+# define DEFINE_PROTO_INUSE(NAME) \
+static DEFINE_PER_CPU(int, NAME##_inuse); \
+static void NAME##_inuse_add(struct proto *prot, int inc) \
+{ \
+ __get_cpu_var(NAME##_inuse) += inc; \
+} \
+ \
+static int NAME##_inuse_getval(const struct proto *prot)\
+{ \
+ int res = 0, cpu; \
+ \
+ for_each_possible_cpu(cpu) \
+ res += per_cpu(NAME##_inuse, cpu); \
+ return res; \
+}
+# define REF_PROTO_INUSE(NAME) \
+ .inuse_add = NAME##_inuse_add, \
+ .inuse_getval = NAME##_inuse_getval,
+#else
+# define DEFINE_PROTO_INUSE(NAME)
+# define REF_PROTO_INUSE(NAME)
+#endif
+
extern int proto_register(struct proto *prot, int alloc_slab);
extern void proto_unregister(struct proto *prot);
@@ -629,12 +663,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
/* Called with local bh disabled */
static __inline__ void sock_prot_inc_use(struct proto *prot)
{
- prot->stats[smp_processor_id()].inuse++;
+#ifdef CONFIG_SMP
+ prot->inuse_add(prot, 1);
+#else
+ prot->inuse++;
+#endif
}
static __inline__ void sock_prot_dec_use(struct proto *prot)
{
- prot->stats[smp_processor_id()].inuse--;
+#ifdef CONFIG_SMP
+ prot->inuse_add(prot, -1);
+#else
+ prot->inuse--;
+#endif
+}
+
+static __inline__ int sock_prot_inuse(struct proto *proto)
+{
+#ifdef CONFIG_SMP
+ return proto->inuse_getval(proto);
+#else
+ return proto->inuse;
+#endif
}
/* With per-bucket locks this operation is not-atomic, so that