aboutsummaryrefslogtreecommitdiffstats
path: root/ipc/util.c
diff options
context:
space:
mode:
authorGuillaume Knispel <guillaume.knispel@supersonicimagine.com>2017-09-08 16:17:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-08 18:26:51 -0700
commit0cfb6aee70bddbef6ec796b255f588ce0e126766 (patch)
treef83b1e24d55b1aef178169f1d747d404d665be83 /ipc/util.c
parentipc/sem: play nicer with large nsops allocations (diff)
downloadlinux-dev-0cfb6aee70bddbef6ec796b255f588ce0e126766.tar.xz
linux-dev-0cfb6aee70bddbef6ec796b255f588ce0e126766.zip
ipc: optimize semget/shmget/msgget for lots of keys
ipc_findkey() used to scan all objects to look for the wanted key. This is slow when using a high number of keys. This change adds an rhashtable of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n). This change gives a 865% improvement of benchmark reaim.jobs_per_min on a 56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1] Other (more micro) benchmark results, by the author: On an i5 laptop, the following loop executed right after a reboot took, without and with this change: for (int i = 0, k=0x424242; i < KEYS; ++i) semget(k++, 1, IPC_CREAT | 0600); total total max single max single KEYS without with call without call with 1 3.5 4.9 µs 3.5 4.9 10 7.6 8.6 µs 3.7 4.7 32 16.2 15.9 µs 4.3 5.3 100 72.9 41.8 µs 3.7 4.7 1000 5,630.0 502.0 µs * * 10000 1,340,000.0 7,240.0 µs * * 31900 17,600,000.0 22,200.0 µs * * *: unreliable measure: high variance The duration for a lookup-only usage was obtained by the same loop once the keys are present: total total max single max single KEYS without with call without call with 1 2.1 2.5 µs 2.1 2.5 10 4.5 4.8 µs 2.2 2.3 32 13.0 10.8 µs 2.3 2.8 100 82.9 25.1 µs * 2.3 1000 5,780.0 217.0 µs * * 10000 1,470,000.0 2,520.0 µs * * 31900 17,400,000.0 7,810.0 µs * * Finally, executing each semget() in a new process gave, when still summing only the durations of these syscalls: creation: total total KEYS without with 1 3.7 5.0 µs 10 32.9 36.7 µs 32 125.0 109.0 µs 100 523.0 353.0 µs 1000 20,300.0 3,280.0 µs 10000 2,470,000.0 46,700.0 µs 31900 27,800,000.0 219,000.0 µs lookup-only: total total KEYS without with 1 2.5 2.7 µs 10 25.4 24.4 µs 32 106.0 72.6 µs 100 591.0 352.0 µs 1000 22,400.0 2,250.0 µs 10000 2,510,000.0 25,700.0 µs 31900 28,200,000.0 115,000.0 µs [1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debix Signed-off-by: Guillaume Knispel <guillaume.knispel@supersonicimagine.com> Reviewed-by: Marc Pardo <marc.pardo@supersonicimagine.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Kees Cook <keescook@chromium.org> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Serge Hallyn <serge@hallyn.com> Cc: Andrey Vagin <avagin@openvz.org> Cc: Guillaume Knispel <guillaume.knispel@supersonicimagine.com> Cc: Marc Pardo <marc.pardo@supersonicimagine.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc/util.c')
-rw-r--r--ipc/util.c100
1 files changed, 76 insertions, 24 deletions
diff --git a/ipc/util.c b/ipc/util.c
index 069bb22c9f64..78755873cc5b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -83,27 +83,46 @@ struct ipc_proc_iface {
*/
static int __init ipc_init(void)
{
- sem_init();
- msg_init();
+ int err_sem, err_msg;
+
+ err_sem = sem_init();
+ WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
+ err_msg = msg_init();
+ WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg);
shm_init();
- return 0;
+
+ return err_msg ? err_msg : err_sem;
}
device_initcall(ipc_init);
+static const struct rhashtable_params ipc_kht_params = {
+ .head_offset = offsetof(struct kern_ipc_perm, khtnode),
+ .key_offset = offsetof(struct kern_ipc_perm, key),
+ .key_len = FIELD_SIZEOF(struct kern_ipc_perm, key),
+ .locks_mul = 1,
+ .automatic_shrinking = true,
+};
+
/**
* ipc_init_ids - initialise ipc identifiers
* @ids: ipc identifier set
*
* Set up the sequence range to use for the ipc identifier range (limited
- * below IPCMNI) then initialise the ids idr.
+ * below IPCMNI) then initialise the keys hashtable and ids idr.
*/
-void ipc_init_ids(struct ipc_ids *ids)
+int ipc_init_ids(struct ipc_ids *ids)
{
+ int err;
ids->in_use = 0;
ids->seq = 0;
ids->next_id = -1;
init_rwsem(&ids->rwsem);
+ err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
+ if (err)
+ return err;
idr_init(&ids->ipcs_idr);
+ ids->tables_initialized = true;
+ return 0;
}
#ifdef CONFIG_PROC_FS
@@ -147,28 +166,20 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
* Returns the locked pointer to the ipc structure if found or NULL
* otherwise. If key is found ipc points to the owning ipc structure
*
- * Called with ipc_ids.rwsem held.
+ * Called with writer ipc_ids.rwsem held.
*/
static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
{
- struct kern_ipc_perm *ipc;
- int next_id;
- int total;
-
- for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
- ipc = idr_find(&ids->ipcs_idr, next_id);
-
- if (ipc == NULL)
- continue;
+ struct kern_ipc_perm *ipcp = NULL;
- if (ipc->key != key) {
- total++;
- continue;
- }
+ if (likely(ids->tables_initialized))
+ ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+ ipc_kht_params);
+ if (ipcp) {
rcu_read_lock();
- ipc_lock_object(ipc);
- return ipc;
+ ipc_lock_object(ipcp);
+ return ipcp;
}
return NULL;
@@ -221,13 +232,13 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
{
kuid_t euid;
kgid_t egid;
- int id;
+ int id, err;
int next_id = ids->next_id;
if (size > IPCMNI)
size = IPCMNI;
- if (ids->in_use >= size)
+ if (!ids->tables_initialized || ids->in_use >= size)
return -ENOSPC;
idr_preload(GFP_KERNEL);
@@ -246,6 +257,15 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
(next_id < 0) ? 0 : ipcid_to_idx(next_id), 0,
GFP_NOWAIT);
idr_preload_end();
+
+ if (id >= 0 && new->key != IPC_PRIVATE) {
+ err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
+ ipc_kht_params);
+ if (err < 0) {
+ idr_remove(&ids->ipcs_idr, id);
+ id = err;
+ }
+ }
if (id < 0) {
spin_unlock(&new->lock);
rcu_read_unlock();
@@ -377,6 +397,20 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
return err;
}
+/**
+ * ipc_kht_remove - remove an ipc from the key hashtable
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the key to remove
+ *
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
+ */
+static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+{
+ if (ipcp->key != IPC_PRIVATE)
+ rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+ ipc_kht_params);
+}
/**
* ipc_rmid - remove an ipc identifier
@@ -391,10 +425,25 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
int lid = ipcid_to_idx(ipcp->id);
idr_remove(&ids->ipcs_idr, lid);
+ ipc_kht_remove(ids, ipcp);
ids->in_use--;
ipcp->deleted = true;
}
+/**
+ * ipc_set_key_private - switch the key of an existing ipc to IPC_PRIVATE
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the key to modify
+ *
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
+ */
+void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+{
+ ipc_kht_remove(ids, ipcp);
+ ipcp->key = IPC_PRIVATE;
+}
+
int ipc_rcu_getref(struct kern_ipc_perm *ptr)
{
return refcount_inc_not_zero(&ptr->refcount);
@@ -485,7 +534,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
}
/**
- * ipc_obtain_object
+ * ipc_obtain_object_idr
* @ids: ipc identifier set
* @id: ipc id to look for
*
@@ -499,6 +548,9 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
+ if (unlikely(!ids->tables_initialized))
+ return ERR_PTR(-EINVAL);
+
out = idr_find(&ids->ipcs_idr, lid);
if (!out)
return ERR_PTR(-EINVAL);