aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cpumask.h
diff options
context:
space:
mode:
authorWaiman Long <longman@redhat.com>2017-01-30 12:57:43 -0500
committerThomas Gleixner <tglx@linutronix.de>2017-02-04 08:54:46 +0100
commit668802c25729a8e3423015c33c05f1c3be3858e9 (patch)
tree86bcd7436b39f4a4273bdace08a0b86d14e8196c /include/linux/cpumask.h
parentMerge branch 'fortglx/4.11/time' of https://git.linaro.org/people/john.stultz/linux into timers/core (diff)
downloadlinux-dev-668802c25729a8e3423015c33c05f1c3be3858e9.tar.xz
linux-dev-668802c25729a8e3423015c33c05f1c3be3858e9.zip
tick/broadcast: Reduce lock cacheline contention
It was observed that on an Intel x86 system without the ARAT (Always running APIC timer) feature and with fairly large number of CPUs as well as CPUs coming in and out of intel_idle frequently, the lock contention on the tick_broadcast_lock can become significant. To reduce contention, the lock is put into its own cacheline and all the cpumask_var_t variables are put into the __read_mostly section. Running the SP benchmark of the NAS Parallel Benchmarks on a 4-socket 16-core 32-thread Nehalam system, the performance number improved from 3353.94 Mop/s to 3469.31 Mop/s when this patch was applied on a 4.9.6 kernel. This is a 3.4% improvement. Signed-off-by: Waiman Long <longman@redhat.com> Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/1485799063-20857-1-git-send-email-longman@redhat.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'include/linux/cpumask.h')
-rw-r--r--include/linux/cpumask.h7
1 files changed, 6 insertions, 1 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c717f5ea88cb..23c1a6d09ec5 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -649,11 +649,15 @@ static inline size_t cpumask_size(void)
* used. Please use this_cpu_cpumask_var_t in those cases. The direct use
* of this_cpu_ptr() or this_cpu_read() will lead to failures when the
* other type of cpumask_var_t implementation is configured.
+ *
+ * Please also note that __cpumask_var_read_mostly can be used to declare
+ * a cpumask_var_t variable itself (not its content) as read mostly.
*/
#ifdef CONFIG_CPUMASK_OFFSTACK
typedef struct cpumask *cpumask_var_t;
-#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
+#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
+#define __cpumask_var_read_mostly __read_mostly
bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
@@ -667,6 +671,7 @@ void free_bootmem_cpumask_var(cpumask_var_t mask);
typedef struct cpumask cpumask_var_t[1];
#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
+#define __cpumask_var_read_mostly
static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{