From b9ec40af0e18fb7d02106be148036c2ea490fdf9 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 28 Feb 2011 11:02:24 +0100 Subject: percpu, x86: Add arch-specific this_cpu_cmpxchg_double() support Support this_cpu_cmpxchg_double() using the cmpxchg16b and cmpxchg8b instructions. -tj: s/percpu_cmpxchg16b/percpu_cmpxchg16b_double/ for consistency and other cosmetic changes. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'arch/x86/include/asm/percpu.h') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 3788f4649db4..260ac7af1fdc 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -451,6 +451,26 @@ do { \ #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) #endif /* !CONFIG_M386 */ +#ifdef CONFIG_X86_CMPXCHG64 +#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ +({ \ + char __ret; \ + typeof(o1) __o1 = o1; \ + typeof(o1) __n1 = n1; \ + typeof(o2) __o2 = o2; \ + typeof(o2) __n2 = n2; \ + typeof(o2) __dummy = n2; \ + asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ + : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ + : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ + __ret; \ +}) + +#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) +#endif /* CONFIG_X86_CMPXCHG64 */ + /* * Per cpu atomic 64 bit operations are only available under 64 bit. * 32 bit must fall back to generic operations. @@ -480,6 +500,34 @@ do { \ #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) + +/* + * Pretty complex macro to generate cmpxchg16 instruction. The instruction + * is not supported on early AMD64 processors so we must be able to emulate + * it in software. The address used in the cmpxchg16 instruction must be + * aligned to a 16 byte boundary. + */ +#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ +({ \ + char __ret; \ + typeof(o1) __o1 = o1; \ + typeof(o1) __n1 = n1; \ + typeof(o2) __o2 = o2; \ + typeof(o2) __n2 = n2; \ + typeof(o2) __dummy; \ + alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ + X86_FEATURE_CX16, \ + ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ + "S" (&pcp1), "b"(__n1), "c"(__n2), \ + "a"(__o1), "d"(__o2)); \ + __ret; \ +}) + +#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) +#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) +#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) + #endif /* This is not atomic against other CPUs -- CPU preemption needs to be off */ -- cgit v1.2.3-59-g8ed1b