aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/uaccess_64.h
diff options
context:
space:
mode:
authorFenghua Yu <fenghua.yu@intel.com>2012-05-24 18:19:45 -0700
committerH. Peter Anvin <hpa@linux.intel.com>2012-06-29 15:33:34 -0700
commit954e482bde20b0e208fd4d34ef26e10afd194600 (patch)
treea98702fdde1d47b31851cee7b81849dec8b91f09 /arch/x86/include/asm/uaccess_64.h
parentx86/alternatives: Use atomic_xchg() instead atomic_dec_and_test() for stop_machine_text_poke() (diff)
downloadlinux-dev-954e482bde20b0e208fd4d34ef26e10afd194600.tar.xz
linux-dev-954e482bde20b0e208fd4d34ef26e10afd194600.zip
x86/copy_user_generic: Optimize copy_user_generic with CPU erms feature
According to Intel 64 and IA-32 SDM and Optimization Reference Manual, beginning with Ivybridge, REG string operation using MOVSB and STOSB can provide both flexible and high-performance REG string operations in cases like memory copy. Enhancement availability is indicated by CPUID.7.0.EBX[9] (Enhanced REP MOVSB/ STOSB). If CPU erms feature is detected, patch copy_user_generic with enhanced fast string version of copy_user_generic. A few new macros are defined to reduce duplicate code in ALTERNATIVE and ALTERNATIVE_2. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Link: http://lkml.kernel.org/r/1337908785-14015-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/include/asm/uaccess_64.h')
-rw-r--r--arch/x86/include/asm/uaccess_64.h11
1 files changed, 10 insertions, 1 deletions
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 8e796fbbf9c6..d8def8b3dba0 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -17,6 +17,8 @@
/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
+copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
+__must_check unsigned long
copy_user_generic_string(void *to, const void *from, unsigned len);
__must_check unsigned long
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)
{
unsigned ret;
- alternative_call(copy_user_generic_unrolled,
+ /*
+ * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
+ * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
+ * Otherwise, use copy_user_generic_unrolled.
+ */
+ alternative_call_2(copy_user_generic_unrolled,
copy_user_generic_string,
X86_FEATURE_REP_GOOD,
+ copy_user_enhanced_fast_string,
+ X86_FEATURE_ERMS,
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
"=d" (len)),
"1" (to), "2" (from), "3" (len)