[PATCH] x86_64: Remove optimization for B stepping AMD K8

B stepping were the first shipping Opterons. memcpy/memset/copy_page/ clear_page had special optimized version for them. These are really old and in the minority now and the difference to the generic versions (using rep microcode) is not that big anyways. So just remove them. TODO: figure out optimized versions for Intel Netburst based EM64T Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Andi Kleen <ak@suse.de> 2005-11-05 17:25:54 +0100
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-11-14 19:55:17 -0800
commit: a5b250a428aabc619ace872f8220a7d0b8f7d557 (patch)
tree: 11cabf07982ae37f94bc929f9a605cbbd20e35ab /arch/x86_64/lib/memset.S
parent: [PATCH] x86_64: Reduce number of retries for reset through keyboard controller (diff)
download: linux-dev-a5b250a428aabc619ace872f8220a7d0b8f7d557.tar.xz
linux-dev-a5b250a428aabc619ace872f8220a7d0b8f7d557.zip
1 files changed, 0 insertions, 94 deletions
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index 4b4c40638640..2aa48f24ed1e 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -13,98 +13,6 @@
 	.p2align 4
 memset:	
 __memset:
-	movq %rdi,%r10
-	movq %rdx,%r11
-
-	/* expand byte value  */
-	movzbl %sil,%ecx
-	movabs $0x0101010101010101,%rax
-	mul    %rcx		/* with rax, clobbers rdx */
-
-	/* align dst */
-	movl  %edi,%r9d		
-	andl  $7,%r9d	
-	jnz  .Lbad_alignment
-.Lafter_bad_alignment:
-	
-	movl %r11d,%ecx
-	shrl $6,%ecx
-	jz	 .Lhandle_tail
-
-	.p2align 4
-.Lloop_64:	
-	decl   %ecx
-	movq  %rax,(%rdi) 
-	movq  %rax,8(%rdi) 
-	movq  %rax,16(%rdi) 
-	movq  %rax,24(%rdi) 
-	movq  %rax,32(%rdi) 
-	movq  %rax,40(%rdi) 
-	movq  %rax,48(%rdi) 
-	movq  %rax,56(%rdi) 
-	leaq  64(%rdi),%rdi
-	jnz    .Lloop_64
-
-	/* Handle tail in loops. The loops should be faster than hard
-	   to predict jump tables. */ 
-	.p2align 4	   
-.Lhandle_tail:
-	movl	%r11d,%ecx
-	andl    $63&(~7),%ecx
-	jz 		.Lhandle_7
-	shrl	$3,%ecx
-	.p2align 4
-.Lloop_8:
-	decl   %ecx
-	movq  %rax,(%rdi)
-	leaq  8(%rdi),%rdi
-	jnz    .Lloop_8
-
-.Lhandle_7:
-	movl	%r11d,%ecx
-	andl	$7,%ecx
-	jz      .Lende
-	.p2align 4
-.Lloop_1:
-	decl    %ecx
-	movb 	%al,(%rdi)
-	leaq	1(%rdi),%rdi
-	jnz     .Lloop_1
-	
-.Lende:	
-	movq	%r10,%rax
-	ret
-
-.Lbad_alignment:
-	cmpq $7,%r11
-	jbe	.Lhandle_7
-	movq %rax,(%rdi)	/* unaligned store */
-	movq $8,%r8			
-	subq %r9,%r8 
-	addq %r8,%rdi
-	subq %r8,%r11
-	jmp .Lafter_bad_alignment
-
-	/* C stepping K8 run faster using the string instructions.
-	   It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>	
-		
-	.section .altinstructions,"a"
-	.align 8
-	.quad  memset
-	.quad  memset_c
-	.byte  X86_FEATURE_K8_C
-	.byte  memset_c_end-memset_c
-	.byte  memset_c_end-memset_c
-	.previous
-
-	.section .altinstr_replacement,"ax"
- /* rdi	destination
-  * rsi value
-  * rdx count
-  */			
-memset_c:	
 	movq %rdi,%r9
 	movl %edx,%r8d
 	andl $7,%r8d		
@@ -121,5 +29,3 @@ memset_c:
 	stosb
 	movq %r9,%rax
 	ret
-memset_c_end:
-	.previous
author	Andi Kleen <ak@suse.de>	2005-11-05 17:25:54 +0100
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-11-14 19:55:17 -0800
commit	a5b250a428aabc619ace872f8220a7d0b8f7d557 (patch)
tree	11cabf07982ae37f94bc929f9a605cbbd20e35ab /arch/x86_64/lib/memset.S
parent	[PATCH] x86_64: Reduce number of retries for reset through keyboard controller (diff)
download	linux-dev-a5b250a428aabc619ace872f8220a7d0b8f7d557.tar.xz linux-dev-a5b250a428aabc619ace872f8220a7d0b8f7d557.zip