/* * Copyright 2002, 2003 Andi Kleen, SuSE Labs. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. No warranty for anything given at all. */ #include #include #include /* * Checksum copy with exception handling. * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the * destination is zeroed. * * Input * rdi source * rsi destination * edx len (32bit) * * Output * eax 64bit sum. undefined in case of exception. * * Wrappers need to take care of valid exception sum and zeroing. * They also should align source or destination to 8 bytes. */ .macro source 10: _ASM_EXTABLE_UA(10b, .Lfault) .endm .macro dest 20: _ASM_EXTABLE_UA(20b, .Lfault) .endm SYM_FUNC_START(csum_partial_copy_generic) subq $5*8, %rsp movq %rbx, 0*8(%rsp) movq %r12, 1*8(%rsp) movq %r14, 2*8(%rsp) movq %r13, 3*8(%rsp) movq %r15, 4*8(%rsp) movl $-1, %eax xorl %r9d, %r9d movl %edx, %ecx cmpl $8, %ecx jb .Lshort testb $7, %sil jne .Lunaligned .Laligned: movl %ecx, %r12d shrq $6, %r12 jz .Lhandle_tail /* < 64 */ clc /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: source movq (%rdi), %rbx source movq 8(%rdi), %r8 source movq 16(%rdi), %r11 source movq 24(%rdi), %rdx source movq 32(%rdi), %r10 source movq 40(%rdi), %r15 source movq 48(%rdi), %r14 source movq 56(%rdi), %r13 30: /* * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a * potentially unmapped kernel address. */ _ASM_EXTABLE(30b, 2f) prefetcht0 5*64(%rdi) 2: adcq %rbx, %rax adcq %r8, %rax adcq %r11, %rax adcq %rdx, %rax adcq %r10, %rax adcq %r15, %rax adcq %r14, %rax adcq %r13, %rax decl %r12d dest movq %rbx, (%rsi) dest movq %r8, 8(%rsi) dest movq %r11, 16(%rsi) dest movq %rdx, 24(%rsi) dest movq %r10, 32(%rsi) dest movq %r15, 40(%rsi) dest movq %r14, 48(%rsi) dest movq %r13, 56(%rsi) leaq 64(%rdi), %rdi leaq 64(%rsi), %rsi jnz .Lloop adcq %r9, %rax /* do last up to 56 bytes */ .Lhandle_tail: /* ecx: count, rcx.63: the end result needs to be rol8 */ movq %rcx, %r10 andl $63, %ecx shrl $3, %ecx jz .Lfold clc .p2align 4 .Lloop_8: source movq (%rdi), %rbx adcq %rbx, %rax decl %ecx dest movq %rbx, (%rsi) leaq 8(%rsi), %rsi /* preserve carry */ leaq 8(%rdi), %rdi jnz .Lloop_8 adcq %r9, %rax /* add in carry */ .Lfold: /* reduce checksum to 32bits */ movl %eax, %ebx shrq $32, %rax addl %ebx, %eax adcl %r9d, %eax /* do last up to 6 bytes */ .Lhandle_7: movl %r10d, %ecx andl $7, %ecx .L1: /* .Lshort rejoins the common path here */ shrl $1, %ecx jz .Lhandle_1 movl $2, %edx xorl %ebx, %ebx clc .p2align 4 .Lloop_1: source movw (%rdi), %bx adcl %ebx, %eax decl %ecx dest movw %bx, (%rsi) leaq 2(%rdi), %rdi leaq 2(%rsi), %rsi jnz .Lloop_1 adcl %r9d, %eax /* add in carry */ /* handle last odd byte */ .Lhandle_1: testb $1, %r10b jz .Lende xorl %ebx, %ebx source movb (%rdi), %bl dest movb %bl, (%rsi) addl %ebx, %eax adcl %r9d, %eax /* carry */ .Lende: testq %r10, %r10 js .Lwas_odd .Lout: movq 0*8(%rsp), %rbx movq 1*8(%rsp), %r12 movq 2*8(%rsp), %r14 movq 3*8(%rsp), %r13 movq 4*8(%rsp), %r15 addq $5*8, %rsp RET .Lshort: movl %ecx, %r10d jmp .L1 .Lunaligned: xorl %ebx, %ebx testb $1, %sil jne .Lodd 1: testb $2, %sil je 2f source movw (%rdi), %bx dest movw %bx, (%rsi) leaq 2(%rdi), %rdi subq $2, %rcx leaq 2(%rsi), %rsi addq %rbx, %rax 2: testb $4, %sil je .Laligned source movl (%rdi), %ebx dest movl %ebx, (%rsi) leaq 4(%rdi), %rdi subq $4, %rcx leaq 4(%rsi), %rsi addq %rbx, %rax jmp .Laligned .Lodd: source movb (%rdi), %bl dest movb %bl, (%rsi) leaq 1(%rdi), %rdi leaq 1(%rsi), %rsi /* decrement, set MSB */ leaq -1(%rcx, %rcx), %rcx rorq $1, %rcx shll $8, %ebx addq %rbx, %rax jmp 1b .Lwas_odd: roll $8, %eax jmp .Lout /* Exception: just return 0 */ .Lfault: xorl %eax, %eax jmp .Lout SYM_FUNC_END(csum_partial_copy_generic)