aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc/lib
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2008-06-09 14:01:46 +1000
committerPaul Mackerras <paulus@samba.org>2008-06-10 21:40:22 +1000
commit917f0af9e5a9ceecf9e72537fabb501254ba321d (patch)
tree1ef207755c6d83ce4af93ef2b5e4645eebd65886 /arch/ppc/lib
parentpowerpc: Improve (in|out)_[bl]eXX() asm code (diff)
downloadlinux-dev-917f0af9e5a9ceecf9e72537fabb501254ba321d.tar.xz
linux-dev-917f0af9e5a9ceecf9e72537fabb501254ba321d.zip
powerpc: Remove arch/ppc and include/asm-ppc
All the maintained platforms are now in arch/powerpc, so the old arch/ppc stuff can now go away. Acked-by: Adrian Bunk <bunk@kernel.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Becky Bruce <becky.bruce@freescale.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> Acked-by: Grant Likely <grant.likely@secretlab.ca> Acked-by: Jochen Friedrich <jochen@scram.de> Acked-by: John Linn <john.linn@xilinx.com> Acked-by: Jon Loeliger <jdl@freescale.com> Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com> Acked-by: Kumar Gala <galak@kernel.crashing.org> Acked-by: Olof Johansson <olof@lixom.net> Acked-by: Peter Korsgaard <jacmet@sunsite.dk> Acked-by: Scott Wood <scottwood@freescale.com> Acked-by: Sean MacLennan <smaclennan@pikatech.com> Acked-by: Segher Boessenkool <segher@kernel.crashing.org> Acked-by: Stefan Roese <sr@denx.de> Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com> Acked-by: Wolfgang Denk <wd@denx.de> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/ppc/lib')
-rw-r--r--arch/ppc/lib/Makefile5
-rw-r--r--arch/ppc/lib/checksum.S225
-rw-r--r--arch/ppc/lib/div64.S58
-rw-r--r--arch/ppc/lib/locks.c189
-rw-r--r--arch/ppc/lib/string.S732
5 files changed, 0 insertions, 1209 deletions
diff --git a/arch/ppc/lib/Makefile b/arch/ppc/lib/Makefile
deleted file mode 100644
index 095e661e79dd..000000000000
--- a/arch/ppc/lib/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for ppc-specific library files..
-#
-
-obj-y := checksum.o string.o div64.o
diff --git a/arch/ppc/lib/checksum.S b/arch/ppc/lib/checksum.S
deleted file mode 100644
index 7874e8a80455..000000000000
--- a/arch/ppc/lib/checksum.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
- .text
-
-/*
- * ip_fast_csum(buf, len) -- Optimized for IP header
- * len is in words and is always >= 5.
- */
-_GLOBAL(ip_fast_csum)
- lwz r0,0(r3)
- lwzu r5,4(r3)
- addic. r4,r4,-2
- addc r0,r0,r5
- mtctr r4
- blelr-
-1: lwzu r4,4(r3)
- adde r0,r0,r4
- bdnz 1b
- addze r0,r0 /* add in final carry */
- rlwinm r3,r0,16,0,31 /* fold two halves together */
- add r3,r0,r3
- not r3,r3
- srwi r3,r3,16
- blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- * csum_tcpudp_magic(saddr, daddr, len, proto, sum)
- */
-_GLOBAL(csum_tcpudp_magic)
- rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
- addc r0,r3,r4 /* add 4 32-bit words together */
- adde r0,r0,r5
- adde r0,r0,r7
- addze r0,r0 /* add in final carry */
- rlwinm r3,r0,16,0,31 /* fold two halves together */
- add r3,r0,r3
- not r3,r3
- srwi r3,r3,16
- blr
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * csum_partial(buff, len, sum)
- */
-_GLOBAL(csum_partial)
- addic r0,r5,0
- subi r3,r3,4
- srwi. r6,r4,2
- beq 3f /* if we're doing < 4 bytes */
- andi. r5,r3,2 /* Align buffer to longword boundary */
- beq+ 1f
- lhz r5,4(r3) /* do 2 bytes to get aligned */
- addi r3,r3,2
- subi r4,r4,2
- addc r0,r0,r5
- srwi. r6,r4,2 /* # words to do */
- beq 3f
-1: mtctr r6
-2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */
- adde r0,r0,r5 /* be unnecessary to unroll this loop */
- bdnz 2b
- andi. r4,r4,3
-3: cmpwi 0,r4,2
- blt+ 4f
- lhz r5,4(r3)
- addi r3,r3,2
- subi r4,r4,2
- adde r0,r0,r5
-4: cmpwi 0,r4,1
- bne+ 5f
- lbz r5,4(r3)
- slwi r5,r5,8 /* Upper byte of word */
- adde r0,r0,r5
-5: addze r3,r0 /* add in final carry */
- blr
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
- */
-_GLOBAL(csum_partial_copy_generic)
- addic r0,r6,0
- subi r3,r3,4
- subi r4,r4,4
- srwi. r6,r5,2
- beq 3f /* if we're doing < 4 bytes */
- andi. r9,r4,2 /* Align dst to longword boundary */
- beq+ 1f
-81: lhz r6,4(r3) /* do 2 bytes to get aligned */
- addi r3,r3,2
- subi r5,r5,2
-91: sth r6,4(r4)
- addi r4,r4,2
- addc r0,r0,r6
- srwi. r6,r5,2 /* # words to do */
- beq 3f
-1: srwi. r6,r5,4 /* # groups of 4 words to do */
- beq 10f
- mtctr r6
-71: lwz r6,4(r3)
-72: lwz r9,8(r3)
-73: lwz r10,12(r3)
-74: lwzu r11,16(r3)
- adde r0,r0,r6
-75: stw r6,4(r4)
- adde r0,r0,r9
-76: stw r9,8(r4)
- adde r0,r0,r10
-77: stw r10,12(r4)
- adde r0,r0,r11
-78: stwu r11,16(r4)
- bdnz 71b
-10: rlwinm. r6,r5,30,30,31 /* # words left to do */
- beq 13f
- mtctr r6
-82: lwzu r9,4(r3)
-92: stwu r9,4(r4)
- adde r0,r0,r9
- bdnz 82b
-13: andi. r5,r5,3
-3: cmpwi 0,r5,2
- blt+ 4f
-83: lhz r6,4(r3)
- addi r3,r3,2
- subi r5,r5,2
-93: sth r6,4(r4)
- addi r4,r4,2
- adde r0,r0,r6
-4: cmpwi 0,r5,1
- bne+ 5f
-84: lbz r6,4(r3)
-94: stb r6,4(r4)
- slwi r6,r6,8 /* Upper byte of word */
- adde r0,r0,r6
-5: addze r3,r0 /* add in final carry */
- blr
-
-/* These shouldn't go in the fixup section, since that would
- cause the ex_table addresses to get out of order. */
-
-src_error_4:
- mfctr r6 /* update # bytes remaining from ctr */
- rlwimi r5,r6,4,0,27
- b 79f
-src_error_1:
- li r6,0
- subi r5,r5,2
-95: sth r6,4(r4)
- addi r4,r4,2
-79: srwi. r6,r5,2
- beq 3f
- mtctr r6
-src_error_2:
- li r6,0
-96: stwu r6,4(r4)
- bdnz 96b
-3: andi. r5,r5,3
- beq src_error
-src_error_3:
- li r6,0
- mtctr r5
- addi r4,r4,3
-97: stbu r6,1(r4)
- bdnz 97b
-src_error:
- cmpwi 0,r7,0
- beq 1f
- li r6,-EFAULT
- stw r6,0(r7)
-1: addze r3,r0
- blr
-
-dst_error:
- cmpwi 0,r8,0
- beq 1f
- li r6,-EFAULT
- stw r6,0(r8)
-1: addze r3,r0
- blr
-
-.section __ex_table,"a"
- .long 81b,src_error_1
- .long 91b,dst_error
- .long 71b,src_error_4
- .long 72b,src_error_4
- .long 73b,src_error_4
- .long 74b,src_error_4
- .long 75b,dst_error
- .long 76b,dst_error
- .long 77b,dst_error
- .long 78b,dst_error
- .long 82b,src_error_2
- .long 92b,dst_error
- .long 83b,src_error_3
- .long 93b,dst_error
- .long 84b,src_error_3
- .long 94b,dst_error
- .long 95b,dst_error
- .long 96b,dst_error
- .long 97b,dst_error
diff --git a/arch/ppc/lib/div64.S b/arch/ppc/lib/div64.S
deleted file mode 100644
index 3527569e9926..000000000000
--- a/arch/ppc/lib/div64.S
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Divide a 64-bit unsigned number by a 32-bit unsigned number.
- * This routine assumes that the top 32 bits of the dividend are
- * non-zero to start with.
- * On entry, r3 points to the dividend, which get overwritten with
- * the 64-bit quotient, and r4 contains the divisor.
- * On exit, r3 contains the remainder.
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/ppc_asm.h>
-#include <asm/processor.h>
-
-_GLOBAL(__div64_32)
- lwz r5,0(r3) # get the dividend into r5/r6
- lwz r6,4(r3)
- cmplw r5,r4
- li r7,0
- li r8,0
- blt 1f
- divwu r7,r5,r4 # if dividend.hi >= divisor,
- mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor
- subf. r5,r0,r5 # dividend.hi %= divisor
- beq 3f
-1: mr r11,r5 # here dividend.hi != 0
- andis. r0,r5,0xc000
- bne 2f
- cntlzw r0,r5 # we are shifting the dividend right
- li r10,-1 # to make it < 2^32, and shifting
- srw r10,r10,r0 # the divisor right the same amount,
- add r9,r4,r10 # rounding up (so the estimate cannot
- andc r11,r6,r10 # ever be too large, only too small)
- andc r9,r9,r10
- or r11,r5,r11
- rotlw r9,r9,r0
- rotlw r11,r11,r0
- divwu r11,r11,r9 # then we divide the shifted quantities
-2: mullw r10,r11,r4 # to get an estimate of the quotient,
- mulhwu r9,r11,r4 # multiply the estimate by the divisor,
- subfc r6,r10,r6 # take the product from the divisor,
- add r8,r8,r11 # and add the estimate to the accumulated
- subfe. r5,r9,r5 # quotient
- bne 1b
-3: cmplw r6,r4
- blt 4f
- divwu r0,r6,r4 # perform the remaining 32-bit division
- mullw r10,r0,r4 # and get the remainder
- add r8,r8,r0
- subf r6,r10,r6
-4: stw r7,0(r3) # return the quotient in *r3
- stw r8,4(r3)
- mr r3,r6 # return the remainder in r3
- blr
diff --git a/arch/ppc/lib/locks.c b/arch/ppc/lib/locks.c
deleted file mode 100644
index ea4aee6b20e6..000000000000
--- a/arch/ppc/lib/locks.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Locks for smp ppc
- *
- * Written by Cort Dougan (cort@cs.nmt.edu)
- */
-
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <asm/ppc_asm.h>
-#include <asm/smp.h>
-
-#ifdef CONFIG_DEBUG_SPINLOCK
-
-#undef INIT_STUCK
-#define INIT_STUCK 200000000 /*0xffffffff*/
-
-/*
- * Try to acquire a spinlock.
- * Only does the stwcx. if the load returned 0 - the Programming
- * Environments Manual suggests not doing unnecessary stcwx.'s
- * since they may inhibit forward progress by other CPUs in getting
- * a lock.
- */
-static inline unsigned long __spin_trylock(volatile unsigned long *lock)
-{
- unsigned long ret;
-
- __asm__ __volatile__ ("\n\
-1: lwarx %0,0,%1\n\
- cmpwi 0,%0,0\n\
- bne 2f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %2,0,%1\n\
- bne- 1b\n\
- isync\n\
-2:"
- : "=&r"(ret)
- : "r"(lock), "r"(1)
- : "cr0", "memory");
-
- return ret;
-}
-
-void _raw_spin_lock(spinlock_t *lock)
-{
- int cpu = smp_processor_id();
- unsigned int stuck = INIT_STUCK;
- while (__spin_trylock(&lock->lock)) {
- while ((unsigned volatile long)lock->lock != 0) {
- if (!--stuck) {
- printk("_spin_lock(%p) CPU#%d NIP %p"
- " holder: cpu %ld pc %08lX\n",
- lock, cpu, __builtin_return_address(0),
- lock->owner_cpu,lock->owner_pc);
- stuck = INIT_STUCK;
- /* steal the lock */
- /*xchg_u32((void *)&lock->lock,0);*/
- }
- }
- }
- lock->owner_pc = (unsigned long)__builtin_return_address(0);
- lock->owner_cpu = cpu;
-}
-EXPORT_SYMBOL(_raw_spin_lock);
-
-int _raw_spin_trylock(spinlock_t *lock)
-{
- if (__spin_trylock(&lock->lock))
- return 0;
- lock->owner_cpu = smp_processor_id();
- lock->owner_pc = (unsigned long)__builtin_return_address(0);
- return 1;
-}
-EXPORT_SYMBOL(_raw_spin_trylock);
-
-void _raw_spin_unlock(spinlock_t *lp)
-{
- if ( !lp->lock )
- printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n",
- lp, smp_processor_id(), __builtin_return_address(0),
- current->comm, current->pid);
- if ( lp->owner_cpu != smp_processor_id() )
- printk("_spin_unlock(%p): cpu %d trying clear of cpu %d pc %lx val %lx\n",
- lp, smp_processor_id(), (int)lp->owner_cpu,
- lp->owner_pc,lp->lock);
- lp->owner_pc = lp->owner_cpu = 0;
- wmb();
- lp->lock = 0;
-}
-EXPORT_SYMBOL(_raw_spin_unlock);
-
-/*
- * For rwlocks, zero is unlocked, -1 is write-locked,
- * positive is read-locked.
- */
-static __inline__ int __read_trylock(rwlock_t *rw)
-{
- signed int tmp;
-
- __asm__ __volatile__(
-"2: lwarx %0,0,%1 # __read_trylock\n\
- addic. %0,%0,1\n\
- ble- 1f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 2b\n\
- isync\n\
-1:"
- : "=&r"(tmp)
- : "r"(&rw->lock)
- : "cr0", "memory");
-
- return tmp;
-}
-
-int _raw_read_trylock(rwlock_t *rw)
-{
- return __read_trylock(rw) > 0;
-}
-EXPORT_SYMBOL(_raw_read_trylock);
-
-void _raw_read_lock(rwlock_t *rw)
-{
- unsigned int stuck;
-
- while (__read_trylock(rw) <= 0) {
- stuck = INIT_STUCK;
- while (!read_can_lock(rw)) {
- if (--stuck == 0) {
- printk("_read_lock(%p) CPU#%d lock %d\n",
- rw, raw_smp_processor_id(), rw->lock);
- stuck = INIT_STUCK;
- }
- }
- }
-}
-EXPORT_SYMBOL(_raw_read_lock);
-
-void _raw_read_unlock(rwlock_t *rw)
-{
- if ( rw->lock == 0 )
- printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n",
- current->comm,current->pid,current->thread.regs->nip,
- rw->lock);
- wmb();
- atomic_dec((atomic_t *) &(rw)->lock);
-}
-EXPORT_SYMBOL(_raw_read_unlock);
-
-void _raw_write_lock(rwlock_t *rw)
-{
- unsigned int stuck;
-
- while (cmpxchg(&rw->lock, 0, -1) != 0) {
- stuck = INIT_STUCK;
- while (!write_can_lock(rw)) {
- if (--stuck == 0) {
- printk("write_lock(%p) CPU#%d lock %d)\n",
- rw, raw_smp_processor_id(), rw->lock);
- stuck = INIT_STUCK;
- }
- }
- }
- wmb();
-}
-EXPORT_SYMBOL(_raw_write_lock);
-
-int _raw_write_trylock(rwlock_t *rw)
-{
- if (cmpxchg(&rw->lock, 0, -1) != 0)
- return 0;
- wmb();
- return 1;
-}
-EXPORT_SYMBOL(_raw_write_trylock);
-
-void _raw_write_unlock(rwlock_t *rw)
-{
- if (rw->lock >= 0)
- printk("_write_lock(): %s/%d (nip %08lX) lock %d\n",
- current->comm,current->pid,current->thread.regs->nip,
- rw->lock);
- wmb();
- rw->lock = 0;
-}
-EXPORT_SYMBOL(_raw_write_unlock);
-
-#endif
diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S
deleted file mode 100644
index 927253bfc826..000000000000
--- a/arch/ppc/lib/string.S
+++ /dev/null
@@ -1,732 +0,0 @@
-/*
- * String handling functions for PowerPC.
- *
- * Copyright (C) 1996 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-#define COPY_16_BYTES \
- lwz r7,4(r4); \
- lwz r8,8(r4); \
- lwz r9,12(r4); \
- lwzu r10,16(r4); \
- stw r7,4(r6); \
- stw r8,8(r6); \
- stw r9,12(r6); \
- stwu r10,16(r6)
-
-#define COPY_16_BYTES_WITHEX(n) \
-8 ## n ## 0: \
- lwz r7,4(r4); \
-8 ## n ## 1: \
- lwz r8,8(r4); \
-8 ## n ## 2: \
- lwz r9,12(r4); \
-8 ## n ## 3: \
- lwzu r10,16(r4); \
-8 ## n ## 4: \
- stw r7,4(r6); \
-8 ## n ## 5: \
- stw r8,8(r6); \
-8 ## n ## 6: \
- stw r9,12(r6); \
-8 ## n ## 7: \
- stwu r10,16(r6)
-
-#define COPY_16_BYTES_EXCODE(n) \
-9 ## n ## 0: \
- addi r5,r5,-(16 * n); \
- b 104f; \
-9 ## n ## 1: \
- addi r5,r5,-(16 * n); \
- b 105f; \
-.section __ex_table,"a"; \
- .align 2; \
- .long 8 ## n ## 0b,9 ## n ## 0b; \
- .long 8 ## n ## 1b,9 ## n ## 0b; \
- .long 8 ## n ## 2b,9 ## n ## 0b; \
- .long 8 ## n ## 3b,9 ## n ## 0b; \
- .long 8 ## n ## 4b,9 ## n ## 1b; \
- .long 8 ## n ## 5b,9 ## n ## 1b; \
- .long 8 ## n ## 6b,9 ## n ## 1b; \
- .long 8 ## n ## 7b,9 ## n ## 1b; \
- .text
-
- .text
- .stabs "arch/ppc/lib/",N_SO,0,0,0f
- .stabs "string.S",N_SO,0,0,0f
-
-CACHELINE_BYTES = L1_CACHE_BYTES
-LG_CACHELINE_BYTES = L1_CACHE_SHIFT
-CACHELINE_MASK = (L1_CACHE_BYTES-1)
-
-_GLOBAL(strcpy)
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r5)
- bne 1b
- blr
-
-/* This clears out any unused part of the destination buffer,
- just as the libc version does. -- paulus */
-_GLOBAL(strncpy)
- cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- bnelr /* if we didn't hit a null char, we're done */
- mfctr r5
- cmpwi 0,r5,0 /* any space left in destination buffer? */
- beqlr /* we know r0 == 0 here */
-2: stbu r0,1(r6) /* clear it out if so */
- bdnz 2b
- blr
-
-_GLOBAL(strcat)
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r0,1(r5)
- cmpwi 0,r0,0
- bne 1b
- addi r5,r5,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r5)
- bne 1b
- blr
-
-_GLOBAL(strcmp)
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r5)
- cmpwi 1,r3,0
- lbzu r0,1(r4)
- subf. r3,r0,r3
- beqlr 1
- beq 1b
- blr
-
-_GLOBAL(strncmp)
- PPC_LCMPI r5,0
- beqlr
- mtctr r5
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r5)
- cmpwi 1,r3,0
- lbzu r0,1(r4)
- subf. r3,r0,r3
- beqlr 1
- bdnzt eq,1b
- blr
-
-_GLOBAL(strlen)
- addi r4,r3,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- bne 1b
- subf r3,r3,r4
- blr
-
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero. This requires that the destination
- * area is cacheable. -- paulus
- */
-_GLOBAL(cacheable_memzero)
- mr r5,r4
- li r4,0
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
- beqlr
- andi. r0,r6,3
- add r5,r0,r5
- subf r6,r0,r6
- clrlwi r7,r6,32-LG_CACHELINE_BYTES
- add r8,r7,r5
- srwi r9,r8,LG_CACHELINE_BYTES
- addic. r9,r9,-1 /* total number of complete cachelines */
- ble 2f
- xori r0,r7,CACHELINE_MASK & ~3
- srwi. r0,r0,2
- beq 3f
- mtctr r0
-4: stwu r4,4(r6)
- bdnz 4b
-3: mtctr r9
- li r7,4
-#if !defined(CONFIG_8xx)
-10: dcbz r7,r6
-#else
-10: stw r4, 4(r6)
- stw r4, 8(r6)
- stw r4, 12(r6)
- stw r4, 16(r6)
-#if CACHE_LINE_SIZE >= 32
- stw r4, 20(r6)
- stw r4, 24(r6)
- stw r4, 28(r6)
- stw r4, 32(r6)
-#endif /* CACHE_LINE_SIZE */
-#endif
- addi r6,r6,CACHELINE_BYTES
- bdnz 10b
- clrlwi r5,r8,32-LG_CACHELINE_BYTES
- addi r5,r5,4
-2: srwi r0,r5,2
- mtctr r0
- bdz 6f
-1: stwu r4,4(r6)
- bdnz 1b
-6: andi. r5,r5,3
-7: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r6,3
-8: stbu r4,1(r6)
- bdnz 8b
- blr
-
-_GLOBAL(memset)
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
- beqlr
- andi. r0,r6,3
- add r5,r0,r5
- subf r6,r0,r6
- srwi r0,r5,2
- mtctr r0
- bdz 6f
-1: stwu r4,4(r6)
- bdnz 1b
-6: andi. r5,r5,3
-7: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r6,r6,3
-8: stbu r4,1(r6)
- bdnz 8b
- blr
-
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic. This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
- add r7,r3,r5 /* test if the src & dst overlap */
- add r8,r4,r5
- cmplw 0,r4,r7
- cmplw 1,r3,r8
- crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt memcpy /* if regions overlap */
-
- addi r4,r4,-4
- addi r6,r3,-4
- neg r0,r3
- andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
- beq 58f
-
- cmplw 0,r5,r0 /* is this more than total to do? */
- blt 63f /* if not much to do */
- andi. r8,r0,3 /* get it word-aligned first */
- subf r5,r0,r5
- mtctr r8
- beq+ 61f
-70: lbz r9,4(r4) /* do some bytes */
- stb r9,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 70b
-61: srwi. r0,r0,2
- mtctr r0
- beq 58f
-72: lwzu r9,4(r4) /* do some words */
- stwu r9,4(r6)
- bdnz 72b
-
-58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
- clrlwi r5,r5,32-LG_CACHELINE_BYTES
- li r11,4
- mtctr r0
- beq 63f
-53:
-#if !defined(CONFIG_8xx)
- dcbz r11,r6
-#endif
- COPY_16_BYTES
-#if L1_CACHE_BYTES >= 32
- COPY_16_BYTES
-#if L1_CACHE_BYTES >= 64
- COPY_16_BYTES
- COPY_16_BYTES
-#if L1_CACHE_BYTES >= 128
- COPY_16_BYTES
- COPY_16_BYTES
- COPY_16_BYTES
- COPY_16_BYTES
-#endif
-#endif
-#endif
- bdnz 53b
-
-63: srwi. r0,r5,2
- mtctr r0
- beq 64f
-30: lwzu r0,4(r4)
- stwu r0,4(r6)
- bdnz 30b
-
-64: andi. r0,r5,3
- mtctr r0
- beq+ 65f
-40: lbz r0,4(r4)
- stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 40b
-65: blr
-
-_GLOBAL(memmove)
- cmplw 0,r3,r4
- bgt backwards_memcpy
- /* fall through */
-
-_GLOBAL(memcpy)
- srwi. r7,r5,3
- addi r6,r3,-4
- addi r4,r4,-4
- beq 2f /* if less than 8 bytes to do */
- andi. r0,r6,3 /* get dest word aligned */
- mtctr r7
- bne 5f
-1: lwz r7,4(r4)
- lwzu r8,8(r4)
- stw r7,4(r6)
- stwu r8,8(r6)
- bdnz 1b
- andi. r5,r5,7
-2: cmplwi 0,r5,4
- blt 3f
- lwzu r0,4(r4)
- addi r5,r5,-4
- stwu r0,4(r6)
-3: cmpwi 0,r5,0
- beqlr
- mtctr r5
- addi r4,r4,3
- addi r6,r6,3
-4: lbzu r0,1(r4)
- stbu r0,1(r6)
- bdnz 4b
- blr
-5: subfic r0,r0,4
- mtctr r0
-6: lbz r7,4(r4)
- addi r4,r4,1
- stb r7,4(r6)
- addi r6,r6,1
- bdnz 6b
- subf r5,r0,r5
- rlwinm. r7,r5,32-3,3,31
- beq 2b
- mtctr r7
- b 1b
-
-_GLOBAL(backwards_memcpy)
- rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
- add r6,r3,r5
- add r4,r4,r5
- beq 2f
- andi. r0,r6,3
- mtctr r7
- bne 5f
-1: lwz r7,-4(r4)
- lwzu r8,-8(r4)
- stw r7,-4(r6)
- stwu r8,-8(r6)
- bdnz 1b
- andi. r5,r5,7
-2: cmplwi 0,r5,4
- blt 3f
- lwzu r0,-4(r4)
- subi r5,r5,4
- stwu r0,-4(r6)
-3: cmpwi 0,r5,0
- beqlr
- mtctr r5
-4: lbzu r0,-1(r4)
- stbu r0,-1(r6)
- bdnz 4b
- blr
-5: mtctr r0
-6: lbzu r7,-1(r4)
- stbu r7,-1(r6)
- bdnz 6b
- subf r5,r0,r5
- rlwinm. r7,r5,32-3,3,31
- beq 2b
- mtctr r7
- b 1b
-
-_GLOBAL(memcmp)
- cmpwi 0,r5,0
- ble- 2f
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r6)
- lbzu r0,1(r4)
- subf. r3,r0,r3
- bdnzt 2,1b
- blr
-2: li r3,0
- blr
-
-_GLOBAL(memchr)
- cmpwi 0,r5,0
- ble- 2f
- mtctr r5
- addi r3,r3,-1
-1: lbzu r0,1(r3)
- cmpw 0,r0,r4
- bdnzf 2,1b
- beqlr
-2: li r3,0
- blr
-
-_GLOBAL(__copy_tofrom_user)
- addi r4,r4,-4
- addi r6,r3,-4
- neg r0,r3
- andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
- beq 58f
-
- cmplw 0,r5,r0 /* is this more than total to do? */
- blt 63f /* if not much to do */
- andi. r8,r0,3 /* get it word-aligned first */
- mtctr r8
- beq+ 61f
-70: lbz r9,4(r4) /* do some bytes */
-71: stb r9,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 70b
-61: subf r5,r0,r5
- srwi. r0,r0,2
- mtctr r0
- beq 58f
-72: lwzu r9,4(r4) /* do some words */
-73: stwu r9,4(r6)
- bdnz 72b
-
- .section __ex_table,"a"
- .align 2
- .long 70b,100f
- .long 71b,101f
- .long 72b,102f
- .long 73b,103f
- .text
-
-58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
- clrlwi r5,r5,32-LG_CACHELINE_BYTES
- li r11,4
- beq 63f
-
-#ifdef CONFIG_8xx
- /* Don't use prefetch on 8xx */
- mtctr r0
- li r0,0
-53: COPY_16_BYTES_WITHEX(0)
- bdnz 53b
-
-#else /* not CONFIG_8xx */
- /* Here we decide how far ahead to prefetch the source */
- li r3,4
- cmpwi r0,1
- li r7,0
- ble 114f
- li r7,1
-#if MAX_COPY_PREFETCH > 1
- /* Heuristically, for large transfers we prefetch
- MAX_COPY_PREFETCH cachelines ahead. For small transfers
- we prefetch 1 cacheline ahead. */
- cmpwi r0,MAX_COPY_PREFETCH
- ble 112f
- li r7,MAX_COPY_PREFETCH
-112: mtctr r7
-111: dcbt r3,r4
- addi r3,r3,CACHELINE_BYTES
- bdnz 111b
-#else
- dcbt r3,r4
- addi r3,r3,CACHELINE_BYTES
-#endif /* MAX_COPY_PREFETCH > 1 */
-
-114: subf r8,r7,r0
- mr r0,r7
- mtctr r8
-
-53: dcbt r3,r4
-54: dcbz r11,r6
- .section __ex_table,"a"
- .align 2
- .long 54b,105f
- .text
-/* the main body of the cacheline loop */
- COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_BYTES >= 32
- COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_BYTES >= 64
- COPY_16_BYTES_WITHEX(2)
- COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_BYTES >= 128
- COPY_16_BYTES_WITHEX(4)
- COPY_16_BYTES_WITHEX(5)
- COPY_16_BYTES_WITHEX(6)
- COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
- bdnz 53b
- cmpwi r0,0
- li r3,4
- li r7,0
- bne 114b
-#endif /* CONFIG_8xx */
-
-63: srwi. r0,r5,2
- mtctr r0
- beq 64f
-30: lwzu r0,4(r4)
-31: stwu r0,4(r6)
- bdnz 30b
-
-64: andi. r0,r5,3
- mtctr r0
- beq+ 65f
-40: lbz r0,4(r4)
-41: stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 40b
-65: li r3,0
- blr
-
-/* read fault, initial single-byte copy */
-100: li r9,0
- b 90f
-/* write fault, initial single-byte copy */
-101: li r9,1
-90: subf r5,r8,r5
- li r3,0
- b 99f
-/* read fault, initial word copy */
-102: li r9,0
- b 91f
-/* write fault, initial word copy */
-103: li r9,1
-91: li r3,2
- b 99f
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
- COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_BYTES >= 32
- COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_BYTES >= 64
- COPY_16_BYTES_EXCODE(2)
- COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_BYTES >= 128
- COPY_16_BYTES_EXCODE(4)
- COPY_16_BYTES_EXCODE(5)
- COPY_16_BYTES_EXCODE(6)
- COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-/* read fault in cacheline loop */
-104: li r9,0
- b 92f
-/* fault on dcbz (effectively a write fault) */
-/* or write fault in cacheline loop */
-105: li r9,1
-92: li r3,LG_CACHELINE_BYTES
- mfctr r8
- add r0,r0,r8
- b 106f
-/* read fault in final word loop */
-108: li r9,0
- b 93f
-/* write fault in final word loop */
-109: li r9,1
-93: andi. r5,r5,3
- li r3,2
- b 99f
-/* read fault in final byte loop */
-110: li r9,0
- b 94f
-/* write fault in final byte loop */
-111: li r9,1
-94: li r5,0
- li r3,0
-/*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
- */
-99: mfctr r0
-106: slw r3,r0,r3
- add. r3,r3,r5
- beq 120f /* shouldn't happen */
- cmpwi 0,r9,0
- bne 120f
-/* for a read fault, first try to continue the copy one byte at a time */
- mtctr r3
-130: lbz r0,4(r4)
-131: stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
- bdnz 130b
-/* then clear out the destination: r3 bytes starting at 4(r6) */
-132: mfctr r3
- srwi. r0,r3,2
- li r9,0
- mtctr r0
- beq 113f
-112: stwu r9,4(r6)
- bdnz 112b
-113: andi. r0,r3,3
- mtctr r0
- beq 120f
-114: stb r9,4(r6)
- addi r6,r6,1
- bdnz 114b
-120: blr
-
- .section __ex_table,"a"
- .align 2
- .long 30b,108b
- .long 31b,109b
- .long 40b,110b
- .long 41b,111b
- .long 130b,132b
- .long 131b,120b
- .long 112b,120b
- .long 114b,120b
- .text
-
-_GLOBAL(__clear_user)
- addi r6,r3,-4
- li r3,0
- li r5,0
- cmplwi 0,r4,4
- blt 7f
- /* clear a single word */
-11: stwu r5,4(r6)
- beqlr
- /* clear word sized chunks */
- andi. r0,r6,3
- add r4,r0,r4
- subf r6,r0,r6
- srwi r0,r4,2
- andi. r4,r4,3
- mtctr r0
- bdz 7f
-1: stwu r5,4(r6)
- bdnz 1b
- /* clear byte sized chunks */
-7: cmpwi 0,r4,0
- beqlr
- mtctr r4
- addi r6,r6,3
-8: stbu r5,1(r6)
- bdnz 8b
- blr
-90: mr r3,r4
- blr
-91: mfctr r3
- slwi r3,r3,2
- add r3,r3,r4
- blr
-92: mfctr r3
- blr
-
- .section __ex_table,"a"
- .align 2
- .long 11b,90b
- .long 1b,91b
- .long 8b,92b
- .text
-
-_GLOBAL(__strncpy_from_user)
- addi r6,r3,-1
- addi r4,r4,-1
- cmpwi 0,r5,0
- beq 2f
- mtctr r5
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r6)
- bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
- beq 3f
-2: addi r6,r6,1
-3: subf r3,r3,r6
- blr
-99: li r3,-EFAULT
- blr
-
- .section __ex_table,"a"
- .align 2
- .long 1b,99b
- .text
-
-/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
-_GLOBAL(__strnlen_user)
- addi r7,r3,-1
- subf r6,r7,r5 /* top+1 - str */
- cmplw 0,r4,r6
- bge 0f
- mr r6,r4
-0: mtctr r6 /* ctr = min(len, top - str) */
-1: lbzu r0,1(r7) /* get next byte */
- cmpwi 0,r0,0
- bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
- addi r7,r7,1
- subf r3,r3,r7 /* number of bytes we have looked at */
- beqlr /* return if we found a 0 byte */
- cmpw 0,r3,r4 /* did we look at all len bytes? */
- blt 99f /* if not, must have hit top */
- addi r3,r4,1 /* return len + 1 to indicate no null found */
- blr
-99: li r3,0 /* bad address, return 0 */
- blr
-
- .section __ex_table,"a"
- .align 2
- .long 1b,99b