aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh64/lib/page_copy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sh64/lib/page_copy.S')
-rw-r--r--arch/sh64/lib/page_copy.S91
1 files changed, 0 insertions, 91 deletions
diff --git a/arch/sh64/lib/page_copy.S b/arch/sh64/lib/page_copy.S
deleted file mode 100644
index e159c3cd2582..000000000000
--- a/arch/sh64/lib/page_copy.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
-
- Tight version of mempy for the case of just copying a page.
- Prefetch strategy empirically optimised against RTL simulations
- of SH5-101 cut2 eval chip with Cayman board DDR memory.
-
- Parameters:
- r2 : source effective address (start of page)
- r3 : destination effective address (start of page)
-
- Always copies 4096 bytes.
-
- Points to review.
- * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
- It seems like the prefetch needs to be at at least 4 lines ahead to get
- the data into the cache in time, and the allocos contend with outstanding
- prefetches for the same cache set, so it's better to have the numbers
- different.
- */
-
- .section .text..SHmedia32,"ax"
- .little
-
- .balign 8
- .global sh64_page_copy
-sh64_page_copy:
-
- /* Copy 4096 bytes worth of data from r2 to r3.
- Do prefetches 4 lines ahead.
- Do alloco 2 lines ahead */
-
- pta 1f, tr1
- pta 2f, tr2
- pta 3f, tr3
- ptabs r18, tr0
-
-#if 0
- /* TAKum03020 */
- ld.q r2, 0x00, r63
- ld.q r2, 0x20, r63
- ld.q r2, 0x40, r63
- ld.q r2, 0x60, r63
-#endif
- alloco r3, 0x00
- synco ! TAKum03020
- alloco r3, 0x20
- synco ! TAKum03020
-
- movi 3968, r6
- add r3, r6, r6
- addi r6, 64, r7
- addi r7, 64, r8
- sub r2, r3, r60
- addi r60, 8, r61
- addi r61, 8, r62
- addi r62, 8, r23
- addi r60, 0x80, r22
-
-/* Minimal code size. The extra branches inside the loop don't cost much
- because they overlap with the time spent waiting for prefetches to
- complete. */
-1:
-#if 0
- /* TAKum03020 */
- bge/u r3, r6, tr2 ! skip prefetch for last 4 lines
- ldx.q r3, r22, r63 ! prefetch 4 lines hence
-#endif
-2:
- bge/u r3, r7, tr3 ! skip alloco for last 2 lines
- alloco r3, 0x40 ! alloc destination line 2 lines ahead
- synco ! TAKum03020
-3:
- ldx.q r3, r60, r36
- ldx.q r3, r61, r37
- ldx.q r3, r62, r38
- ldx.q r3, r23, r39
- st.q r3, 0, r36
- st.q r3, 8, r37
- st.q r3, 16, r38
- st.q r3, 24, r39
- addi r3, 32, r3
- bgt/l r8, r3, tr1
-
- blink tr0, r63 ! return
-
-