summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/lib/copypage_64.S198
1 files changed, 93 insertions, 105 deletions
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index f9837f44ac0..75f3267fdc3 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ * Copyright (C) 2008 Mark Nelson, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -8,112 +8,100 @@
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .section ".toc","aw"
+PPC64_CACHES:
+ .tc ppc64_caches[TC],ppc64_caches
+ .section ".text"
+
_GLOBAL(copy_4K_page)
- std r31,-8(1)
- std r30,-16(1)
- std r29,-24(1)
- std r28,-32(1)
- std r27,-40(1)
- std r26,-48(1)
- std r25,-56(1)
- std r24,-64(1)
- std r23,-72(1)
- std r22,-80(1)
- std r21,-88(1)
- std r20,-96(1)
- li r5,4096/32 - 1
+ li r5,4096 /* 4K page size */
+BEGIN_FTR_SECTION
+ ld r10,PPC64_CACHES@toc(r2)
+ lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
+ lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */
+ li r9,0
+ srd r8,r5,r11
+
+ mtctr r8
+setup:
+ dcbt r9,r4
+ dcbz r9,r3
+ add r9,r9,r12
+ bdnz setup
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
addi r3,r3,-8
- li r12,5
-0: addi r5,r5,-24
- mtctr r12
- ld r22,640(4)
- ld r21,512(4)
- ld r20,384(4)
- ld r11,256(4)
- ld r9,128(4)
- ld r7,0(4)
- ld r25,648(4)
- ld r24,520(4)
- ld r23,392(4)
- ld r10,264(4)
- ld r8,136(4)
- ldu r6,8(4)
- cmpwi r5,24
-1: std r22,648(3)
- std r21,520(3)
- std r20,392(3)
- std r11,264(3)
- std r9,136(3)
- std r7,8(3)
- ld r28,648(4)
- ld r27,520(4)
- ld r26,392(4)
- ld r31,264(4)
- ld r30,136(4)
- ld r29,8(4)
- std r25,656(3)
- std r24,528(3)
- std r23,400(3)
- std r10,272(3)
- std r8,144(3)
- std r6,16(3)
- ld r22,656(4)
- ld r21,528(4)
- ld r20,400(4)
- ld r11,272(4)
- ld r9,144(4)
- ld r7,16(4)
- std r28,664(3)
- std r27,536(3)
- std r26,408(3)
- std r31,280(3)
- std r30,152(3)
- stdu r29,24(3)
- ld r25,664(4)
- ld r24,536(4)
- ld r23,408(4)
- ld r10,280(4)
- ld r8,152(4)
- ldu r6,24(4)
+ srdi r8,r5,7 /* page is copied in 128 byte strides */
+ addi r8,r8,-1 /* one stride copied outside loop */
+
+ mtctr r8
+
+ ld r5,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ldu r8,24(r4)
+1: std r5,8(r3)
+ ld r9,8(r4)
+ std r6,16(r3)
+ ld r10,16(r4)
+ std r7,24(r3)
+ ld r11,24(r4)
+ std r8,32(r3)
+ ld r12,32(r4)
+ std r9,40(r3)
+ ld r5,40(r4)
+ std r10,48(r3)
+ ld r6,48(r4)
+ std r11,56(r3)
+ ld r7,56(r4)
+ std r12,64(r3)
+ ld r8,64(r4)
+ std r5,72(r3)
+ ld r9,72(r4)
+ std r6,80(r3)
+ ld r10,80(r4)
+ std r7,88(r3)
+ ld r11,88(r4)
+ std r8,96(r3)
+ ld r12,96(r4)
+ std r9,104(r3)
+ ld r5,104(r4)
+ std r10,112(r3)
+ ld r6,112(r4)
+ std r11,120(r3)
+ ld r7,120(r4)
+ stdu r12,128(r3)
+ ldu r8,128(r4)
bdnz 1b
- std r22,648(3)
- std r21,520(3)
- std r20,392(3)
- std r11,264(3)
- std r9,136(3)
- std r7,8(3)
- addi r4,r4,640
- addi r3,r3,648
- bge 0b
- mtctr r5
- ld r7,0(4)
- ld r8,8(4)
- ldu r9,16(4)
-3: ld r10,8(4)
- std r7,8(3)
- ld r7,16(4)
- std r8,16(3)
- ld r8,24(4)
- std r9,24(3)
- ldu r9,32(4)
- stdu r10,32(3)
- bdnz 3b
-4: ld r10,8(4)
- std r7,8(3)
- std r8,16(3)
- std r9,24(3)
- std r10,32(3)
-9: ld r20,-96(1)
- ld r21,-88(1)
- ld r22,-80(1)
- ld r23,-72(1)
- ld r24,-64(1)
- ld r25,-56(1)
- ld r26,-48(1)
- ld r27,-40(1)
- ld r28,-32(1)
- ld r29,-24(1)
- ld r30,-16(1)
- ld r31,-8(1)
+
+ std r5,8(r3)
+ ld r9,8(r4)
+ std r6,16(r3)
+ ld r10,16(r4)
+ std r7,24(r3)
+ ld r11,24(r4)
+ std r8,32(r3)
+ ld r12,32(r4)
+ std r9,40(r3)
+ ld r5,40(r4)
+ std r10,48(r3)
+ ld r6,48(r4)
+ std r11,56(r3)
+ ld r7,56(r4)
+ std r12,64(r3)
+ ld r8,64(r4)
+ std r5,72(r3)
+ ld r9,72(r4)
+ std r6,80(r3)
+ ld r10,80(r4)
+ std r7,88(r3)
+ ld r11,88(r4)
+ std r8,96(r3)
+ ld r12,96(r4)
+ std r9,104(r3)
+ std r10,112(r3)
+ std r11,120(r3)
+ std r12,128(r3)
blr