Patchwork powerpc: Update 64bit __copy_tofrom_user() using CPU_FTR_UNALIGNED_LD_STD

login
register
mail settings
Submitter Mark Nelson
Date Nov. 11, 2008, 10:53 a.m.
Message ID <200811112153.34133.markn@au1.ibm.com>
Download mbox | patch
Permalink /patch/8104/
State Accepted
Commit a4e22f02f5b6518c1484faea1f88d81802b9feac
Delegated to: Paul Mackerras
Headers show

Comments

Mark Nelson - Nov. 11, 2008, 10:53 a.m.
In exactly the same way that we updated memcpy() with new feature sections
in commit 25d6e2d7c58ddc4a3b614fc5381591c0cfe66556 we do the same thing
here for __copy_tofrom_user(). Once again this is purely a performance
tweak for Cell and Power6 - this has no effect on all the other 64bit
powerpc chips.

We can make these same changes to __copy_tofrom_user() because the basic
copy algorithm is the same as in memcpy() - this version just has all the
exception handling logic needed when copying to or from userspace as well
as a special case for copying whole 4K pages that are page aligned.

CPU_FTR_UNALIGNED_LD_STD CPU was added in commit
4ec577a28980a0790df3c3dfe9c81f6e2222acfb

We also make the same simple one line change from cmpldi r1,... to cmpldi
cr1,... for consistency.

Signed-off-by: Mark Nelson <markn@au1.ibm.com>
---
 arch/powerpc/lib/copyuser_64.S |   17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

Patch

Index: upstream/arch/powerpc/lib/copyuser_64.S
===================================================================
--- upstream.orig/arch/powerpc/lib/copyuser_64.S
+++ upstream/arch/powerpc/lib/copyuser_64.S
@@ -26,11 +26,24 @@  _GLOBAL(__copy_tofrom_user)
 	andi.	r6,r6,7
 	PPC_MTOCRF	0x01,r5
 	blt	cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ * cleared.
+ * At the time of writing the only CPU that has this combination of bits
+ * set is Power6.
+ */
+BEGIN_FTR_SECTION
+	nop
+FTR_SECTION_ELSE
 	bne	.Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+		    CPU_FTR_UNALIGNED_LD_STD)
 .Ldst_aligned:
-	andi.	r0,r4,7
 	addi	r3,r3,-16
+BEGIN_FTR_SECTION
+	andi.	r0,r4,7
 	bne	.Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 	srdi	r7,r5,4
 20:	ld	r9,0(r4)
 	addi	r4,r4,-8
@@ -138,7 +151,7 @@  _GLOBAL(__copy_tofrom_user)
 	PPC_MTOCRF	0x01,r6		/* put #bytes to 8B bdry into cr7 */
 	subf	r5,r6,r5
 	li	r7,0
-	cmpldi	r1,r5,16
+	cmpldi	cr1,r5,16
 	bf	cr7*4+3,1f
 35:	lbz	r0,0(r4)
 81:	stb	r0,0(r3)