diff mbox

powerpc: POWER7 optimised copy_to_user/copy_from_user using VMX

Message ID 1324263652.30454.13.camel@pasglop (mailing list archive)
State Not Applicable
Headers show

Commit Message

Benjamin Herrenschmidt Dec. 19, 2011, 3 a.m. UTC
On Thu, 2011-12-08 at 17:11 +1100, Anton Blanchard wrote:
> Implement a POWER7 optimised copy_to_user/copy_from_user using VMX.
> For large aligned copies this new loop is over 10% faster, and for
> large unaligned copies it is over 200% faster.

Breaks !CONFIG_ALTIVEC build an pops some WARN's with preempt & lockdep,
this seems to fix them:

Comments

Benjamin Herrenschmidt Dec. 19, 2011, 3:19 a.m. UTC | #1
On Mon, 2011-12-19 at 14:00 +1100, Benjamin Herrenschmidt wrote:
> On Thu, 2011-12-08 at 17:11 +1100, Anton Blanchard wrote:
> > Implement a POWER7 optimised copy_to_user/copy_from_user using VMX.
> > For large aligned copies this new loop is over 10% faster, and for
> > large unaligned copies it is over 200% faster.
> 
> Breaks !CONFIG_ALTIVEC build an pops some WARN's with preempt & lockdep,
> this seems to fix them:

And it's missing another ifdef actually for !CONFIG_ALTIVEC, I'll fix it
locally and will put a fixed version in -next.

Cheers,
Ben.
diff mbox

Patch

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index b90b3e7..7735a2c 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -17,13 +17,14 @@  obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o \
 			   checksum_wrappers_64.o hweight_64.o \
-			   copyuser_power7.o copyuser_power7_vmx.o
+			   copyuser_power7.o
 obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
 obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
+obj-$(CONFIG_ALTIVEC)	+= copyuser_power7_vmx.o
 endif
 
 obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 4395939..9a21b08 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -85,6 +85,7 @@ 
 
 
 _GLOBAL(__copy_tofrom_user_power7)
+#ifdef CONFIG_ALTIVEC
 	cmpldi	r5,16
 	cmpldi	cr1,r5,4096
 
@@ -94,6 +95,15 @@  _GLOBAL(__copy_tofrom_user_power7)
 
 	blt	.Lshort_copy
 	bgt	cr1,.Lvmx_copy
+#else
+	cmpldi	r5,16
+
+	std	r3,48(r1)
+	std	r4,56(r1)
+	std	r5,64(r1)
+
+	blt	.Lshort_copy
+#endif
 
 .Lnonvmx_copy:
 	/* Get the source 8B aligned */
@@ -273,6 +283,7 @@  err1;	stb	r0,0(r3)
 	addi	r1,r1,STACKFRAMESIZE
 	b	.Lnonvmx_copy
 
+#ifdef CONFIG_ALTIVEC
 .Lvmx_copy:
 	mflr	r0
 	std	r0,16(r1)
@@ -667,3 +678,4 @@  err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
 	b	.exit_vmx_copy		/* tail call optimise */
+#endif /* CONFiG_ALTIVEC */
diff --git a/arch/powerpc/lib/copyuser_power7_vmx.c b/arch/powerpc/lib/copyuser_power7_vmx.c
index c37b949..6e1efad 100644
--- a/arch/powerpc/lib/copyuser_power7_vmx.c
+++ b/arch/powerpc/lib/copyuser_power7_vmx.c
@@ -26,10 +26,16 @@  int enter_vmx_copy(void)
 	if (in_interrupt())
 		return 0;
 
-	enable_kernel_altivec();
-
+	/* This acts as preempt_disable() as well and will make
+	 * enable_kernel_altivec(). We need to disable page faults
+	 * as they can call schedule and thus make us lose the VMX
+	 * context. So on page faults, we just fail which will cause
+	 * a fallback to the normal non-vmx copy.
+	 */
 	pagefault_disable();
 
+	enable_kernel_altivec();
+
 	return 1;
 }