powerpc: Fix bogus cache flushing on all 40x and BookE processors v2

Submitted by Benjamin Herrenschmidt on Dec. 4, 2008, 6:12 a.m.

Details

Message ID 20081204061341.C45CBDDDF6@ozlabs.org
State Accepted, archived
Commit 8309ce7280536b07716026ff588acbcc0ee1a546
Headers show

Commit Message

Benjamin Herrenschmidt Dec. 4, 2008, 6:12 a.m.
We were missing the CPU_FTR_NOEXECUTE bit in our cputable for all
these processors. The result is that update_mmu_cache() would flush
the cache for all pages mapped to userspace which is totally
unnecessary on those processors since we already handle flushing
on execute in the page fault path.

This should provide a nice speed up ;-)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

This one fixes the E500 definition and uses a bit that works
for 32-bit processors

 arch/powerpc/include/asm/cputable.h |   15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

Comments

Josh Boyer Dec. 4, 2008, 12:33 p.m.
On Thu, 04 Dec 2008 17:12:59 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> We were missing the CPU_FTR_NOEXECUTE bit in our cputable for all
> these processors. The result is that update_mmu_cache() would flush
> the cache for all pages mapped to userspace which is totally
> unnecessary on those processors since we already handle flushing
> on execute in the page fault path.
> 
> This should provide a nice speed up ;-)

Did you test it this time?  If so, how and what were the results?

josh
Benjamin Herrenschmidt Dec. 4, 2008, 10:02 p.m.
On Thu, 2008-12-04 at 07:33 -0500, Josh Boyer wrote:
> On Thu, 04 Dec 2008 17:12:59 +1100
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> 
> > We were missing the CPU_FTR_NOEXECUTE bit in our cputable for all
> > these processors. The result is that update_mmu_cache() would flush
> > the cache for all pages mapped to userspace which is totally
> > unnecessary on those processors since we already handle flushing
> > on execute in the page fault path.
> > 
> > This should provide a nice speed up ;-)
> 
> Did you test it this time?  If so, how and what were the results?

Yes, I verified I no longer had PG_arch1 all over my PCI GART pages with
DRI enabled :-)

I didn't actually benchmark anything.

Cheers,
Ben.

Patch hide | download patch | download mbox

--- linux-work.orig/arch/powerpc/include/asm/cputable.h	2008-12-01 16:55:53.000000000 +1100
+++ linux-work/arch/powerpc/include/asm/cputable.h	2008-12-04 15:03:47.000000000 +1100
@@ -163,6 +163,7 @@  extern const char *powerpc_base_platform
 #define CPU_FTR_SPE			ASM_CONST(0x0000000002000000)
 #define CPU_FTR_NEED_PAIRED_STWCX	ASM_CONST(0x0000000004000000)
 #define CPU_FTR_LWSYNC			ASM_CONST(0x0000000008000000)
+#define CPU_FTR_NOEXECUTE		ASM_CONST(0x0000000010000000)
 
 /*
  * Add the 64-bit processor unique features in the top half of the word;
@@ -177,7 +178,6 @@  extern const char *powerpc_base_platform
 #define CPU_FTR_SLB			LONG_ASM_CONST(0x0000000100000000)
 #define CPU_FTR_16M_PAGE		LONG_ASM_CONST(0x0000000200000000)
 #define CPU_FTR_TLBIEL			LONG_ASM_CONST(0x0000000400000000)
-#define CPU_FTR_NOEXECUTE		LONG_ASM_CONST(0x0000000800000000)
 #define CPU_FTR_IABR			LONG_ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
@@ -366,19 +366,20 @@  extern const char *powerpc_base_platform
 #define CPU_FTRS_CLASSIC32	(CPU_FTR_COMMON | \
 	    CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE)
 #define CPU_FTRS_8XX	(CPU_FTR_USE_TB)
-#define CPU_FTRS_40X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
-#define CPU_FTRS_44X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
+#define CPU_FTRS_40X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_E200	(CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
 	    CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
-	    CPU_FTR_UNIFIED_ID_CACHE)
+	    CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_E500	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
-	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN)
+	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_E500_2	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
 	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_BIG_PHYS | \
-	    CPU_FTR_NODSISRALIGN)
+	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_E500MC	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN | \
-	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC)
+	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */