Message ID | 1236957383-20240-1-git-send-email-galak@kernel.crashing.org (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
On Fri, 2009-03-13 at 10:16 -0500, Kumar Gala wrote: > From: David Jander <david.jander@protonic.nl> > > Complete workaround for DTLB errata in e300c2/c3/c4 processors. > > Due to the bug, the hardware-implemented LRU algorythm always goes to way > 1 of the TLB. This fix implements the proposed software workaround in > form of a LRW table for chosing the TLB-way. > > Signed-off-by: David Jander <david@protonic.nl> > Signed-off-by: Kumar Gala <galak@kernel.crashing.org> > --- > > Added cpu feature support.. need to check with Ben if we should use a MMU feature instead Yes, we should... Cheers, Ben. > - k > > arch/powerpc/include/asm/cputable.h | 7 +++- > arch/powerpc/kernel/cputable.c | 4 +- > arch/powerpc/kernel/head_32.S | 61 +++++++++++++++++++++++++++++++++++ > 3 files changed, 69 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h > index fca1611..42e3145 100644 > --- a/arch/powerpc/include/asm/cputable.h > +++ b/arch/powerpc/include/asm/cputable.h > @@ -152,6 +152,7 @@ extern const char *powerpc_base_platform; > #define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x0000000000002000) > #define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x0000000000004000) > #define CPU_FTR_NO_DPM ASM_CONST(0x0000000000008000) > +#define CPU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x0000000000010000) > #define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000) > #define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000) > #define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000) > @@ -356,7 +357,11 @@ extern const char *powerpc_base_platform; > CPU_FTR_COMMON) > #define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ > CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ > - CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) > + CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | \ > + CPU_FTR_NEED_DTLB_SW_LRU) > +#define CPU_FTRS_E300C3 (CPU_FTR_MAYBE_CAN_DOZE | \ > + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ > + CPU_FTR_COMMON | CPU_FTR_NEED_DTLB_SW_LRU) > #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) > #define CPU_FTRS_8XX (CPU_FTR_USE_TB) > #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) > diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c > index ccea243..039452c 100644 > --- a/arch/powerpc/kernel/cputable.c > +++ b/arch/powerpc/kernel/cputable.c > @@ -1101,7 +1101,7 @@ static struct cpu_spec __initdata cpu_specs[] = { > .pvr_mask = 0x7fff0000, > .pvr_value = 0x00850000, > .cpu_name = "e300c3", > - .cpu_features = CPU_FTRS_E300, > + .cpu_features = CPU_FTRS_E300C3, > .cpu_user_features = COMMON_USER, > .mmu_features = MMU_FTR_USE_HIGH_BATS, > .icache_bsize = 32, > @@ -1116,7 +1116,7 @@ static struct cpu_spec __initdata cpu_specs[] = { > .pvr_mask = 0x7fff0000, > .pvr_value = 0x00860000, > .cpu_name = "e300c4", > - .cpu_features = CPU_FTRS_E300, > + .cpu_features = CPU_FTRS_E300C3, > .cpu_user_features = COMMON_USER, > .mmu_features = MMU_FTR_USE_HIGH_BATS, > .icache_bsize = 32, > diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S > index f8c2e6b..eecae0d 100644 > --- a/arch/powerpc/kernel/head_32.S > +++ b/arch/powerpc/kernel/head_32.S > @@ -554,6 +554,10 @@ DataLoadTLBMiss: > * r2: ptr to linux-style pte > * r3: scratch > */ > +BEGIN_FTR_SECTION > + b TlbWo /* Code for TLB-errata workaround doesn't fit here */ > +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) > +RFTlbWo: > mfctr r0 > /* Get PTE (linux-style) and check access */ > mfspr r3,SPRN_DMISS > @@ -626,6 +630,31 @@ DataStoreTLBMiss: > * r2: ptr to linux-style pte > * r3: scratch > */ > +BEGIN_FTR_SECTION > +/* MPC512x: workaround for errata in die M36P and earlier: > + * Implement LRW for TLB way. > + */ > + mfspr r3,SPRN_DMISS > + rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */ > + lis r2,lrw@ha /* Search index in lrw[] */ > + addi r2,r2,lrw@l > + tophys(r2,r2) > + lwzx r1,r3,r2 /* Get item from lrw[] */ > + cmpwi 0,r1,0 /* Was it way 0 last time? */ > + beq- 0,113f /* Then goto 113: */ > + > + mfspr r1,SPRN_SRR1 > + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */ > + mtspr SPRN_SRR1,r1 > + > + li r0,0 > + stwx r0,r3,r2 /* Make lrw[] entry 0 */ > + b 114f > +113: > + li r0,1 > + stwx r0,r3,r2 /* Make lrw[] entry 1 */ > +114: > +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) > mfctr r0 > /* Get PTE (linux-style) and check access */ > mfspr r3,SPRN_DMISS > @@ -813,6 +842,32 @@ giveup_altivec: > blr > #endif /* CONFIG_ALTIVEC */ > > +TlbWo: > +/* MPC512x: workaround for errata in die M36P and earlier: > + * Implement LRW for TLB way. > + */ > + mfspr r3,SPRN_DMISS > + rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */ > + lis r2,lrw@ha /* Search index in lrw[] */ > + addi r2,r2,lrw@l > + tophys(r2,r2) > + lwzx r1,r3,r2 /* Get item from lrw[] */ > + cmpwi 0,r1,0 /* Was it way 0 last time? */ > + beq- 0,113f /* Then goto 113: */ > + > + mfspr r1,SPRN_SRR1 > + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */ > + mtspr SPRN_SRR1,r1 > + > + li r0,0 > + stwx r0,r3,r2 /* Make lrw[] entry 0 */ > + b 114f > +113: > + li r0,1 > + stwx r0,r3,r2 /* Make lrw[] entry 1 */ > +114: > + b RFTlbWo > + > /* > * This code is jumped to from the startup code to copy > * the kernel image to physical address PHYSICAL_START. > @@ -1328,6 +1383,12 @@ intercept_table: > .long 0, 0, 0, 0, 0, 0, 0, 0 > .long 0, 0, 0, 0, 0, 0, 0, 0 > > +lrw: > + .long 0, 0, 0, 0, 0, 0, 0, 0 > + .long 0, 0, 0, 0, 0, 0, 0, 0 > + .long 0, 0, 0, 0, 0, 0, 0, 0 > + .long 0, 0, 0, 0, 0, 0, 0, 0 > + > /* Room for two PTE pointers, usually the kernel and current user pointers > * to their respective root page table. > */
> +BEGIN_FTR_SECTION > + b TlbWo /* Code for TLB-errata workaround doesn't fit here */ > +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) > +RFTlbWo: Can you use nicer label names ? :-) Also, that's a lot of code for such a hot path... Cheers, Ben.
On Mar 13, 2009, at 4:26 PM, Benjamin Herrenschmidt wrote: >> +BEGIN_FTR_SECTION >> + b TlbWo /* Code for TLB-errata workaround doesn't fit >> here */ >> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) >> +RFTlbWo: > > Can you use nicer label names ? :-) > > Also, that's a lot of code for such a hot path... > > Cheers, > Ben. The code needs reworking. However, we are doing SW LRU, not sure how we reduce this in the hot path. - k
On Mar 13, 2009, at 5:06 PM, Kumar Gala wrote: > > On Mar 13, 2009, at 4:26 PM, Benjamin Herrenschmidt wrote: > >>> +BEGIN_FTR_SECTION >>> + b TlbWo /* Code for TLB-errata workaround doesn't fit >>> here */ >>> +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) >>> +RFTlbWo: >> >> Can you use nicer label names ? :-) >> >> Also, that's a lot of code for such a hot path... >> >> Cheers, >> Ben. > > The code needs reworking. However, we are doing SW LRU, not sure > how we reduce this in the hot path. Ben, David, Here's my attempt at reworking the code to use an SPRG, remove branches, optimize it down, etc. I haven't validated that this is even correct. It should be easy to replace the m{f,t}spr SPRG w/lwz/ stw if we want to keep the LRU state in memory instead. Ben, do you think we can optimize this further with some random LRU selection? mtspr SPRN_RPA,r1 mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 #if 1 li r0,1 mfspr r1,SPRN_SPRG4 /* could replace w/lwz r1,sw_way_lru@l(0) */ rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */ slw r0,r0,r3 xor r1,r0,r1 srw r0,r1,r3 mtspr SPRN_SPRG4,r1 /* could replace w/stw r1,sw_way_lru@l(0) */ rlwimi r2,r0,31-14,14,14 #endif tlbld r3 rfi
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index fca1611..42e3145 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -152,6 +152,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x0000000000002000) #define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x0000000000004000) #define CPU_FTR_NO_DPM ASM_CONST(0x0000000000008000) +#define CPU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x0000000000010000) #define CPU_FTR_NEED_COHERENT ASM_CONST(0x0000000000020000) #define CPU_FTR_NO_BTIC ASM_CONST(0x0000000000040000) #define CPU_FTR_NODSISRALIGN ASM_CONST(0x0000000000100000) @@ -356,7 +357,11 @@ extern const char *powerpc_base_platform; CPU_FTR_COMMON) #define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ - CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) + CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | \ + CPU_FTR_NEED_DTLB_SW_LRU) +#define CPU_FTRS_E300C3 (CPU_FTR_MAYBE_CAN_DOZE | \ + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_COMMON | CPU_FTR_NEED_DTLB_SW_LRU) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) #define CPU_FTRS_8XX (CPU_FTR_USE_TB) #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index ccea243..039452c 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1101,7 +1101,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0x7fff0000, .pvr_value = 0x00850000, .cpu_name = "e300c3", - .cpu_features = CPU_FTRS_E300, + .cpu_features = CPU_FTRS_E300C3, .cpu_user_features = COMMON_USER, .mmu_features = MMU_FTR_USE_HIGH_BATS, .icache_bsize = 32, @@ -1116,7 +1116,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0x7fff0000, .pvr_value = 0x00860000, .cpu_name = "e300c4", - .cpu_features = CPU_FTRS_E300, + .cpu_features = CPU_FTRS_E300C3, .cpu_user_features = COMMON_USER, .mmu_features = MMU_FTR_USE_HIGH_BATS, .icache_bsize = 32, diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index f8c2e6b..eecae0d 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -554,6 +554,10 @@ DataLoadTLBMiss: * r2: ptr to linux-style pte * r3: scratch */ +BEGIN_FTR_SECTION + b TlbWo /* Code for TLB-errata workaround doesn't fit here */ +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) +RFTlbWo: mfctr r0 /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS @@ -626,6 +630,31 @@ DataStoreTLBMiss: * r2: ptr to linux-style pte * r3: scratch */ +BEGIN_FTR_SECTION +/* MPC512x: workaround for errata in die M36P and earlier: + * Implement LRW for TLB way. + */ + mfspr r3,SPRN_DMISS + rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */ + lis r2,lrw@ha /* Search index in lrw[] */ + addi r2,r2,lrw@l + tophys(r2,r2) + lwzx r1,r3,r2 /* Get item from lrw[] */ + cmpwi 0,r1,0 /* Was it way 0 last time? */ + beq- 0,113f /* Then goto 113: */ + + mfspr r1,SPRN_SRR1 + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */ + mtspr SPRN_SRR1,r1 + + li r0,0 + stwx r0,r3,r2 /* Make lrw[] entry 0 */ + b 114f +113: + li r0,1 + stwx r0,r3,r2 /* Make lrw[] entry 1 */ +114: +END_FTR_SECTION_IFSET(CPU_FTR_NEED_DTLB_SW_LRU) mfctr r0 /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS @@ -813,6 +842,32 @@ giveup_altivec: blr #endif /* CONFIG_ALTIVEC */ +TlbWo: +/* MPC512x: workaround for errata in die M36P and earlier: + * Implement LRW for TLB way. + */ + mfspr r3,SPRN_DMISS + rlwinm r3,r3,19,25,29 /* Get Address bits 19:15 */ + lis r2,lrw@ha /* Search index in lrw[] */ + addi r2,r2,lrw@l + tophys(r2,r2) + lwzx r1,r3,r2 /* Get item from lrw[] */ + cmpwi 0,r1,0 /* Was it way 0 last time? */ + beq- 0,113f /* Then goto 113: */ + + mfspr r1,SPRN_SRR1 + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */ + mtspr SPRN_SRR1,r1 + + li r0,0 + stwx r0,r3,r2 /* Make lrw[] entry 0 */ + b 114f +113: + li r0,1 + stwx r0,r3,r2 /* Make lrw[] entry 1 */ +114: + b RFTlbWo + /* * This code is jumped to from the startup code to copy * the kernel image to physical address PHYSICAL_START. @@ -1328,6 +1383,12 @@ intercept_table: .long 0, 0, 0, 0, 0, 0, 0, 0 .long 0, 0, 0, 0, 0, 0, 0, 0 +lrw: + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + /* Room for two PTE pointers, usually the kernel and current user pointers * to their respective root page table. */