Message ID | 200903161521.04979.david.jander@protonic.nl (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Ooops, ok I think I just missed your proposal, Kumar. Anyway, I'll post my benchmark results to this here: 1.- mplayer -nosound -benchmark testfile.mpeg (a DVD-mpeg2 file): No fix at all: VC: 30.5s VO: 53.4s Sys:1.95s Total: 85.8s First fix (force writes to way 0): VC: 24.3s VO: 40.6s Sys:1.95s Total: 66.9s Second fix (implementing lrw): VC: 23.1s VO: 31.5s Sys:1.03s Total: 55.6s Third fix (patch v4, lrw in SPRG6): VC: 21.055s VO: 28.289s Sys:0.972s Total: 50.316s 2.- prboom -timedemo doombench1 (where doombench1.lmp is prerecorded demo): No fix at all: 14.1 fps First fix (force writes to way 0): 16.7 fps Second fix (implementing lrw): 18.1 fps Third fix (patch v4, lrw in SPRG6): 19.9 fps 3.- Synthetic and pathologic memcpy() benchmark: No fix at all: 26 Mbyte/s First fix (force writes to way 0): 160 MByte/s Second fix (implementing lrw): 163 MByte/s Third fix (patch v4, lrw in SPRG6): 180 MByte/s Best regards,
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 0f4fac5..6cc0cd3 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -540,9 +540,13 @@ DataLoadTLBMiss: * r2: ptr to linux-style pte * r3: scratch */ + mfspr r3,SPRN_DMISS +#ifdef CONFIG_PPC_MPC512x + b e300_read_tlb_fix /* Code for TLB-errata workaround doesn't fit here */ +e300_read_tlb_fix_ret: +#endif mfctr r0 /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3
Complete workaround for DTLB errata in MPC5121e processors of die M36P and older (all currently existing versions). Due to the bug, the hardware-implemented LRU algorythm always goes to way 1 of the TLB. This fix implements the proposed software workaround in form of a LRW table encoded in 32 bits of SPRG6 for chosing the TLB-way. Signed-off-by: David Jander <david@protonic.nl> --- mfspr r2,SPRN_SPRG3 @@ -612,9 +616,32 @@ DataStoreTLBMiss: * r2: ptr to linux-style pte * r3: scratch */ + mfspr r3,SPRN_DMISS +#ifdef CONFIG_PPC_MPC512x +/* MPC512x: workaround for errata in die M36P and earlier: + * Implement LRW for TLB way. + */ + rlwinm r0,r3,17,27,31 /* Get Address bits 19:15 */ + li r1,1 + slw r0,r1,r0 /* Make bitmask */ + mfspr r2,SPRN_SPRG6 /* Get lrw table */ + and. r1,r2,r0 /* Check entry in lrw */ + beq- 0,113f /* 0? Then goto 113: */ + + mfspr r1,SPRN_SRR1 + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */ + mtspr SPRN_SRR1,r1 + + andc r2,r2,r0 + mtspr SPRN_SPRG6,r2 + b 114f +113: + or r2,r2,r0 + mtspr SPRN_SPRG6,r2 +114: +#endif mfctr r0 /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2,SPRN_SPRG3 @@ -688,6 +715,29 @@ DataStoreTLBMiss: .globl mol_trampoline .set mol_trampoline, i0x2f00 +#ifdef CONFIG_PPC_MPC512x +e300_read_tlb_fix: + rlwinm r0,r3,17,27,31 /* Get Address bits 19:15 */ + li r1,1 + slw r0,r1,r0 /* Make bitmask */ + mfspr r2,SPRN_SPRG6 /* Get lrw table */ + and. r1,r2,r0 /* Check entry in lrw */ + beq- 0,113f /* 0? Then goto 113: */ + + mfspr r1,SPRN_SRR1 + rlwinm r1,r1,0,15,13 /* Mask out SRR1[WAY] */ + mtspr SPRN_SRR1,r1 + + andc r2,r2,r0 + mtspr SPRN_SPRG6,r2 + b 114f +113: + or r2,r2,r0 + mtspr SPRN_SPRG6,r2 +114: + b e300_read_tlb_fix_ret +#endif + . = 0x3000 AltiVecUnavailable: