Message ID | 1424436345-37924-4-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On 20.02.15 13:45, Paolo Bonzini wrote: > PowerPC TCG flushes the TLB on every IR/DR change, which basically > means on every user<->kernel context switch. Encode IR/DR in the > MMU index. > > This brings the number of TLB flushes down from ~900000 to ~50000 > for starting up the Debian installer, which is in line with x86 > and gives a ~10% performance improvement. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > target-ppc/cpu.h | 7 ++----- > target-ppc/excp_helper.c | 3 --- > target-ppc/helper_regs.h | 11 ++++++----- > 3 files changed, 8 insertions(+), 13 deletions(-) > > diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h > index aae33a9..610d884 100644 > --- a/target-ppc/cpu.h > +++ b/target-ppc/cpu.h > @@ -943,7 +943,8 @@ struct ppc_segment_page_sizes { > > /*****************************************************************************/ > /* The whole PowerPC CPU context */ > -#define NB_MMU_MODES 3 > +#define NB_MMU_MODES 12 > +#define MMU_USER_IDX 3 /* PR=IR=DR=1 */ > > #define PPC_CPU_OPCODES_LEN 0x40 > #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20 > @@ -1252,10 +1253,6 @@ static inline CPUPPCState *cpu_init(const char *cpu_model) > #define cpu_list ppc_cpu_list > > /* MMU modes definitions */ > -#define MMU_MODE0_SUFFIX _user > -#define MMU_MODE1_SUFFIX _kernel > -#define MMU_MODE2_SUFFIX _hypv > -#define MMU_USER_IDX 0 > static inline int cpu_mmu_index (CPUPPCState *env) > { > return env->mmu_idx; > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c > index b803475..f608701 100644 > --- a/target-ppc/excp_helper.c > +++ b/target-ppc/excp_helper.c > @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) > > if (env->spr[SPR_LPCR] & LPCR_AIL) { > new_msr |= (1 << MSR_IR) | (1 << MSR_DR); > - } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) { > - /* If we disactivated any translation, flush TLBs */ > - tlb_flush(cs, 1); > } > > #ifdef TARGET_PPC64 > diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h > index 271fddf..23b8ded 100644 > --- a/target-ppc/helper_regs.h > +++ b/target-ppc/helper_regs.h > @@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env) > > static inline void hreg_compute_mem_idx(CPUPPCState *env) > { > + int high; > + > /* Precompute MMU index */ > if (msr_pr == 0 && msr_hv != 0) { > - env->mmu_idx = 2; > + high = 2; Could you instead do something like uint32_t mmu_idx = 0; mmu_idx |= MMU_IDX_HV; > } else { > - env->mmu_idx = 1 - msr_pr; > + high = 1 - msr_pr; mmu_idx |= (msr_pr & 1) ? MMU_IDX_PR; > } > + env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr; mmu_idx |= (msr_ir & 1) ? MMU_IDX_IR; mmu_idx |= (msr_dr & 1) ? MMU_IDX_DR; env->mmu_idx = mmu_idx; and check whether the compiler is smart enough to optimize this out considering that it's all constants? Also please double-check that 440 still works. That was the target that gave me the most headaches on DR/IR switching so far. Otherwise looks simple and clean to me :). Alex > } > > static inline void hreg_compute_hflags(CPUPPCState *env) > @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env) > /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */ > hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) | > (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) | > - (1 << MSR_LE) | (1 << MSR_VSX); > + (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR); > hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB; > hreg_compute_mem_idx(env); > env->hflags = env->msr & hflags_mask; > @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, > } > if (((value >> MSR_IR) & 1) != msr_ir || > ((value >> MSR_DR) & 1) != msr_dr) { > - /* Flush all tlb when changing translation mode */ > - tlb_flush(cs, 1); > excp = POWERPC_EXCP_NONE; > cs->interrupt_request |= CPU_INTERRUPT_EXITTB; > } >
On 20/02/2015 14:00, Alexander Graf wrote: > Also please double-check that 440 still works. That was the target that > gave me the most headaches on DR/IR switching so far. The ppc-virtexml507-linux-2_6_34.tgz image works for me. Paolo > Otherwise looks simple and clean to me :). > > > Alex > >> } >> >> static inline void hreg_compute_hflags(CPUPPCState *env) >> @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env) >> /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */ >> hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) | >> (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) | >> - (1 << MSR_LE) | (1 << MSR_VSX); >> + (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR); >> hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB; >> hreg_compute_mem_idx(env); >> env->hflags = env->msr & hflags_mask; >> @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, >> } >> if (((value >> MSR_IR) & 1) != msr_ir || >> ((value >> MSR_DR) & 1) != msr_dr) { >> - /* Flush all tlb when changing translation mode */ >> - tlb_flush(cs, 1); >> excp = POWERPC_EXCP_NONE; >> cs->interrupt_request |= CPU_INTERRUPT_EXITTB; >> } >> > >
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index aae33a9..610d884 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -943,7 +943,8 @@ struct ppc_segment_page_sizes { /*****************************************************************************/ /* The whole PowerPC CPU context */ -#define NB_MMU_MODES 3 +#define NB_MMU_MODES 12 +#define MMU_USER_IDX 3 /* PR=IR=DR=1 */ #define PPC_CPU_OPCODES_LEN 0x40 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20 @@ -1252,10 +1253,6 @@ static inline CPUPPCState *cpu_init(const char *cpu_model) #define cpu_list ppc_cpu_list /* MMU modes definitions */ -#define MMU_MODE0_SUFFIX _user -#define MMU_MODE1_SUFFIX _kernel -#define MMU_MODE2_SUFFIX _hypv -#define MMU_USER_IDX 0 static inline int cpu_mmu_index (CPUPPCState *env) { return env->mmu_idx; diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index b803475..f608701 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) if (env->spr[SPR_LPCR] & LPCR_AIL) { new_msr |= (1 << MSR_IR) | (1 << MSR_DR); - } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) { - /* If we disactivated any translation, flush TLBs */ - tlb_flush(cs, 1); } #ifdef TARGET_PPC64 diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index 271fddf..23b8ded 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env) static inline void hreg_compute_mem_idx(CPUPPCState *env) { + int high; + /* Precompute MMU index */ if (msr_pr == 0 && msr_hv != 0) { - env->mmu_idx = 2; + high = 2; } else { - env->mmu_idx = 1 - msr_pr; + high = 1 - msr_pr; } + env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr; } static inline void hreg_compute_hflags(CPUPPCState *env) @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env) /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */ hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) | (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) | - (1 << MSR_LE) | (1 << MSR_VSX); + (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR); hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB; hreg_compute_mem_idx(env); env->hflags = env->msr & hflags_mask; @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, } if (((value >> MSR_IR) & 1) != msr_ir || ((value >> MSR_DR) & 1) != msr_dr) { - /* Flush all tlb when changing translation mode */ - tlb_flush(cs, 1); excp = POWERPC_EXCP_NONE; cs->interrupt_request |= CPU_INTERRUPT_EXITTB; }
PowerPC TCG flushes the TLB on every IR/DR change, which basically means on every user<->kernel context switch. Encode IR/DR in the MMU index. This brings the number of TLB flushes down from ~900000 to ~50000 for starting up the Debian installer, which is in line with x86 and gives a ~10% performance improvement. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target-ppc/cpu.h | 7 ++----- target-ppc/excp_helper.c | 3 --- target-ppc/helper_regs.h | 11 ++++++----- 3 files changed, 8 insertions(+), 13 deletions(-)