diff mbox series

[v2,13/22] target/openrisc: Fix cpu_mmu_index

Message ID 20180618184046.6270-14-richard.henderson@linaro.org
State New
Headers show
Series target/openrisc improvements | expand

Commit Message

Richard Henderson June 18, 2018, 6:40 p.m. UTC
The code in cpu_mmu_index does not properly honor SR_DME.
This bug has workarounds elsewhere in that we flush the
tlb more often than necessary, on the state changes that
should be reflected in a change of mmu_index.

Fixing this means that we can respect the mmu_index that
is given to tlb_flush.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/openrisc/cpu.h              | 23 +++++++++++++--------
 target/openrisc/interrupt.c        |  4 ----
 target/openrisc/interrupt_helper.c | 15 +++-----------
 target/openrisc/mmu.c              | 33 +++++++++++++++++++++++++++---
 target/openrisc/sys_helper.c       |  4 ----
 target/openrisc/translate.c        |  2 +-
 6 files changed, 49 insertions(+), 32 deletions(-)

Comments

Stafford Horne June 24, 2018, 3:44 a.m. UTC | #1
On Tue, Jun 19, 2018 at 3:41 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The code in cpu_mmu_index does not properly honor SR_DME.
> This bug has workarounds elsewhere in that we flush the
> tlb more often than necessary, on the state changes that
> should be reflected in a change of mmu_index.
>
> Fixing this means that we can respect the mmu_index that
> is given to tlb_flush.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/openrisc/cpu.h              | 23 +++++++++++++--------
>  target/openrisc/interrupt.c        |  4 ----
>  target/openrisc/interrupt_helper.c | 15 +++-----------
>  target/openrisc/mmu.c              | 33 +++++++++++++++++++++++++++---
>  target/openrisc/sys_helper.c       |  4 ----
>  target/openrisc/translate.c        |  2 +-
>  6 files changed, 49 insertions(+), 32 deletions(-)


Hello,

I am trying to test these patches running a linux kernel.

For some reason this is causing a strange failure with SMP but not
single core, I see an OpenRISC target pointer is making its way into
the tb_jmp_cache.  I don't think this is right and I am trying to
figure out why this happens and why this patch triggers it.

When bisecting to this commit I get:

[New Thread 0x7fffe9f11700 (LWP 4210)]

[    0.000000] Compiled-in FDT at (ptrval)
[    0.000000] Linux version
4.18.0-rc1-simple-smp-00006-gd5d0782e3db9-dirty
(shorne@lianli.shorne-pla.net) (gcc version 9.0.0 20180426
(experimental) (GCC)) #1013 SMP Sat Jun 23 17:11:42 JST 2018
[    0.000000] CPU: OpenRISC-0 (revision 0) @20 MHz
[    0.000000] -- dcache disabled
[    0.000000] -- icache disabled
[    0.000000] -- dmmu:   64 entries, 1 way(s)
[    0.000000] -- immu:   64 entries, 1 way(s)
[    0.000000] -- additional features:
[    0.000000] -- power management
[    0.000000] -- PIC
[    0.000000] -- timer
[    0.000000] setup_memory: Memory: 0x0-0x2000000
[    0.000000] Setting up paging and PTEs.
[    0.000000] map_ram: Memory: 0x0-0x2000000
[    0.000000] itlb_miss_handler (ptrval)
[    0.000000] dtlb_miss_handler (ptrval)
[    0.000000] OpenRISC Linux -- http://openrisc.io
[    0.000000] percpu: Embedded 6 pages/cpu @(ptrval) s18880 r8192 d22080 u49152
[    0.000000] Built 1 zonelists, mobility grouping off.  Total pages: 4080
[    0.000000] Kernel command line: earlycon
[    0.000000] earlycon: ns16550a0 at MMIO 0x90000000 (options '115200')
[    0.000000] bootconsole [ns16550a0] enabled
[    0.000000] Dentry cache hash table entries: 4096 (order: 1, 16384 bytes)
[    0.000000] Inode-cache hash table entries: 2048 (order: 0, 8192 bytes)
[    0.000000] Sorting __ex_table...
[    0.000000] Memory: 22336K/32768K available (3309K kernel code, 96K
rwdata, 736K rodata, 5898K init, 91K bss, 10432K reserved, 0K
cma-reserved)
[    0.000000] mem_init_done ...........................................
[    0.000000] Hierarchical RCU implementation.
[    0.000000] NR_IRQS: 32, nr_irqs: 32, preallocated irqs: 0
[    0.000000] clocksource: openrisc_timer: mask: 0xffffffff
max_cycles: 0xffffffff, max_idle_ns: 95563022313 ns
[    0.000000] 40.00 BogoMIPS (lpj=200000)
[    0.000000] pid_max: default: 32768 minimum: 301
[    0.000000] Mount-cache hash table entries: 2048 (order: 0, 8192 bytes)
[    0.000000] Mountpoint-cache hash table entries: 2048 (order: 0, 8192 bytes)


(gdb) bt
#0  0x00005555556d3e59 in tb_lookup__cpu_state (cf_mask=0,
flags=<synthetic pointer>, cs_base=<synthetic pointer>, pc=<synthetic
pointer>, cpu=0x555555f81300)
    at /home/shorne/work/openrisc/qemu/include/exec/tb-lookup.h:31
#1  0x00005555556d3e59 in tb_find (cf_mask=0, tb_exit=0,
last_tb=0x7fffe223ff00 <code_gen_buffer+2358995>, cpu=0x555555f81300)
at /home/shorne/work/openrisc/qemu/accel/tcg/cpu-exec.c:390
#2  0x00005555556d3e59 in cpu_exec (cpu=cpu@entry=0x555555f81300) at
/home/shorne/work/openrisc/qemu/accel/tcg/cpu-exec.c:735
#3  0x00005555556a0d2b in tcg_cpu_exec (cpu=cpu@entry=0x555555f81300)
at /home/shorne/work/openrisc/qemu/cpus.c:1362
#4  0x00005555556a238e in qemu_tcg_rr_cpu_thread_fn (arg=<optimized
out>) at /home/shorne/work/openrisc/qemu/cpus.c:1461
#5  0x0000555555886005 in qemu_thread_start (args=0x555555f93ef0) at
/home/shorne/work/openrisc/qemu/util/qemu-thread-posix.c:507
#6  0x00007ffff2a18564 in start_thread () at /lib64/libpthread.so.0
#7  0x00007ffff274c31f in clone () at /lib64/libc.so.6
(gdb) l
26          uint32_t hash;
27
28          cpu_get_tb_cpu_state(env, pc, cs_base, flags);
29          hash = tb_jmp_cache_hash_func(*pc);
30          tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]);
31          if (likely(tb &&
32                     tb->pc == *pc &&
33                     tb->cs_base == *cs_base &&
34                     tb->flags == *flags &&
35                     tb->trace_vcpu_dstate == *cpu->trace_dstate &&
(gdb) p tb
$1 = (TranslationBlock *) 0xc03c90a8


To reproduce I am running qemu with:
  qemu-system-or1k -cpu or1200 -M or1k-sim -kernel
or1k-linux-4.18-rc1-smp -serial stdio -nographic -monitor none -smp
cpus=2 -m 128

Kernel (need to gunzip):
  SMP - http://shorne.noip.me/downloads/or1k-linux-4.18-rc1-smp.gz
  Single - http://shorne.noip.me/downloads/or1k-linux-4.18-rc1.gz

I will continue to investigate, I just figured out SMP triggers it so
maybe that will uncover something more.

Sorry, if this mail gets clobbered I am using the gmail web interface.

-Stafford
Stafford Horne June 26, 2018, 10:07 p.m. UTC | #2
Hello,

I think I found out something.

in: target/openrisc/sys_helper.c:92

When we write to `env->tlb.dtlb[idx].tr`  in helper_mtspr():
  93          case TO_SPR(1, 640) ... TO_SPR(1, 640 + TLB_SIZE - 1):
/* DTLBW0TR 0-127 */
  94              idx = spr - TO_SPR(1, 640);
  95              env->tlb.dtlb[idx].tr = rb;


Somehow we are overlapping with `cpu->tb_jmp_cache`,  these are both
pointing to the same spot in memory.

(gdb) p &cs->tb_jmp_cache[3014]
$9 = (struct TranslationBlock **) 0x55555608b300
(gdb) p &env->tlb.dtlb[idx].tr
$10 = (uint32_t *) 0x55555608b304


I can't see why yet, but it should be something simple.  Still looking.

-Stafford
On Sun, Jun 24, 2018 at 12:44 PM Stafford Horne <shorne@gmail.com> wrote:
>
> On Tue, Jun 19, 2018 at 3:41 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
> >
> > The code in cpu_mmu_index does not properly honor SR_DME.
> > This bug has workarounds elsewhere in that we flush the
> > tlb more often than necessary, on the state changes that
> > should be reflected in a change of mmu_index.
> >
> > Fixing this means that we can respect the mmu_index that
> > is given to tlb_flush.
> >
> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > ---
> >  target/openrisc/cpu.h              | 23 +++++++++++++--------
> >  target/openrisc/interrupt.c        |  4 ----
> >  target/openrisc/interrupt_helper.c | 15 +++-----------
> >  target/openrisc/mmu.c              | 33 +++++++++++++++++++++++++++---
> >  target/openrisc/sys_helper.c       |  4 ----
> >  target/openrisc/translate.c        |  2 +-
> >  6 files changed, 49 insertions(+), 32 deletions(-)
>
>
> Hello,
>
> I am trying to test these patches running a linux kernel.
>
> For some reason this is causing a strange failure with SMP but not
> single core, I see an OpenRISC target pointer is making its way into
> the tb_jmp_cache.  I don't think this is right and I am trying to
> figure out why this happens and why this patch triggers it.
>
> When bisecting to this commit I get:
>
> [New Thread 0x7fffe9f11700 (LWP 4210)]
>
> [    0.000000] Compiled-in FDT at (ptrval)
> [    0.000000] Linux version
> 4.18.0-rc1-simple-smp-00006-gd5d0782e3db9-dirty
> (shorne@lianli.shorne-pla.net) (gcc version 9.0.0 20180426
> (experimental) (GCC)) #1013 SMP Sat Jun 23 17:11:42 JST 2018
> [    0.000000] CPU: OpenRISC-0 (revision 0) @20 MHz
> [    0.000000] -- dcache disabled
> [    0.000000] -- icache disabled
> [    0.000000] -- dmmu:   64 entries, 1 way(s)
> [    0.000000] -- immu:   64 entries, 1 way(s)
> [    0.000000] -- additional features:
> [    0.000000] -- power management
> [    0.000000] -- PIC
> [    0.000000] -- timer
> [    0.000000] setup_memory: Memory: 0x0-0x2000000
> [    0.000000] Setting up paging and PTEs.
> [    0.000000] map_ram: Memory: 0x0-0x2000000
> [    0.000000] itlb_miss_handler (ptrval)
> [    0.000000] dtlb_miss_handler (ptrval)
> [    0.000000] OpenRISC Linux -- http://openrisc.io
> [    0.000000] percpu: Embedded 6 pages/cpu @(ptrval) s18880 r8192 d22080 u49152
> [    0.000000] Built 1 zonelists, mobility grouping off.  Total pages: 4080
> [    0.000000] Kernel command line: earlycon
> [    0.000000] earlycon: ns16550a0 at MMIO 0x90000000 (options '115200')
> [    0.000000] bootconsole [ns16550a0] enabled
> [    0.000000] Dentry cache hash table entries: 4096 (order: 1, 16384 bytes)
> [    0.000000] Inode-cache hash table entries: 2048 (order: 0, 8192 bytes)
> [    0.000000] Sorting __ex_table...
> [    0.000000] Memory: 22336K/32768K available (3309K kernel code, 96K
> rwdata, 736K rodata, 5898K init, 91K bss, 10432K reserved, 0K
> cma-reserved)
> [    0.000000] mem_init_done ...........................................
> [    0.000000] Hierarchical RCU implementation.
> [    0.000000] NR_IRQS: 32, nr_irqs: 32, preallocated irqs: 0
> [    0.000000] clocksource: openrisc_timer: mask: 0xffffffff
> max_cycles: 0xffffffff, max_idle_ns: 95563022313 ns
> [    0.000000] 40.00 BogoMIPS (lpj=200000)
> [    0.000000] pid_max: default: 32768 minimum: 301
> [    0.000000] Mount-cache hash table entries: 2048 (order: 0, 8192 bytes)
> [    0.000000] Mountpoint-cache hash table entries: 2048 (order: 0, 8192 bytes)
>
>
> (gdb) bt
> #0  0x00005555556d3e59 in tb_lookup__cpu_state (cf_mask=0,
> flags=<synthetic pointer>, cs_base=<synthetic pointer>, pc=<synthetic
> pointer>, cpu=0x555555f81300)
>     at /home/shorne/work/openrisc/qemu/include/exec/tb-lookup.h:31
> #1  0x00005555556d3e59 in tb_find (cf_mask=0, tb_exit=0,
> last_tb=0x7fffe223ff00 <code_gen_buffer+2358995>, cpu=0x555555f81300)
> at /home/shorne/work/openrisc/qemu/accel/tcg/cpu-exec.c:390
> #2  0x00005555556d3e59 in cpu_exec (cpu=cpu@entry=0x555555f81300) at
> /home/shorne/work/openrisc/qemu/accel/tcg/cpu-exec.c:735
> #3  0x00005555556a0d2b in tcg_cpu_exec (cpu=cpu@entry=0x555555f81300)
> at /home/shorne/work/openrisc/qemu/cpus.c:1362
> #4  0x00005555556a238e in qemu_tcg_rr_cpu_thread_fn (arg=<optimized
> out>) at /home/shorne/work/openrisc/qemu/cpus.c:1461
> #5  0x0000555555886005 in qemu_thread_start (args=0x555555f93ef0) at
> /home/shorne/work/openrisc/qemu/util/qemu-thread-posix.c:507
> #6  0x00007ffff2a18564 in start_thread () at /lib64/libpthread.so.0
> #7  0x00007ffff274c31f in clone () at /lib64/libc.so.6
> (gdb) l
> 26          uint32_t hash;
> 27
> 28          cpu_get_tb_cpu_state(env, pc, cs_base, flags);
> 29          hash = tb_jmp_cache_hash_func(*pc);
> 30          tb = atomic_rcu_read(&cpu->tb_jmp_cache[hash]);
> 31          if (likely(tb &&
> 32                     tb->pc == *pc &&
> 33                     tb->cs_base == *cs_base &&
> 34                     tb->flags == *flags &&
> 35                     tb->trace_vcpu_dstate == *cpu->trace_dstate &&
> (gdb) p tb
> $1 = (TranslationBlock *) 0xc03c90a8
>
>
> To reproduce I am running qemu with:
>   qemu-system-or1k -cpu or1200 -M or1k-sim -kernel
> or1k-linux-4.18-rc1-smp -serial stdio -nographic -monitor none -smp
> cpus=2 -m 128
>
> Kernel (need to gunzip):
>   SMP - http://shorne.noip.me/downloads/or1k-linux-4.18-rc1-smp.gz
>   Single - http://shorne.noip.me/downloads/or1k-linux-4.18-rc1.gz
>
> I will continue to investigate, I just figured out SMP triggers it so
> maybe that will uncover something more.
>
> Sorry, if this mail gets clobbered I am using the gmail web interface.
>
> -Stafford
Richard Henderson June 26, 2018, 10:26 p.m. UTC | #3
On 06/26/2018 03:07 PM, Stafford Horne wrote:
> Hello,
> 
> I think I found out something.
> 
> in: target/openrisc/sys_helper.c:92
> 
> When we write to `env->tlb.dtlb[idx].tr`  in helper_mtspr():
>   93          case TO_SPR(1, 640) ... TO_SPR(1, 640 + TLB_SIZE - 1):
> /* DTLBW0TR 0-127 */
>   94              idx = spr - TO_SPR(1, 640);
>   95              env->tlb.dtlb[idx].tr = rb;
> 
> 
> Somehow we are overlapping with `cpu->tb_jmp_cache`,  these are both
> pointing to the same spot in memory.
> 
> (gdb) p &cs->tb_jmp_cache[3014]
> $9 = (struct TranslationBlock **) 0x55555608b300
> (gdb) p &env->tlb.dtlb[idx].tr
> $10 = (uint32_t *) 0x55555608b304

That is definitely weird.  How about

(gdb) p openrisc_env_get_cpu(env)
$1 = xxxx
(gdb) p &$1->parent_obj
(gdb) p &$1->env
(gdb) p cs->env_ptr

There should be 4096 entries in tb_jmp_cache, so there should
be no way that overlaps.  I can only imagine either CS or ENV
is incorrect somehow.  How that would be, I don't know...


r~
Stafford Horne June 27, 2018, 12:59 p.m. UTC | #4
On Tue, Jun 26, 2018 at 03:26:01PM -0700, Richard Henderson wrote:
> On 06/26/2018 03:07 PM, Stafford Horne wrote:
> > Hello,
> > 
> > I think I found out something.
> > 
> > in: target/openrisc/sys_helper.c:92
> > 
> > When we write to `env->tlb.dtlb[idx].tr`  in helper_mtspr():
> >   93          case TO_SPR(1, 640) ... TO_SPR(1, 640 + TLB_SIZE - 1):
> > /* DTLBW0TR 0-127 */
> >   94              idx = spr - TO_SPR(1, 640);
> >   95              env->tlb.dtlb[idx].tr = rb;
> > 
> > 
> > Somehow we are overlapping with `cpu->tb_jmp_cache`,  these are both
> > pointing to the same spot in memory.
> > 
> > (gdb) p &cs->tb_jmp_cache[3014]
> > $9 = (struct TranslationBlock **) 0x55555608b300
> > (gdb) p &env->tlb.dtlb[idx].tr
> > $10 = (uint32_t *) 0x55555608b304
> 
> That is definitely weird.  How about
> 
> (gdb) p openrisc_env_get_cpu(env)
> $1 = xxxx
> (gdb) p &$1->parent_obj
> (gdb) p &$1->env
> (gdb) p cs->env_ptr
> 
> There should be 4096 entries in tb_jmp_cache, so there should
> be no way that overlaps.  I can only imagine either CS or ENV
> is incorrect somehow.  How that would be, I don't know...

Nothing looks strange there... but this does... :)

(gdb) p &cs->tb_jmp_cache[3014]
$56 = (struct TranslationBlock **) 0x55555606c570
(gdb) p &env->tlb.dtlb[idx].tr
$57 = (uint32_t *) 0x55555606c574
(gdb) p &env->tlb.dtlb[idx].mr
$58 = (uint32_t *) 0x55555606c570
(gdb) p idx
$59 = -1502

The index is negative... this patch should fix that.

@@ -78,6 +78,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr,
target_ulong rb)
     case TO_SPR(0, 1024) ... TO_SPR(0, 1024 + (16 * 32)): /* Shadow GPRs */
         idx = (spr - 1024);
         env->shadow_gpr[idx / 32][idx % 32] = rb;
+        break;
 
     case TO_SPR(1, 512) ... TO_SPR(1, 512 + TLB_SIZE - 1): /* DTLBW0MR 0-127 */
         idx = spr - TO_SPR(1, 512);

-Stafford
Richard Henderson June 27, 2018, 1:50 p.m. UTC | #5
On 06/27/2018 05:59 AM, Stafford Horne wrote:
> The index is negative... this patch should fix that.
> 
> @@ -78,6 +78,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr,
> target_ulong rb)
>      case TO_SPR(0, 1024) ... TO_SPR(0, 1024 + (16 * 32)): /* Shadow GPRs */
>          idx = (spr - 1024);
>          env->shadow_gpr[idx / 32][idx % 32] = rb;
> +        break;
>  
>      case TO_SPR(1, 512) ... TO_SPR(1, 512 + TLB_SIZE - 1): /* DTLBW0MR 0-127 */

OMG.  That's embarrasing...


r~
Stafford Horne June 27, 2018, 11:08 p.m. UTC | #6
On Wed, Jun 27, 2018 at 06:50:18AM -0700, Richard Henderson wrote:
> On 06/27/2018 05:59 AM, Stafford Horne wrote:
> > The index is negative... this patch should fix that.
> > 
> > @@ -78,6 +78,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr,
> > target_ulong rb)
> >      case TO_SPR(0, 1024) ... TO_SPR(0, 1024 + (16 * 32)): /* Shadow GPRs */
> >          idx = (spr - 1024);
> >          env->shadow_gpr[idx / 32][idx % 32] = rb;
> > +        break;
> >  
> >      case TO_SPR(1, 512) ... TO_SPR(1, 512 + TLB_SIZE - 1): /* DTLBW0MR 0-127 */
> 
> OMG.  That's embarrasing...

Yes, I thought so too, it's my bug.  I am little surprised it didn't cause
issues before.

I am still getting failures on SMP, this time the kernel is jumping to some
unknown address, maybe an itlb issue, I will continue to debug.  Bisecting it is
exposing some other issues (the mmu handlers were not getting init'd during one
point in time).

-Stafford
Richard Henderson June 28, 2018, 1:36 a.m. UTC | #7
On 06/27/2018 04:08 PM, Stafford Horne wrote:
> I am still getting failures on SMP, this time the kernel is jumping to some
> unknown address, maybe an itlb issue, I will continue to debug.  Bisecting it is
> exposing some other issues (the mmu handlers were not getting init'd during one
> point in time).

Fixed, I think.  I traced it down to the l.mtspr rewrite.


r~
Stafford Horne June 28, 2018, 9:27 p.m. UTC | #8
On Wed, Jun 27, 2018 at 06:36:20PM -0700, Richard Henderson wrote:
> On 06/27/2018 04:08 PM, Stafford Horne wrote:
> > I am still getting failures on SMP, this time the kernel is jumping to some
> > unknown address, maybe an itlb issue, I will continue to debug.  Bisecting it is
> > exposing some other issues (the mmu handlers were not getting init'd during one
> > point in time).
> 
> Fixed, I think.  I traced it down to the l.mtspr rewrite.

Thanks, the new series works for me.

-Stafford
diff mbox series

Patch

diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 947ca00d8d..c48802ad8f 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -384,9 +384,12 @@  void cpu_openrisc_count_stop(OpenRISCCPU *cpu);
 
 #include "exec/cpu-all.h"
 
-#define TB_FLAGS_DFLAG 1
-#define TB_FLAGS_R0_0  2
+#define TB_FLAGS_SM    SR_SM
+#define TB_FLAGS_DME   SR_DME
+#define TB_FLAGS_IME   SR_IME
 #define TB_FLAGS_OVE   SR_OVE
+#define TB_FLAGS_DFLAG 2      /* reuse SR_TEE */
+#define TB_FLAGS_R0_0  4      /* reuse SR_IEE */
 
 static inline uint32_t cpu_get_gpr(const CPUOpenRISCState *env, int i)
 {
@@ -404,17 +407,21 @@  static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env,
 {
     *pc = env->pc;
     *cs_base = 0;
-    *flags = (env->dflag
-              | (cpu_get_gpr(env, 0) == 0 ? TB_FLAGS_R0_0 : 0)
-              | (env->sr & SR_OVE));
+    *flags = (env->dflag ? TB_FLAGS_DFLAG : 0)
+           | (cpu_get_gpr(env, 0) ? 0 : TB_FLAGS_R0_0)
+           | (env->sr & (SR_SM | SR_DME | SR_IME | SR_OVE));
 }
 
 static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
 {
-    if (!(env->sr & SR_IME)) {
-        return MMU_NOMMU_IDX;
+    int ret = MMU_NOMMU_IDX;  /* mmu is disabled */
+
+    if (env->sr & (ifetch ? SR_IME : SR_DME)) {
+        /* The mmu is enabled; test supervisor state.  */
+        ret = env->sr & SR_SM ? MMU_SUPERVISOR_IDX : MMU_USER_IDX;
     }
-    return (env->sr & SR_SM) == 0 ? MMU_USER_IDX : MMU_SUPERVISOR_IDX;
+
+    return ret;
 }
 
 static inline uint32_t cpu_get_sr(const CPUOpenRISCState *env)
diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index d9cb363fea..e28042856a 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -50,10 +50,6 @@  void openrisc_cpu_do_interrupt(CPUState *cs)
         env->eear = env->pc;
     }
 
-    /* For machine-state changed between user-mode and supervisor mode,
-       we need flush TLB when we enter&exit EXCP.  */
-    tlb_flush(cs);
-
     env->esr = cpu_get_sr(env);
     env->sr &= ~SR_DME;
     env->sr &= ~SR_IME;
diff --git a/target/openrisc/interrupt_helper.c b/target/openrisc/interrupt_helper.c
index a2e9003969..9c5489f5f7 100644
--- a/target/openrisc/interrupt_helper.c
+++ b/target/openrisc/interrupt_helper.c
@@ -25,16 +25,7 @@ 
 
 void HELPER(rfe)(CPUOpenRISCState *env)
 {
-    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
-#ifndef CONFIG_USER_ONLY
-    int need_flush_tlb = (cpu->env.sr & (SR_SM | SR_IME | SR_DME)) ^
-                         (cpu->env.esr & (SR_SM | SR_IME | SR_DME));
-    if (need_flush_tlb) {
-        CPUState *cs = CPU(cpu);
-        tlb_flush(cs);
-    }
-#endif
-    cpu->env.pc = cpu->env.epcr;
-    cpu->env.lock_addr = -1;
-    cpu_set_sr(&cpu->env, cpu->env.esr);
+    env->pc = env->epcr;
+    env->lock_addr = -1;
+    cpu_set_sr(env, env->esr);
 }
diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index 856969a7f2..b293b64e98 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -246,9 +246,36 @@  hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 void tlb_fill(CPUState *cs, target_ulong addr, int size,
               MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
-    int ret = openrisc_cpu_handle_mmu_fault(cs, addr, size,
-                                            access_type, mmu_idx);
-    if (ret) {
+    OpenRISCCPU *cpu = OPENRISC_CPU(cs);
+    int ret, prot = 0;
+    hwaddr physical = 0;
+
+    if (mmu_idx == MMU_NOMMU_IDX) {
+        ret = get_phys_nommu(&physical, &prot, addr);
+    } else {
+        bool super = mmu_idx == MMU_SUPERVISOR_IDX;
+        if (access_type == MMU_INST_FETCH) {
+            ret = get_phys_code(cpu, &physical, &prot, addr, 2, super);
+        } else {
+            ret = get_phys_data(cpu, &physical, &prot, addr,
+                                access_type == MMU_DATA_STORE, super);
+        }
+    }
+
+    if (ret == TLBRET_MATCH) {
+        tlb_set_page(cs, addr & TARGET_PAGE_MASK,
+                     physical & TARGET_PAGE_MASK, prot,
+                     mmu_idx, TARGET_PAGE_SIZE);
+    } else if (ret < 0) {
+        int rw;
+        if (access_type == MMU_INST_FETCH) {
+            rw = 2;
+        } else if (access_type == MMU_DATA_STORE) {
+            rw = 1;
+        } else {
+            rw = 0;
+        }
+        cpu_openrisc_raise_mmu_exception(cpu, addr, rw, ret);
         /* Raise Exception.  */
         cpu_loop_exit_restore(cs, retaddr);
     }
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index e00aaa332e..0a74c9522f 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -56,10 +56,6 @@  void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb)
         break;
 
     case TO_SPR(0, 17): /* SR */
-        if ((env->sr & (SR_IME | SR_DME | SR_SM)) ^
-            (rb & (SR_IME | SR_DME | SR_SM))) {
-            tlb_flush(cs);
-        }
         cpu_set_sr(env, rb);
         break;
 
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index f19f0d257b..60c6e19f4b 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -59,7 +59,7 @@  static inline bool is_user(DisasContext *dc)
 #ifdef CONFIG_USER_ONLY
     return true;
 #else
-    return dc->mem_idx == MMU_USER_IDX;
+    return !(dc->tb_flags & TB_FLAGS_SM);
 #endif
 }