diff mbox

[v1,6/6] mem: change tcg code to rcu style

Message ID 1369793469-30883-7-git-send-email-qemulist@gmail.com
State New
Headers show

Commit Message

pingfan liu May 29, 2013, 2:11 a.m. UTC
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

When adopting rcu style, for tcg code, need to fix two kind of path:
 -tlb_set_page() will cache translation info.
 -instruction emualation path

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
----
Not sure about tcg code, so I took helper_st as the example.
---
 cputlb.c                        |   25 +++++++++++++++++-
 exec.c                          |    2 +-
 include/exec/softmmu_template.h |   53 +++++++++++++++++++++++++++++++-------
 3 files changed, 68 insertions(+), 12 deletions(-)

Comments

Paolo Bonzini May 29, 2013, 7:22 a.m. UTC | #1
Il 29/05/2013 04:11, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> When adopting rcu style, for tcg code, need to fix two kind of path:
>  -tlb_set_page() will cache translation info.
>  -instruction emualation path
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ----
> Not sure about tcg code, so I took helper_st as the example.
> ---
>  cputlb.c                        |   25 +++++++++++++++++-
>  exec.c                          |    2 +-
>  include/exec/softmmu_template.h |   53 +++++++++++++++++++++++++++++++-------
>  3 files changed, 68 insertions(+), 12 deletions(-)
> 
> diff --git a/cputlb.c b/cputlb.c
> index 86666c8..6b55c70 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -249,6 +249,10 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>      uintptr_t addend;
>      CPUTLBEntry *te;
>      hwaddr iotlb, xlat, sz;
> +    unsigned long start;
> +    Node *map_base;
> +    MemoryRegionSection *sections_base;
> +    PhysPageEntry *roots_base;
>  
>      assert(size >= TARGET_PAGE_SIZE);
>      if (size != TARGET_PAGE_SIZE) {
> @@ -256,8 +260,18 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>      }
>  
>      sz = size;
> +
> +reload:
> +    rcu_read_lock();

It's really the whole of cpu_exec that has to be protected by a RCU
critical section.

> +    start = seqlock_read_begin(&dispatch_table_lock);
> +    map_base = cur_map_nodes;
> +    sections_base = cur_phys_sections;
> +    roots_base = cur_roots;

Why the seqlock_read_check at the end and not here?

Remember that this code is running under the BQL, so there is no need to
protect the TLB flushes otherwise.  There is also no need to do anything
as long as you ref the regions that are entered in the map, and unref
them when you destroy the map.  Those refs will protect TCG's TLB too.

In the end, all of TCG's execution consists of a large RCU critical section.

>      section = address_space_translate(&address_space_memory, paddr, &xlat, &sz,
> -                                      false);
> +                                      false,
> +                                      map_base,
> +                                      sections_base,
> +                                      roots_base);
>      assert(sz >= TARGET_PAGE_SIZE);
>  
>  #if defined(DEBUG_TLB)
> @@ -282,6 +296,9 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>  
>      index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
>      env->iotlb[mmu_idx][index] = iotlb - vaddr;
> +    /* Serialized with reader, so only need to worry about tlb_flush
> +      * in parellel.  If there is conflict, just reload tlb entry until right.
> +      */
>      te = &env->tlb_table[mmu_idx][index];
>      te->addend = addend - vaddr;
>      if (prot & PAGE_READ) {
> @@ -309,6 +326,12 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>      } else {
>          te->addr_write = -1;
>      }
> +
> +    if (unlikely(seqlock_read_check(&dispatch_table_lock, start))) {
> +        rcu_read_unlock();
> +        goto reload;
> +    }
> +    rcu_read_unlock();
>  }
>  
>  /* NOTE: this function can trigger an exception */
> diff --git a/exec.c b/exec.c
> index 9a5c67f..d4ca101 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1820,7 +1820,6 @@ static void core_commit(MemoryListener *listener)
>      cur_phys_sections = next_phys_sections;
>      cur_alloc_info = next_alloc_info;
>      cur_roots = next_roots;
> -    seqlock_write_unlock(&dispatch_table_lock);
>  
>      /* since each CPU stores ram addresses in its TLB cache, we must
>         reset the modified entries */
> @@ -1828,6 +1827,7 @@ static void core_commit(MemoryListener *listener)
>      for (env = first_cpu; env != NULL; env = env->next_cpu) {
>          tlb_flush(env, 1);
>      }
> +    seqlock_write_unlock(&dispatch_table_lock);
>  
>      /* Fix me, will changed to call_rcu */
>      release_dispatch_map(info);
> diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
> index 8584902..7fa631e 100644
> --- a/include/exec/softmmu_template.h
> +++ b/include/exec/softmmu_template.h

There shouldn't be any need to change this.

> @@ -196,13 +196,16 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>                                                     int mmu_idx,
>                                                     uintptr_t retaddr);
>  
> +/* Caller hold rcu read lock, this func will release lock */
>  static inline void glue(io_write, SUFFIX)(CPUArchState *env,
>                                            hwaddr physaddr,
>                                            DATA_TYPE val,
>                                            target_ulong addr,
> -                                          uintptr_t retaddr)
> +                                          uintptr_t retaddr,
> +                                          MemoryRegionSection *mrs_base)
>  {
> -    MemoryRegion *mr = iotlb_to_region(physaddr);
> +    subpage_t *subpg;
> +    MemoryRegion *mr = mrs_base[[physaddr & ~TARGET_PAGE_MASK].mr;
>  
>      physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
>      if (mr != &io_mem_rom && mr != &io_mem_notdirty && !can_do_io(env)) {
> @@ -211,6 +214,13 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
>  
>      env->mem_io_vaddr = addr;
>      env->mem_io_pc = retaddr;
> +    if (!mr->terminates) {
> +        subpg = container_of(mr, subpage_t, mmio);
> +        idx = SUBPAGE_IDX(addr);
> +        mr = mrs_base[subpg->sections[idx]].mr;
> +    }
> +    memory_region_ref(mr);
> +    rcu_read_unlock();
>      io_mem_write(mr, physaddr, val, 1 << SHIFT);
>  }
>  
> @@ -222,17 +232,28 @@ void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>      target_ulong tlb_addr;
>      uintptr_t retaddr;
>      int index;
> +    unsigned int start;
> +    MemoryRegionSection *mrs_base;
> +    uintptr_t addend;
>  
>      index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
>   redo:
> -    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
> +    rcu_read_lock();
> +    do {
> +        start = seqlock_read_begin(&dispatch_table_lock);
> +        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
> +        ioaddr = env->iotlb[mmu_idx][index];
> +        addend = env->tlb_table[mmu_idx][index].addend;
> +        mrs_base =  cur_phys_sections;
> +    } while (seqlock_read_check(&dispatch_table_lock, start));
> +
>      if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
>          if (tlb_addr & ~TARGET_PAGE_MASK) {
>              /* IO access */
>              if ((addr & (DATA_SIZE - 1)) != 0)
>                  goto do_unaligned_access;
>              retaddr = GETPC_EXT();
> -            ioaddr = env->iotlb[mmu_idx][index];
> +            /* will rcu_read_unlock() inside */
>              glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>          do_unaligned_access:
> @@ -240,22 +261,23 @@ void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>  #ifdef ALIGNED_ONLY
>              do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
>  #endif
> +            rcu_read_unlock();
>              glue(glue(slow_st, SUFFIX), MMUSUFFIX)(env, addr, val,
>                                                     mmu_idx, retaddr);
>          } else {
>              /* aligned/unaligned access in the same page */
> -            uintptr_t addend;
>  #ifdef ALIGNED_ONLY
>              if ((addr & (DATA_SIZE - 1)) != 0) {
>                  retaddr = GETPC_EXT();
>                  do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
>              }
>  #endif
> -            addend = env->tlb_table[mmu_idx][index].addend;
>              glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
>                                           (addr + addend), val);
> +            rcu_read_unlock();
>          }
>      } else {
> +        rcu_read_unlock();
>          /* the page is not in the TLB : fill it */
>          retaddr = GETPC_EXT();
>  #ifdef ALIGNED_ONLY
> @@ -277,17 +299,25 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>      hwaddr ioaddr;
>      target_ulong tlb_addr;
>      int index, i;
> +    uintptr_t addend;
>  
>      index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
>   redo:
> -    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
> +    rcu_read_lock();
> +    do {
> +        start = seqlock_read_begin(&dispatch_table_lock);
> +        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
> +        ioaddr = env->iotlb[mmu_idx][index];
> +        addend = env->tlb_table[mmu_idx][index].addend;
> +        mrs_base =  cur_phys_sections;
> +    } while (seqlock_read_check(&dispatch_table_lock, start));
> +
>      if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
>          if (tlb_addr & ~TARGET_PAGE_MASK) {
>              /* IO access */
>              if ((addr & (DATA_SIZE - 1)) != 0)
>                  goto do_unaligned_access;
> -            ioaddr = env->iotlb[mmu_idx][index];
> -            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
> +            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr, mrs_base);
>          } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
>          do_unaligned_access:
>              /* XXX: not efficient, but simple */
> @@ -295,10 +325,12 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>               * previous page from the TLB cache.  */
>              for(i = DATA_SIZE - 1; i >= 0; i--) {
>  #ifdef TARGET_WORDS_BIGENDIAN
> +                rcu_read_unlock();
>                  glue(slow_stb, MMUSUFFIX)(env, addr + i,
>                                            val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
>                                            mmu_idx, retaddr);
>  #else
> +                rcu_read_unlock();
>                  glue(slow_stb, MMUSUFFIX)(env, addr + i,
>                                            val >> (i * 8),
>                                            mmu_idx, retaddr);
> @@ -306,11 +338,12 @@ static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
>              }
>          } else {
>              /* aligned/unaligned access in the same page */
> -            uintptr_t addend = env->tlb_table[mmu_idx][index].addend;
>              glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
>                                           (addr + addend), val);
> +            rcu_read_unlock();
>          }
>      } else {
> +        rcu_read_unlock();
>          /* the page is not in the TLB : fill it */
>          tlb_fill(env, addr, 1, mmu_idx, retaddr);
>          goto redo;
>
pingfan liu May 29, 2013, 9 a.m. UTC | #2
On Wed, May 29, 2013 at 3:22 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 29/05/2013 04:11, Liu Ping Fan ha scritto:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> When adopting rcu style, for tcg code, need to fix two kind of path:
>>  -tlb_set_page() will cache translation info.
>>  -instruction emualation path
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ----
>> Not sure about tcg code, so I took helper_st as the example.
>> ---
>>  cputlb.c                        |   25 +++++++++++++++++-
>>  exec.c                          |    2 +-
>>  include/exec/softmmu_template.h |   53 +++++++++++++++++++++++++++++++-------
>>  3 files changed, 68 insertions(+), 12 deletions(-)
>>
>> diff --git a/cputlb.c b/cputlb.c
>> index 86666c8..6b55c70 100644
>> --- a/cputlb.c
>> +++ b/cputlb.c
>> @@ -249,6 +249,10 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>>      uintptr_t addend;
>>      CPUTLBEntry *te;
>>      hwaddr iotlb, xlat, sz;
>> +    unsigned long start;
>> +    Node *map_base;
>> +    MemoryRegionSection *sections_base;
>> +    PhysPageEntry *roots_base;
>>
>>      assert(size >= TARGET_PAGE_SIZE);
>>      if (size != TARGET_PAGE_SIZE) {
>> @@ -256,8 +260,18 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>>      }
>>
>>      sz = size;
>> +
>> +reload:
>> +    rcu_read_lock();
>
> It's really the whole of cpu_exec that has to be protected by a RCU
> critical section.
>
Ok.
>> +    start = seqlock_read_begin(&dispatch_table_lock);
>> +    map_base = cur_map_nodes;
>> +    sections_base = cur_phys_sections;
>> +    roots_base = cur_roots;
>
> Why the seqlock_read_check at the end and not here?
>
In case the tlb_flush() empty tlb entry, and we record incorrect index

> Remember that this code is running under the BQL, so there is no need to
> protect the TLB flushes otherwise.  There is also no need to do anything

Do you mean that cpu_exec() can not be out of BQL like kvm path?  If
it is true, then there is no changes needed for tcg code, right?

> as long as you ref the regions that are entered in the map, and unref
> them when you destroy the map.  Those refs will protect TCG's TLB too.
>
Worry about whether the TLB is the right index in the right
phys_sections, if the mem topology changes between tlb_set_page() and
instruction emulated.

> In the end, all of TCG's execution consists of a large RCU critical section.
>
Even stronger? If it runs with BQL protection.

Regards,
Pingfan

[...]
Paolo Bonzini May 29, 2013, 9:03 a.m. UTC | #3
Il 29/05/2013 11:00, liu ping fan ha scritto:
>> Remember that this code is running under the BQL, so there is no need to
>> protect the TLB flushes otherwise.  There is also no need to do anything
> 
> Do you mean that cpu_exec() can not be out of BQL like kvm path?  If
> it is true, then there is no changes needed for tcg code, right?

Indeed.  When/if a fine-grained lock is introduced, it will protect TLB
accesses vs. TLB flushes.  For now, the BQL will do.

Making TCG multi-threaded is a wholly different problem, and a superset
of what's needed for KVM.

(Xen instead is almost the same, only the mapcache has to be protected
by its own mutex).

>> as long as you ref the regions that are entered in the map, and unref
>> them when you destroy the map.  Those refs will protect TCG's TLB too.
>>
> Worry about whether the TLB is the right index in the right
> phys_sections, if the mem topology changes between tlb_set_page() and
> instruction emulated.

That's not possible because of BQL protection.

>> In the end, all of TCG's execution consists of a large RCU critical section.
>
> Even stronger? If it runs with BQL protection.

Yes.

Paolo
diff mbox

Patch

diff --git a/cputlb.c b/cputlb.c
index 86666c8..6b55c70 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -249,6 +249,10 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     uintptr_t addend;
     CPUTLBEntry *te;
     hwaddr iotlb, xlat, sz;
+    unsigned long start;
+    Node *map_base;
+    MemoryRegionSection *sections_base;
+    PhysPageEntry *roots_base;
 
     assert(size >= TARGET_PAGE_SIZE);
     if (size != TARGET_PAGE_SIZE) {
@@ -256,8 +260,18 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     }
 
     sz = size;
+
+reload:
+    rcu_read_lock();
+    start = seqlock_read_begin(&dispatch_table_lock);
+    map_base = cur_map_nodes;
+    sections_base = cur_phys_sections;
+    roots_base = cur_roots;
     section = address_space_translate(&address_space_memory, paddr, &xlat, &sz,
-                                      false);
+                                      false,
+                                      map_base,
+                                      sections_base,
+                                      roots_base);
     assert(sz >= TARGET_PAGE_SIZE);
 
 #if defined(DEBUG_TLB)
@@ -282,6 +296,9 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
 
     index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     env->iotlb[mmu_idx][index] = iotlb - vaddr;
+    /* Serialized with reader, so only need to worry about tlb_flush
+      * in parellel.  If there is conflict, just reload tlb entry until right.
+      */
     te = &env->tlb_table[mmu_idx][index];
     te->addend = addend - vaddr;
     if (prot & PAGE_READ) {
@@ -309,6 +326,12 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     } else {
         te->addr_write = -1;
     }
+
+    if (unlikely(seqlock_read_check(&dispatch_table_lock, start))) {
+        rcu_read_unlock();
+        goto reload;
+    }
+    rcu_read_unlock();
 }
 
 /* NOTE: this function can trigger an exception */
diff --git a/exec.c b/exec.c
index 9a5c67f..d4ca101 100644
--- a/exec.c
+++ b/exec.c
@@ -1820,7 +1820,6 @@  static void core_commit(MemoryListener *listener)
     cur_phys_sections = next_phys_sections;
     cur_alloc_info = next_alloc_info;
     cur_roots = next_roots;
-    seqlock_write_unlock(&dispatch_table_lock);
 
     /* since each CPU stores ram addresses in its TLB cache, we must
        reset the modified entries */
@@ -1828,6 +1827,7 @@  static void core_commit(MemoryListener *listener)
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         tlb_flush(env, 1);
     }
+    seqlock_write_unlock(&dispatch_table_lock);
 
     /* Fix me, will changed to call_rcu */
     release_dispatch_map(info);
diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index 8584902..7fa631e 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -196,13 +196,16 @@  static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                    int mmu_idx,
                                                    uintptr_t retaddr);
 
+/* Caller hold rcu read lock, this func will release lock */
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
                                           hwaddr physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
-                                          uintptr_t retaddr)
+                                          uintptr_t retaddr,
+                                          MemoryRegionSection *mrs_base)
 {
-    MemoryRegion *mr = iotlb_to_region(physaddr);
+    subpage_t *subpg;
+    MemoryRegion *mr = mrs_base[[physaddr & ~TARGET_PAGE_MASK].mr;
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !can_do_io(env)) {
@@ -211,6 +214,13 @@  static inline void glue(io_write, SUFFIX)(CPUArchState *env,
 
     env->mem_io_vaddr = addr;
     env->mem_io_pc = retaddr;
+    if (!mr->terminates) {
+        subpg = container_of(mr, subpage_t, mmio);
+        idx = SUBPAGE_IDX(addr);
+        mr = mrs_base[subpg->sections[idx]].mr;
+    }
+    memory_region_ref(mr);
+    rcu_read_unlock();
     io_mem_write(mr, physaddr, val, 1 << SHIFT);
 }
 
@@ -222,17 +232,28 @@  void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
     target_ulong tlb_addr;
     uintptr_t retaddr;
     int index;
+    unsigned int start;
+    MemoryRegionSection *mrs_base;
+    uintptr_t addend;
 
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
  redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    rcu_read_lock();
+    do {
+        start = seqlock_read_begin(&dispatch_table_lock);
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+        ioaddr = env->iotlb[mmu_idx][index];
+        addend = env->tlb_table[mmu_idx][index].addend;
+        mrs_base =  cur_phys_sections;
+    } while (seqlock_read_check(&dispatch_table_lock, start));
+
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         if (tlb_addr & ~TARGET_PAGE_MASK) {
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             retaddr = GETPC_EXT();
-            ioaddr = env->iotlb[mmu_idx][index];
+            /* will rcu_read_unlock() inside */
             glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
@@ -240,22 +261,23 @@  void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
 #ifdef ALIGNED_ONLY
             do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
+            rcu_read_unlock();
             glue(glue(slow_st, SUFFIX), MMUSUFFIX)(env, addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
-            uintptr_t addend;
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC_EXT();
                 do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
             }
 #endif
-            addend = env->tlb_table[mmu_idx][index].addend;
             glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
                                          (addr + addend), val);
+            rcu_read_unlock();
         }
     } else {
+        rcu_read_unlock();
         /* the page is not in the TLB : fill it */
         retaddr = GETPC_EXT();
 #ifdef ALIGNED_ONLY
@@ -277,17 +299,25 @@  static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
     hwaddr ioaddr;
     target_ulong tlb_addr;
     int index, i;
+    uintptr_t addend;
 
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
  redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    rcu_read_lock();
+    do {
+        start = seqlock_read_begin(&dispatch_table_lock);
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+        ioaddr = env->iotlb[mmu_idx][index];
+        addend = env->tlb_table[mmu_idx][index].addend;
+        mrs_base =  cur_phys_sections;
+    } while (seqlock_read_check(&dispatch_table_lock, start));
+
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         if (tlb_addr & ~TARGET_PAGE_MASK) {
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
-            ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr, mrs_base);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -295,10 +325,12 @@  static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
+                rcu_read_unlock();
                 glue(slow_stb, MMUSUFFIX)(env, addr + i,
                                           val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
+                rcu_read_unlock();
                 glue(slow_stb, MMUSUFFIX)(env, addr + i,
                                           val >> (i * 8),
                                           mmu_idx, retaddr);
@@ -306,11 +338,12 @@  static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
             }
         } else {
             /* aligned/unaligned access in the same page */
-            uintptr_t addend = env->tlb_table[mmu_idx][index].addend;
             glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
                                          (addr + addend), val);
+            rcu_read_unlock();
         }
     } else {
+        rcu_read_unlock();
         /* the page is not in the TLB : fill it */
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;