diff mbox

[v1,5/6] mem: make dispatch path satify rcu style

Message ID 1369793469-30883-6-git-send-email-qemulist@gmail.com
State New
Headers show

Commit Message

pingfan liu May 29, 2013, 2:11 a.m. UTC
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Using seqlock to load dispatch context atomic. The dispatch
context consist of: cur_map_nodes, cur_sections, cur_roots.

Also during the dispatch, we should get the terminal, and dup
MemoryRegionSection. So after rcu unlock, the cur dispatch
context can be dropped safely.

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

--------
The tcg code related to tlb_set_page() should be fixed too.
But it is a little complicated, separating it for easing review.
---
 exec.c |  239 ++++++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 152 insertions(+), 87 deletions(-)

Comments

Paolo Bonzini May 29, 2013, 7:06 a.m. UTC | #1
Il 29/05/2013 04:11, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Using seqlock to load dispatch context atomic. The dispatch
> context consist of: cur_map_nodes, cur_sections, cur_roots.
> 
> Also during the dispatch, we should get the terminal, and dup
> MemoryRegionSection. So after rcu unlock, the cur dispatch
> context can be dropped safely.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> --------
> The tcg code related to tlb_set_page() should be fixed too.
> But it is a little complicated, separating it for easing review.
> ---
>  exec.c |  239 ++++++++++++++++++++++++++++++++++++++++-----------------------
>  1 files changed, 152 insertions(+), 87 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 315960d..9a5c67f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -112,6 +112,8 @@ static Node *next_map_nodes;
>  static PhysPageEntry *next_roots;
>  static AllocInfo *next_alloc_info;
>  
> +QemuSeqLock dispatch_table_lock;
> +
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
>  static void io_mem_init(void);
> @@ -199,31 +201,24 @@ static void phys_page_set(AddressSpaceDispatch *d,
>                          P_L2_LEVELS - 1);
>  }
>  
> -static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index, bool cur)
> +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d,
> +            hwaddr index,
> +            Node *map_base,
> +            MemoryRegionSection *sections_base,
> +            PhysPageEntry *root_base)
>  {
> -    PhysPageEntry lp;
> +    PhysPageEntry lp = root_base[d->idx];
>      PhysPageEntry *p;
>      int i;
> -    Node *map_nodes;
> -    MemoryRegionSection *sections;
>  
> -    if (likely(cur)) {
> -        map_nodes = cur_map_nodes;
> -        sections = cur_phys_sections;
> -        lp = cur_roots[d->idx];
> -    } else {
> -        map_nodes = next_map_nodes;
> -        sections = next_phys_sections;
> -        lp = next_roots[d->idx];
> -    }
>      for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
> -            return &sections[phys_section_unassigned];
> +            return &sections_base[phys_section_unassigned];
>          }
> -        p = map_nodes[lp.ptr];
> +        p = map_base[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>      }
> -    return &sections[lp.ptr];
> +    return &sections_base[lp.ptr];
>  }
>  
>  bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -233,21 +228,28 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>  }
>  
>  static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
> -                                                        hwaddr addr)
> +        hwaddr addr, Node *map_base, MemoryRegionSection *sections_base,
> +        PhysPageEntry *root_base)
> +
>  {
> -    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, true);
> +    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS,
> +                                    map_base, sections_base, root_base);
>  }
>  
>  MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
>                                               hwaddr *xlat, hwaddr *plen,
> -                                             bool is_write)
> +                                             bool is_write,
> +                                             Node *map_base,
> +                                             MemoryRegionSection *sections_base,
> +                                             PhysPageEntry *root_base)

These two functions are always called on the "cur" map.  Only
phys_page_find can be called (from register_subpage) on the "next" map.

Paolo

>  {
>      IOMMUTLBEntry iotlb;
> -    MemoryRegionSection *section, *base = cur_phys_sections;
> +    MemoryRegionSection *section;
>      hwaddr len = *plen;
>  
>      for (;;) {
> -        section = address_space_lookup_region(as, addr);
> +        section = address_space_lookup_region(as, addr, map_base,
> +                        sections_base, root_base);
>  
>          /* Compute offset within MemoryRegionSection */
>          addr -= section->offset_within_address_space;
> @@ -265,7 +267,7 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
>                  | (addr & iotlb.addr_mask));
>          len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
>          if (!(iotlb.perm & (1 << is_write))) {
> -            section = &base[phys_section_unassigned];
> +            section = &sections_base[phys_section_unassigned];
>              break;
>          }
>  
> @@ -276,6 +278,42 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
>      *xlat = addr;
>      return section;
>  }
> +
> +/* caller should hold rcu read lock */
> +MemoryRegionSection address_space_get_terminal(AddressSpace *as, hwaddr addr,
> +                                             hwaddr *xlat, hwaddr *plen,
> +                                             bool is_write)
> +{
> +    MemoryRegionSection *mrs, *ret, *sections_base;
> +    Node *map_base;
> +    subpage_t *mmio;
> +    unsigned int idx, start;
> +    PhysPageEntry *root_base;
> +
> +    /* Walk through all of the AddressSpace in consistent */
> +    do {
> +        start = seqlock_read_begin(&dispatch_table_lock);
> +        map_base = cur_map_nodes;
> +        sections_base = cur_phys_sections;
> +        /* get all AddressSpaceDispatch root */
> +        root_base = cur_roots;
> +    } while (seqlock_read_check(&dispatch_table_lock, start);
> +
> +    mrs = address_space_translate(as, addr, xlat, plen, is_write,
> +                    map_base, sections_base, root_base);
> +    if (!mrs->mr->terminates) {
> +        mmio = container_of(mrs->mr, subpage_t, mmio);
> +        idx = SUBPAGE_IDX(addr);
> +        ret = &sections_base[mmio->sub_section[idx]];
> +        *xlat += mmio->base;
> +        *xlat -= mrs->offset_within_address_space;
> +        *xlat += mrs->offset_within_region;
> +    } else {
> +        ret = mrs;
> +    }
> +
> +    return *ret;
> +}
>  #endif
>  
>  void cpu_exec_init_all(void)
> @@ -1777,13 +1815,12 @@ static void core_commit(MemoryListener *listener)
>      info->sections = cur_phys_sections;
>      info->roots = cur_roots;
>  
> -    /* Fix me, in future, will be protected by wr seqlock when in parellel
> -     * with reader
> -     */
> +    seqlock_write_lock(&dispatch_table_lock);
>      cur_map_nodes = next_map_nodes;
>      cur_phys_sections = next_phys_sections;
>      cur_alloc_info = next_alloc_info;
>      cur_roots = next_roots;
> +    seqlock_write_unlock(&dispatch_table_lock);
>  
>      /* since each CPU stores ram addresses in its TLB cache, we must
>         reset the modified entries */
> @@ -1890,8 +1927,13 @@ void address_space_destroy_dispatch(AddressSpace *as)
>  
>  static void memory_map_init(void)
>  {
> +    QemuMutex *mutex;
> +
>      qemu_mutex_init(&id_lock);
>      address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE);
> +    mutex = g_malloc0(sizeof(QemuMutex));
> +    qemu_mutex_init(mutex);
> +    seqlock_init(&dispatch_table_lock, mutex);
>      system_memory = g_malloc(sizeof(*system_memory));
>      memory_region_init(system_memory, "system", INT64_MAX);
>      address_space_init(&address_space_memory, system_memory, "memory");
> @@ -2001,59 +2043,68 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
>      uint8_t *ptr;
>      uint64_t val;
>      hwaddr addr1;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      bool error = false;
>  
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(as, addr, &addr1, &l, is_write);
> +        rcu_read_lock();
> +        section = address_space_get_terminal(as, addr, &addr1, &l, is_write);
>  
>          if (is_write) {
> -            if (!memory_access_is_direct(section->mr, is_write)) {
> +            if (!memory_access_is_direct(section.mr, is_write)) {
>                  l = memory_access_size(l, addr1);
>                  /* XXX: could force cpu_single_env to NULL to avoid
>                     potential bugs */
> +                memory_region_ref(section.mr);
> +                rcu_read_unlock();
>                  if (l == 4) {
>                      /* 32 bit write access */
>                      val = ldl_p(buf);
> -                    error |= io_mem_write(section->mr, addr1, val, 4);
> +                    error |= io_mem_write(section.mr, addr1, val, 4);
>                  } else if (l == 2) {
>                      /* 16 bit write access */
>                      val = lduw_p(buf);
> -                    error |= io_mem_write(section->mr, addr1, val, 2);
> +                    error |= io_mem_write(section.mr, addr1, val, 2);
>                  } else {
>                      /* 8 bit write access */
>                      val = ldub_p(buf);
> -                    error |= io_mem_write(section->mr, addr1, val, 1);
> +                    error |= io_mem_write(section.mr, addr1, val, 1);
>                  }
> +                memory_region_unref(section.mr);
>              } else {
> -                addr1 += memory_region_get_ram_addr(section->mr);
> +                addr1 += memory_region_get_ram_addr(section.mr);
>                  /* RAM case */
>                  ptr = qemu_get_ram_ptr(addr1);
>                  memcpy(ptr, buf, l);
>                  invalidate_and_set_dirty(addr1, l);
> +                rcu_read_unlock();
>              }
>          } else {
> -            if (!memory_access_is_direct(section->mr, is_write)) {
> +            if (!memory_access_is_direct(section.mr, is_write)) {
>                  /* I/O case */
> +                memory_region_ref(section.mr);
> +                rcu_read_unlock();
>                  l = memory_access_size(l, addr1);
>                  if (l == 4) {
>                      /* 32 bit read access */
> -                    error |= io_mem_read(section->mr, addr1, &val, 4);
> +                    error |= io_mem_read(section.mr, addr1, &val, 4);
>                      stl_p(buf, val);
>                  } else if (l == 2) {
>                      /* 16 bit read access */
> -                    error |= io_mem_read(section->mr, addr1, &val, 2);
> +                    error |= io_mem_read(section.mr, addr1, &val, 2);
>                      stw_p(buf, val);
>                  } else {
>                      /* 8 bit read access */
> -                    error |= io_mem_read(section->mr, addr1, &val, 1);
> +                    error |= io_mem_read(section.mr, addr1, &val, 1);
>                      stb_p(buf, val);
>                  }
> +                memory_region_unref(section.mr);
>              } else {
>                  /* RAM case */
> -                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
> +                ptr = qemu_get_ram_ptr(section.mr->ram_addr + addr1);
>                  memcpy(buf, ptr, l);
> +                rcu_read_unlock();
>              }
>          }
>          len -= l;
> @@ -2089,18 +2140,19 @@ void cpu_physical_memory_write_rom(hwaddr addr,
>      hwaddr l;
>      uint8_t *ptr;
>      hwaddr addr1;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>  
> +    rcu_read_lock();
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(&address_space_memory,
> +        section = address_space_get_terminal(&address_space_memory,
>                                            addr, &addr1, &l, true);
>  
> -        if (!(memory_region_is_ram(section->mr) ||
> -              memory_region_is_romd(section->mr))) {
> +        if (!(memory_region_is_ram(section.mr) ||
> +              memory_region_is_romd(section.mr))) {
>              /* do nothing */
>          } else {
> -            addr1 += memory_region_get_ram_addr(section->mr);
> +            addr1 += memory_region_get_ram_addr(section.mr);
>              /* ROM/RAM case */
>              ptr = qemu_get_ram_ptr(addr1);
>              memcpy(ptr, buf, l);
> @@ -2110,6 +2162,7 @@ void cpu_physical_memory_write_rom(hwaddr addr,
>          buf += l;
>          addr += l;
>      }
> +    rcu_read_unlock();
>  }
>  
>  typedef struct {
> @@ -2161,15 +2214,15 @@ static void cpu_notify_map_clients(void)
>  
>  bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
>  {
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l, xlat;
>  
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(as, addr, &xlat, &l, is_write);
> -        if (!memory_access_is_direct(section->mr, is_write)) {
> +        section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
> +        if (!memory_access_is_direct(section.mr, is_write)) {
>              l = memory_access_size(l, addr);
> -            if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
> +            if (!memory_region_access_valid(section.mr, xlat, l, is_write)) {
>                  return false;
>              }
>          }
> @@ -2195,14 +2248,14 @@ void *address_space_map(AddressSpace *as,
>      hwaddr len = *plen;
>      hwaddr todo = 0;
>      hwaddr l, xlat;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      ram_addr_t raddr = RAM_ADDR_MAX;
>      ram_addr_t rlen;
>      void *ret;
>  
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(as, addr, &xlat, &l, is_write);
> +        section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
>  
>          if (!memory_access_is_direct(section->mr, is_write)) {
>              if (todo || bounce.buffer) {
> @@ -2211,20 +2264,20 @@ void *address_space_map(AddressSpace *as,
>              bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
>              bounce.addr = addr;
>              bounce.len = l;
> -            bounce.mr = section->mr;
> +            bounce.mr = section.mr;
>              if (!is_write) {
>                  address_space_read(as, addr, bounce.buffer, l);
>              }
>  
>              *plen = l;
> -            memory_region_ref(section->mr);
> +            memory_region_ref(section.mr);
>              return bounce.buffer;
>          }
>          if (!todo) {
> -            raddr = memory_region_get_ram_addr(section->mr) + xlat;
> -            memory_region_ref(section->mr);
> +            raddr = memory_region_get_ram_addr(section.mr) + xlat;
> +            memory_region_ref(section.mr);
>          } else {
> -            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
> +            if (memory_region_get_ram_addr(section.mr) + xlat != raddr + todo) {
>                  break;
>              }
>          }
> @@ -2297,15 +2350,17 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
>  {
>      uint8_t *ptr;
>      uint64_t val;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 4;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      false);
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, false);
> +    rcu_read_unlock();
>      if (l < 4 || !memory_access_is_direct(section->mr, false)) {
>          /* I/O case */
> -        io_mem_read(section->mr, addr1, &val, 4);
> +        io_mem_read(section.mr, addr1, &val, 4);
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap32(val);
> @@ -2360,8 +2415,10 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
>      hwaddr l = 8;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      false);
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, false);
> +    rcu_read_unlock();
>      if (l < 8 || !memory_access_is_direct(section->mr, false)) {
>          /* I/O case */
>          io_mem_read(section->mr, addr1, &val, 8);
> @@ -2423,15 +2480,17 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
>  {
>      uint8_t *ptr;
>      uint64_t val;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 2;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      false);
> -    if (l < 2 || !memory_access_is_direct(section->mr, false)) {
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, false);
> +    rcu_read_unlock();
> +    if (l < 2 || !memory_access_is_direct(section.mr, false)) {
>          /* I/O case */
> -        io_mem_read(section->mr, addr1, &val, 2);
> +        io_mem_read(section.mr, addr1, &val, 2);
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap16(val);
> @@ -2443,7 +2502,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
>  #endif
>      } else {
>          /* RAM case */
> -        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
> +        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
>                                  & TARGET_PAGE_MASK)
>                                 + addr1);
>          switch (endian) {
> @@ -2482,16 +2541,18 @@ uint32_t lduw_be_phys(hwaddr addr)
>  void stl_phys_notdirty(hwaddr addr, uint32_t val)
>  {
>      uint8_t *ptr;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 4;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      true);
> -    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
> -        io_mem_write(section->mr, addr1, val, 4);
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, true);
> +    rcu_read_unlock();
> +    if (l < 4 || !memory_access_is_direct(section.mr, true)) {
> +        io_mem_write(section.mr, addr1, val, 4);
>      } else {
> -        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> +        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
>          ptr = qemu_get_ram_ptr(addr1);
>          stl_p(ptr, val);
>  
> @@ -2512,13 +2573,15 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
>                                       enum device_endian endian)
>  {
>      uint8_t *ptr;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 4;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      true);
> -    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, true);
> +    rcu_read_unlock();
> +    if (l < 4 || !memory_access_is_direct(section.mr, true)) {
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap32(val);
> @@ -2528,10 +2591,10 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
>              val = bswap32(val);
>          }
>  #endif
> -        io_mem_write(section->mr, addr1, val, 4);
> +        io_mem_write(section.mr, addr1, val, 4);
>      } else {
>          /* RAM case */
> -        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> +        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
>          ptr = qemu_get_ram_ptr(addr1);
>          switch (endian) {
>          case DEVICE_LITTLE_ENDIAN:
> @@ -2575,13 +2638,13 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
>                                       enum device_endian endian)
>  {
>      uint8_t *ptr;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 2;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      true);
> -    if (l < 2 || !memory_access_is_direct(section->mr, true)) {
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, true);
> +    if (l < 2 || !memory_access_is_direct(section.mr, true)) {
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap16(val);
> @@ -2591,10 +2654,10 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
>              val = bswap16(val);
>          }
>  #endif
> -        io_mem_write(section->mr, addr1, val, 2);
> +        io_mem_write(section.mr, addr1, val, 2);
>      } else {
>          /* RAM case */
> -        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> +        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
>          ptr = qemu_get_ram_ptr(addr1);
>          switch (endian) {
>          case DEVICE_LITTLE_ENDIAN:
> @@ -2696,13 +2759,15 @@ bool virtio_is_big_endian(void)
>  #ifndef CONFIG_USER_ONLY
>  bool cpu_physical_memory_is_io(hwaddr phys_addr)
>  {
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 1;
>  
> -    section = address_space_translate(&address_space_memory,
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory,
>                                        phys_addr, &phys_addr, &l, false);
> +    rcu_read_unlock();
>  
> -    return !(memory_region_is_ram(section->mr) ||
> -             memory_region_is_romd(section->mr));
> +    return !(memory_region_is_ram(section.mr) ||
> +             memory_region_is_romd(section.mr));
>  }
>  #endif
>
Paolo Bonzini May 29, 2013, 7:15 a.m. UTC | #2
Il 29/05/2013 04:11, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Using seqlock to load dispatch context atomic. The dispatch
> context consist of: cur_map_nodes, cur_sections, cur_roots.
> 
> Also during the dispatch, we should get the terminal, and dup
> MemoryRegionSection. So after rcu unlock, the cur dispatch
> context can be dropped safely.
> 
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> --------
> The tcg code related to tlb_set_page() should be fixed too.
> But it is a little complicated, separating it for easing review.
> ---
>  exec.c |  239 ++++++++++++++++++++++++++++++++++++++++-----------------------
>  1 files changed, 152 insertions(+), 87 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 315960d..9a5c67f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -112,6 +112,8 @@ static Node *next_map_nodes;
>  static PhysPageEntry *next_roots;
>  static AllocInfo *next_alloc_info;
>  
> +QemuSeqLock dispatch_table_lock;

You didn't include the seqlock implementation.

> +
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
>  static void io_mem_init(void);
> @@ -199,31 +201,24 @@ static void phys_page_set(AddressSpaceDispatch *d,
>                          P_L2_LEVELS - 1);
>  }
>  
> -static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index, bool cur)
> +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d,
> +            hwaddr index,
> +            Node *map_base,
> +            MemoryRegionSection *sections_base,
> +            PhysPageEntry *root_base)
>  {
> -    PhysPageEntry lp;
> +    PhysPageEntry lp = root_base[d->idx];
>      PhysPageEntry *p;
>      int i;
> -    Node *map_nodes;
> -    MemoryRegionSection *sections;
>  
> -    if (likely(cur)) {
> -        map_nodes = cur_map_nodes;
> -        sections = cur_phys_sections;
> -        lp = cur_roots[d->idx];
> -    } else {
> -        map_nodes = next_map_nodes;
> -        sections = next_phys_sections;
> -        lp = next_roots[d->idx];
> -    }
>      for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
> -            return &sections[phys_section_unassigned];
> +            return &sections_base[phys_section_unassigned];
>          }
> -        p = map_nodes[lp.ptr];
> +        p = map_base[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>      }
> -    return &sections[lp.ptr];
> +    return &sections_base[lp.ptr];
>  }
>  
>  bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -233,21 +228,28 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>  }
>  
>  static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
> -                                                        hwaddr addr)
> +        hwaddr addr, Node *map_base, MemoryRegionSection *sections_base,
> +        PhysPageEntry *root_base)
> +
>  {
> -    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, true);
> +    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS,
> +                                    map_base, sections_base, root_base);
>  }
>  
>  MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
>                                               hwaddr *xlat, hwaddr *plen,
> -                                             bool is_write)
> +                                             bool is_write,
> +                                             Node *map_base,
> +                                             MemoryRegionSection *sections_base,
> +                                             PhysPageEntry *root_base)
>  {
>      IOMMUTLBEntry iotlb;
> -    MemoryRegionSection *section, *base = cur_phys_sections;
> +    MemoryRegionSection *section;
>      hwaddr len = *plen;
>  
>      for (;;) {
> -        section = address_space_lookup_region(as, addr);
> +        section = address_space_lookup_region(as, addr, map_base,
> +                        sections_base, root_base);
>  
>          /* Compute offset within MemoryRegionSection */
>          addr -= section->offset_within_address_space;
> @@ -265,7 +267,7 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
>                  | (addr & iotlb.addr_mask));
>          len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
>          if (!(iotlb.perm & (1 << is_write))) {
> -            section = &base[phys_section_unassigned];
> +            section = &sections_base[phys_section_unassigned];
>              break;
>          }
>  
> @@ -276,6 +278,42 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
>      *xlat = addr;
>      return section;
>  }
> +
> +/* caller should hold rcu read lock */
> +MemoryRegionSection address_space_get_terminal(AddressSpace *as, hwaddr addr,
> +                                             hwaddr *xlat, hwaddr *plen,
> +                                             bool is_write)
> +{
> +    MemoryRegionSection *mrs, *ret, *sections_base;
> +    Node *map_base;
> +    subpage_t *mmio;
> +    unsigned int idx, start;
> +    PhysPageEntry *root_base;
> +
> +    /* Walk through all of the AddressSpace in consistent */
> +    do {
> +        start = seqlock_read_begin(&dispatch_table_lock);
> +        map_base = cur_map_nodes;
> +        sections_base = cur_phys_sections;
> +        /* get all AddressSpaceDispatch root */
> +        root_base = cur_roots;
> +    } while (seqlock_read_check(&dispatch_table_lock, start);
> +
> +    mrs = address_space_translate(as, addr, xlat, plen, is_write,
> +                    map_base, sections_base, root_base);
> +    if (!mrs->mr->terminates) {

Do you want to test mrs->mr->subpage instead?  Very similar code is
already in address_space_lookup_region, and should be there.

address_space_lookup_region should ref the region, and the callers
should unref it.  This way, only address_space_lookup_region needs to
host the RCU critical section.

Adding the ref/unref should be a separate patch.  Even before, we should
change address_space_translate and address_space_translate_for_iotlb to
return a MemoryRegion instead of a MemoryRegionSection.

> +        mmio = container_of(mrs->mr, subpage_t, mmio);
> +        idx = SUBPAGE_IDX(addr);
> +        ret = &sections_base[mmio->sub_section[idx]];
> +        *xlat += mmio->base;
> +        *xlat -= mrs->offset_within_address_space;
> +        *xlat += mrs->offset_within_region;
> +    } else {
> +        ret = mrs;
> +    }
> +
> +    return *ret;
> +}
>  #endif
>  
>  void cpu_exec_init_all(void)
> @@ -1777,13 +1815,12 @@ static void core_commit(MemoryListener *listener)
>      info->sections = cur_phys_sections;
>      info->roots = cur_roots;
>  
> -    /* Fix me, in future, will be protected by wr seqlock when in parellel
> -     * with reader
> -     */
> +    seqlock_write_lock(&dispatch_table_lock);
>      cur_map_nodes = next_map_nodes;
>      cur_phys_sections = next_phys_sections;
>      cur_alloc_info = next_alloc_info;
>      cur_roots = next_roots;
> +    seqlock_write_unlock(&dispatch_table_lock);
>  
>      /* since each CPU stores ram addresses in its TLB cache, we must
>         reset the modified entries */
> @@ -1890,8 +1927,13 @@ void address_space_destroy_dispatch(AddressSpace *as)
>  
>  static void memory_map_init(void)
>  {
> +    QemuMutex *mutex;
> +
>      qemu_mutex_init(&id_lock);
>      address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE);
> +    mutex = g_malloc0(sizeof(QemuMutex));
> +    qemu_mutex_init(mutex);

No need for a separate mutex, we can use the BQL.

> +    seqlock_init(&dispatch_table_lock, mutex);
>      system_memory = g_malloc(sizeof(*system_memory));
>      memory_region_init(system_memory, "system", INT64_MAX);
>      address_space_init(&address_space_memory, system_memory, "memory");
> @@ -2001,59 +2043,68 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
>      uint8_t *ptr;
>      uint64_t val;
>      hwaddr addr1;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      bool error = false;
>  
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(as, addr, &addr1, &l, is_write);
> +        rcu_read_lock();
> +        section = address_space_get_terminal(as, addr, &addr1, &l, is_write);
>  
>          if (is_write) {
> -            if (!memory_access_is_direct(section->mr, is_write)) {
> +            if (!memory_access_is_direct(section.mr, is_write)) {
>                  l = memory_access_size(l, addr1);
>                  /* XXX: could force cpu_single_env to NULL to avoid
>                     potential bugs */
> +                memory_region_ref(section.mr);
> +                rcu_read_unlock();
>                  if (l == 4) {
>                      /* 32 bit write access */
>                      val = ldl_p(buf);
> -                    error |= io_mem_write(section->mr, addr1, val, 4);
> +                    error |= io_mem_write(section.mr, addr1, val, 4);
>                  } else if (l == 2) {
>                      /* 16 bit write access */
>                      val = lduw_p(buf);
> -                    error |= io_mem_write(section->mr, addr1, val, 2);
> +                    error |= io_mem_write(section.mr, addr1, val, 2);
>                  } else {
>                      /* 8 bit write access */
>                      val = ldub_p(buf);
> -                    error |= io_mem_write(section->mr, addr1, val, 1);
> +                    error |= io_mem_write(section.mr, addr1, val, 1);
>                  }
> +                memory_region_unref(section.mr);
>              } else {
> -                addr1 += memory_region_get_ram_addr(section->mr);
> +                addr1 += memory_region_get_ram_addr(section.mr);
>                  /* RAM case */
>                  ptr = qemu_get_ram_ptr(addr1);
>                  memcpy(ptr, buf, l);
>                  invalidate_and_set_dirty(addr1, l);
> +                rcu_read_unlock();
>              }
>          } else {
> -            if (!memory_access_is_direct(section->mr, is_write)) {
> +            if (!memory_access_is_direct(section.mr, is_write)) {
>                  /* I/O case */
> +                memory_region_ref(section.mr);
> +                rcu_read_unlock();
>                  l = memory_access_size(l, addr1);
>                  if (l == 4) {
>                      /* 32 bit read access */
> -                    error |= io_mem_read(section->mr, addr1, &val, 4);
> +                    error |= io_mem_read(section.mr, addr1, &val, 4);
>                      stl_p(buf, val);
>                  } else if (l == 2) {
>                      /* 16 bit read access */
> -                    error |= io_mem_read(section->mr, addr1, &val, 2);
> +                    error |= io_mem_read(section.mr, addr1, &val, 2);
>                      stw_p(buf, val);
>                  } else {
>                      /* 8 bit read access */
> -                    error |= io_mem_read(section->mr, addr1, &val, 1);
> +                    error |= io_mem_read(section.mr, addr1, &val, 1);
>                      stb_p(buf, val);
>                  }
> +                memory_region_unref(section.mr);
>              } else {
>                  /* RAM case */
> -                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
> +                ptr = qemu_get_ram_ptr(section.mr->ram_addr + addr1);
>                  memcpy(buf, ptr, l);
> +                rcu_read_unlock();
>              }
>          }
>          len -= l;
> @@ -2089,18 +2140,19 @@ void cpu_physical_memory_write_rom(hwaddr addr,
>      hwaddr l;
>      uint8_t *ptr;
>      hwaddr addr1;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>  
> +    rcu_read_lock();
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(&address_space_memory,
> +        section = address_space_get_terminal(&address_space_memory,
>                                            addr, &addr1, &l, true);
>  
> -        if (!(memory_region_is_ram(section->mr) ||
> -              memory_region_is_romd(section->mr))) {
> +        if (!(memory_region_is_ram(section.mr) ||
> +              memory_region_is_romd(section.mr))) {
>              /* do nothing */
>          } else {
> -            addr1 += memory_region_get_ram_addr(section->mr);
> +            addr1 += memory_region_get_ram_addr(section.mr);
>              /* ROM/RAM case */
>              ptr = qemu_get_ram_ptr(addr1);
>              memcpy(ptr, buf, l);
> @@ -2110,6 +2162,7 @@ void cpu_physical_memory_write_rom(hwaddr addr,
>          buf += l;
>          addr += l;
>      }
> +    rcu_read_unlock();
>  }
>  
>  typedef struct {
> @@ -2161,15 +2214,15 @@ static void cpu_notify_map_clients(void)
>  
>  bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
>  {
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l, xlat;
>  
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(as, addr, &xlat, &l, is_write);
> -        if (!memory_access_is_direct(section->mr, is_write)) {
> +        section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
> +        if (!memory_access_is_direct(section.mr, is_write)) {
>              l = memory_access_size(l, addr);
> -            if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
> +            if (!memory_region_access_valid(section.mr, xlat, l, is_write)) {
>                  return false;
>              }
>          }
> @@ -2195,14 +2248,14 @@ void *address_space_map(AddressSpace *as,
>      hwaddr len = *plen;
>      hwaddr todo = 0;
>      hwaddr l, xlat;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      ram_addr_t raddr = RAM_ADDR_MAX;
>      ram_addr_t rlen;
>      void *ret;
>  
>      while (len > 0) {
>          l = len;
> -        section = address_space_translate(as, addr, &xlat, &l, is_write);
> +        section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
>  
>          if (!memory_access_is_direct(section->mr, is_write)) {
>              if (todo || bounce.buffer) {
> @@ -2211,20 +2264,20 @@ void *address_space_map(AddressSpace *as,
>              bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
>              bounce.addr = addr;
>              bounce.len = l;
> -            bounce.mr = section->mr;
> +            bounce.mr = section.mr;
>              if (!is_write) {
>                  address_space_read(as, addr, bounce.buffer, l);
>              }
>  
>              *plen = l;
> -            memory_region_ref(section->mr);
> +            memory_region_ref(section.mr);
>              return bounce.buffer;
>          }
>          if (!todo) {
> -            raddr = memory_region_get_ram_addr(section->mr) + xlat;
> -            memory_region_ref(section->mr);
> +            raddr = memory_region_get_ram_addr(section.mr) + xlat;
> +            memory_region_ref(section.mr);
>          } else {
> -            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
> +            if (memory_region_get_ram_addr(section.mr) + xlat != raddr + todo) {
>                  break;
>              }
>          }
> @@ -2297,15 +2350,17 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
>  {
>      uint8_t *ptr;
>      uint64_t val;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 4;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      false);
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, false);
> +    rcu_read_unlock();
>      if (l < 4 || !memory_access_is_direct(section->mr, false)) {
>          /* I/O case */
> -        io_mem_read(section->mr, addr1, &val, 4);
> +        io_mem_read(section.mr, addr1, &val, 4);
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap32(val);
> @@ -2360,8 +2415,10 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
>      hwaddr l = 8;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      false);
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, false);
> +    rcu_read_unlock();
>      if (l < 8 || !memory_access_is_direct(section->mr, false)) {
>          /* I/O case */
>          io_mem_read(section->mr, addr1, &val, 8);
> @@ -2423,15 +2480,17 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
>  {
>      uint8_t *ptr;
>      uint64_t val;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 2;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      false);
> -    if (l < 2 || !memory_access_is_direct(section->mr, false)) {
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, false);
> +    rcu_read_unlock();
> +    if (l < 2 || !memory_access_is_direct(section.mr, false)) {
>          /* I/O case */
> -        io_mem_read(section->mr, addr1, &val, 2);
> +        io_mem_read(section.mr, addr1, &val, 2);
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap16(val);
> @@ -2443,7 +2502,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
>  #endif
>      } else {
>          /* RAM case */
> -        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
> +        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
>                                  & TARGET_PAGE_MASK)
>                                 + addr1);
>          switch (endian) {
> @@ -2482,16 +2541,18 @@ uint32_t lduw_be_phys(hwaddr addr)
>  void stl_phys_notdirty(hwaddr addr, uint32_t val)
>  {
>      uint8_t *ptr;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 4;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      true);
> -    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
> -        io_mem_write(section->mr, addr1, val, 4);
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, true);
> +    rcu_read_unlock();
> +    if (l < 4 || !memory_access_is_direct(section.mr, true)) {
> +        io_mem_write(section.mr, addr1, val, 4);
>      } else {
> -        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> +        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
>          ptr = qemu_get_ram_ptr(addr1);
>          stl_p(ptr, val);
>  
> @@ -2512,13 +2573,15 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
>                                       enum device_endian endian)
>  {
>      uint8_t *ptr;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 4;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      true);
> -    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, true);
> +    rcu_read_unlock();
> +    if (l < 4 || !memory_access_is_direct(section.mr, true)) {
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap32(val);
> @@ -2528,10 +2591,10 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
>              val = bswap32(val);
>          }
>  #endif
> -        io_mem_write(section->mr, addr1, val, 4);
> +        io_mem_write(section.mr, addr1, val, 4);
>      } else {
>          /* RAM case */
> -        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> +        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
>          ptr = qemu_get_ram_ptr(addr1);
>          switch (endian) {
>          case DEVICE_LITTLE_ENDIAN:
> @@ -2575,13 +2638,13 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
>                                       enum device_endian endian)
>  {
>      uint8_t *ptr;
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 2;
>      hwaddr addr1;
>  
> -    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
> -                                      true);
> -    if (l < 2 || !memory_access_is_direct(section->mr, true)) {
> +    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> +                                    &l, true);
> +    if (l < 2 || !memory_access_is_direct(section.mr, true)) {
>  #if defined(TARGET_WORDS_BIGENDIAN)
>          if (endian == DEVICE_LITTLE_ENDIAN) {
>              val = bswap16(val);
> @@ -2591,10 +2654,10 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
>              val = bswap16(val);
>          }
>  #endif
> -        io_mem_write(section->mr, addr1, val, 2);
> +        io_mem_write(section.mr, addr1, val, 2);
>      } else {
>          /* RAM case */
> -        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> +        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
>          ptr = qemu_get_ram_ptr(addr1);
>          switch (endian) {
>          case DEVICE_LITTLE_ENDIAN:
> @@ -2696,13 +2759,15 @@ bool virtio_is_big_endian(void)
>  #ifndef CONFIG_USER_ONLY
>  bool cpu_physical_memory_is_io(hwaddr phys_addr)
>  {
> -    MemoryRegionSection *section;
> +    MemoryRegionSection section;
>      hwaddr l = 1;
>  
> -    section = address_space_translate(&address_space_memory,
> +    rcu_read_lock();
> +    section = address_space_get_terminal(&address_space_memory,
>                                        phys_addr, &phys_addr, &l, false);
> +    rcu_read_unlock();
>  
> -    return !(memory_region_is_ram(section->mr) ||
> -             memory_region_is_romd(section->mr));
> +    return !(memory_region_is_ram(section.mr) ||
> +             memory_region_is_romd(section.mr));
>  }
>  #endif
>
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 315960d..9a5c67f 100644
--- a/exec.c
+++ b/exec.c
@@ -112,6 +112,8 @@  static Node *next_map_nodes;
 static PhysPageEntry *next_roots;
 static AllocInfo *next_alloc_info;
 
+QemuSeqLock dispatch_table_lock;
+
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
 static void io_mem_init(void);
@@ -199,31 +201,24 @@  static void phys_page_set(AddressSpaceDispatch *d,
                         P_L2_LEVELS - 1);
 }
 
-static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index, bool cur)
+static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d,
+            hwaddr index,
+            Node *map_base,
+            MemoryRegionSection *sections_base,
+            PhysPageEntry *root_base)
 {
-    PhysPageEntry lp;
+    PhysPageEntry lp = root_base[d->idx];
     PhysPageEntry *p;
     int i;
-    Node *map_nodes;
-    MemoryRegionSection *sections;
 
-    if (likely(cur)) {
-        map_nodes = cur_map_nodes;
-        sections = cur_phys_sections;
-        lp = cur_roots[d->idx];
-    } else {
-        map_nodes = next_map_nodes;
-        sections = next_phys_sections;
-        lp = next_roots[d->idx];
-    }
     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
         if (lp.ptr == PHYS_MAP_NODE_NIL) {
-            return &sections[phys_section_unassigned];
+            return &sections_base[phys_section_unassigned];
         }
-        p = map_nodes[lp.ptr];
+        p = map_base[lp.ptr];
         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
     }
-    return &sections[lp.ptr];
+    return &sections_base[lp.ptr];
 }
 
 bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -233,21 +228,28 @@  bool memory_region_is_unassigned(MemoryRegion *mr)
 }
 
 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
-                                                        hwaddr addr)
+        hwaddr addr, Node *map_base, MemoryRegionSection *sections_base,
+        PhysPageEntry *root_base)
+
 {
-    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, true);
+    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS,
+                                    map_base, sections_base, root_base);
 }
 
 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
                                              hwaddr *xlat, hwaddr *plen,
-                                             bool is_write)
+                                             bool is_write,
+                                             Node *map_base,
+                                             MemoryRegionSection *sections_base,
+                                             PhysPageEntry *root_base)
 {
     IOMMUTLBEntry iotlb;
-    MemoryRegionSection *section, *base = cur_phys_sections;
+    MemoryRegionSection *section;
     hwaddr len = *plen;
 
     for (;;) {
-        section = address_space_lookup_region(as, addr);
+        section = address_space_lookup_region(as, addr, map_base,
+                        sections_base, root_base);
 
         /* Compute offset within MemoryRegionSection */
         addr -= section->offset_within_address_space;
@@ -265,7 +267,7 @@  MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
                 | (addr & iotlb.addr_mask));
         len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
         if (!(iotlb.perm & (1 << is_write))) {
-            section = &base[phys_section_unassigned];
+            section = &sections_base[phys_section_unassigned];
             break;
         }
 
@@ -276,6 +278,42 @@  MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
     *xlat = addr;
     return section;
 }
+
+/* caller should hold rcu read lock */
+MemoryRegionSection address_space_get_terminal(AddressSpace *as, hwaddr addr,
+                                             hwaddr *xlat, hwaddr *plen,
+                                             bool is_write)
+{
+    MemoryRegionSection *mrs, *ret, *sections_base;
+    Node *map_base;
+    subpage_t *mmio;
+    unsigned int idx, start;
+    PhysPageEntry *root_base;
+
+    /* Walk through all of the AddressSpace in consistent */
+    do {
+        start = seqlock_read_begin(&dispatch_table_lock);
+        map_base = cur_map_nodes;
+        sections_base = cur_phys_sections;
+        /* get all AddressSpaceDispatch root */
+        root_base = cur_roots;
+    } while (seqlock_read_check(&dispatch_table_lock, start);
+
+    mrs = address_space_translate(as, addr, xlat, plen, is_write,
+                    map_base, sections_base, root_base);
+    if (!mrs->mr->terminates) {
+        mmio = container_of(mrs->mr, subpage_t, mmio);
+        idx = SUBPAGE_IDX(addr);
+        ret = &sections_base[mmio->sub_section[idx]];
+        *xlat += mmio->base;
+        *xlat -= mrs->offset_within_address_space;
+        *xlat += mrs->offset_within_region;
+    } else {
+        ret = mrs;
+    }
+
+    return *ret;
+}
 #endif
 
 void cpu_exec_init_all(void)
@@ -1777,13 +1815,12 @@  static void core_commit(MemoryListener *listener)
     info->sections = cur_phys_sections;
     info->roots = cur_roots;
 
-    /* Fix me, in future, will be protected by wr seqlock when in parellel
-     * with reader
-     */
+    seqlock_write_lock(&dispatch_table_lock);
     cur_map_nodes = next_map_nodes;
     cur_phys_sections = next_phys_sections;
     cur_alloc_info = next_alloc_info;
     cur_roots = next_roots;
+    seqlock_write_unlock(&dispatch_table_lock);
 
     /* since each CPU stores ram addresses in its TLB cache, we must
        reset the modified entries */
@@ -1890,8 +1927,13 @@  void address_space_destroy_dispatch(AddressSpace *as)
 
 static void memory_map_init(void)
 {
+    QemuMutex *mutex;
+
     qemu_mutex_init(&id_lock);
     address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE);
+    mutex = g_malloc0(sizeof(QemuMutex));
+    qemu_mutex_init(mutex);
+    seqlock_init(&dispatch_table_lock, mutex);
     system_memory = g_malloc(sizeof(*system_memory));
     memory_region_init(system_memory, "system", INT64_MAX);
     address_space_init(&address_space_memory, system_memory, "memory");
@@ -2001,59 +2043,68 @@  bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
     uint8_t *ptr;
     uint64_t val;
     hwaddr addr1;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     bool error = false;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(as, addr, &addr1, &l, is_write);
+        rcu_read_lock();
+        section = address_space_get_terminal(as, addr, &addr1, &l, is_write);
 
         if (is_write) {
-            if (!memory_access_is_direct(section->mr, is_write)) {
+            if (!memory_access_is_direct(section.mr, is_write)) {
                 l = memory_access_size(l, addr1);
                 /* XXX: could force cpu_single_env to NULL to avoid
                    potential bugs */
+                memory_region_ref(section.mr);
+                rcu_read_unlock();
                 if (l == 4) {
                     /* 32 bit write access */
                     val = ldl_p(buf);
-                    error |= io_mem_write(section->mr, addr1, val, 4);
+                    error |= io_mem_write(section.mr, addr1, val, 4);
                 } else if (l == 2) {
                     /* 16 bit write access */
                     val = lduw_p(buf);
-                    error |= io_mem_write(section->mr, addr1, val, 2);
+                    error |= io_mem_write(section.mr, addr1, val, 2);
                 } else {
                     /* 8 bit write access */
                     val = ldub_p(buf);
-                    error |= io_mem_write(section->mr, addr1, val, 1);
+                    error |= io_mem_write(section.mr, addr1, val, 1);
                 }
+                memory_region_unref(section.mr);
             } else {
-                addr1 += memory_region_get_ram_addr(section->mr);
+                addr1 += memory_region_get_ram_addr(section.mr);
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
                 memcpy(ptr, buf, l);
                 invalidate_and_set_dirty(addr1, l);
+                rcu_read_unlock();
             }
         } else {
-            if (!memory_access_is_direct(section->mr, is_write)) {
+            if (!memory_access_is_direct(section.mr, is_write)) {
                 /* I/O case */
+                memory_region_ref(section.mr);
+                rcu_read_unlock();
                 l = memory_access_size(l, addr1);
                 if (l == 4) {
                     /* 32 bit read access */
-                    error |= io_mem_read(section->mr, addr1, &val, 4);
+                    error |= io_mem_read(section.mr, addr1, &val, 4);
                     stl_p(buf, val);
                 } else if (l == 2) {
                     /* 16 bit read access */
-                    error |= io_mem_read(section->mr, addr1, &val, 2);
+                    error |= io_mem_read(section.mr, addr1, &val, 2);
                     stw_p(buf, val);
                 } else {
                     /* 8 bit read access */
-                    error |= io_mem_read(section->mr, addr1, &val, 1);
+                    error |= io_mem_read(section.mr, addr1, &val, 1);
                     stb_p(buf, val);
                 }
+                memory_region_unref(section.mr);
             } else {
                 /* RAM case */
-                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
+                ptr = qemu_get_ram_ptr(section.mr->ram_addr + addr1);
                 memcpy(buf, ptr, l);
+                rcu_read_unlock();
             }
         }
         len -= l;
@@ -2089,18 +2140,19 @@  void cpu_physical_memory_write_rom(hwaddr addr,
     hwaddr l;
     uint8_t *ptr;
     hwaddr addr1;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
 
+    rcu_read_lock();
     while (len > 0) {
         l = len;
-        section = address_space_translate(&address_space_memory,
+        section = address_space_get_terminal(&address_space_memory,
                                           addr, &addr1, &l, true);
 
-        if (!(memory_region_is_ram(section->mr) ||
-              memory_region_is_romd(section->mr))) {
+        if (!(memory_region_is_ram(section.mr) ||
+              memory_region_is_romd(section.mr))) {
             /* do nothing */
         } else {
-            addr1 += memory_region_get_ram_addr(section->mr);
+            addr1 += memory_region_get_ram_addr(section.mr);
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
@@ -2110,6 +2162,7 @@  void cpu_physical_memory_write_rom(hwaddr addr,
         buf += l;
         addr += l;
     }
+    rcu_read_unlock();
 }
 
 typedef struct {
@@ -2161,15 +2214,15 @@  static void cpu_notify_map_clients(void)
 
 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
 {
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l, xlat;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(as, addr, &xlat, &l, is_write);
-        if (!memory_access_is_direct(section->mr, is_write)) {
+        section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
+        if (!memory_access_is_direct(section.mr, is_write)) {
             l = memory_access_size(l, addr);
-            if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
+            if (!memory_region_access_valid(section.mr, xlat, l, is_write)) {
                 return false;
             }
         }
@@ -2195,14 +2248,14 @@  void *address_space_map(AddressSpace *as,
     hwaddr len = *plen;
     hwaddr todo = 0;
     hwaddr l, xlat;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     ram_addr_t raddr = RAM_ADDR_MAX;
     ram_addr_t rlen;
     void *ret;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(as, addr, &xlat, &l, is_write);
+        section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
 
         if (!memory_access_is_direct(section->mr, is_write)) {
             if (todo || bounce.buffer) {
@@ -2211,20 +2264,20 @@  void *address_space_map(AddressSpace *as,
             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
             bounce.addr = addr;
             bounce.len = l;
-            bounce.mr = section->mr;
+            bounce.mr = section.mr;
             if (!is_write) {
                 address_space_read(as, addr, bounce.buffer, l);
             }
 
             *plen = l;
-            memory_region_ref(section->mr);
+            memory_region_ref(section.mr);
             return bounce.buffer;
         }
         if (!todo) {
-            raddr = memory_region_get_ram_addr(section->mr) + xlat;
-            memory_region_ref(section->mr);
+            raddr = memory_region_get_ram_addr(section.mr) + xlat;
+            memory_region_ref(section.mr);
         } else {
-            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
+            if (memory_region_get_ram_addr(section.mr) + xlat != raddr + todo) {
                 break;
             }
         }
@@ -2297,15 +2350,17 @@  static inline uint32_t ldl_phys_internal(hwaddr addr,
 {
     uint8_t *ptr;
     uint64_t val;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l = 4;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      false);
+    rcu_read_lock();
+    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
+                                    &l, false);
+    rcu_read_unlock();
     if (l < 4 || !memory_access_is_direct(section->mr, false)) {
         /* I/O case */
-        io_mem_read(section->mr, addr1, &val, 4);
+        io_mem_read(section.mr, addr1, &val, 4);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap32(val);
@@ -2360,8 +2415,10 @@  static inline uint64_t ldq_phys_internal(hwaddr addr,
     hwaddr l = 8;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      false);
+    rcu_read_lock();
+    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
+                                    &l, false);
+    rcu_read_unlock();
     if (l < 8 || !memory_access_is_direct(section->mr, false)) {
         /* I/O case */
         io_mem_read(section->mr, addr1, &val, 8);
@@ -2423,15 +2480,17 @@  static inline uint32_t lduw_phys_internal(hwaddr addr,
 {
     uint8_t *ptr;
     uint64_t val;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l = 2;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      false);
-    if (l < 2 || !memory_access_is_direct(section->mr, false)) {
+    rcu_read_lock();
+    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
+                                    &l, false);
+    rcu_read_unlock();
+    if (l < 2 || !memory_access_is_direct(section.mr, false)) {
         /* I/O case */
-        io_mem_read(section->mr, addr1, &val, 2);
+        io_mem_read(section.mr, addr1, &val, 2);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap16(val);
@@ -2443,7 +2502,7 @@  static inline uint32_t lduw_phys_internal(hwaddr addr,
 #endif
     } else {
         /* RAM case */
-        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
+        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
                                 & TARGET_PAGE_MASK)
                                + addr1);
         switch (endian) {
@@ -2482,16 +2541,18 @@  uint32_t lduw_be_phys(hwaddr addr)
 void stl_phys_notdirty(hwaddr addr, uint32_t val)
 {
     uint8_t *ptr;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l = 4;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      true);
-    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
-        io_mem_write(section->mr, addr1, val, 4);
+    rcu_read_lock();
+    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
+                                    &l, true);
+    rcu_read_unlock();
+    if (l < 4 || !memory_access_is_direct(section.mr, true)) {
+        io_mem_write(section.mr, addr1, val, 4);
     } else {
-        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
+        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
         stl_p(ptr, val);
 
@@ -2512,13 +2573,15 @@  static inline void stl_phys_internal(hwaddr addr, uint32_t val,
                                      enum device_endian endian)
 {
     uint8_t *ptr;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l = 4;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      true);
-    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
+    rcu_read_lock();
+    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
+                                    &l, true);
+    rcu_read_unlock();
+    if (l < 4 || !memory_access_is_direct(section.mr, true)) {
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap32(val);
@@ -2528,10 +2591,10 @@  static inline void stl_phys_internal(hwaddr addr, uint32_t val,
             val = bswap32(val);
         }
 #endif
-        io_mem_write(section->mr, addr1, val, 4);
+        io_mem_write(section.mr, addr1, val, 4);
     } else {
         /* RAM case */
-        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
+        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
@@ -2575,13 +2638,13 @@  static inline void stw_phys_internal(hwaddr addr, uint32_t val,
                                      enum device_endian endian)
 {
     uint8_t *ptr;
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l = 2;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      true);
-    if (l < 2 || !memory_access_is_direct(section->mr, true)) {
+    section = address_space_get_terminal(&address_space_memory, addr, &addr1,
+                                    &l, true);
+    if (l < 2 || !memory_access_is_direct(section.mr, true)) {
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap16(val);
@@ -2591,10 +2654,10 @@  static inline void stw_phys_internal(hwaddr addr, uint32_t val,
             val = bswap16(val);
         }
 #endif
-        io_mem_write(section->mr, addr1, val, 2);
+        io_mem_write(section.mr, addr1, val, 2);
     } else {
         /* RAM case */
-        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
+        addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
@@ -2696,13 +2759,15 @@  bool virtio_is_big_endian(void)
 #ifndef CONFIG_USER_ONLY
 bool cpu_physical_memory_is_io(hwaddr phys_addr)
 {
-    MemoryRegionSection *section;
+    MemoryRegionSection section;
     hwaddr l = 1;
 
-    section = address_space_translate(&address_space_memory,
+    rcu_read_lock();
+    section = address_space_get_terminal(&address_space_memory,
                                       phys_addr, &phys_addr, &l, false);
+    rcu_read_unlock();
 
-    return !(memory_region_is_ram(section->mr) ||
-             memory_region_is_romd(section->mr));
+    return !(memory_region_is_ram(section.mr) ||
+             memory_region_is_romd(section.mr));
 }
 #endif