diff mbox

[09/15] memory: prepare flatview and radix-tree for rcu style access

Message ID 1344407156-25562-10-git-send-email-qemulist@gmail.com
State New
Headers show

Commit Message

pingfan liu Aug. 8, 2012, 6:25 a.m. UTC
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>

Flatview and radix view are all under the protection of pointer.
And this make sure the change of them seem to be atomic!

The mr accessed by radix-tree leaf or flatview will be reclaimed
after the prev PhysMap not in use any longer

Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
 exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
 hw/vhost.c  |    2 +-
 hw/xen_pt.c |    2 +-
 kvm-all.c   |    2 +-
 memory.c    |   92 ++++++++++++++-----
 memory.h    |    9 ++-
 vl.c        |    1 +
 xen-all.c   |    2 +-
 8 files changed, 286 insertions(+), 127 deletions(-)

Comments

Avi Kivity Aug. 8, 2012, 9:41 a.m. UTC | #1
On 08/08/2012 09:25 AM, Liu Ping Fan wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> 
> Flatview and radix view are all under the protection of pointer.
> And this make sure the change of them seem to be atomic!
> 
> The mr accessed by radix-tree leaf or flatview will be reclaimed
> after the prev PhysMap not in use any longer
> 

IMO this cleverness should come much later.  Let's first take care of
dropping the big qemu lock, then make swithcing memory maps more efficient.

The initial paths could look like:

  lookup:
     take mem_map_lock
     lookup
     take ref
     drop mem_map_lock

  update:
     take mem_map_lock (in core_begin)
     do updates
     drop memo_map_lock

Later we can replace mem_map_lock with either a rwlock or (real) rcu.


>  
>  #if !defined(CONFIG_USER_ONLY)
>  
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>  {
> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>          typedef PhysPageEntry Node[L2_SIZE];
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> -                                      phys_map_nodes_nb + nodes);
> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
> -                                 phys_map_nodes_nb_alloc);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
> +                                                                        16);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
> +                                      map->phys_map_nodes_nb + nodes);
> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
> +                                 map->phys_map_nodes_nb_alloc);
>      }
>  }

Please have a patch that just adds the map parameter to all these
functions.  This makes the later patch, that adds the copy, easier to read.

> +
> +void cur_map_update(PhysMap *next)
> +{
> +    qemu_mutex_lock(&cur_map_lock);
> +    physmap_put(cur_map);
> +    cur_map = next;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +}

IMO this can be mem_map_lock.

If we take my previous suggestion:

  lookup:
     take mem_map_lock
     lookup
     take ref
     drop mem_map_lock

  update:
     take mem_map_lock (in core_begin)
     do updates
     drop memo_map_lock

And update it to


  update:
     prepare next_map (in core_begin)
     do updates
     take mem_map_lock (in core_commit)
     switch maps
     drop mem_map_lock
     free old map


Note the lookup path copies the MemoryRegionSection instead of
referencing it.  Thus we can destroy the old map without worrying; the
only pointers will point to MemoryRegions, which will be protected by
the refcounts on their Objects.

This can be easily switched to rcu:

  update:
     prepare next_map (in core_begin)
     do updates
     switch maps - rcu_assign_pointer
     call_rcu(free old map) (or synchronize_rcu; free old maps)

Again, this should be done after the simplictic patch that enables
parallel lookup but keeps just one map.
Blue Swirl Aug. 8, 2012, 7:23 p.m. UTC | #2
On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>
> Flatview and radix view are all under the protection of pointer.
> And this make sure the change of them seem to be atomic!
>
> The mr accessed by radix-tree leaf or flatview will be reclaimed
> after the prev PhysMap not in use any longer
>
> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
> ---
>  exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
>  hw/vhost.c  |    2 +-
>  hw/xen_pt.c |    2 +-
>  kvm-all.c   |    2 +-
>  memory.c    |   92 ++++++++++++++-----
>  memory.h    |    9 ++-
>  vl.c        |    1 +
>  xen-all.c   |    2 +-
>  8 files changed, 286 insertions(+), 127 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 01b91b0..97addb9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -24,6 +24,7 @@
>  #include <sys/mman.h>
>  #endif
>
> +#include "qemu/atomic.h"
>  #include "qemu-common.h"
>  #include "cpu.h"
>  #include "tcg.h"
> @@ -35,6 +36,8 @@
>  #include "qemu-timer.h"
>  #include "memory.h"
>  #include "exec-memory.h"
> +#include "qemu-thread.h"
> +#include "qemu/reclaimer.h"
>  #if defined(CONFIG_USER_ONLY)
>  #include <qemu.h>
>  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> @@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static MemoryRegionSection *phys_sections;
> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
>  static uint16_t phys_section_unassigned;
>  static uint16_t phys_section_notdirty;
>  static uint16_t phys_section_rom;
>  static uint16_t phys_section_watch;
>
> -
> -/* Simple allocator for PhysPageEntry nodes */
> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> -
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> -/* This is a multi-level map on the physical address space.
> -   The bottom level has pointers to MemoryRegionSections.  */
> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> -
> +static QemuMutex cur_map_lock;
> +static PhysMap *cur_map;
>  QemuMutex mem_map_lock;
> +static PhysMap *next_map;
>
>  static void io_mem_init(void);
>  static void memory_map_init(void);
> @@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>
>  #if !defined(CONFIG_USER_ONLY)
>
> -static void phys_map_node_reserve(unsigned nodes)
> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>  {
> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>          typedef PhysPageEntry Node[L2_SIZE];
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> -                                      phys_map_nodes_nb + nodes);
> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
> -                                 phys_map_nodes_nb_alloc);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
> +                                                                        16);
> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
> +                                      map->phys_map_nodes_nb + nodes);
> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
> +                                 map->phys_map_nodes_nb_alloc);
>      }
>  }
>
> -static uint16_t phys_map_node_alloc(void)
> +static uint16_t phys_map_node_alloc(PhysMap *map)
>  {
>      unsigned i;
>      uint16_t ret;
>
> -    ret = phys_map_nodes_nb++;
> +    ret = map->phys_map_nodes_nb++;
>      assert(ret != PHYS_MAP_NODE_NIL);
> -    assert(ret != phys_map_nodes_nb_alloc);
> +    assert(ret != map->phys_map_nodes_nb_alloc);
>      for (i = 0; i < L2_SIZE; ++i) {
> -        phys_map_nodes[ret][i].is_leaf = 0;
> -        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> +        map->phys_map_nodes[ret][i].is_leaf = 0;
> +        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>      }
>      return ret;
>  }
>
> -static void phys_map_nodes_reset(void)
> -{
> -    phys_map_nodes_nb = 0;
> -}
> -
> -
> -static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
> -                                target_phys_addr_t *nb, uint16_t leaf,
> +static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
> +                                target_phys_addr_t *index,
> +                                target_phys_addr_t *nb,
> +                                uint16_t leaf,
>                                  int level)
>  {
>      PhysPageEntry *p;
> @@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>      target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
>
>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> -        lp->ptr = phys_map_node_alloc();
> -        p = phys_map_nodes[lp->ptr];
> +        lp->ptr = phys_map_node_alloc(map);
> +        p = map->phys_map_nodes[lp->ptr];
>          if (level == 0) {
>              for (i = 0; i < L2_SIZE; i++) {
>                  p[i].is_leaf = 1;
> @@ -434,7 +426,7 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              }
>          }
>      } else {
> -        p = phys_map_nodes[lp->ptr];
> +        p = map->phys_map_nodes[lp->ptr];
>      }
>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>
> @@ -445,24 +437,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>              *index += step;
>              *nb -= step;
>          } else {
> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>          }
>          ++lp;
>      }
>  }
>
> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
> -                          uint16_t leaf)
> +static void phys_page_set(PhysMap *map, target_phys_addr_t index,
> +                            target_phys_addr_t nb,
> +                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
> -    phys_map_node_reserve(3 * P_L2_LEVELS);
> +    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
>
> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    /* update in new tree*/
> +    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
> +                           target_phys_addr_t index)
>  {
> -    PhysPageEntry lp = phys_map;
> +    PhysPageEntry lp = map->root;
>      PhysPageEntry *p;
>      int i;
>      uint16_t s_index = phys_section_unassigned;
> @@ -471,13 +466,79 @@ MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
>              goto not_found;
>          }
> -        p = phys_map_nodes[lp.ptr];
> +        p = map->phys_map_nodes[lp.ptr];
>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>      }
>
>      s_index = lp.ptr;
>  not_found:
> -    return &phys_sections[s_index];
> +    return &map->phys_sections[s_index];
> +}
> +
> +MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +{
> +    return phys_page_find_internal(cur_map, index);
> +}
> +
> +void physmap_get(PhysMap *map)
> +{
> +    atomic_inc(&map->ref);
> +}
> +
> +/* Untill rcu read side finished, do this reclaim */

Until

> +static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };

Please insert a blank line here.

> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
> +{
> +    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
> +}
> +
> +static void destroy_all_mappings(PhysMap *map);

Prototypes belong to the top of the file.

> +static void phys_map_release(PhysMap *map)
> +{
> +    /* emulate for rcu reclaimer for mr */
> +    reclaimer_worker(&physmap_reclaimer_list);
> +
> +    destroy_all_mappings(map);
> +    g_free(map->phys_map_nodes);
> +    g_free(map->phys_sections);
> +    g_free(map->views[0].ranges);
> +    g_free(map->views[1].ranges);
> +    g_free(map);
> +}
> +
> +void physmap_put(PhysMap *map)
> +{
> +    if (atomic_dec_and_test(&map->ref)) {
> +        phys_map_release(map);
> +    }
> +}
> +
> +void cur_map_update(PhysMap *next)
> +{
> +    qemu_mutex_lock(&cur_map_lock);
> +    physmap_put(cur_map);
> +    cur_map = next;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +}
> +
> +PhysMap *cur_map_get(void)
> +{
> +    PhysMap *ret;
> +
> +    qemu_mutex_lock(&cur_map_lock);
> +    ret = cur_map;
> +    physmap_get(ret);
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +    return ret;
> +}
> +
> +PhysMap *alloc_next_map(void)
> +{
> +    PhysMap *next = g_malloc0(sizeof(PhysMap));
> +    atomic_set(&next->ref, 1);
> +    return next;
>  }
>
>  bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -632,6 +693,7 @@ void cpu_exec_init_all(void)
>      memory_map_init();
>      io_mem_init();
>      qemu_mutex_init(&mem_map_lock);
> +    qemu_mutex_init(&cur_map_lock);
>  #endif
>  }
>
> @@ -2161,17 +2223,18 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
>
>  #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
>  typedef struct subpage_t {
> +    PhysMap *map;
>      MemoryRegion iomem;
>      target_phys_addr_t base;
>      uint16_t sub_section[TARGET_PAGE_SIZE];
>  } subpage_t;
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section);
> -static subpage_t *subpage_init(target_phys_addr_t base);
> -static void destroy_page_desc(uint16_t section_index)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                            uint32_t end, uint16_t section);
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
> +static void destroy_page_desc(PhysMap *map, uint16_t section_index)
>  {
> -    MemoryRegionSection *section = &phys_sections[section_index];
> +    MemoryRegionSection *section = &map->phys_sections[section_index];
>      MemoryRegion *mr = section->mr;
>
>      if (mr->subpage) {
> @@ -2181,7 +2244,7 @@ static void destroy_page_desc(uint16_t section_index)
>      }
>  }
>
> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
> +static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
>  {
>      unsigned i;
>      PhysPageEntry *p;
> @@ -2190,38 +2253,34 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>          return;
>      }
>
> -    p = phys_map_nodes[lp->ptr];
> +    p = map->phys_map_nodes[lp->ptr];
>      for (i = 0; i < L2_SIZE; ++i) {
>          if (!p[i].is_leaf) {
> -            destroy_l2_mapping(&p[i], level - 1);
> +            destroy_l2_mapping(map, &p[i], level - 1);
>          } else {
> -            destroy_page_desc(p[i].ptr);
> +            destroy_page_desc(map, p[i].ptr);
>          }
>      }
>      lp->is_leaf = 0;
>      lp->ptr = PHYS_MAP_NODE_NIL;
>  }
>
> -static void destroy_all_mappings(void)
> +static void destroy_all_mappings(PhysMap *map)
>  {
> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
> -    phys_map_nodes_reset();
> -}
> +    PhysPageEntry *root = &map->root;
>
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> -{
> -    if (phys_sections_nb == phys_sections_nb_alloc) {
> -        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
> -        phys_sections = g_renew(MemoryRegionSection, phys_sections,
> -                                phys_sections_nb_alloc);
> -    }
> -    phys_sections[phys_sections_nb] = *section;
> -    return phys_sections_nb++;
> +    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
>  }
>
> -static void phys_sections_clear(void)
> +static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
>  {
> -    phys_sections_nb = 0;
> +    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
> +        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
> +        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
> +                                map->phys_sections_nb_alloc);
> +    }
> +    map->phys_sections[map->phys_sections_nb] = *section;
> +    return map->phys_sections_nb++;
>  }
>
>  /* register physical memory.
> @@ -2232,12 +2291,13 @@ static void phys_sections_clear(void)
>     start_addr and region_offset are rounded down to a page boundary
>     before calculating this offset.  This should not be a problem unless
>     the low bits of start_addr and region_offset differ.  */
> -static void register_subpage(MemoryRegionSection *section)
> +static void register_subpage(PhysMap *map, MemoryRegionSection *section)
>  {
>      subpage_t *subpage;
>      target_phys_addr_t base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
> +    MemoryRegionSection *existing = phys_page_find_internal(map,
> +                                            base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = TARGET_PAGE_SIZE,
> @@ -2247,30 +2307,30 @@ static void register_subpage(MemoryRegionSection *section)
>      assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
>
>      if (!(existing->mr->subpage)) {
> -        subpage = subpage_init(base);
> +        subpage = subpage_init(map, base);
>          subsection.mr = &subpage->iomem;
> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
> -                      phys_section_add(&subsection));
> +        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
> +                      phys_section_add(map, &subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
>      }
>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>      end = start + section->size;
> -    subpage_register(subpage, start, end, phys_section_add(section));
> +    subpage_register(map, subpage, start, end, phys_section_add(map, section));
>  }
>
>
> -static void register_multipage(MemoryRegionSection *section)
> +static void register_multipage(PhysMap *map, MemoryRegionSection *section)
>  {
>      target_phys_addr_t start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
>      target_phys_addr_t addr;
> -    uint16_t section_index = phys_section_add(section);
> +    uint16_t section_index = phys_section_add(map, section);
>
>      assert(size);
>
>      addr = start_addr;
> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
> +    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>                    section_index);
>  }
>
> @@ -2278,13 +2338,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>                                        bool readonly)
>  {
>      MemoryRegionSection now = *section, remain = *section;
> +    PhysMap *map = next_map;
>
>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
>          || (now.size < TARGET_PAGE_SIZE)) {
>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space,
>                         now.size);
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
> @@ -2292,14 +2353,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>      now = remain;
>      now.size &= TARGET_PAGE_MASK;
>      if (now.size) {
> -        register_multipage(&now);
> +        register_multipage(map, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
>      }
>      now = remain;
>      if (now.size) {
> -        register_subpage(&now);
> +        register_subpage(map, &now);
>      }
>  }
>
> @@ -3001,7 +3062,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
>             mmio, len, addr, idx);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3020,7 +3081,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
>             __func__, mmio, len, addr, idx, value);
>  #endif
>
> -    section = &phys_sections[mmio->sub_section[idx]];
> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>      addr += mmio->base;
>      addr -= section->offset_within_address_space;
>      addr += section->offset_within_region;
> @@ -3065,8 +3126,8 @@ static const MemoryRegionOps subpage_ram_ops = {
>      .endianness = DEVICE_NATIVE_ENDIAN,
>  };
>
> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
> -                             uint16_t section)
> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
> +                              uint32_t end, uint16_t section)
>  {
>      int idx, eidx;
>
> @@ -3078,10 +3139,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
>             mmio, start, end, idx, eidx, memory);
>  #endif
> -    if (memory_region_is_ram(phys_sections[section].mr)) {
> -        MemoryRegionSection new_section = phys_sections[section];
> +    if (memory_region_is_ram(map->phys_sections[section].mr)) {
> +        MemoryRegionSection new_section = map->phys_sections[section];
>          new_section.mr = &io_mem_subpage_ram;
> -        section = phys_section_add(&new_section);
> +        section = phys_section_add(map, &new_section);
>      }
>      for (; idx <= eidx; idx++) {
>          mmio->sub_section[idx] = section;
> @@ -3090,12 +3151,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>      return 0;
>  }
>
> -static subpage_t *subpage_init(target_phys_addr_t base)
> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
>  {
>      subpage_t *mmio;
>
>      mmio = g_malloc0(sizeof(subpage_t));
>
> +    mmio->map = map;
>      mmio->base = base;
>      memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
>                            "subpage", TARGET_PAGE_SIZE);
> @@ -3104,12 +3166,12 @@ static subpage_t *subpage_init(target_phys_addr_t base)
>      printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
>             mmio, base, TARGET_PAGE_SIZE, subpage_memory);
>  #endif
> -    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
> +    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
>
>      return mmio;
>  }
>
> -static uint16_t dummy_section(MemoryRegion *mr)
> +static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
>  {
>      MemoryRegionSection section = {
>          .mr = mr,
> @@ -3118,7 +3180,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
>          .size = UINT64_MAX,
>      };
>
> -    return phys_section_add(&section);
> +    return phys_section_add(map, &section);
>  }
>
>  MemoryRegion *iotlb_to_region(target_phys_addr_t index)
> @@ -3140,15 +3202,32 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>
> -static void core_begin(MemoryListener *listener)
> +#if 0
> +static void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL,
> +                             .nr = 0,
> +                             .nr_allocated = 0,
> +    };
> +
> +    init_map.views[0] = v;
> +    init_map.views[1] = v;
> +    cur_map =  &init_map;
> +}
> +#endif

Please delete.

> +
> +static void core_begin(MemoryListener *listener, PhysMap *new_map)
>  {
> -    destroy_all_mappings();
> -    phys_sections_clear();
> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
> -    phys_section_unassigned = dummy_section(&io_mem_unassigned);
> -    phys_section_notdirty = dummy_section(&io_mem_notdirty);
> -    phys_section_rom = dummy_section(&io_mem_rom);
> -    phys_section_watch = dummy_section(&io_mem_watch);
> +
> +    new_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    new_map->root.is_leaf = 0;
> +
> +    /* In all the map, these sections have the same index */
> +    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
> +    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
> +    phys_section_rom = dummy_section(new_map, &io_mem_rom);
> +    phys_section_watch = dummy_section(new_map, &io_mem_watch);
> +    next_map = new_map;
>  }
>
>  static void core_commit(MemoryListener *listener)
> @@ -3161,6 +3240,16 @@ static void core_commit(MemoryListener *listener)
>      for(env = first_cpu; env != NULL; env = env->next_cpu) {
>          tlb_flush(env, 1);
>      }
> +
> +/* move into high layer
> +    qemu_mutex_lock(&cur_map_lock);
> +    if (cur_map != NULL) {
> +        physmap_put(cur_map);
> +    }
> +    cur_map = next_map;
> +    smp_mb();
> +    qemu_mutex_unlock(&cur_map_lock);
> +*/

Also commented out code should be deleted.

>  }
>
>  static void core_region_add(MemoryListener *listener,
> @@ -3217,7 +3306,7 @@ static void core_eventfd_del(MemoryListener *listener,
>  {
>  }
>
> -static void io_begin(MemoryListener *listener)
> +static void io_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> @@ -3329,6 +3418,20 @@ static void memory_map_init(void)
>      memory_listener_register(&io_memory_listener, system_io);
>  }
>
> +void physmap_init(void)
> +{
> +    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
> +                           };
> +    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
> +
> +    atomic_set(&init_map->ref, 1);
> +    init_map->root.ptr = PHYS_MAP_NODE_NIL;
> +    init_map->root.is_leaf = 0;
> +    init_map->views[0] = v;
> +    init_map->views[1] = v;
> +    cur_map = init_map;
> +}
> +
>  MemoryRegion *get_system_memory(void)
>  {
>      return system_memory;
> @@ -3391,6 +3494,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>      uint32_t val;
>      target_phys_addr_t page;
>      MemoryRegionSection *section;
> +    PhysMap *cur = cur_map_get();
>
>      while (len > 0) {
>          page = addr & TARGET_PAGE_MASK;
> @@ -3472,6 +3576,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>          buf += l;
>          addr += l;
>      }
> +    physmap_put(cur);
>  }
>
>  /* used for ROM loading : can write in RAM and ROM */
> diff --git a/hw/vhost.c b/hw/vhost.c
> index 43664e7..df58345 100644
> --- a/hw/vhost.c
> +++ b/hw/vhost.c
> @@ -438,7 +438,7 @@ static bool vhost_section(MemoryRegionSection *section)
>          && memory_region_is_ram(section->mr);
>  }
>
> -static void vhost_begin(MemoryListener *listener)
> +static void vhost_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/hw/xen_pt.c b/hw/xen_pt.c
> index 3b6d186..fba8586 100644
> --- a/hw/xen_pt.c
> +++ b/hw/xen_pt.c
> @@ -597,7 +597,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
>      }
>  }
>
> -static void xen_pt_begin(MemoryListener *l)
> +static void xen_pt_begin(MemoryListener *l, PhysMap *next)
>  {
>  }
>
> diff --git a/kvm-all.c b/kvm-all.c
> index f8e4328..bc42cab 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -693,7 +693,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>      }
>  }
>
> -static void kvm_begin(MemoryListener *listener)
> +static void kvm_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> diff --git a/memory.c b/memory.c
> index c7f2cfd..54cdc7f 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -20,6 +20,7 @@
>  #include "kvm.h"
>  #include <assert.h>
>  #include "hw/qdev.h"
> +#include "qemu-thread.h"
>
>  #define WANT_EXEC_OBSOLETE
>  #include "exec-obsolete.h"
> @@ -192,7 +193,7 @@ typedef struct AddressSpaceOps AddressSpaceOps;
>  /* A system address space - I/O, memory, etc. */
>  struct AddressSpace {
>      MemoryRegion *root;
> -    FlatView current_map;
> +    int view_id;
>      int ioeventfd_nb;
>      MemoryRegionIoeventfd *ioeventfds;
>  };
> @@ -232,11 +233,6 @@ static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
>      ++view->nr;
>  }
>
> -static void flatview_destroy(FlatView *view)
> -{
> -    g_free(view->ranges);
> -}
> -
>  static bool can_merge(FlatRange *r1, FlatRange *r2)
>  {
>      return int128_eq(addrrange_end(r1->addr), r2->addr.start)
> @@ -594,8 +590,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      MemoryRegionIoeventfd *ioeventfds = NULL;
>      AddrRange tmp;
>      unsigned i;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
>              tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
>                                    int128_sub(fr->addr.start,
> @@ -616,6 +614,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>      g_free(as->ioeventfds);
>      as->ioeventfds = ioeventfds;
>      as->ioeventfd_nb = ioeventfd_nb;
> +    physmap_put(map);
>  }
>
>  static void address_space_update_topology_pass(AddressSpace *as,
> @@ -681,21 +680,23 @@ static void address_space_update_topology_pass(AddressSpace *as,
>  }
>
>
> -static void address_space_update_topology(AddressSpace *as)
> +static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
> +                                            PhysMap *next)
>  {
> -    FlatView old_view = as->current_map;
> +    FlatView old_view = prev->views[as->view_id];
>      FlatView new_view = generate_memory_topology(as->root);
>
>      address_space_update_topology_pass(as, old_view, new_view, false);
>      address_space_update_topology_pass(as, old_view, new_view, true);
> +    next->views[as->view_id] = new_view;
>
> -    as->current_map = new_view;
> -    flatview_destroy(&old_view);
>      address_space_update_ioeventfds(as);
>  }
>
>  static void memory_region_update_topology(MemoryRegion *mr)
>  {
> +    PhysMap *prev, *next;
> +
>      if (memory_region_transaction_depth) {
>          memory_region_update_pending |= !mr || mr->enabled;
>          return;
> @@ -705,16 +706,20 @@ static void memory_region_update_topology(MemoryRegion *mr)
>          return;
>      }
>
> -    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
> +     prev = cur_map_get();
> +    /* allocate PhysMap next here */
> +    next = alloc_next_map();
> +    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
>
>      if (address_space_memory.root) {
> -        address_space_update_topology(&address_space_memory);
> +        address_space_update_topology(&address_space_memory, prev, next);
>      }
>      if (address_space_io.root) {
> -        address_space_update_topology(&address_space_io);
> +        address_space_update_topology(&address_space_io, prev, next);
>      }
>
>      MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
> +    cur_map_update(next);
>
>      memory_region_update_pending = false;
>  }
> @@ -1071,7 +1076,7 @@ void memory_region_put(MemoryRegion *mr)
>
>      if (atomic_dec_and_test(&mr->ref)) {
>          /* to fix, using call_rcu( ,release) */
> -        mr->life_ops->put(mr);
> +        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
>      }
>  }
>
> @@ -1147,13 +1152,18 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>  {
>      FlatRange *fr;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
>                                            Forward, log_sync);
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
> @@ -1201,8 +1211,12 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>      FlatRange *fr;
>      CoalescedMemoryRange *cmr;
>      AddrRange tmp;
> +    FlatView *fview;
> +    PhysMap *map;
>
> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
> +    map = cur_map_get();
> +    fview = &map->views[address_space_memory.view_id];
> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>          if (fr->mr == mr) {
>              qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
>                                             int128_get64(fr->addr.size));
> @@ -1219,6 +1233,7 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>              }
>          }
>      }
> +    physmap_put(map);
>  }
>
>  void memory_region_set_coalescing(MemoryRegion *mr)
> @@ -1458,29 +1473,49 @@ static int cmp_flatrange_addr(const void *addr_, const void *fr_)
>      return 0;
>  }
>
> -static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
> +static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
>  {
> -    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
> +    return bsearch(&addr, view->ranges, view->nr,
>                     sizeof(FlatRange), cmp_flatrange_addr);
>  }
>
> +/* dec the ref, which inc by memory_region_find*/
> +void memory_region_section_put(MemoryRegionSection *mrs)
> +{
> +    if (mrs->mr != NULL) {
> +        memory_region_put(mrs->mr);
> +    }
> +}
> +
> +/* inc mr's ref. Caller need dec mr's ref */
>  MemoryRegionSection memory_region_find(MemoryRegion *address_space,
>                                         target_phys_addr_t addr, uint64_t size)
>  {
> +    PhysMap *map;
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      AddrRange range = addrrange_make(int128_make64(addr),
>                                       int128_make64(size));
> -    FlatRange *fr = address_space_lookup(as, range);
> +    FlatView *fview;
> +
> +    map = cur_map_get();
> +
> +    fview = &map->views[as->view_id];
> +    FlatRange *fr = address_space_lookup(fview, range);
>      MemoryRegionSection ret = { .mr = NULL, .size = 0 };
>
>      if (!fr) {
> +        physmap_put(map);
>          return ret;
>      }
>
> -    while (fr > as->current_map.ranges
> +    while (fr > fview->ranges
>             && addrrange_intersects(fr[-1].addr, range)) {
>          --fr;
>      }
> +    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
> +     */
> +    memory_region_get(fr->mr);
> +    physmap_put(map);
>
>      ret.mr = fr->mr;
>      range = addrrange_intersection(range, fr->addr);
> @@ -1497,10 +1532,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
>  {
>      AddressSpace *as = memory_region_to_address_space(address_space);
>      FlatRange *fr;
> +    PhysMap *map = cur_map_get();
> +    FlatView *view = &map->views[as->view_id];
>
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_global_dirty_log_start(void)
> @@ -1519,6 +1557,8 @@ static void listener_add_address_space(MemoryListener *listener,
>                                         AddressSpace *as)
>  {
>      FlatRange *fr;
> +    PhysMap *map;
> +    FlatView *view;
>
>      if (listener->address_space_filter
>          && listener->address_space_filter != as->root) {
> @@ -1528,7 +1568,10 @@ static void listener_add_address_space(MemoryListener *listener,
>      if (global_dirty_log) {
>          listener->log_global_start(listener);
>      }
> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
> +
> +    map = cur_map_get();
> +    view = &map->views[as->view_id];
> +    FOR_EACH_FLAT_RANGE(fr, view) {
>          MemoryRegionSection section = {
>              .mr = fr->mr,
>              .address_space = as->root,
> @@ -1539,6 +1582,7 @@ static void listener_add_address_space(MemoryListener *listener,
>          };
>          listener->region_add(listener, &section);
>      }
> +    physmap_put(map);
>  }
>
>  void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
> @@ -1570,12 +1614,14 @@ void memory_listener_unregister(MemoryListener *listener)
>  void set_system_memory_map(MemoryRegion *mr)
>  {
>      address_space_memory.root = mr;
> +    address_space_memory.view_id = 0;
>      memory_region_update_topology(NULL);
>  }
>
>  void set_system_io_map(MemoryRegion *mr)
>  {
>      address_space_io.root = mr;
> +    address_space_io.view_id = 1;
>      memory_region_update_topology(NULL);
>  }
>
> diff --git a/memory.h b/memory.h
> index 357edd8..18442d4 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -256,7 +256,7 @@ typedef struct MemoryListener MemoryListener;
>   * Use with memory_listener_register() and memory_listener_unregister().
>   */
>  struct MemoryListener {
> -    void (*begin)(MemoryListener *listener);
> +    void (*begin)(MemoryListener *listener, PhysMap *next);
>      void (*commit)(MemoryListener *listener);
>      void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
>      void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
> @@ -829,6 +829,13 @@ void mtree_info(fprintf_function mon_printf, void *f);
>
>  void memory_region_get(MemoryRegion *mr);
>  void memory_region_put(MemoryRegion *mr);
> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
> +void physmap_get(PhysMap *map);
> +void physmap_put(PhysMap *map);
> +PhysMap *cur_map_get(void);
> +PhysMap *alloc_next_map(void);
> +void cur_map_update(PhysMap *next);
> +void physmap_init(void);
>  #endif
>
>  #endif
> diff --git a/vl.c b/vl.c
> index 1329c30..12af523 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -3346,6 +3346,7 @@ int main(int argc, char **argv, char **envp)
>      if (ram_size == 0) {
>          ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
>      }
> +    physmap_init();
>
>      configure_accelerator();
>
> diff --git a/xen-all.c b/xen-all.c
> index 59f2323..41d82fd 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -452,7 +452,7 @@ static void xen_set_memory(struct MemoryListener *listener,
>      }
>  }
>
> -static void xen_begin(MemoryListener *listener)
> +static void xen_begin(MemoryListener *listener, PhysMap *next)
>  {
>  }
>
> --
> 1.7.4.4
>
pingfan liu Aug. 9, 2012, 7:29 a.m. UTC | #3
On Thu, Aug 9, 2012 at 3:23 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Wed, Aug 8, 2012 at 6:25 AM, Liu Ping Fan <qemulist@gmail.com> wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> Flatview and radix view are all under the protection of pointer.
>> And this make sure the change of them seem to be atomic!
>>
>> The mr accessed by radix-tree leaf or flatview will be reclaimed
>> after the prev PhysMap not in use any longer
>>
>> Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>> ---
>>  exec.c      |  303 +++++++++++++++++++++++++++++++++++++++-------------------
>>  hw/vhost.c  |    2 +-
>>  hw/xen_pt.c |    2 +-
>>  kvm-all.c   |    2 +-
>>  memory.c    |   92 ++++++++++++++-----
>>  memory.h    |    9 ++-
>>  vl.c        |    1 +
>>  xen-all.c   |    2 +-
>>  8 files changed, 286 insertions(+), 127 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 01b91b0..97addb9 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -24,6 +24,7 @@
>>  #include <sys/mman.h>
>>  #endif
>>
>> +#include "qemu/atomic.h"
>>  #include "qemu-common.h"
>>  #include "cpu.h"
>>  #include "tcg.h"
>> @@ -35,6 +36,8 @@
>>  #include "qemu-timer.h"
>>  #include "memory.h"
>>  #include "exec-memory.h"
>> +#include "qemu-thread.h"
>> +#include "qemu/reclaimer.h"
>>  #if defined(CONFIG_USER_ONLY)
>>  #include <qemu.h>
>>  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
>> @@ -184,25 +187,17 @@ static void *l1_map[V_L1_SIZE];
>>
>>  #if !defined(CONFIG_USER_ONLY)
>>
>> -static MemoryRegionSection *phys_sections;
>> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
>>  static uint16_t phys_section_unassigned;
>>  static uint16_t phys_section_notdirty;
>>  static uint16_t phys_section_rom;
>>  static uint16_t phys_section_watch;
>>
>> -
>> -/* Simple allocator for PhysPageEntry nodes */
>> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
>> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>> -
>>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>>
>> -/* This is a multi-level map on the physical address space.
>> -   The bottom level has pointers to MemoryRegionSections.  */
>> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>> -
>> +static QemuMutex cur_map_lock;
>> +static PhysMap *cur_map;
>>  QemuMutex mem_map_lock;
>> +static PhysMap *next_map;
>>
>>  static void io_mem_init(void);
>>  static void memory_map_init(void);
>> @@ -383,41 +378,38 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>>
>>  #if !defined(CONFIG_USER_ONLY)
>>
>> -static void phys_map_node_reserve(unsigned nodes)
>> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>>  {
>> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
>> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>>          typedef PhysPageEntry Node[L2_SIZE];
>> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
>> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
>> -                                      phys_map_nodes_nb + nodes);
>> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
>> -                                 phys_map_nodes_nb_alloc);
>> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
>> +                                                                        16);
>> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
>> +                                      map->phys_map_nodes_nb + nodes);
>> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
>> +                                 map->phys_map_nodes_nb_alloc);
>>      }
>>  }
>>
>> -static uint16_t phys_map_node_alloc(void)
>> +static uint16_t phys_map_node_alloc(PhysMap *map)
>>  {
>>      unsigned i;
>>      uint16_t ret;
>>
>> -    ret = phys_map_nodes_nb++;
>> +    ret = map->phys_map_nodes_nb++;
>>      assert(ret != PHYS_MAP_NODE_NIL);
>> -    assert(ret != phys_map_nodes_nb_alloc);
>> +    assert(ret != map->phys_map_nodes_nb_alloc);
>>      for (i = 0; i < L2_SIZE; ++i) {
>> -        phys_map_nodes[ret][i].is_leaf = 0;
>> -        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>> +        map->phys_map_nodes[ret][i].is_leaf = 0;
>> +        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
>>      }
>>      return ret;
>>  }
>>
>> -static void phys_map_nodes_reset(void)
>> -{
>> -    phys_map_nodes_nb = 0;
>> -}
>> -
>> -
>> -static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>> -                                target_phys_addr_t *nb, uint16_t leaf,
>> +static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
>> +                                target_phys_addr_t *index,
>> +                                target_phys_addr_t *nb,
>> +                                uint16_t leaf,
>>                                  int level)
>>  {
>>      PhysPageEntry *p;
>> @@ -425,8 +417,8 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>>      target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
>>
>>      if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
>> -        lp->ptr = phys_map_node_alloc();
>> -        p = phys_map_nodes[lp->ptr];
>> +        lp->ptr = phys_map_node_alloc(map);
>> +        p = map->phys_map_nodes[lp->ptr];
>>          if (level == 0) {
>>              for (i = 0; i < L2_SIZE; i++) {
>>                  p[i].is_leaf = 1;
>> @@ -434,7 +426,7 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>>              }
>>          }
>>      } else {
>> -        p = phys_map_nodes[lp->ptr];
>> +        p = map->phys_map_nodes[lp->ptr];
>>      }
>>      lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>>
>> @@ -445,24 +437,27 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>>              *index += step;
>>              *nb -= step;
>>          } else {
>> -            phys_page_set_level(lp, index, nb, leaf, level - 1);
>> +            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
>>          }
>>          ++lp;
>>      }
>>  }
>>
>> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
>> -                          uint16_t leaf)
>> +static void phys_page_set(PhysMap *map, target_phys_addr_t index,
>> +                            target_phys_addr_t nb,
>> +                            uint16_t leaf)
>>  {
>>      /* Wildly overreserve - it doesn't matter much. */
>> -    phys_map_node_reserve(3 * P_L2_LEVELS);
>> +    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
>>
>> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>> +    /* update in new tree*/
>> +    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>>  }
>>
>> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>> +static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
>> +                           target_phys_addr_t index)
>>  {
>> -    PhysPageEntry lp = phys_map;
>> +    PhysPageEntry lp = map->root;
>>      PhysPageEntry *p;
>>      int i;
>>      uint16_t s_index = phys_section_unassigned;
>> @@ -471,13 +466,79 @@ MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>>          if (lp.ptr == PHYS_MAP_NODE_NIL) {
>>              goto not_found;
>>          }
>> -        p = phys_map_nodes[lp.ptr];
>> +        p = map->phys_map_nodes[lp.ptr];
>>          lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>>      }
>>
>>      s_index = lp.ptr;
>>  not_found:
>> -    return &phys_sections[s_index];
>> +    return &map->phys_sections[s_index];
>> +}
>> +
>> +MemoryRegionSection *phys_page_find(target_phys_addr_t index)
>> +{
>> +    return phys_page_find_internal(cur_map, index);
>> +}
>> +
>> +void physmap_get(PhysMap *map)
>> +{
>> +    atomic_inc(&map->ref);
>> +}
>> +
>> +/* Untill rcu read side finished, do this reclaim */
>
> Until
>
adopted

>> +static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };
>
> Please insert a blank line here.
>
adopted

>> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
>> +{
>> +    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
>> +}
>> +
>> +static void destroy_all_mappings(PhysMap *map);
>
> Prototypes belong to the top of the file.
>
adopted

>> +static void phys_map_release(PhysMap *map)
>> +{
>> +    /* emulate for rcu reclaimer for mr */
>> +    reclaimer_worker(&physmap_reclaimer_list);
>> +
>> +    destroy_all_mappings(map);
>> +    g_free(map->phys_map_nodes);
>> +    g_free(map->phys_sections);
>> +    g_free(map->views[0].ranges);
>> +    g_free(map->views[1].ranges);
>> +    g_free(map);
>> +}
>> +
>> +void physmap_put(PhysMap *map)
>> +{
>> +    if (atomic_dec_and_test(&map->ref)) {
>> +        phys_map_release(map);
>> +    }
>> +}
>> +
>> +void cur_map_update(PhysMap *next)
>> +{
>> +    qemu_mutex_lock(&cur_map_lock);
>> +    physmap_put(cur_map);
>> +    cur_map = next;
>> +    smp_mb();
>> +    qemu_mutex_unlock(&cur_map_lock);
>> +}
>> +
>> +PhysMap *cur_map_get(void)
>> +{
>> +    PhysMap *ret;
>> +
>> +    qemu_mutex_lock(&cur_map_lock);
>> +    ret = cur_map;
>> +    physmap_get(ret);
>> +    smp_mb();
>> +    qemu_mutex_unlock(&cur_map_lock);
>> +    return ret;
>> +}
>> +
>> +PhysMap *alloc_next_map(void)
>> +{
>> +    PhysMap *next = g_malloc0(sizeof(PhysMap));
>> +    atomic_set(&next->ref, 1);
>> +    return next;
>>  }
>>
>>  bool memory_region_is_unassigned(MemoryRegion *mr)
>> @@ -632,6 +693,7 @@ void cpu_exec_init_all(void)
>>      memory_map_init();
>>      io_mem_init();
>>      qemu_mutex_init(&mem_map_lock);
>> +    qemu_mutex_init(&cur_map_lock);
>>  #endif
>>  }
>>
>> @@ -2161,17 +2223,18 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
>>
>>  #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
>>  typedef struct subpage_t {
>> +    PhysMap *map;
>>      MemoryRegion iomem;
>>      target_phys_addr_t base;
>>      uint16_t sub_section[TARGET_PAGE_SIZE];
>>  } subpage_t;
>>
>> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>> -                             uint16_t section);
>> -static subpage_t *subpage_init(target_phys_addr_t base);
>> -static void destroy_page_desc(uint16_t section_index)
>> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
>> +                            uint32_t end, uint16_t section);
>> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
>> +static void destroy_page_desc(PhysMap *map, uint16_t section_index)
>>  {
>> -    MemoryRegionSection *section = &phys_sections[section_index];
>> +    MemoryRegionSection *section = &map->phys_sections[section_index];
>>      MemoryRegion *mr = section->mr;
>>
>>      if (mr->subpage) {
>> @@ -2181,7 +2244,7 @@ static void destroy_page_desc(uint16_t section_index)
>>      }
>>  }
>>
>> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>> +static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
>>  {
>>      unsigned i;
>>      PhysPageEntry *p;
>> @@ -2190,38 +2253,34 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>>          return;
>>      }
>>
>> -    p = phys_map_nodes[lp->ptr];
>> +    p = map->phys_map_nodes[lp->ptr];
>>      for (i = 0; i < L2_SIZE; ++i) {
>>          if (!p[i].is_leaf) {
>> -            destroy_l2_mapping(&p[i], level - 1);
>> +            destroy_l2_mapping(map, &p[i], level - 1);
>>          } else {
>> -            destroy_page_desc(p[i].ptr);
>> +            destroy_page_desc(map, p[i].ptr);
>>          }
>>      }
>>      lp->is_leaf = 0;
>>      lp->ptr = PHYS_MAP_NODE_NIL;
>>  }
>>
>> -static void destroy_all_mappings(void)
>> +static void destroy_all_mappings(PhysMap *map)
>>  {
>> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
>> -    phys_map_nodes_reset();
>> -}
>> +    PhysPageEntry *root = &map->root;
>>
>> -static uint16_t phys_section_add(MemoryRegionSection *section)
>> -{
>> -    if (phys_sections_nb == phys_sections_nb_alloc) {
>> -        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
>> -        phys_sections = g_renew(MemoryRegionSection, phys_sections,
>> -                                phys_sections_nb_alloc);
>> -    }
>> -    phys_sections[phys_sections_nb] = *section;
>> -    return phys_sections_nb++;
>> +    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
>>  }
>>
>> -static void phys_sections_clear(void)
>> +static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
>>  {
>> -    phys_sections_nb = 0;
>> +    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
>> +        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
>> +        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
>> +                                map->phys_sections_nb_alloc);
>> +    }
>> +    map->phys_sections[map->phys_sections_nb] = *section;
>> +    return map->phys_sections_nb++;
>>  }
>>
>>  /* register physical memory.
>> @@ -2232,12 +2291,13 @@ static void phys_sections_clear(void)
>>     start_addr and region_offset are rounded down to a page boundary
>>     before calculating this offset.  This should not be a problem unless
>>     the low bits of start_addr and region_offset differ.  */
>> -static void register_subpage(MemoryRegionSection *section)
>> +static void register_subpage(PhysMap *map, MemoryRegionSection *section)
>>  {
>>      subpage_t *subpage;
>>      target_phys_addr_t base = section->offset_within_address_space
>>          & TARGET_PAGE_MASK;
>> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
>> +    MemoryRegionSection *existing = phys_page_find_internal(map,
>> +                                            base >> TARGET_PAGE_BITS);
>>      MemoryRegionSection subsection = {
>>          .offset_within_address_space = base,
>>          .size = TARGET_PAGE_SIZE,
>> @@ -2247,30 +2307,30 @@ static void register_subpage(MemoryRegionSection *section)
>>      assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
>>
>>      if (!(existing->mr->subpage)) {
>> -        subpage = subpage_init(base);
>> +        subpage = subpage_init(map, base);
>>          subsection.mr = &subpage->iomem;
>> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
>> -                      phys_section_add(&subsection));
>> +        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
>> +                      phys_section_add(map, &subsection));
>>      } else {
>>          subpage = container_of(existing->mr, subpage_t, iomem);
>>      }
>>      start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
>>      end = start + section->size;
>> -    subpage_register(subpage, start, end, phys_section_add(section));
>> +    subpage_register(map, subpage, start, end, phys_section_add(map, section));
>>  }
>>
>>
>> -static void register_multipage(MemoryRegionSection *section)
>> +static void register_multipage(PhysMap *map, MemoryRegionSection *section)
>>  {
>>      target_phys_addr_t start_addr = section->offset_within_address_space;
>>      ram_addr_t size = section->size;
>>      target_phys_addr_t addr;
>> -    uint16_t section_index = phys_section_add(section);
>> +    uint16_t section_index = phys_section_add(map, section);
>>
>>      assert(size);
>>
>>      addr = start_addr;
>> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>> +    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>>                    section_index);
>>  }
>>
>> @@ -2278,13 +2338,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>>                                        bool readonly)
>>  {
>>      MemoryRegionSection now = *section, remain = *section;
>> +    PhysMap *map = next_map;
>>
>>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
>>          || (now.size < TARGET_PAGE_SIZE)) {
>>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>>                         - now.offset_within_address_space,
>>                         now.size);
>> -        register_subpage(&now);
>> +        register_subpage(map, &now);
>>          remain.size -= now.size;
>>          remain.offset_within_address_space += now.size;
>>          remain.offset_within_region += now.size;
>> @@ -2292,14 +2353,14 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>>      now = remain;
>>      now.size &= TARGET_PAGE_MASK;
>>      if (now.size) {
>> -        register_multipage(&now);
>> +        register_multipage(map, &now);
>>          remain.size -= now.size;
>>          remain.offset_within_address_space += now.size;
>>          remain.offset_within_region += now.size;
>>      }
>>      now = remain;
>>      if (now.size) {
>> -        register_subpage(&now);
>> +        register_subpage(map, &now);
>>      }
>>  }
>>
>> @@ -3001,7 +3062,7 @@ static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
>>             mmio, len, addr, idx);
>>  #endif
>>
>> -    section = &phys_sections[mmio->sub_section[idx]];
>> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>>      addr += mmio->base;
>>      addr -= section->offset_within_address_space;
>>      addr += section->offset_within_region;
>> @@ -3020,7 +3081,7 @@ static void subpage_write(void *opaque, target_phys_addr_t addr,
>>             __func__, mmio, len, addr, idx, value);
>>  #endif
>>
>> -    section = &phys_sections[mmio->sub_section[idx]];
>> +    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
>>      addr += mmio->base;
>>      addr -= section->offset_within_address_space;
>>      addr += section->offset_within_region;
>> @@ -3065,8 +3126,8 @@ static const MemoryRegionOps subpage_ram_ops = {
>>      .endianness = DEVICE_NATIVE_ENDIAN,
>>  };
>>
>> -static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>> -                             uint16_t section)
>> +static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
>> +                              uint32_t end, uint16_t section)
>>  {
>>      int idx, eidx;
>>
>> @@ -3078,10 +3139,10 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>>      printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
>>             mmio, start, end, idx, eidx, memory);
>>  #endif
>> -    if (memory_region_is_ram(phys_sections[section].mr)) {
>> -        MemoryRegionSection new_section = phys_sections[section];
>> +    if (memory_region_is_ram(map->phys_sections[section].mr)) {
>> +        MemoryRegionSection new_section = map->phys_sections[section];
>>          new_section.mr = &io_mem_subpage_ram;
>> -        section = phys_section_add(&new_section);
>> +        section = phys_section_add(map, &new_section);
>>      }
>>      for (; idx <= eidx; idx++) {
>>          mmio->sub_section[idx] = section;
>> @@ -3090,12 +3151,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
>>      return 0;
>>  }
>>
>> -static subpage_t *subpage_init(target_phys_addr_t base)
>> +static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
>>  {
>>      subpage_t *mmio;
>>
>>      mmio = g_malloc0(sizeof(subpage_t));
>>
>> +    mmio->map = map;
>>      mmio->base = base;
>>      memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
>>                            "subpage", TARGET_PAGE_SIZE);
>> @@ -3104,12 +3166,12 @@ static subpage_t *subpage_init(target_phys_addr_t base)
>>      printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
>>             mmio, base, TARGET_PAGE_SIZE, subpage_memory);
>>  #endif
>> -    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
>> +    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
>>
>>      return mmio;
>>  }
>>
>> -static uint16_t dummy_section(MemoryRegion *mr)
>> +static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
>>  {
>>      MemoryRegionSection section = {
>>          .mr = mr,
>> @@ -3118,7 +3180,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
>>          .size = UINT64_MAX,
>>      };
>>
>> -    return phys_section_add(&section);
>> +    return phys_section_add(map, &section);
>>  }
>>
>>  MemoryRegion *iotlb_to_region(target_phys_addr_t index)
>> @@ -3140,15 +3202,32 @@ static void io_mem_init(void)
>>                            "watch", UINT64_MAX);
>>  }
>>
>> -static void core_begin(MemoryListener *listener)
>> +#if 0
>> +static void physmap_init(void)
>> +{
>> +    FlatView v = { .ranges = NULL,
>> +                             .nr = 0,
>> +                             .nr_allocated = 0,
>> +    };
>> +
>> +    init_map.views[0] = v;
>> +    init_map.views[1] = v;
>> +    cur_map =  &init_map;
>> +}
>> +#endif
>
> Please delete.
>
adopted

Thanks and regards,
pingfan
>> +
>> +static void core_begin(MemoryListener *listener, PhysMap *new_map)
>>  {
>> -    destroy_all_mappings();
>> -    phys_sections_clear();
>> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
>> -    phys_section_unassigned = dummy_section(&io_mem_unassigned);
>> -    phys_section_notdirty = dummy_section(&io_mem_notdirty);
>> -    phys_section_rom = dummy_section(&io_mem_rom);
>> -    phys_section_watch = dummy_section(&io_mem_watch);
>> +
>> +    new_map->root.ptr = PHYS_MAP_NODE_NIL;
>> +    new_map->root.is_leaf = 0;
>> +
>> +    /* In all the map, these sections have the same index */
>> +    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
>> +    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
>> +    phys_section_rom = dummy_section(new_map, &io_mem_rom);
>> +    phys_section_watch = dummy_section(new_map, &io_mem_watch);
>> +    next_map = new_map;
>>  }
>>
>>  static void core_commit(MemoryListener *listener)
>> @@ -3161,6 +3240,16 @@ static void core_commit(MemoryListener *listener)
>>      for(env = first_cpu; env != NULL; env = env->next_cpu) {
>>          tlb_flush(env, 1);
>>      }
>> +
>> +/* move into high layer
>> +    qemu_mutex_lock(&cur_map_lock);
>> +    if (cur_map != NULL) {
>> +        physmap_put(cur_map);
>> +    }
>> +    cur_map = next_map;
>> +    smp_mb();
>> +    qemu_mutex_unlock(&cur_map_lock);
>> +*/
>
> Also commented out code should be deleted.
>
>>  }
>>
>>  static void core_region_add(MemoryListener *listener,
>> @@ -3217,7 +3306,7 @@ static void core_eventfd_del(MemoryListener *listener,
>>  {
>>  }
>>
>> -static void io_begin(MemoryListener *listener)
>> +static void io_begin(MemoryListener *listener, PhysMap *next)
>>  {
>>  }
>>
>> @@ -3329,6 +3418,20 @@ static void memory_map_init(void)
>>      memory_listener_register(&io_memory_listener, system_io);
>>  }
>>
>> +void physmap_init(void)
>> +{
>> +    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
>> +                           };
>> +    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
>> +
>> +    atomic_set(&init_map->ref, 1);
>> +    init_map->root.ptr = PHYS_MAP_NODE_NIL;
>> +    init_map->root.is_leaf = 0;
>> +    init_map->views[0] = v;
>> +    init_map->views[1] = v;
>> +    cur_map = init_map;
>> +}
>> +
>>  MemoryRegion *get_system_memory(void)
>>  {
>>      return system_memory;
>> @@ -3391,6 +3494,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>>      uint32_t val;
>>      target_phys_addr_t page;
>>      MemoryRegionSection *section;
>> +    PhysMap *cur = cur_map_get();
>>
>>      while (len > 0) {
>>          page = addr & TARGET_PAGE_MASK;
>> @@ -3472,6 +3576,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>>          buf += l;
>>          addr += l;
>>      }
>> +    physmap_put(cur);
>>  }
>>
>>  /* used for ROM loading : can write in RAM and ROM */
>> diff --git a/hw/vhost.c b/hw/vhost.c
>> index 43664e7..df58345 100644
>> --- a/hw/vhost.c
>> +++ b/hw/vhost.c
>> @@ -438,7 +438,7 @@ static bool vhost_section(MemoryRegionSection *section)
>>          && memory_region_is_ram(section->mr);
>>  }
>>
>> -static void vhost_begin(MemoryListener *listener)
>> +static void vhost_begin(MemoryListener *listener, PhysMap *next)
>>  {
>>  }
>>
>> diff --git a/hw/xen_pt.c b/hw/xen_pt.c
>> index 3b6d186..fba8586 100644
>> --- a/hw/xen_pt.c
>> +++ b/hw/xen_pt.c
>> @@ -597,7 +597,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
>>      }
>>  }
>>
>> -static void xen_pt_begin(MemoryListener *l)
>> +static void xen_pt_begin(MemoryListener *l, PhysMap *next)
>>  {
>>  }
>>
>> diff --git a/kvm-all.c b/kvm-all.c
>> index f8e4328..bc42cab 100644
>> --- a/kvm-all.c
>> +++ b/kvm-all.c
>> @@ -693,7 +693,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>>      }
>>  }
>>
>> -static void kvm_begin(MemoryListener *listener)
>> +static void kvm_begin(MemoryListener *listener, PhysMap *next)
>>  {
>>  }
>>
>> diff --git a/memory.c b/memory.c
>> index c7f2cfd..54cdc7f 100644
>> --- a/memory.c
>> +++ b/memory.c
>> @@ -20,6 +20,7 @@
>>  #include "kvm.h"
>>  #include <assert.h>
>>  #include "hw/qdev.h"
>> +#include "qemu-thread.h"
>>
>>  #define WANT_EXEC_OBSOLETE
>>  #include "exec-obsolete.h"
>> @@ -192,7 +193,7 @@ typedef struct AddressSpaceOps AddressSpaceOps;
>>  /* A system address space - I/O, memory, etc. */
>>  struct AddressSpace {
>>      MemoryRegion *root;
>> -    FlatView current_map;
>> +    int view_id;
>>      int ioeventfd_nb;
>>      MemoryRegionIoeventfd *ioeventfds;
>>  };
>> @@ -232,11 +233,6 @@ static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
>>      ++view->nr;
>>  }
>>
>> -static void flatview_destroy(FlatView *view)
>> -{
>> -    g_free(view->ranges);
>> -}
>> -
>>  static bool can_merge(FlatRange *r1, FlatRange *r2)
>>  {
>>      return int128_eq(addrrange_end(r1->addr), r2->addr.start)
>> @@ -594,8 +590,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>>      MemoryRegionIoeventfd *ioeventfds = NULL;
>>      AddrRange tmp;
>>      unsigned i;
>> +    PhysMap *map = cur_map_get();
>> +    FlatView *view = &map->views[as->view_id];
>>
>> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
>> +    FOR_EACH_FLAT_RANGE(fr, view) {
>>          for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
>>              tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
>>                                    int128_sub(fr->addr.start,
>> @@ -616,6 +614,7 @@ static void address_space_update_ioeventfds(AddressSpace *as)
>>      g_free(as->ioeventfds);
>>      as->ioeventfds = ioeventfds;
>>      as->ioeventfd_nb = ioeventfd_nb;
>> +    physmap_put(map);
>>  }
>>
>>  static void address_space_update_topology_pass(AddressSpace *as,
>> @@ -681,21 +680,23 @@ static void address_space_update_topology_pass(AddressSpace *as,
>>  }
>>
>>
>> -static void address_space_update_topology(AddressSpace *as)
>> +static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
>> +                                            PhysMap *next)
>>  {
>> -    FlatView old_view = as->current_map;
>> +    FlatView old_view = prev->views[as->view_id];
>>      FlatView new_view = generate_memory_topology(as->root);
>>
>>      address_space_update_topology_pass(as, old_view, new_view, false);
>>      address_space_update_topology_pass(as, old_view, new_view, true);
>> +    next->views[as->view_id] = new_view;
>>
>> -    as->current_map = new_view;
>> -    flatview_destroy(&old_view);
>>      address_space_update_ioeventfds(as);
>>  }
>>
>>  static void memory_region_update_topology(MemoryRegion *mr)
>>  {
>> +    PhysMap *prev, *next;
>> +
>>      if (memory_region_transaction_depth) {
>>          memory_region_update_pending |= !mr || mr->enabled;
>>          return;
>> @@ -705,16 +706,20 @@ static void memory_region_update_topology(MemoryRegion *mr)
>>          return;
>>      }
>>
>> -    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
>> +     prev = cur_map_get();
>> +    /* allocate PhysMap next here */
>> +    next = alloc_next_map();
>> +    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
>>
>>      if (address_space_memory.root) {
>> -        address_space_update_topology(&address_space_memory);
>> +        address_space_update_topology(&address_space_memory, prev, next);
>>      }
>>      if (address_space_io.root) {
>> -        address_space_update_topology(&address_space_io);
>> +        address_space_update_topology(&address_space_io, prev, next);
>>      }
>>
>>      MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
>> +    cur_map_update(next);
>>
>>      memory_region_update_pending = false;
>>  }
>> @@ -1071,7 +1076,7 @@ void memory_region_put(MemoryRegion *mr)
>>
>>      if (atomic_dec_and_test(&mr->ref)) {
>>          /* to fix, using call_rcu( ,release) */
>> -        mr->life_ops->put(mr);
>> +        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
>>      }
>>  }
>>
>> @@ -1147,13 +1152,18 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
>>  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
>>  {
>>      FlatRange *fr;
>> +    FlatView *fview;
>> +    PhysMap *map;
>>
>> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
>> +    map = cur_map_get();
>> +    fview = &map->views[address_space_memory.view_id];
>> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>>          if (fr->mr == mr) {
>>              MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
>>                                            Forward, log_sync);
>>          }
>>      }
>> +    physmap_put(map);
>>  }
>>
>>  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
>> @@ -1201,8 +1211,12 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>>      FlatRange *fr;
>>      CoalescedMemoryRange *cmr;
>>      AddrRange tmp;
>> +    FlatView *fview;
>> +    PhysMap *map;
>>
>> -    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
>> +    map = cur_map_get();
>> +    fview = &map->views[address_space_memory.view_id];
>> +    FOR_EACH_FLAT_RANGE(fr, fview) {
>>          if (fr->mr == mr) {
>>              qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
>>                                             int128_get64(fr->addr.size));
>> @@ -1219,6 +1233,7 @@ static void memory_region_update_coalesced_range(MemoryRegion *mr)
>>              }
>>          }
>>      }
>> +    physmap_put(map);
>>  }
>>
>>  void memory_region_set_coalescing(MemoryRegion *mr)
>> @@ -1458,29 +1473,49 @@ static int cmp_flatrange_addr(const void *addr_, const void *fr_)
>>      return 0;
>>  }
>>
>> -static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
>> +static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
>>  {
>> -    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
>> +    return bsearch(&addr, view->ranges, view->nr,
>>                     sizeof(FlatRange), cmp_flatrange_addr);
>>  }
>>
>> +/* dec the ref, which inc by memory_region_find*/
>> +void memory_region_section_put(MemoryRegionSection *mrs)
>> +{
>> +    if (mrs->mr != NULL) {
>> +        memory_region_put(mrs->mr);
>> +    }
>> +}
>> +
>> +/* inc mr's ref. Caller need dec mr's ref */
>>  MemoryRegionSection memory_region_find(MemoryRegion *address_space,
>>                                         target_phys_addr_t addr, uint64_t size)
>>  {
>> +    PhysMap *map;
>>      AddressSpace *as = memory_region_to_address_space(address_space);
>>      AddrRange range = addrrange_make(int128_make64(addr),
>>                                       int128_make64(size));
>> -    FlatRange *fr = address_space_lookup(as, range);
>> +    FlatView *fview;
>> +
>> +    map = cur_map_get();
>> +
>> +    fview = &map->views[as->view_id];
>> +    FlatRange *fr = address_space_lookup(fview, range);
>>      MemoryRegionSection ret = { .mr = NULL, .size = 0 };
>>
>>      if (!fr) {
>> +        physmap_put(map);
>>          return ret;
>>      }
>>
>> -    while (fr > as->current_map.ranges
>> +    while (fr > fview->ranges
>>             && addrrange_intersects(fr[-1].addr, range)) {
>>          --fr;
>>      }
>> +    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
>> +     */
>> +    memory_region_get(fr->mr);
>> +    physmap_put(map);
>>
>>      ret.mr = fr->mr;
>>      range = addrrange_intersection(range, fr->addr);
>> @@ -1497,10 +1532,13 @@ void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
>>  {
>>      AddressSpace *as = memory_region_to_address_space(address_space);
>>      FlatRange *fr;
>> +    PhysMap *map = cur_map_get();
>> +    FlatView *view = &map->views[as->view_id];
>>
>> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
>> +    FOR_EACH_FLAT_RANGE(fr, view) {
>>          MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
>>      }
>> +    physmap_put(map);
>>  }
>>
>>  void memory_global_dirty_log_start(void)
>> @@ -1519,6 +1557,8 @@ static void listener_add_address_space(MemoryListener *listener,
>>                                         AddressSpace *as)
>>  {
>>      FlatRange *fr;
>> +    PhysMap *map;
>> +    FlatView *view;
>>
>>      if (listener->address_space_filter
>>          && listener->address_space_filter != as->root) {
>> @@ -1528,7 +1568,10 @@ static void listener_add_address_space(MemoryListener *listener,
>>      if (global_dirty_log) {
>>          listener->log_global_start(listener);
>>      }
>> -    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
>> +
>> +    map = cur_map_get();
>> +    view = &map->views[as->view_id];
>> +    FOR_EACH_FLAT_RANGE(fr, view) {
>>          MemoryRegionSection section = {
>>              .mr = fr->mr,
>>              .address_space = as->root,
>> @@ -1539,6 +1582,7 @@ static void listener_add_address_space(MemoryListener *listener,
>>          };
>>          listener->region_add(listener, &section);
>>      }
>> +    physmap_put(map);
>>  }
>>
>>  void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
>> @@ -1570,12 +1614,14 @@ void memory_listener_unregister(MemoryListener *listener)
>>  void set_system_memory_map(MemoryRegion *mr)
>>  {
>>      address_space_memory.root = mr;
>> +    address_space_memory.view_id = 0;
>>      memory_region_update_topology(NULL);
>>  }
>>
>>  void set_system_io_map(MemoryRegion *mr)
>>  {
>>      address_space_io.root = mr;
>> +    address_space_io.view_id = 1;
>>      memory_region_update_topology(NULL);
>>  }
>>
>> diff --git a/memory.h b/memory.h
>> index 357edd8..18442d4 100644
>> --- a/memory.h
>> +++ b/memory.h
>> @@ -256,7 +256,7 @@ typedef struct MemoryListener MemoryListener;
>>   * Use with memory_listener_register() and memory_listener_unregister().
>>   */
>>  struct MemoryListener {
>> -    void (*begin)(MemoryListener *listener);
>> +    void (*begin)(MemoryListener *listener, PhysMap *next);
>>      void (*commit)(MemoryListener *listener);
>>      void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
>>      void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
>> @@ -829,6 +829,13 @@ void mtree_info(fprintf_function mon_printf, void *f);
>>
>>  void memory_region_get(MemoryRegion *mr);
>>  void memory_region_put(MemoryRegion *mr);
>> +void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
>> +void physmap_get(PhysMap *map);
>> +void physmap_put(PhysMap *map);
>> +PhysMap *cur_map_get(void);
>> +PhysMap *alloc_next_map(void);
>> +void cur_map_update(PhysMap *next);
>> +void physmap_init(void);
>>  #endif
>>
>>  #endif
>> diff --git a/vl.c b/vl.c
>> index 1329c30..12af523 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -3346,6 +3346,7 @@ int main(int argc, char **argv, char **envp)
>>      if (ram_size == 0) {
>>          ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
>>      }
>> +    physmap_init();
>>
>>      configure_accelerator();
>>
>> diff --git a/xen-all.c b/xen-all.c
>> index 59f2323..41d82fd 100644
>> --- a/xen-all.c
>> +++ b/xen-all.c
>> @@ -452,7 +452,7 @@ static void xen_set_memory(struct MemoryListener *listener,
>>      }
>>  }
>>
>> -static void xen_begin(MemoryListener *listener)
>> +static void xen_begin(MemoryListener *listener, PhysMap *next)
>>  {
>>  }
>>
>> --
>> 1.7.4.4
>>
pingfan liu Aug. 11, 2012, 1:58 a.m. UTC | #4
On Wed, Aug 8, 2012 at 5:41 PM, Avi Kivity <avi@redhat.com> wrote:
> On 08/08/2012 09:25 AM, Liu Ping Fan wrote:
>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>
>> Flatview and radix view are all under the protection of pointer.
>> And this make sure the change of them seem to be atomic!
>>
>> The mr accessed by radix-tree leaf or flatview will be reclaimed
>> after the prev PhysMap not in use any longer
>>
>
> IMO this cleverness should come much later.  Let's first take care of
> dropping the big qemu lock, then make swithcing memory maps more efficient.
>
> The initial paths could look like:
>
>   lookup:
>      take mem_map_lock
>      lookup
>      take ref
>      drop mem_map_lock
>
>   update:
>      take mem_map_lock (in core_begin)
>      do updates
>      drop memo_map_lock
>
> Later we can replace mem_map_lock with either a rwlock or (real) rcu.
>
>
>>
>>  #if !defined(CONFIG_USER_ONLY)
>>
>> -static void phys_map_node_reserve(unsigned nodes)
>> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>>  {
>> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
>> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>>          typedef PhysPageEntry Node[L2_SIZE];
>> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
>> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
>> -                                      phys_map_nodes_nb + nodes);
>> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
>> -                                 phys_map_nodes_nb_alloc);
>> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
>> +                                                                        16);
>> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
>> +                                      map->phys_map_nodes_nb + nodes);
>> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
>> +                                 map->phys_map_nodes_nb_alloc);
>>      }
>>  }
>
> Please have a patch that just adds the map parameter to all these
> functions.  This makes the later patch, that adds the copy, easier to read.
>
>> +
>> +void cur_map_update(PhysMap *next)
>> +{
>> +    qemu_mutex_lock(&cur_map_lock);
>> +    physmap_put(cur_map);
>> +    cur_map = next;
>> +    smp_mb();
>> +    qemu_mutex_unlock(&cur_map_lock);
>> +}
>
> IMO this can be mem_map_lock.
>
> If we take my previous suggestion:
>
>   lookup:
>      take mem_map_lock
>      lookup
>      take ref
>      drop mem_map_lock
>
>   update:
>      take mem_map_lock (in core_begin)
>      do updates
>      drop memo_map_lock
>
> And update it to
>
>
>   update:
>      prepare next_map (in core_begin)
>      do updates
>      take mem_map_lock (in core_commit)
>      switch maps
>      drop mem_map_lock
>      free old map
>
>
> Note the lookup path copies the MemoryRegionSection instead of
> referencing it.  Thus we can destroy the old map without worrying; the
> only pointers will point to MemoryRegions, which will be protected by
> the refcounts on their Objects.
>
Just find there may be a leak here. If mrs points to subpage, then the
subpage_t  could be crashed by destroy.
To avoid such situation, we can walk down the chain to pin us on the
Object based mr, but then we must expose the address convert in
subpage_read() right here. Right?

Regards,
pingfan

> This can be easily switched to rcu:
>
>   update:
>      prepare next_map (in core_begin)
>      do updates
>      switch maps - rcu_assign_pointer
>      call_rcu(free old map) (or synchronize_rcu; free old maps)
>
> Again, this should be done after the simplictic patch that enables
> parallel lookup but keeps just one map.
>
>
>
> --
> error compiling committee.c: too many arguments to function
pingfan liu Aug. 11, 2012, 10:06 a.m. UTC | #5
On Sat, Aug 11, 2012 at 9:58 AM, liu ping fan <qemulist@gmail.com> wrote:
> On Wed, Aug 8, 2012 at 5:41 PM, Avi Kivity <avi@redhat.com> wrote:
>> On 08/08/2012 09:25 AM, Liu Ping Fan wrote:
>>> From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
>>>
>>> Flatview and radix view are all under the protection of pointer.
>>> And this make sure the change of them seem to be atomic!
>>>
>>> The mr accessed by radix-tree leaf or flatview will be reclaimed
>>> after the prev PhysMap not in use any longer
>>>
>>
>> IMO this cleverness should come much later.  Let's first take care of
>> dropping the big qemu lock, then make swithcing memory maps more efficient.
>>
>> The initial paths could look like:
>>
>>   lookup:
>>      take mem_map_lock
>>      lookup
>>      take ref
>>      drop mem_map_lock
>>
>>   update:
>>      take mem_map_lock (in core_begin)
>>      do updates
>>      drop memo_map_lock
>>
>> Later we can replace mem_map_lock with either a rwlock or (real) rcu.
>>
>>
>>>
>>>  #if !defined(CONFIG_USER_ONLY)
>>>
>>> -static void phys_map_node_reserve(unsigned nodes)
>>> +static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
>>>  {
>>> -    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
>>> +    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
>>>          typedef PhysPageEntry Node[L2_SIZE];
>>> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
>>> -        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
>>> -                                      phys_map_nodes_nb + nodes);
>>> -        phys_map_nodes = g_renew(Node, phys_map_nodes,
>>> -                                 phys_map_nodes_nb_alloc);
>>> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
>>> +                                                                        16);
>>> +        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
>>> +                                      map->phys_map_nodes_nb + nodes);
>>> +        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
>>> +                                 map->phys_map_nodes_nb_alloc);
>>>      }
>>>  }
>>
>> Please have a patch that just adds the map parameter to all these
>> functions.  This makes the later patch, that adds the copy, easier to read.
>>
>>> +
>>> +void cur_map_update(PhysMap *next)
>>> +{
>>> +    qemu_mutex_lock(&cur_map_lock);
>>> +    physmap_put(cur_map);
>>> +    cur_map = next;
>>> +    smp_mb();
>>> +    qemu_mutex_unlock(&cur_map_lock);
>>> +}
>>
>> IMO this can be mem_map_lock.
>>
>> If we take my previous suggestion:
>>
>>   lookup:
>>      take mem_map_lock
>>      lookup
>>      take ref
>>      drop mem_map_lock
>>
>>   update:
>>      take mem_map_lock (in core_begin)
>>      do updates
>>      drop memo_map_lock
>>
>> And update it to
>>
>>
>>   update:
>>      prepare next_map (in core_begin)
>>      do updates
>>      take mem_map_lock (in core_commit)
>>      switch maps
>>      drop mem_map_lock
>>      free old map
>>
>>
>> Note the lookup path copies the MemoryRegionSection instead of
>> referencing it.  Thus we can destroy the old map without worrying; the
>> only pointers will point to MemoryRegions, which will be protected by
>> the refcounts on their Objects.
>>
> Just find there may be a leak here. If mrs points to subpage, then the
> subpage_t  could be crashed by destroy.
> To avoid such situation, we can walk down the chain to pin us on the
> Object based mr, but then we must expose the address convert in
> subpage_read() right here. Right?
>
Oh, just read the code logic and I think walk down the chain is
enough. And subpage_read/write() is bypass, so no need for fold the
addr translation.

Regards,
pingfan

> Regards,
> pingfan
>
>> This can be easily switched to rcu:
>>
>>   update:
>>      prepare next_map (in core_begin)
>>      do updates
>>      switch maps - rcu_assign_pointer
>>      call_rcu(free old map) (or synchronize_rcu; free old maps)
>>
>> Again, this should be done after the simplictic patch that enables
>> parallel lookup but keeps just one map.
>>
>>
>>
>> --
>> error compiling committee.c: too many arguments to function
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 01b91b0..97addb9 100644
--- a/exec.c
+++ b/exec.c
@@ -24,6 +24,7 @@ 
 #include <sys/mman.h>
 #endif
 
+#include "qemu/atomic.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "tcg.h"
@@ -35,6 +36,8 @@ 
 #include "qemu-timer.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "qemu-thread.h"
+#include "qemu/reclaimer.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -184,25 +187,17 @@  static void *l1_map[V_L1_SIZE];
 
 #if !defined(CONFIG_USER_ONLY)
 
-static MemoryRegionSection *phys_sections;
-static unsigned phys_sections_nb, phys_sections_nb_alloc;
 static uint16_t phys_section_unassigned;
 static uint16_t phys_section_notdirty;
 static uint16_t phys_section_rom;
 static uint16_t phys_section_watch;
 
-
-/* Simple allocator for PhysPageEntry nodes */
-static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
-static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
-
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
-/* This is a multi-level map on the physical address space.
-   The bottom level has pointers to MemoryRegionSections.  */
-static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
-
+static QemuMutex cur_map_lock;
+static PhysMap *cur_map;
 QemuMutex mem_map_lock;
+static PhysMap *next_map;
 
 static void io_mem_init(void);
 static void memory_map_init(void);
@@ -383,41 +378,38 @@  static inline PageDesc *page_find(tb_page_addr_t index)
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void phys_map_node_reserve(unsigned nodes)
+static void phys_map_node_reserve(PhysMap *map, unsigned nodes)
 {
-    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
+    if (map->phys_map_nodes_nb + nodes > map->phys_map_nodes_nb_alloc) {
         typedef PhysPageEntry Node[L2_SIZE];
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
-                                      phys_map_nodes_nb + nodes);
-        phys_map_nodes = g_renew(Node, phys_map_nodes,
-                                 phys_map_nodes_nb_alloc);
+        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc * 2,
+                                                                        16);
+        map->phys_map_nodes_nb_alloc = MAX(map->phys_map_nodes_nb_alloc,
+                                      map->phys_map_nodes_nb + nodes);
+        map->phys_map_nodes = g_renew(Node, map->phys_map_nodes,
+                                 map->phys_map_nodes_nb_alloc);
     }
 }
 
-static uint16_t phys_map_node_alloc(void)
+static uint16_t phys_map_node_alloc(PhysMap *map)
 {
     unsigned i;
     uint16_t ret;
 
-    ret = phys_map_nodes_nb++;
+    ret = map->phys_map_nodes_nb++;
     assert(ret != PHYS_MAP_NODE_NIL);
-    assert(ret != phys_map_nodes_nb_alloc);
+    assert(ret != map->phys_map_nodes_nb_alloc);
     for (i = 0; i < L2_SIZE; ++i) {
-        phys_map_nodes[ret][i].is_leaf = 0;
-        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+        map->phys_map_nodes[ret][i].is_leaf = 0;
+        map->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
     }
     return ret;
 }
 
-static void phys_map_nodes_reset(void)
-{
-    phys_map_nodes_nb = 0;
-}
-
-
-static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
-                                target_phys_addr_t *nb, uint16_t leaf,
+static void phys_page_set_level(PhysMap *map, PhysPageEntry *lp,
+                                target_phys_addr_t *index,
+                                target_phys_addr_t *nb,
+                                uint16_t leaf,
                                 int level)
 {
     PhysPageEntry *p;
@@ -425,8 +417,8 @@  static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
     target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
 
     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
-        lp->ptr = phys_map_node_alloc();
-        p = phys_map_nodes[lp->ptr];
+        lp->ptr = phys_map_node_alloc(map);
+        p = map->phys_map_nodes[lp->ptr];
         if (level == 0) {
             for (i = 0; i < L2_SIZE; i++) {
                 p[i].is_leaf = 1;
@@ -434,7 +426,7 @@  static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
             }
         }
     } else {
-        p = phys_map_nodes[lp->ptr];
+        p = map->phys_map_nodes[lp->ptr];
     }
     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 
@@ -445,24 +437,27 @@  static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
             *index += step;
             *nb -= step;
         } else {
-            phys_page_set_level(lp, index, nb, leaf, level - 1);
+            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
         }
         ++lp;
     }
 }
 
-static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
-                          uint16_t leaf)
+static void phys_page_set(PhysMap *map, target_phys_addr_t index,
+                            target_phys_addr_t nb,
+                            uint16_t leaf)
 {
     /* Wildly overreserve - it doesn't matter much. */
-    phys_map_node_reserve(3 * P_L2_LEVELS);
+    phys_map_node_reserve(map, 3 * P_L2_LEVELS);
 
-    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+    /* update in new tree*/
+    phys_page_set_level(map, &map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
-MemoryRegionSection *phys_page_find(target_phys_addr_t index)
+static MemoryRegionSection *phys_page_find_internal(PhysMap *map,
+                           target_phys_addr_t index)
 {
-    PhysPageEntry lp = phys_map;
+    PhysPageEntry lp = map->root;
     PhysPageEntry *p;
     int i;
     uint16_t s_index = phys_section_unassigned;
@@ -471,13 +466,79 @@  MemoryRegionSection *phys_page_find(target_phys_addr_t index)
         if (lp.ptr == PHYS_MAP_NODE_NIL) {
             goto not_found;
         }
-        p = phys_map_nodes[lp.ptr];
+        p = map->phys_map_nodes[lp.ptr];
         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
     }
 
     s_index = lp.ptr;
 not_found:
-    return &phys_sections[s_index];
+    return &map->phys_sections[s_index];
+}
+
+MemoryRegionSection *phys_page_find(target_phys_addr_t index)
+{
+    return phys_page_find_internal(cur_map, index);
+}
+
+void physmap_get(PhysMap *map)
+{
+    atomic_inc(&map->ref);
+}
+
+/* Untill rcu read side finished, do this reclaim */
+static ChunkHead physmap_reclaimer_list = { .lh_first = NULL };
+void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release)
+{
+    reclaimer_enqueue(&physmap_reclaimer_list, opaque, release);
+}
+
+static void destroy_all_mappings(PhysMap *map);
+static void phys_map_release(PhysMap *map)
+{
+    /* emulate for rcu reclaimer for mr */
+    reclaimer_worker(&physmap_reclaimer_list);
+
+    destroy_all_mappings(map);
+    g_free(map->phys_map_nodes);
+    g_free(map->phys_sections);
+    g_free(map->views[0].ranges);
+    g_free(map->views[1].ranges);
+    g_free(map);
+}
+
+void physmap_put(PhysMap *map)
+{
+    if (atomic_dec_and_test(&map->ref)) {
+        phys_map_release(map);
+    }
+}
+
+void cur_map_update(PhysMap *next)
+{
+    qemu_mutex_lock(&cur_map_lock);
+    physmap_put(cur_map);
+    cur_map = next;
+    smp_mb();
+    qemu_mutex_unlock(&cur_map_lock);
+}
+
+PhysMap *cur_map_get(void)
+{
+    PhysMap *ret;
+
+    qemu_mutex_lock(&cur_map_lock);
+    ret = cur_map;
+    physmap_get(ret);
+    smp_mb();
+    qemu_mutex_unlock(&cur_map_lock);
+    return ret;
+}
+
+PhysMap *alloc_next_map(void)
+{
+    PhysMap *next = g_malloc0(sizeof(PhysMap));
+    atomic_set(&next->ref, 1);
+    return next;
 }
 
 bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -632,6 +693,7 @@  void cpu_exec_init_all(void)
     memory_map_init();
     io_mem_init();
     qemu_mutex_init(&mem_map_lock);
+    qemu_mutex_init(&cur_map_lock);
 #endif
 }
 
@@ -2161,17 +2223,18 @@  int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
 
 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 typedef struct subpage_t {
+    PhysMap *map;
     MemoryRegion iomem;
     target_phys_addr_t base;
     uint16_t sub_section[TARGET_PAGE_SIZE];
 } subpage_t;
 
-static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
-                             uint16_t section);
-static subpage_t *subpage_init(target_phys_addr_t base);
-static void destroy_page_desc(uint16_t section_index)
+static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
+                            uint32_t end, uint16_t section);
+static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base);
+static void destroy_page_desc(PhysMap *map, uint16_t section_index)
 {
-    MemoryRegionSection *section = &phys_sections[section_index];
+    MemoryRegionSection *section = &map->phys_sections[section_index];
     MemoryRegion *mr = section->mr;
 
     if (mr->subpage) {
@@ -2181,7 +2244,7 @@  static void destroy_page_desc(uint16_t section_index)
     }
 }
 
-static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
+static void destroy_l2_mapping(PhysMap *map, PhysPageEntry *lp, unsigned level)
 {
     unsigned i;
     PhysPageEntry *p;
@@ -2190,38 +2253,34 @@  static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
         return;
     }
 
-    p = phys_map_nodes[lp->ptr];
+    p = map->phys_map_nodes[lp->ptr];
     for (i = 0; i < L2_SIZE; ++i) {
         if (!p[i].is_leaf) {
-            destroy_l2_mapping(&p[i], level - 1);
+            destroy_l2_mapping(map, &p[i], level - 1);
         } else {
-            destroy_page_desc(p[i].ptr);
+            destroy_page_desc(map, p[i].ptr);
         }
     }
     lp->is_leaf = 0;
     lp->ptr = PHYS_MAP_NODE_NIL;
 }
 
-static void destroy_all_mappings(void)
+static void destroy_all_mappings(PhysMap *map)
 {
-    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
-    phys_map_nodes_reset();
-}
+    PhysPageEntry *root = &map->root;
 
-static uint16_t phys_section_add(MemoryRegionSection *section)
-{
-    if (phys_sections_nb == phys_sections_nb_alloc) {
-        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
-        phys_sections = g_renew(MemoryRegionSection, phys_sections,
-                                phys_sections_nb_alloc);
-    }
-    phys_sections[phys_sections_nb] = *section;
-    return phys_sections_nb++;
+    destroy_l2_mapping(map, root, P_L2_LEVELS - 1);
 }
 
-static void phys_sections_clear(void)
+static uint16_t phys_section_add(PhysMap *map, MemoryRegionSection *section)
 {
-    phys_sections_nb = 0;
+    if (map->phys_sections_nb == map->phys_sections_nb_alloc) {
+        map->phys_sections_nb_alloc = MAX(map->phys_sections_nb_alloc * 2, 16);
+        map->phys_sections = g_renew(MemoryRegionSection, map->phys_sections,
+                                map->phys_sections_nb_alloc);
+    }
+    map->phys_sections[map->phys_sections_nb] = *section;
+    return map->phys_sections_nb++;
 }
 
 /* register physical memory.
@@ -2232,12 +2291,13 @@  static void phys_sections_clear(void)
    start_addr and region_offset are rounded down to a page boundary
    before calculating this offset.  This should not be a problem unless
    the low bits of start_addr and region_offset differ.  */
-static void register_subpage(MemoryRegionSection *section)
+static void register_subpage(PhysMap *map, MemoryRegionSection *section)
 {
     subpage_t *subpage;
     target_phys_addr_t base = section->offset_within_address_space
         & TARGET_PAGE_MASK;
-    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
+    MemoryRegionSection *existing = phys_page_find_internal(map,
+                                            base >> TARGET_PAGE_BITS);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
         .size = TARGET_PAGE_SIZE,
@@ -2247,30 +2307,30 @@  static void register_subpage(MemoryRegionSection *section)
     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
 
     if (!(existing->mr->subpage)) {
-        subpage = subpage_init(base);
+        subpage = subpage_init(map, base);
         subsection.mr = &subpage->iomem;
-        phys_page_set(base >> TARGET_PAGE_BITS, 1,
-                      phys_section_add(&subsection));
+        phys_page_set(map, base >> TARGET_PAGE_BITS, 1,
+                      phys_section_add(map, &subsection));
     } else {
         subpage = container_of(existing->mr, subpage_t, iomem);
     }
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
     end = start + section->size;
-    subpage_register(subpage, start, end, phys_section_add(section));
+    subpage_register(map, subpage, start, end, phys_section_add(map, section));
 }
 
 
-static void register_multipage(MemoryRegionSection *section)
+static void register_multipage(PhysMap *map, MemoryRegionSection *section)
 {
     target_phys_addr_t start_addr = section->offset_within_address_space;
     ram_addr_t size = section->size;
     target_phys_addr_t addr;
-    uint16_t section_index = phys_section_add(section);
+    uint16_t section_index = phys_section_add(map, section);
 
     assert(size);
 
     addr = start_addr;
-    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
+    phys_page_set(map, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
                   section_index);
 }
 
@@ -2278,13 +2338,14 @@  void cpu_register_physical_memory_log(MemoryRegionSection *section,
                                       bool readonly)
 {
     MemoryRegionSection now = *section, remain = *section;
+    PhysMap *map = next_map;
 
     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
         || (now.size < TARGET_PAGE_SIZE)) {
         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
                        - now.offset_within_address_space,
                        now.size);
-        register_subpage(&now);
+        register_subpage(map, &now);
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
@@ -2292,14 +2353,14 @@  void cpu_register_physical_memory_log(MemoryRegionSection *section,
     now = remain;
     now.size &= TARGET_PAGE_MASK;
     if (now.size) {
-        register_multipage(&now);
+        register_multipage(map, &now);
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
     }
     now = remain;
     if (now.size) {
-        register_subpage(&now);
+        register_subpage(map, &now);
     }
 }
 
@@ -3001,7 +3062,7 @@  static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
            mmio, len, addr, idx);
 #endif
 
-    section = &phys_sections[mmio->sub_section[idx]];
+    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
     addr += mmio->base;
     addr -= section->offset_within_address_space;
     addr += section->offset_within_region;
@@ -3020,7 +3081,7 @@  static void subpage_write(void *opaque, target_phys_addr_t addr,
            __func__, mmio, len, addr, idx, value);
 #endif
 
-    section = &phys_sections[mmio->sub_section[idx]];
+    section = &mmio->map->phys_sections[mmio->sub_section[idx]];
     addr += mmio->base;
     addr -= section->offset_within_address_space;
     addr += section->offset_within_region;
@@ -3065,8 +3126,8 @@  static const MemoryRegionOps subpage_ram_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
-                             uint16_t section)
+static int subpage_register(PhysMap *map, subpage_t *mmio, uint32_t start,
+                              uint32_t end, uint16_t section)
 {
     int idx, eidx;
 
@@ -3078,10 +3139,10 @@  static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
            mmio, start, end, idx, eidx, memory);
 #endif
-    if (memory_region_is_ram(phys_sections[section].mr)) {
-        MemoryRegionSection new_section = phys_sections[section];
+    if (memory_region_is_ram(map->phys_sections[section].mr)) {
+        MemoryRegionSection new_section = map->phys_sections[section];
         new_section.mr = &io_mem_subpage_ram;
-        section = phys_section_add(&new_section);
+        section = phys_section_add(map, &new_section);
     }
     for (; idx <= eidx; idx++) {
         mmio->sub_section[idx] = section;
@@ -3090,12 +3151,13 @@  static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     return 0;
 }
 
-static subpage_t *subpage_init(target_phys_addr_t base)
+static subpage_t *subpage_init(PhysMap *map, target_phys_addr_t base)
 {
     subpage_t *mmio;
 
     mmio = g_malloc0(sizeof(subpage_t));
 
+    mmio->map = map;
     mmio->base = base;
     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
                           "subpage", TARGET_PAGE_SIZE);
@@ -3104,12 +3166,12 @@  static subpage_t *subpage_init(target_phys_addr_t base)
     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
 #endif
-    subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
+    subpage_register(map, mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
 
     return mmio;
 }
 
-static uint16_t dummy_section(MemoryRegion *mr)
+static uint16_t dummy_section(PhysMap *map, MemoryRegion *mr)
 {
     MemoryRegionSection section = {
         .mr = mr,
@@ -3118,7 +3180,7 @@  static uint16_t dummy_section(MemoryRegion *mr)
         .size = UINT64_MAX,
     };
 
-    return phys_section_add(&section);
+    return phys_section_add(map, &section);
 }
 
 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
@@ -3140,15 +3202,32 @@  static void io_mem_init(void)
                           "watch", UINT64_MAX);
 }
 
-static void core_begin(MemoryListener *listener)
+#if 0
+static void physmap_init(void)
+{
+    FlatView v = { .ranges = NULL,
+                             .nr = 0,
+                             .nr_allocated = 0,
+    };
+
+    init_map.views[0] = v;
+    init_map.views[1] = v;
+    cur_map =  &init_map;
+}
+#endif
+
+static void core_begin(MemoryListener *listener, PhysMap *new_map)
 {
-    destroy_all_mappings();
-    phys_sections_clear();
-    phys_map.ptr = PHYS_MAP_NODE_NIL;
-    phys_section_unassigned = dummy_section(&io_mem_unassigned);
-    phys_section_notdirty = dummy_section(&io_mem_notdirty);
-    phys_section_rom = dummy_section(&io_mem_rom);
-    phys_section_watch = dummy_section(&io_mem_watch);
+
+    new_map->root.ptr = PHYS_MAP_NODE_NIL;
+    new_map->root.is_leaf = 0;
+
+    /* In all the map, these sections have the same index */
+    phys_section_unassigned = dummy_section(new_map, &io_mem_unassigned);
+    phys_section_notdirty = dummy_section(new_map, &io_mem_notdirty);
+    phys_section_rom = dummy_section(new_map, &io_mem_rom);
+    phys_section_watch = dummy_section(new_map, &io_mem_watch);
+    next_map = new_map;
 }
 
 static void core_commit(MemoryListener *listener)
@@ -3161,6 +3240,16 @@  static void core_commit(MemoryListener *listener)
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         tlb_flush(env, 1);
     }
+
+/* move into high layer
+    qemu_mutex_lock(&cur_map_lock);
+    if (cur_map != NULL) {
+        physmap_put(cur_map);
+    }
+    cur_map = next_map;
+    smp_mb();
+    qemu_mutex_unlock(&cur_map_lock);
+*/
 }
 
 static void core_region_add(MemoryListener *listener,
@@ -3217,7 +3306,7 @@  static void core_eventfd_del(MemoryListener *listener,
 {
 }
 
-static void io_begin(MemoryListener *listener)
+static void io_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
@@ -3329,6 +3418,20 @@  static void memory_map_init(void)
     memory_listener_register(&io_memory_listener, system_io);
 }
 
+void physmap_init(void)
+{
+    FlatView v = { .ranges = NULL, .nr = 0, .nr_allocated = 0,
+                           };
+    PhysMap *init_map = g_malloc0(sizeof(PhysMap));
+
+    atomic_set(&init_map->ref, 1);
+    init_map->root.ptr = PHYS_MAP_NODE_NIL;
+    init_map->root.is_leaf = 0;
+    init_map->views[0] = v;
+    init_map->views[1] = v;
+    cur_map = init_map;
+}
+
 MemoryRegion *get_system_memory(void)
 {
     return system_memory;
@@ -3391,6 +3494,7 @@  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
     uint32_t val;
     target_phys_addr_t page;
     MemoryRegionSection *section;
+    PhysMap *cur = cur_map_get();
 
     while (len > 0) {
         page = addr & TARGET_PAGE_MASK;
@@ -3472,6 +3576,7 @@  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
         buf += l;
         addr += l;
     }
+    physmap_put(cur);
 }
 
 /* used for ROM loading : can write in RAM and ROM */
diff --git a/hw/vhost.c b/hw/vhost.c
index 43664e7..df58345 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -438,7 +438,7 @@  static bool vhost_section(MemoryRegionSection *section)
         && memory_region_is_ram(section->mr);
 }
 
-static void vhost_begin(MemoryListener *listener)
+static void vhost_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
diff --git a/hw/xen_pt.c b/hw/xen_pt.c
index 3b6d186..fba8586 100644
--- a/hw/xen_pt.c
+++ b/hw/xen_pt.c
@@ -597,7 +597,7 @@  static void xen_pt_region_update(XenPCIPassthroughState *s,
     }
 }
 
-static void xen_pt_begin(MemoryListener *l)
+static void xen_pt_begin(MemoryListener *l, PhysMap *next)
 {
 }
 
diff --git a/kvm-all.c b/kvm-all.c
index f8e4328..bc42cab 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -693,7 +693,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
     }
 }
 
-static void kvm_begin(MemoryListener *listener)
+static void kvm_begin(MemoryListener *listener, PhysMap *next)
 {
 }
 
diff --git a/memory.c b/memory.c
index c7f2cfd..54cdc7f 100644
--- a/memory.c
+++ b/memory.c
@@ -20,6 +20,7 @@ 
 #include "kvm.h"
 #include <assert.h>
 #include "hw/qdev.h"
+#include "qemu-thread.h"
 
 #define WANT_EXEC_OBSOLETE
 #include "exec-obsolete.h"
@@ -192,7 +193,7 @@  typedef struct AddressSpaceOps AddressSpaceOps;
 /* A system address space - I/O, memory, etc. */
 struct AddressSpace {
     MemoryRegion *root;
-    FlatView current_map;
+    int view_id;
     int ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
 };
@@ -232,11 +233,6 @@  static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
     ++view->nr;
 }
 
-static void flatview_destroy(FlatView *view)
-{
-    g_free(view->ranges);
-}
-
 static bool can_merge(FlatRange *r1, FlatRange *r2)
 {
     return int128_eq(addrrange_end(r1->addr), r2->addr.start)
@@ -594,8 +590,10 @@  static void address_space_update_ioeventfds(AddressSpace *as)
     MemoryRegionIoeventfd *ioeventfds = NULL;
     AddrRange tmp;
     unsigned i;
+    PhysMap *map = cur_map_get();
+    FlatView *view = &map->views[as->view_id];
 
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+    FOR_EACH_FLAT_RANGE(fr, view) {
         for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
             tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
                                   int128_sub(fr->addr.start,
@@ -616,6 +614,7 @@  static void address_space_update_ioeventfds(AddressSpace *as)
     g_free(as->ioeventfds);
     as->ioeventfds = ioeventfds;
     as->ioeventfd_nb = ioeventfd_nb;
+    physmap_put(map);
 }
 
 static void address_space_update_topology_pass(AddressSpace *as,
@@ -681,21 +680,23 @@  static void address_space_update_topology_pass(AddressSpace *as,
 }
 
 
-static void address_space_update_topology(AddressSpace *as)
+static void address_space_update_topology(AddressSpace *as, PhysMap *prev,
+                                            PhysMap *next)
 {
-    FlatView old_view = as->current_map;
+    FlatView old_view = prev->views[as->view_id];
     FlatView new_view = generate_memory_topology(as->root);
 
     address_space_update_topology_pass(as, old_view, new_view, false);
     address_space_update_topology_pass(as, old_view, new_view, true);
+    next->views[as->view_id] = new_view;
 
-    as->current_map = new_view;
-    flatview_destroy(&old_view);
     address_space_update_ioeventfds(as);
 }
 
 static void memory_region_update_topology(MemoryRegion *mr)
 {
+    PhysMap *prev, *next;
+
     if (memory_region_transaction_depth) {
         memory_region_update_pending |= !mr || mr->enabled;
         return;
@@ -705,16 +706,20 @@  static void memory_region_update_topology(MemoryRegion *mr)
         return;
     }
 
-    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
+     prev = cur_map_get();
+    /* allocate PhysMap next here */
+    next = alloc_next_map();
+    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward, next);
 
     if (address_space_memory.root) {
-        address_space_update_topology(&address_space_memory);
+        address_space_update_topology(&address_space_memory, prev, next);
     }
     if (address_space_io.root) {
-        address_space_update_topology(&address_space_io);
+        address_space_update_topology(&address_space_io, prev, next);
     }
 
     MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
+    cur_map_update(next);
 
     memory_region_update_pending = false;
 }
@@ -1071,7 +1076,7 @@  void memory_region_put(MemoryRegion *mr)
 
     if (atomic_dec_and_test(&mr->ref)) {
         /* to fix, using call_rcu( ,release) */
-        mr->life_ops->put(mr);
+        physmap_reclaimer_enqueue(mr, (ReleaseHandler *)mr->life_ops->put);
     }
 }
 
@@ -1147,13 +1152,18 @@  void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr,
 void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
     FlatRange *fr;
+    FlatView *fview;
+    PhysMap *map;
 
-    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
+    map = cur_map_get();
+    fview = &map->views[address_space_memory.view_id];
+    FOR_EACH_FLAT_RANGE(fr, fview) {
         if (fr->mr == mr) {
             MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
                                           Forward, log_sync);
         }
     }
+    physmap_put(map);
 }
 
 void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
@@ -1201,8 +1211,12 @@  static void memory_region_update_coalesced_range(MemoryRegion *mr)
     FlatRange *fr;
     CoalescedMemoryRange *cmr;
     AddrRange tmp;
+    FlatView *fview;
+    PhysMap *map;
 
-    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
+    map = cur_map_get();
+    fview = &map->views[address_space_memory.view_id];
+    FOR_EACH_FLAT_RANGE(fr, fview) {
         if (fr->mr == mr) {
             qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
                                            int128_get64(fr->addr.size));
@@ -1219,6 +1233,7 @@  static void memory_region_update_coalesced_range(MemoryRegion *mr)
             }
         }
     }
+    physmap_put(map);
 }
 
 void memory_region_set_coalescing(MemoryRegion *mr)
@@ -1458,29 +1473,49 @@  static int cmp_flatrange_addr(const void *addr_, const void *fr_)
     return 0;
 }
 
-static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
+static FlatRange *address_space_lookup(FlatView *view, AddrRange addr)
 {
-    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
+    return bsearch(&addr, view->ranges, view->nr,
                    sizeof(FlatRange), cmp_flatrange_addr);
 }
 
+/* dec the ref, which inc by memory_region_find*/
+void memory_region_section_put(MemoryRegionSection *mrs)
+{
+    if (mrs->mr != NULL) {
+        memory_region_put(mrs->mr);
+    }
+}
+
+/* inc mr's ref. Caller need dec mr's ref */
 MemoryRegionSection memory_region_find(MemoryRegion *address_space,
                                        target_phys_addr_t addr, uint64_t size)
 {
+    PhysMap *map;
     AddressSpace *as = memory_region_to_address_space(address_space);
     AddrRange range = addrrange_make(int128_make64(addr),
                                      int128_make64(size));
-    FlatRange *fr = address_space_lookup(as, range);
+    FlatView *fview;
+
+    map = cur_map_get();
+
+    fview = &map->views[as->view_id];
+    FlatRange *fr = address_space_lookup(fview, range);
     MemoryRegionSection ret = { .mr = NULL, .size = 0 };
 
     if (!fr) {
+        physmap_put(map);
         return ret;
     }
 
-    while (fr > as->current_map.ranges
+    while (fr > fview->ranges
            && addrrange_intersects(fr[-1].addr, range)) {
         --fr;
     }
+    /* To fix, the caller must in rcu, or we must inc fr->mr->ref here
+     */
+    memory_region_get(fr->mr);
+    physmap_put(map);
 
     ret.mr = fr->mr;
     range = addrrange_intersection(range, fr->addr);
@@ -1497,10 +1532,13 @@  void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
 {
     AddressSpace *as = memory_region_to_address_space(address_space);
     FlatRange *fr;
+    PhysMap *map = cur_map_get();
+    FlatView *view = &map->views[as->view_id];
 
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+    FOR_EACH_FLAT_RANGE(fr, view) {
         MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
     }
+    physmap_put(map);
 }
 
 void memory_global_dirty_log_start(void)
@@ -1519,6 +1557,8 @@  static void listener_add_address_space(MemoryListener *listener,
                                        AddressSpace *as)
 {
     FlatRange *fr;
+    PhysMap *map;
+    FlatView *view;
 
     if (listener->address_space_filter
         && listener->address_space_filter != as->root) {
@@ -1528,7 +1568,10 @@  static void listener_add_address_space(MemoryListener *listener,
     if (global_dirty_log) {
         listener->log_global_start(listener);
     }
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+
+    map = cur_map_get();
+    view = &map->views[as->view_id];
+    FOR_EACH_FLAT_RANGE(fr, view) {
         MemoryRegionSection section = {
             .mr = fr->mr,
             .address_space = as->root,
@@ -1539,6 +1582,7 @@  static void listener_add_address_space(MemoryListener *listener,
         };
         listener->region_add(listener, &section);
     }
+    physmap_put(map);
 }
 
 void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
@@ -1570,12 +1614,14 @@  void memory_listener_unregister(MemoryListener *listener)
 void set_system_memory_map(MemoryRegion *mr)
 {
     address_space_memory.root = mr;
+    address_space_memory.view_id = 0;
     memory_region_update_topology(NULL);
 }
 
 void set_system_io_map(MemoryRegion *mr)
 {
     address_space_io.root = mr;
+    address_space_io.view_id = 1;
     memory_region_update_topology(NULL);
 }
 
diff --git a/memory.h b/memory.h
index 357edd8..18442d4 100644
--- a/memory.h
+++ b/memory.h
@@ -256,7 +256,7 @@  typedef struct MemoryListener MemoryListener;
  * Use with memory_listener_register() and memory_listener_unregister().
  */
 struct MemoryListener {
-    void (*begin)(MemoryListener *listener);
+    void (*begin)(MemoryListener *listener, PhysMap *next);
     void (*commit)(MemoryListener *listener);
     void (*region_add)(MemoryListener *listener, MemoryRegionSection *section);
     void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
@@ -829,6 +829,13 @@  void mtree_info(fprintf_function mon_printf, void *f);
 
 void memory_region_get(MemoryRegion *mr);
 void memory_region_put(MemoryRegion *mr);
+void physmap_reclaimer_enqueue(void *opaque, ReleaseHandler *release);
+void physmap_get(PhysMap *map);
+void physmap_put(PhysMap *map);
+PhysMap *cur_map_get(void);
+PhysMap *alloc_next_map(void);
+void cur_map_update(PhysMap *next);
+void physmap_init(void);
 #endif
 
 #endif
diff --git a/vl.c b/vl.c
index 1329c30..12af523 100644
--- a/vl.c
+++ b/vl.c
@@ -3346,6 +3346,7 @@  int main(int argc, char **argv, char **envp)
     if (ram_size == 0) {
         ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
     }
+    physmap_init();
 
     configure_accelerator();
 
diff --git a/xen-all.c b/xen-all.c
index 59f2323..41d82fd 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -452,7 +452,7 @@  static void xen_set_memory(struct MemoryListener *listener,
     }
 }
 
-static void xen_begin(MemoryListener *listener)
+static void xen_begin(MemoryListener *listener, PhysMap *next)
 {
 }