Patchwork [RFC,v1,19/22] memory: per-AddressSpace dispatch

login
register
mail settings
Submitter Avi Kivity
Date Oct. 3, 2012, 4:04 p.m.
Message ID <1349280245-16341-20-git-send-email-avi@redhat.com>
Download mbox | patch
Permalink /patch/188853/
State New
Headers show

Comments

Avi Kivity - Oct. 3, 2012, 4:04 p.m.
Currently we use a global radix tree to dispatch memory access.  This only
works with a single address space; to support multiple address spaces we
make the radix tree a member of AddressSpace (via an intermediate structure
AddressSpaceDispatch to avoid exposing too many internals).

A side effect is that address_space_io also gains a dispatch table.  When
we remove all the pre-memory-API I/O registrations, we can use that for
dispatching I/O and get rid of the original I/O dispatch.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 cputlb.c          |   3 +-
 cputlb.h          |   3 +-
 exec.c            | 176 +++++++++++++++++++++++++++++++++---------------------
 memory-internal.h |  22 ++++++-
 memory.c          |   1 +
 memory.h          |  62 +++++++++++++++++++
 6 files changed, 196 insertions(+), 71 deletions(-)
Blue Swirl - Oct. 3, 2012, 8:24 p.m.
On Wed, Oct 3, 2012 at 4:04 PM, Avi Kivity <avi@redhat.com> wrote:
> Currently we use a global radix tree to dispatch memory access.  This only
> works with a single address space; to support multiple address spaces we
> make the radix tree a member of AddressSpace (via an intermediate structure
> AddressSpaceDispatch to avoid exposing too many internals).
>
> A side effect is that address_space_io also gains a dispatch table.  When
> we remove all the pre-memory-API I/O registrations, we can use that for
> dispatching I/O and get rid of the original I/O dispatch.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>  cputlb.c          |   3 +-
>  cputlb.h          |   3 +-
>  exec.c            | 176 +++++++++++++++++++++++++++++++++---------------------
>  memory-internal.h |  22 ++++++-
>  memory.c          |   1 +
>  memory.h          |  62 +++++++++++++++++++
>  6 files changed, 196 insertions(+), 71 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index 0627f32..9027557 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -21,6 +21,7 @@
>  #include "cpu.h"
>  #include "exec-all.h"
>  #include "memory.h"
> +#include "exec-memory.h"
>
>  #include "cputlb.h"
>
> @@ -251,7 +252,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>      if (size != TARGET_PAGE_SIZE) {
>          tlb_add_large_page(env, vaddr, size);
>      }
> -    section = phys_page_find(paddr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
>  #if defined(DEBUG_TLB)
>      printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
>             " prot=%x idx=%d pd=0x%08lx\n",
> diff --git a/cputlb.h b/cputlb.h
> index 2dc2c96..d537b77 100644
> --- a/cputlb.h
> +++ b/cputlb.h
> @@ -26,7 +26,8 @@ void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
>                               target_ulong vaddr);
>  void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
>                             uintptr_t length);
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index);
> +MemoryRegionSection *phys_page_find(struct AddressSpaceDispatch *d,
> +                                    target_phys_addr_t index);
>  void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length);
>  void tlb_set_dirty(CPUArchState *env, target_ulong vaddr);
>  extern int tlb_flush_count;
> diff --git a/exec.c b/exec.c
> index 7a76efa..42f9ad1 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -187,7 +187,6 @@
>  static void *l1_map[V_L1_SIZE];
>
>  #if !defined(CONFIG_USER_ONLY)
> -typedef struct PhysPageEntry PhysPageEntry;
>
>  static MemoryRegionSection *phys_sections;
>  static unsigned phys_sections_nb, phys_sections_nb_alloc;
> @@ -196,22 +195,12 @@
>  static uint16_t phys_section_rom;
>  static uint16_t phys_section_watch;
>
> -struct PhysPageEntry {
> -    uint16_t is_leaf : 1;
> -     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
> -    uint16_t ptr : 15;
> -};
> -
>  /* Simple allocator for PhysPageEntry nodes */
>  static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
>  static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> -/* This is a multi-level map on the physical address space.
> -   The bottom level has pointers to MemoryRegionSections.  */
> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> -
>  static void io_mem_init(void);
>  static void memory_map_init(void);
>
> @@ -459,18 +448,19 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>      }
>  }
>
> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
> +static void phys_page_set(AddressSpaceDispatch *d,
> +                          target_phys_addr_t index, target_phys_addr_t nb,
>                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
>      phys_map_node_reserve(3 * P_L2_LEVELS);
>
> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, target_phys_addr_t index)
>  {
> -    PhysPageEntry lp = phys_map;
> +    PhysPageEntry lp = d->phys_map;
>      PhysPageEntry *p;
>      int i;
>      uint16_t s_index = phys_section_unassigned;
> @@ -1472,7 +1462,7 @@ void tb_invalidate_phys_addr(target_phys_addr_t addr)
>      ram_addr_t ram_addr;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>      if (!(memory_region_is_ram(section->mr)
>            || (section->mr->rom_device && section->mr->readable))) {
>          return;
> @@ -2218,9 +2208,9 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>      lp->ptr = PHYS_MAP_NODE_NIL;
>  }
>
> -static void destroy_all_mappings(void)
> +static void destroy_all_mappings(AddressSpaceDispatch *d)
>  {
> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
> +    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
>      phys_map_nodes_reset();
>  }
>
> @@ -2240,12 +2230,12 @@ static void phys_sections_clear(void)
>      phys_sections_nb = 0;
>  }
>
> -static void register_subpage(MemoryRegionSection *section)
> +static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
>  {
>      subpage_t *subpage;
>      target_phys_addr_t base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
> +    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = TARGET_PAGE_SIZE,
> @@ -2257,7 +2247,7 @@ static void register_subpage(MemoryRegionSection *section)
>      if (!(existing->mr->subpage)) {
>          subpage = subpage_init(base);
>          subsection.mr = &subpage->iomem;
> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
> +        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
>                        phys_section_add(&subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
> @@ -2268,7 +2258,7 @@ static void register_subpage(MemoryRegionSection *section)
>  }
>
>
> -static void register_multipage(MemoryRegionSection *section)
> +static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
>  {
>      target_phys_addr_t start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
> @@ -2278,13 +2268,13 @@ static void register_multipage(MemoryRegionSection *section)
>      assert(size);
>
>      addr = start_addr;
> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
> +    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>                    section_index);
>  }
>
> -void cpu_register_physical_memory_log(MemoryRegionSection *section,
> -                                      bool readonly)
> +static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
>  {
> +    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
>      MemoryRegionSection now = *section, remain = *section;
>
>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
> @@ -2292,7 +2282,7 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space,
>                         now.size);
> -        register_subpage(&now);
> +        register_subpage(d, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
> @@ -2301,10 +2291,10 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>          now = remain;
>          if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
>              now.size = TARGET_PAGE_SIZE;
> -            register_subpage(&now);
> +            register_subpage(d, &now);
>          } else {
>              now.size &= TARGET_PAGE_MASK;
> -            register_multipage(&now);
> +            register_multipage(d, &now);
>          }
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
> @@ -2312,7 +2302,7 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>      }
>      now = remain;
>      if (now.size) {
> -        register_subpage(&now);
> +        register_subpage(d, &now);
>      }
>  }
>
> @@ -3163,11 +3153,17 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>
> +static void mem_begin(MemoryListener *listener)
> +{
> +    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
> +
> +    destroy_all_mappings(d);
> +    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
> +}
> +
>  static void core_begin(MemoryListener *listener)
>  {
> -    destroy_all_mappings();
>      phys_sections_clear();
> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
>      phys_section_unassigned = dummy_section(&io_mem_unassigned);
>      phys_section_notdirty = dummy_section(&io_mem_notdirty);
>      phys_section_rom = dummy_section(&io_mem_rom);
> @@ -3186,18 +3182,6 @@ static void tcg_commit(MemoryListener *listener)
>      }
>  }
>
> -static void core_region_add(MemoryListener *listener,
> -                            MemoryRegionSection *section)
> -{
> -    cpu_register_physical_memory_log(section, section->readonly);
> -}
> -
> -static void core_region_nop(MemoryListener *listener,
> -                            MemoryRegionSection *section)
> -{
> -    cpu_register_physical_memory_log(section, section->readonly);
> -}
> -
>  static void core_log_global_start(MemoryListener *listener)
>  {
>      cpu_physical_memory_set_dirty_tracking(1);
> @@ -3229,11 +3213,9 @@ static void io_region_del(MemoryListener *listener,
>  static MemoryListener core_memory_listener = {
>      MEMORY_LISTENER_DEFAULT_OPS,
>      .begin = core_begin,
> -    .region_add = core_region_add,
> -    .region_nop = core_region_nop,
>      .log_global_start = core_log_global_start,
>      .log_global_stop = core_log_global_stop,
> -    .priority = 0,
> +    .priority = 1,
>  };
>
>  static MemoryListener io_memory_listener = {
> @@ -3248,6 +3230,22 @@ static void io_region_del(MemoryListener *listener,
>      .commit = tcg_commit,
>  };
>
> +void address_space_init_dispatch(AddressSpace *as)
> +{
> +    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> +
> +    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> +    d->listener = (MemoryListener) {
> +        MEMORY_LISTENER_DEFAULT_OPS,
> +        .begin = mem_begin,
> +        .region_add = mem_add,
> +        .region_nop = mem_add,
> +        .priority = 0,
> +    };
> +    as->dispatch = d;
> +    memory_listener_register(&d->listener, as);
> +}
> +
>  static void memory_map_init(void)
>  {
>      system_memory = g_malloc(sizeof(*system_memory));
> @@ -3319,9 +3317,11 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
>  }
>
>  #else
> -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
> -                            int len, int is_write)
> +
> +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
> +                      int len, bool is_write)

I'd make address_space_* use uint64_t instead of target_phys_addr_t
for the address. It may actually be buggy for 32 bit
target_phys_addr_t  and 64 bit DMA addresses, if such architectures
exist. Maybe memory.c could be made target independent one day.

>  {
> +    AddressSpaceDispatch *d = as->dispatch;
>      int l;
>      uint8_t *ptr;
>      uint32_t val;
> @@ -3333,7 +3333,7 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>          l = (page + TARGET_PAGE_SIZE) - addr;
>          if (l > len)
>              l = len;
> -        section = phys_page_find(page >> TARGET_PAGE_BITS);
> +        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>
>          if (is_write) {
>              if (!memory_region_is_ram(section->mr)) {
> @@ -3410,10 +3410,36 @@ void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>      }
>  }
>
> +void address_space_write(AddressSpace *as, target_phys_addr_t addr,
> +                         const uint8_t *buf, int len)
> +{
> +    address_space_rw(as, addr, (uint8_t *)buf, len, true);
> +}
> +
> +/**
> + * address_space_read: read from an address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + */
> +void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len)
> +{
> +    address_space_rw(as, addr, buf, len, false);
> +}
> +
> +
> +void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
> +                            int len, int is_write)
> +{
> +    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
> +}
> +
>  /* used for ROM loading : can write in RAM and ROM */
>  void cpu_physical_memory_write_rom(target_phys_addr_t addr,
>                                     const uint8_t *buf, int len)
>  {
> +    AddressSpaceDispatch *d = address_space_memory.dispatch;
>      int l;
>      uint8_t *ptr;
>      target_phys_addr_t page;
> @@ -3424,7 +3450,7 @@ void cpu_physical_memory_write_rom(target_phys_addr_t addr,
>          l = (page + TARGET_PAGE_SIZE) - addr;
>          if (l > len)
>              l = len;
> -        section = phys_page_find(page >> TARGET_PAGE_BITS);
> +        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>
>          if (!(memory_region_is_ram(section->mr) ||
>                memory_region_is_romd(section->mr))) {
> @@ -3504,10 +3530,12 @@ static void cpu_notify_map_clients(void)
>   * Use cpu_register_map_client() to know when retrying the map operation is
>   * likely to succeed.
>   */
> -void *cpu_physical_memory_map(target_phys_addr_t addr,
> -                              target_phys_addr_t *plen,
> -                              int is_write)
> +void *address_space_map(AddressSpace *as,
> +                        target_phys_addr_t addr,
> +                        target_phys_addr_t *plen,
> +                        bool is_write)
>  {
> +    AddressSpaceDispatch *d = as->dispatch;
>      target_phys_addr_t len = *plen;
>      target_phys_addr_t todo = 0;
>      int l;
> @@ -3522,7 +3550,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
>          l = (page + TARGET_PAGE_SIZE) - addr;
>          if (l > len)
>              l = len;
> -        section = phys_page_find(page >> TARGET_PAGE_BITS);
> +        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
>
>          if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
>              if (todo || bounce.buffer) {
> @@ -3532,7 +3560,7 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
>              bounce.addr = addr;
>              bounce.len = l;
>              if (!is_write) {
> -                cpu_physical_memory_read(addr, bounce.buffer, l);
> +                address_space_read(as, addr, bounce.buffer, l);
>              }
>
>              *plen = l;
> @@ -3553,12 +3581,12 @@ void *cpu_physical_memory_map(target_phys_addr_t addr,
>      return ret;
>  }
>
> -/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
> +/* Unmaps a memory region previously mapped by address_space_map().
>   * Will also mark the memory as dirty if is_write == 1.  access_len gives
>   * the amount of memory that was actually read or written by the caller.
>   */
> -void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
> -                               int is_write, target_phys_addr_t access_len)
> +void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
> +                         int is_write, target_phys_addr_t access_len)
>  {
>      if (buffer != bounce.buffer) {
>          if (is_write) {
> @@ -3585,13 +3613,26 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
>          return;
>      }
>      if (is_write) {
> -        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
> +        address_space_write(as, bounce.addr, bounce.buffer, access_len);
>      }
>      qemu_vfree(bounce.buffer);
>      bounce.buffer = NULL;
>      cpu_notify_map_clients();
>  }
>
> +void *cpu_physical_memory_map(target_phys_addr_t addr,
> +                              target_phys_addr_t *plen,
> +                              int is_write)
> +{
> +    return address_space_map(&address_space_memory, addr, plen, is_write);
> +}
> +
> +void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
> +                               int is_write, target_phys_addr_t access_len)
> +{
> +    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
> +}
> +
>  /* warning: addr must be aligned */
>  static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
>                                           enum device_endian endian)
> @@ -3600,7 +3641,7 @@ static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
>      uint32_t val;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!(memory_region_is_ram(section->mr) ||
>            memory_region_is_romd(section->mr))) {
> @@ -3659,7 +3700,7 @@ static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
>      uint64_t val;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!(memory_region_is_ram(section->mr) ||
>            memory_region_is_romd(section->mr))) {
> @@ -3726,7 +3767,7 @@ static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
>      uint64_t val;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!(memory_region_is_ram(section->mr) ||
>            memory_region_is_romd(section->mr))) {
> @@ -3785,7 +3826,7 @@ void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
>      uint8_t *ptr;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          addr = memory_region_section_addr(section, addr);
> @@ -3817,7 +3858,7 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
>      uint8_t *ptr;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          addr = memory_region_section_addr(section, addr);
> @@ -3846,7 +3887,7 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
>      uint8_t *ptr;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          addr = memory_region_section_addr(section, addr);
> @@ -3919,7 +3960,7 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
>      uint8_t *ptr;
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>
>      if (!memory_region_is_ram(section->mr) || section->readonly) {
>          addr = memory_region_section_addr(section, addr);
> @@ -4161,7 +4202,8 @@ bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
>  {
>      MemoryRegionSection *section;
>
> -    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch,
> +                             phys_addr >> TARGET_PAGE_BITS);
>
>      return !(memory_region_is_ram(section->mr) ||
>               memory_region_is_romd(section->mr));
> diff --git a/memory-internal.h b/memory-internal.h
> index 655f71f..a9d914e 100644
> --- a/memory-internal.h
> +++ b/memory-internal.h
> @@ -21,6 +21,26 @@
>
>  #ifndef CONFIG_USER_ONLY
>
> +typedef struct PhysPageEntry PhysPageEntry;
> +
> +struct PhysPageEntry {
> +    uint16_t is_leaf : 1;
> +     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
> +    uint16_t ptr : 15;
> +};
> +
> +typedef struct AddressSpaceDispatch AddressSpaceDispatch;
> +
> +struct AddressSpaceDispatch {
> +    /* This is a multi-level map on the physical address space.
> +     * The bottom level has pointers to MemoryRegionSections.
> +     */
> +    PhysPageEntry phys_map;
> +    MemoryListener listener;
> +};
> +
> +void address_space_init_dispatch(AddressSpace *as);
> +
>  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                     MemoryRegion *mr);
>  ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
> @@ -29,8 +49,6 @@ void qemu_ram_free_from_ptr(ram_addr_t addr);
>
>  struct MemoryRegion;
>  struct MemoryRegionSection;
> -void cpu_register_physical_memory_log(struct MemoryRegionSection *section,
> -                                      bool readonly);
>
>  void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
>  void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
> diff --git a/memory.c b/memory.c
> index f829d84..28a79ae 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1550,6 +1550,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root)
>      QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
>      as->name = NULL;
>      memory_region_transaction_commit();
> +    address_space_init_dispatch(as);
>  }
>
>  uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size)
> diff --git a/memory.h b/memory.h
> index 6115f48..84f7439 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -169,6 +169,7 @@ struct AddressSpace {
>      struct FlatView *current_map;
>      int ioeventfd_nb;
>      struct MemoryRegionIoeventfd *ioeventfds;
> +    struct AddressSpaceDispatch *dispatch;
>      QTAILQ_ENTRY(AddressSpace) address_spaces_link;
>  };
>
> @@ -830,6 +831,67 @@ void mtree_info(fprintf_function mon_printf, void *f);
>   */
>  void address_space_init(AddressSpace *as, MemoryRegion *root);
>
> +/**
> + * address_space_rw: read from or write to an address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + * @is_write: indicates the transfer direction
> + */
> +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
> +                      int len, bool is_write);
> +
> +/**
> + * address_space_write: write to address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + */
> +void address_space_write(AddressSpace *as, target_phys_addr_t addr,
> +                         const uint8_t *buf, int len);
> +
> +/**
> + * address_space_read: read from an address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + */
> +void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len);
> +
> +/* address_space_map: map a physical memory region into a host virtual address
> + *
> + * May map a subset of the requested range, given by and returned in @plen.
> + * May return %NULL if resources needed to perform the mapping are exhausted.
> + * Use only for reads OR writes - not for read-modify-write operations.
> + * Use cpu_register_map_client() to know when retrying the map operation is
> + * likely to succeed.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @plen: pointer to length of buffer; updated on return
> + * @is_write: indicates the transfer direction
> + */
> +void *address_space_map(AddressSpace *as, target_phys_addr_t addr,
> +                        target_phys_addr_t *plen, bool is_write);
> +
> +/* address_space_unmap: Unmaps a memory region previously mapped by address_space_map()
> + *
> + * Will also mark the memory as dirty if @is_write == %true.  @access_len gives
> + * the amount of memory that was actually read or written by the caller.
> + *
> + * @as: #AddressSpace used
> + * @addr: address within that address space
> + * @len: buffer length as returned by address_space_map()
> + * @access_len: amount of data actually transferred
> + * @is_write: indicates the transfer direction
> + */
> +void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
> +                         int is_write, target_phys_addr_t access_len);
> +
> +
>  #endif
>
>  #endif
> --
> 1.7.12
>
Avi Kivity - Oct. 4, 2012, 6:38 a.m.
On 10/03/2012 10:24 PM, Blue Swirl wrote:
> >
> >  #else
> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
> > -                            int len, int is_write)
> > +
> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
> > +                      int len, bool is_write)
>
> I'd make address_space_* use uint64_t instead of target_phys_addr_t
> for the address. It may actually be buggy for 32 bit
> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
> exist. Maybe memory.c could be made target independent one day.

We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
deployments where both host and guest are 32 bits is dropping, and I
doubt the performance drop is noticable.

We were also planning to rename target_phys_addt_t to hw_addr.

But both of those are for another patch.  For now, AddressSpace follows
MemoryRegion, it's just a small abstraction on top.
Peter Maydell - Oct. 4, 2012, 8:47 a.m.
On 4 October 2012 07:38, Avi Kivity <avi@redhat.com> wrote:
> On 10/03/2012 10:24 PM, Blue Swirl wrote:
>> >
>> >  #else
>> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>> > -                            int len, int is_write)
>> > +
>> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
>> > +                      int len, bool is_write)
>>
>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>> for the address. It may actually be buggy for 32 bit
>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>> exist. Maybe memory.c could be made target independent one day.

I thought target_phys_addr_t was supposed to be "widest necessary
address", so an architecture with 64 bit DMA addresses should be
implemented with 64 bit target_phys_addr_t...

> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
> deployments where both host and guest are 32 bits is dropping, and I
> doubt the performance drop is noticable.

I benchmarked it on ARM when I switched ARM to always 64 bit physaddrs;
it was just about measurable (maybe half a percent or so) but not
significant (we don't use this type in the fast path for memory
accesses, only in the slow/IO path).

I'd favour just moving everything to 64 bits (the remaining
32 bit targets are: cris, lm32, m68k, microblaze, or32, sh4, unicore32,
xtensa). However it does require some review of devices to check that
they're not using target_phys_addr_t when they meant uint32_t [eg
in registers for DMA devices] or relying on 32 bit wraparound.

-- PMM
Avi Kivity - Oct. 4, 2012, 10:15 a.m.
On 10/04/2012 10:47 AM, Peter Maydell wrote:
>>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>>> for the address. It may actually be buggy for 32 bit
>>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>>> exist. Maybe memory.c could be made target independent one day.
> 
> I thought target_phys_addr_t was supposed to be "widest necessary
> address", so an architecture with 64 bit DMA addresses should be
> implemented with 64 bit target_phys_addr_t...

Yes.  Since even before this patchset all DMA went through the memory
API (and before the memory API, through cpu_register_physical_memory),
this case was already broken.

> 
>> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
>> deployments where both host and guest are 32 bits is dropping, and I
>> doubt the performance drop is noticable.
> 
> I benchmarked it on ARM when I switched ARM to always 64 bit physaddrs;
> it was just about measurable (maybe half a percent or so) but not
> significant (we don't use this type in the fast path for memory
> accesses, only in the slow/IO path).
> 
> I'd favour just moving everything to 64 bits (the remaining
> 32 bit targets are: cris, lm32, m68k, microblaze, or32, sh4, unicore32,
> xtensa). However it does require some review of devices to check that
> they're not using target_phys_addr_t when they meant uint32_t [eg
> in registers for DMA devices] or relying on 32 bit wraparound.

I posted a patch.  I did no review, the cost/benefit tradeoff is
horrible IMO, especially with me not knowing the details.  If something
breaks, git bisect will save the day.
Peter Maydell - Oct. 4, 2012, 10:29 a.m.
On 4 October 2012 11:15, Avi Kivity <avi@redhat.com> wrote:
> On 10/04/2012 10:47 AM, Peter Maydell wrote:
>> I'd favour just moving everything to 64 bits (the remaining
>> 32 bit targets are: cris, lm32, m68k, microblaze, or32, sh4, unicore32,
>> xtensa). However it does require some review of devices to check that
>> they're not using target_phys_addr_t when they meant uint32_t [eg
>> in registers for DMA devices] or relying on 32 bit wraparound.
>
> I posted a patch.  I did no review, the cost/benefit tradeoff is
> horrible IMO, especially with me not knowing the details.  If something
> breaks, git bisect will save the day.

It might be nice to cc the maintainers of the affected target archs
and suggest that they at least do a quick smoke test :-)

-- PMM
Avi Kivity - Oct. 4, 2012, 10:30 a.m.
On 10/04/2012 12:29 PM, Peter Maydell wrote:
> On 4 October 2012 11:15, Avi Kivity <avi@redhat.com> wrote:
>> On 10/04/2012 10:47 AM, Peter Maydell wrote:
>>> I'd favour just moving everything to 64 bits (the remaining
>>> 32 bit targets are: cris, lm32, m68k, microblaze, or32, sh4, unicore32,
>>> xtensa). However it does require some review of devices to check that
>>> they're not using target_phys_addr_t when they meant uint32_t [eg
>>> in registers for DMA devices] or relying on 32 bit wraparound.
>>
>> I posted a patch.  I did no review, the cost/benefit tradeoff is
>> horrible IMO, especially with me not knowing the details.  If something
>> breaks, git bisect will save the day.
> 
> It might be nice to cc the maintainers of the affected target archs
> and suggest that they at least do a quick smoke test :-)

Yes, will do.
Anthony Liguori - Oct. 4, 2012, 2:13 p.m.
Avi Kivity <avi@redhat.com> writes:

> Currently we use a global radix tree to dispatch memory access.  This only
> works with a single address space; to support multiple address spaces we
> make the radix tree a member of AddressSpace (via an intermediate structure
> AddressSpaceDispatch to avoid exposing too many internals).
>
> A side effect is that address_space_io also gains a dispatch table.  When
> we remove all the pre-memory-API I/O registrations, we can use that for
> dispatching I/O and get rid of the original I/O dispatch.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>  cputlb.c          |   3 +-
>  cputlb.h          |   3 +-
>  exec.c            | 176 +++++++++++++++++++++++++++++++++---------------------
>  memory-internal.h |  22 ++++++-
>  memory.c          |   1 +
>  memory.h          |  62 +++++++++++++++++++
>  6 files changed, 196 insertions(+), 71 deletions(-)
>
> diff --git a/cputlb.c b/cputlb.c
> index 0627f32..9027557 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -21,6 +21,7 @@
>  #include "cpu.h"
>  #include "exec-all.h"
>  #include "memory.h"
> +#include "exec-memory.h"
>  
>  #include "cputlb.h"
>  
> @@ -251,7 +252,7 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
>      if (size != TARGET_PAGE_SIZE) {
>          tlb_add_large_page(env, vaddr, size);
>      }
> -    section = phys_page_find(paddr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
>  #if defined(DEBUG_TLB)
>      printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
>             " prot=%x idx=%d pd=0x%08lx\n",
> diff --git a/cputlb.h b/cputlb.h
> index 2dc2c96..d537b77 100644
> --- a/cputlb.h
> +++ b/cputlb.h
> @@ -26,7 +26,8 @@ void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
>                               target_ulong vaddr);
>  void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
>                             uintptr_t length);
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index);
> +MemoryRegionSection *phys_page_find(struct AddressSpaceDispatch *d,
> +                                    target_phys_addr_t index);
>  void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length);
>  void tlb_set_dirty(CPUArchState *env, target_ulong vaddr);
>  extern int tlb_flush_count;
> diff --git a/exec.c b/exec.c
> index 7a76efa..42f9ad1 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -187,7 +187,6 @@
>  static void *l1_map[V_L1_SIZE];
>  
>  #if !defined(CONFIG_USER_ONLY)
> -typedef struct PhysPageEntry PhysPageEntry;
>  
>  static MemoryRegionSection *phys_sections;
>  static unsigned phys_sections_nb, phys_sections_nb_alloc;
> @@ -196,22 +195,12 @@
>  static uint16_t phys_section_rom;
>  static uint16_t phys_section_watch;
>  
> -struct PhysPageEntry {
> -    uint16_t is_leaf : 1;
> -     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
> -    uint16_t ptr : 15;
> -};
> -
>  /* Simple allocator for PhysPageEntry nodes */
>  static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
>  static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
>  
>  #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>  
> -/* This is a multi-level map on the physical address space.
> -   The bottom level has pointers to MemoryRegionSections.  */
> -static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> -
>  static void io_mem_init(void);
>  static void memory_map_init(void);
>  
> @@ -459,18 +448,19 @@ static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
>      }
>  }
>  
> -static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
> +static void phys_page_set(AddressSpaceDispatch *d,
> +                          target_phys_addr_t index, target_phys_addr_t nb,
>                            uint16_t leaf)
>  {
>      /* Wildly overreserve - it doesn't matter much. */
>      phys_map_node_reserve(3 * P_L2_LEVELS);
>  
> -    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
> +    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>  }
>  
> -MemoryRegionSection *phys_page_find(target_phys_addr_t index)
> +MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, target_phys_addr_t index)
>  {
> -    PhysPageEntry lp = phys_map;
> +    PhysPageEntry lp = d->phys_map;
>      PhysPageEntry *p;
>      int i;
>      uint16_t s_index = phys_section_unassigned;
> @@ -1472,7 +1462,7 @@ void tb_invalidate_phys_addr(target_phys_addr_t addr)
>      ram_addr_t ram_addr;
>      MemoryRegionSection *section;
>  
> -    section = phys_page_find(addr >> TARGET_PAGE_BITS);
> +    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
>      if (!(memory_region_is_ram(section->mr)
>            || (section->mr->rom_device && section->mr->readable))) {
>          return;
> @@ -2218,9 +2208,9 @@ static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
>      lp->ptr = PHYS_MAP_NODE_NIL;
>  }
>  
> -static void destroy_all_mappings(void)
> +static void destroy_all_mappings(AddressSpaceDispatch *d)
>  {
> -    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
> +    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
>      phys_map_nodes_reset();
>  }
>  
> @@ -2240,12 +2230,12 @@ static void phys_sections_clear(void)
>      phys_sections_nb = 0;
>  }
>  
> -static void register_subpage(MemoryRegionSection *section)
> +static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
>  {
>      subpage_t *subpage;
>      target_phys_addr_t base = section->offset_within_address_space
>          & TARGET_PAGE_MASK;
> -    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
> +    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
>      MemoryRegionSection subsection = {
>          .offset_within_address_space = base,
>          .size = TARGET_PAGE_SIZE,
> @@ -2257,7 +2247,7 @@ static void register_subpage(MemoryRegionSection *section)
>      if (!(existing->mr->subpage)) {
>          subpage = subpage_init(base);
>          subsection.mr = &subpage->iomem;
> -        phys_page_set(base >> TARGET_PAGE_BITS, 1,
> +        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
>                        phys_section_add(&subsection));
>      } else {
>          subpage = container_of(existing->mr, subpage_t, iomem);
> @@ -2268,7 +2258,7 @@ static void register_subpage(MemoryRegionSection *section)
>  }
>  
>  
> -static void register_multipage(MemoryRegionSection *section)
> +static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
>  {
>      target_phys_addr_t start_addr = section->offset_within_address_space;
>      ram_addr_t size = section->size;
> @@ -2278,13 +2268,13 @@ static void register_multipage(MemoryRegionSection *section)
>      assert(size);
>  
>      addr = start_addr;
> -    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
> +    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
>                    section_index);
>  }
>  
> -void cpu_register_physical_memory_log(MemoryRegionSection *section,
> -                                      bool readonly)
> +static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
>  {
> +    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
>      MemoryRegionSection now = *section, remain = *section;
>  
>      if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
> @@ -2292,7 +2282,7 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>          now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
>                         - now.offset_within_address_space,
>                         now.size);
> -        register_subpage(&now);
> +        register_subpage(d, &now);
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
>          remain.offset_within_region += now.size;
> @@ -2301,10 +2291,10 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>          now = remain;
>          if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
>              now.size = TARGET_PAGE_SIZE;
> -            register_subpage(&now);
> +            register_subpage(d, &now);
>          } else {
>              now.size &= TARGET_PAGE_MASK;
> -            register_multipage(&now);
> +            register_multipage(d, &now);
>          }
>          remain.size -= now.size;
>          remain.offset_within_address_space += now.size;
> @@ -2312,7 +2302,7 @@ void cpu_register_physical_memory_log(MemoryRegionSection *section,
>      }
>      now = remain;
>      if (now.size) {
> -        register_subpage(&now);
> +        register_subpage(d, &now);
>      }
>  }
>  
> @@ -3163,11 +3153,17 @@ static void io_mem_init(void)
>                            "watch", UINT64_MAX);
>  }
>  
> +static void mem_begin(MemoryListener *listener)
> +{
> +    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
> +
> +    destroy_all_mappings(d);
> +    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
> +}
> +
>  static void core_begin(MemoryListener *listener)
>  {
> -    destroy_all_mappings();
>      phys_sections_clear();
> -    phys_map.ptr = PHYS_MAP_NODE_NIL;
>      phys_section_unassigned = dummy_section(&io_mem_unassigned);
>      phys_section_notdirty = dummy_section(&io_mem_notdirty);
>      phys_section_rom = dummy_section(&io_mem_rom);
> @@ -3186,18 +3182,6 @@ static void tcg_commit(MemoryListener *listener)
>      }
>  }
>  
> -static void core_region_add(MemoryListener *listener,
> -                            MemoryRegionSection *section)
> -{
> -    cpu_register_physical_memory_log(section, section->readonly);
> -}
> -
> -static void core_region_nop(MemoryListener *listener,
> -                            MemoryRegionSection *section)
> -{
> -    cpu_register_physical_memory_log(section, section->readonly);
> -}
> -
>  static void core_log_global_start(MemoryListener *listener)
>  {
>      cpu_physical_memory_set_dirty_tracking(1);
> @@ -3229,11 +3213,9 @@ static void io_region_del(MemoryListener *listener,
>  static MemoryListener core_memory_listener = {
>      MEMORY_LISTENER_DEFAULT_OPS,
>      .begin = core_begin,
> -    .region_add = core_region_add,
> -    .region_nop = core_region_nop,
>      .log_global_start = core_log_global_start,
>      .log_global_stop = core_log_global_stop,
> -    .priority = 0,
> +    .priority = 1,
>  };
>  
>  static MemoryListener io_memory_listener = {
> @@ -3248,6 +3230,22 @@ static void io_region_del(MemoryListener *listener,
>      .commit = tcg_commit,
>  };
>  
> +void address_space_init_dispatch(AddressSpace *as)
> +{
> +    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
> +
> +    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
> +    d->listener = (MemoryListener) {
> +        MEMORY_LISTENER_DEFAULT_OPS,
> +        .begin = mem_begin,
> +        .region_add = mem_add,
> +        .region_nop = mem_add,
> +        .priority = 0,
> +    };

I see you've become fond of this extension :-)

I'd personally avoid it...  You're typing more than you need to.

BTW, if you make the earlier change I suggested, the
MEMORY_LISTENER_DEFAULT_OPS is no longer necessary.

Regards,

Anthony Liguori

> diff --git a/memory-internal.h b/memory-internal.h
> index 655f71f..a9d914e 100644
> --- a/memory-internal.h
> +++ b/memory-internal.h
> @@ -21,6 +21,26 @@
>  
>  #ifndef CONFIG_USER_ONLY
>  
> +typedef struct PhysPageEntry PhysPageEntry;
> +
> +struct PhysPageEntry {
> +    uint16_t is_leaf : 1;
> +     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
> +    uint16_t ptr : 15;
> +};
> +
> +typedef struct AddressSpaceDispatch AddressSpaceDispatch;
> +
> +struct AddressSpaceDispatch {
> +    /* This is a multi-level map on the physical address space.
> +     * The bottom level has pointers to MemoryRegionSections.
> +     */
> +    PhysPageEntry phys_map;
> +    MemoryListener listener;
> +};
> +
> +void address_space_init_dispatch(AddressSpace *as);
> +
>  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                     MemoryRegion *mr);
>  ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
> @@ -29,8 +49,6 @@ void qemu_ram_free_from_ptr(ram_addr_t addr);
>  
>  struct MemoryRegion;
>  struct MemoryRegionSection;
> -void cpu_register_physical_memory_log(struct MemoryRegionSection *section,
> -                                      bool readonly);
>  
>  void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
>  void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
> diff --git a/memory.c b/memory.c
> index f829d84..28a79ae 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1550,6 +1550,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root)
>      QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
>      as->name = NULL;
>      memory_region_transaction_commit();
> +    address_space_init_dispatch(as);
>  }
>  
>  uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size)
> diff --git a/memory.h b/memory.h
> index 6115f48..84f7439 100644
> --- a/memory.h
> +++ b/memory.h
> @@ -169,6 +169,7 @@ struct AddressSpace {
>      struct FlatView *current_map;
>      int ioeventfd_nb;
>      struct MemoryRegionIoeventfd *ioeventfds;
> +    struct AddressSpaceDispatch *dispatch;
>      QTAILQ_ENTRY(AddressSpace) address_spaces_link;
>  };
>  
> @@ -830,6 +831,67 @@ void mtree_info(fprintf_function mon_printf, void *f);
>   */
>  void address_space_init(AddressSpace *as, MemoryRegion *root);
>  
> +/**
> + * address_space_rw: read from or write to an address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + * @is_write: indicates the transfer direction
> + */
> +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
> +                      int len, bool is_write);
> +
> +/**
> + * address_space_write: write to address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + */
> +void address_space_write(AddressSpace *as, target_phys_addr_t addr,
> +                         const uint8_t *buf, int len);
> +
> +/**
> + * address_space_read: read from an address space.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @buf: buffer with the data transferred
> + */
> +void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len);
> +
> +/* address_space_map: map a physical memory region into a host virtual address
> + *
> + * May map a subset of the requested range, given by and returned in @plen.
> + * May return %NULL if resources needed to perform the mapping are exhausted.
> + * Use only for reads OR writes - not for read-modify-write operations.
> + * Use cpu_register_map_client() to know when retrying the map operation is
> + * likely to succeed.
> + *
> + * @as: #AddressSpace to be accessed
> + * @addr: address within that address space
> + * @plen: pointer to length of buffer; updated on return
> + * @is_write: indicates the transfer direction
> + */
> +void *address_space_map(AddressSpace *as, target_phys_addr_t addr,
> +                        target_phys_addr_t *plen, bool is_write);
> +
> +/* address_space_unmap: Unmaps a memory region previously mapped by address_space_map()
> + *
> + * Will also mark the memory as dirty if @is_write == %true.  @access_len gives
> + * the amount of memory that was actually read or written by the caller.
> + *
> + * @as: #AddressSpace used
> + * @addr: address within that address space
> + * @len: buffer length as returned by address_space_map()
> + * @access_len: amount of data actually transferred
> + * @is_write: indicates the transfer direction
> + */
> +void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
> +                         int is_write, target_phys_addr_t access_len);
> +
> +
>  #endif
>  
>  #endif
> -- 
> 1.7.12
Avi Kivity - Oct. 4, 2012, 2:43 p.m.
On 10/04/2012 04:13 PM, Anthony Liguori wrote:
>>  
>> +void address_space_init_dispatch(AddressSpace *as)
>> +{
>> +    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
>> +
>> +    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>> +    d->listener = (MemoryListener) {
>> +        MEMORY_LISTENER_DEFAULT_OPS,
>> +        .begin = mem_begin,
>> +        .region_add = mem_add,
>> +        .region_nop = mem_add,
>> +        .priority = 0,
>> +    };
> 
> I see you've become fond of this extension :-)

It's not an extension.

> I'd personally avoid it...  You're typing more than you need to.

Don't see why, it seems to me to be a direct way of specifying what you
want.  You can read it literally as "d->phys_map is assigned a
PhysPageEntry structure with ptr = this and is_leaf = that.

The alternatives are:

   memset(&d->phys_map, 0, sizeof(d->phys_map);
   d->phys_map.ptr = PHYS_MAP_NODE_NIL;
   d->phys_map.is_leaf = 0;

or

   PhysPageEntry tmp = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };

   d->phys_map = tmp;

I find both of these less appealing than the original.
Blue Swirl - Oct. 4, 2012, 5:13 p.m.
On Thu, Oct 4, 2012 at 6:38 AM, Avi Kivity <avi@redhat.com> wrote:
> On 10/03/2012 10:24 PM, Blue Swirl wrote:
>> >
>> >  #else
>> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>> > -                            int len, int is_write)
>> > +
>> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
>> > +                      int len, bool is_write)
>>
>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>> for the address. It may actually be buggy for 32 bit
>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>> exist. Maybe memory.c could be made target independent one day.
>
> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
> deployments where both host and guest are 32 bits is dropping, and I
> doubt the performance drop is noticable.

My line of thought was that memory.c would not be tied to physical
addresses, but it would be more general. Then exec.c would specialize
the API to use target_phys_addr_t. Similarly PCI would specialize it
to pcibus_t, PIO to pio_addr_t and DMA to dma_addr_t.

>
> We were also planning to rename target_phys_addt_t to hw_addr.
>
> But both of those are for another patch.  For now, AddressSpace follows
> MemoryRegion, it's just a small abstraction on top.
>
>
> --
> I have a truly marvellous patch that fixes the bug which this
> signature is too narrow to contain.
>
Avi Kivity - Oct. 4, 2012, 5:19 p.m.
On 10/04/2012 07:13 PM, Blue Swirl wrote:
> On Thu, Oct 4, 2012 at 6:38 AM, Avi Kivity <avi@redhat.com> wrote:
>> On 10/03/2012 10:24 PM, Blue Swirl wrote:
>>> >
>>> >  #else
>>> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>>> > -                            int len, int is_write)
>>> > +
>>> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
>>> > +                      int len, bool is_write)
>>>
>>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>>> for the address. It may actually be buggy for 32 bit
>>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>>> exist. Maybe memory.c could be made target independent one day.
>>
>> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
>> deployments where both host and guest are 32 bits is dropping, and I
>> doubt the performance drop is noticable.
> 
> My line of thought was that memory.c would not be tied to physical
> addresses, but it would be more general. Then exec.c would specialize
> the API to use target_phys_addr_t. Similarly PCI would specialize it
> to pcibus_t, PIO to pio_addr_t and DMA to dma_addr_t.

The problem is that all any transition across the boundaries would then
involve casts (explicit or implicit) with the constant worry of whether
we're truncating or not.  Note we have transitions in both directions,
with the higher layer APIs calling memory APIs, and the memory API
calling them back via MemoryRegionOps or a new MemoryRegionIOMMUOps.

What does this flexibility buy us, compared to a single hw_addr fixed at
64 bits?
Blue Swirl - Oct. 4, 2012, 5:42 p.m.
On Thu, Oct 4, 2012 at 5:19 PM, Avi Kivity <avi@redhat.com> wrote:
> On 10/04/2012 07:13 PM, Blue Swirl wrote:
>> On Thu, Oct 4, 2012 at 6:38 AM, Avi Kivity <avi@redhat.com> wrote:
>>> On 10/03/2012 10:24 PM, Blue Swirl wrote:
>>>> >
>>>> >  #else
>>>> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>>>> > -                            int len, int is_write)
>>>> > +
>>>> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
>>>> > +                      int len, bool is_write)
>>>>
>>>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>>>> for the address. It may actually be buggy for 32 bit
>>>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>>>> exist. Maybe memory.c could be made target independent one day.
>>>
>>> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
>>> deployments where both host and guest are 32 bits is dropping, and I
>>> doubt the performance drop is noticable.
>>
>> My line of thought was that memory.c would not be tied to physical
>> addresses, but it would be more general. Then exec.c would specialize
>> the API to use target_phys_addr_t. Similarly PCI would specialize it
>> to pcibus_t, PIO to pio_addr_t and DMA to dma_addr_t.
>
> The problem is that all any transition across the boundaries would then
> involve casts (explicit or implicit) with the constant worry of whether
> we're truncating or not.  Note we have transitions in both directions,
> with the higher layer APIs calling memory APIs, and the memory API
> calling them back via MemoryRegionOps or a new MemoryRegionIOMMUOps.
>
> What does this flexibility buy us, compared to a single hw_addr fixed at
> 64 bits?

They can all be 64 bits, I'm just considering types. Getting rid of
target_phys_addr_t, pcibus_t, pio_addr_t and dma_addr_t (are there
more?) may be also worthwhile.

>
>
> --
> error compiling committee.c: too many arguments to function
Anthony Liguori - Oct. 4, 2012, 7:05 p.m.
Blue Swirl <blauwirbel@gmail.com> writes:

> On Thu, Oct 4, 2012 at 5:19 PM, Avi Kivity <avi@redhat.com> wrote:
>> On 10/04/2012 07:13 PM, Blue Swirl wrote:
>>> On Thu, Oct 4, 2012 at 6:38 AM, Avi Kivity <avi@redhat.com> wrote:
>>>> On 10/03/2012 10:24 PM, Blue Swirl wrote:
>>>>> >
>>>>> >  #else
>>>>> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>>>>> > -                            int len, int is_write)
>>>>> > +
>>>>> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
>>>>> > +                      int len, bool is_write)
>>>>>
>>>>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>>>>> for the address. It may actually be buggy for 32 bit
>>>>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>>>>> exist. Maybe memory.c could be made target independent one day.
>>>>
>>>> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
>>>> deployments where both host and guest are 32 bits is dropping, and I
>>>> doubt the performance drop is noticable.
>>>
>>> My line of thought was that memory.c would not be tied to physical
>>> addresses, but it would be more general. Then exec.c would specialize
>>> the API to use target_phys_addr_t. Similarly PCI would specialize it
>>> to pcibus_t, PIO to pio_addr_t and DMA to dma_addr_t.
>>
>> The problem is that all any transition across the boundaries would then
>> involve casts (explicit or implicit) with the constant worry of whether
>> we're truncating or not.  Note we have transitions in both directions,
>> with the higher layer APIs calling memory APIs, and the memory API
>> calling them back via MemoryRegionOps or a new MemoryRegionIOMMUOps.
>>
>> What does this flexibility buy us, compared to a single hw_addr fixed at
>> 64 bits?
>
> They can all be 64 bits, I'm just considering types. Getting rid of
> target_phys_addr_t, pcibus_t, pio_addr_t and dma_addr_t (are there
> more?) may be also worthwhile.

Where this breaks down is devices that are DMA capable but may exist on
multiple busses.

So you either end up with a device-specific type and a layer of casting
or weird acrobatics.

It makes more sense IMHO to just treat bus addresses as a fixed with.

target_phys_addr_t is a bad name.  I'd be in favor of either just using
uint64_t directly or having a generic dma_addr_t.

Regards,

Anthony Liguori

>
>>
>>
>> --
>> error compiling committee.c: too many arguments to function
Blue Swirl - Oct. 4, 2012, 7:15 p.m.
On Thu, Oct 4, 2012 at 7:05 PM, Anthony Liguori <anthony@codemonkey.ws> wrote:
> Blue Swirl <blauwirbel@gmail.com> writes:
>
>> On Thu, Oct 4, 2012 at 5:19 PM, Avi Kivity <avi@redhat.com> wrote:
>>> On 10/04/2012 07:13 PM, Blue Swirl wrote:
>>>> On Thu, Oct 4, 2012 at 6:38 AM, Avi Kivity <avi@redhat.com> wrote:
>>>>> On 10/03/2012 10:24 PM, Blue Swirl wrote:
>>>>>> >
>>>>>> >  #else
>>>>>> > -void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
>>>>>> > -                            int len, int is_write)
>>>>>> > +
>>>>>> > +void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
>>>>>> > +                      int len, bool is_write)
>>>>>>
>>>>>> I'd make address_space_* use uint64_t instead of target_phys_addr_t
>>>>>> for the address. It may actually be buggy for 32 bit
>>>>>> target_phys_addr_t  and 64 bit DMA addresses, if such architectures
>>>>>> exist. Maybe memory.c could be made target independent one day.
>>>>>
>>>>> We can make target_phys_addr_t 64 bit unconditionally.  The fraction of
>>>>> deployments where both host and guest are 32 bits is dropping, and I
>>>>> doubt the performance drop is noticable.
>>>>
>>>> My line of thought was that memory.c would not be tied to physical
>>>> addresses, but it would be more general. Then exec.c would specialize
>>>> the API to use target_phys_addr_t. Similarly PCI would specialize it
>>>> to pcibus_t, PIO to pio_addr_t and DMA to dma_addr_t.
>>>
>>> The problem is that all any transition across the boundaries would then
>>> involve casts (explicit or implicit) with the constant worry of whether
>>> we're truncating or not.  Note we have transitions in both directions,
>>> with the higher layer APIs calling memory APIs, and the memory API
>>> calling them back via MemoryRegionOps or a new MemoryRegionIOMMUOps.
>>>
>>> What does this flexibility buy us, compared to a single hw_addr fixed at
>>> 64 bits?
>>
>> They can all be 64 bits, I'm just considering types. Getting rid of
>> target_phys_addr_t, pcibus_t, pio_addr_t and dma_addr_t (are there
>> more?) may be also worthwhile.
>
> Where this breaks down is devices that are DMA capable but may exist on
> multiple busses.
>
> So you either end up with a device-specific type and a layer of casting
> or weird acrobatics.

OK, less types is better.

>
> It makes more sense IMHO to just treat bus addresses as a fixed with.
>
> target_phys_addr_t is a bad name.  I'd be in favor of either just using
> uint64_t directly or having a generic dma_addr_t.

Simplest would be uint64_t for all, one unified address type may have
type safety benefits.

>
> Regards,
>
> Anthony Liguori
>
>>
>>>
>>>
>>> --
>>> error compiling committee.c: too many arguments to function
Peter Maydell - Oct. 4, 2012, 7:16 p.m.
On 4 October 2012 20:05, Anthony Liguori <anthony@codemonkey.ws> wrote:
> Blue Swirl <blauwirbel@gmail.com> writes:
>> They can all be 64 bits, I'm just considering types. Getting rid of
>> target_phys_addr_t, pcibus_t, pio_addr_t and dma_addr_t (are there
>> more?) may be also worthwhile.
>
> Where this breaks down is devices that are DMA capable but may exist on
> multiple busses.
>
> So you either end up with a device-specific type and a layer of casting
> or weird acrobatics.
>
> It makes more sense IMHO to just treat bus addresses as a fixed with.
>
> target_phys_addr_t is a bad name.  I'd be in favor of either just using
> uint64_t directly or having a generic dma_addr_t.

I agree that we only need one type; I think it's helpful to
have a type name rather than direct use of uint64_t. dma_addr_t
doesn't seem right because most of the usage of it isn't going to
be in DMA related contexts. addr_t ?

-- PMM
Avi Kivity - Oct. 7, 2012, 10:34 a.m.
On 10/04/2012 09:16 PM, Peter Maydell wrote:
> On 4 October 2012 20:05, Anthony Liguori <anthony@codemonkey.ws> wrote:
>> Blue Swirl <blauwirbel@gmail.com> writes:
>>> They can all be 64 bits, I'm just considering types. Getting rid of
>>> target_phys_addr_t, pcibus_t, pio_addr_t and dma_addr_t (are there
>>> more?) may be also worthwhile.
>>
>> Where this breaks down is devices that are DMA capable but may exist on
>> multiple busses.
>>
>> So you either end up with a device-specific type and a layer of casting
>> or weird acrobatics.
>>
>> It makes more sense IMHO to just treat bus addresses as a fixed with.
>>
>> target_phys_addr_t is a bad name.  I'd be in favor of either just using
>> uint64_t directly or having a generic dma_addr_t.
> 
> I agree that we only need one type; I think it's helpful to
> have a type name rather than direct use of uint64_t. dma_addr_t
> doesn't seem right because most of the usage of it isn't going to
> be in DMA related contexts. addr_t ?

*_t is reserved.

Suggestions:

 phys
 Phys
 hwaddr
 hw_addr

there are some variables named 'phys' scattered in the source, so my
current favorite is hwaddr.  Short and to the point.
Anthony Liguori - Oct. 9, 2012, 3:17 p.m.
Avi Kivity <avi@redhat.com> writes:

> On 10/04/2012 04:13 PM, Anthony Liguori wrote:
>>>  
>>> +void address_space_init_dispatch(AddressSpace *as)
>>> +{
>>> +    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
>>> +
>>> +    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>>> +    d->listener = (MemoryListener) {
>>> +        MEMORY_LISTENER_DEFAULT_OPS,
>>> +        .begin = mem_begin,
>>> +        .region_add = mem_add,
>>> +        .region_nop = mem_add,
>>> +        .priority = 0,
>>> +    };
>> 
>> I see you've become fond of this extension :-)
>
> It's not an extension.
>
>> I'd personally avoid it...  You're typing more than you need to.
>
> Don't see why, it seems to me to be a direct way of specifying what you
> want.  You can read it literally as "d->phys_map is assigned a
> PhysPageEntry structure with ptr = this and is_leaf = that.
>
> The alternatives are:
>
>    memset(&d->phys_map, 0, sizeof(d->phys_map);
>    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
>    d->phys_map.is_leaf = 0;
>
> or
>
>    PhysPageEntry tmp = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
>
>    d->phys_map = tmp;
>
> I find both of these less appealing than the original.

I don't really care about the use of struct literals...

You are typing more than you need to.  The bit that I think really
matters is using zero-initialization as the default vs. using a macro.

Regards,

Anthony Liguori

>
> -- 
> error compiling committee.c: too many arguments to function

Patch

diff --git a/cputlb.c b/cputlb.c
index 0627f32..9027557 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -21,6 +21,7 @@ 
 #include "cpu.h"
 #include "exec-all.h"
 #include "memory.h"
+#include "exec-memory.h"
 
 #include "cputlb.h"
 
@@ -251,7 +252,7 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     if (size != TARGET_PAGE_SIZE) {
         tlb_add_large_page(env, vaddr, size);
     }
-    section = phys_page_find(paddr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
 #if defined(DEBUG_TLB)
     printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
            " prot=%x idx=%d pd=0x%08lx\n",
diff --git a/cputlb.h b/cputlb.h
index 2dc2c96..d537b77 100644
--- a/cputlb.h
+++ b/cputlb.h
@@ -26,7 +26,8 @@  void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                              target_ulong vaddr);
 void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
                            uintptr_t length);
-MemoryRegionSection *phys_page_find(target_phys_addr_t index);
+MemoryRegionSection *phys_page_find(struct AddressSpaceDispatch *d,
+                                    target_phys_addr_t index);
 void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUArchState *env, target_ulong vaddr);
 extern int tlb_flush_count;
diff --git a/exec.c b/exec.c
index 7a76efa..42f9ad1 100644
--- a/exec.c
+++ b/exec.c
@@ -187,7 +187,6 @@ 
 static void *l1_map[V_L1_SIZE];
 
 #if !defined(CONFIG_USER_ONLY)
-typedef struct PhysPageEntry PhysPageEntry;
 
 static MemoryRegionSection *phys_sections;
 static unsigned phys_sections_nb, phys_sections_nb_alloc;
@@ -196,22 +195,12 @@ 
 static uint16_t phys_section_rom;
 static uint16_t phys_section_watch;
 
-struct PhysPageEntry {
-    uint16_t is_leaf : 1;
-     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
-    uint16_t ptr : 15;
-};
-
 /* Simple allocator for PhysPageEntry nodes */
 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
 
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
-/* This is a multi-level map on the physical address space.
-   The bottom level has pointers to MemoryRegionSections.  */
-static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
-
 static void io_mem_init(void);
 static void memory_map_init(void);
 
@@ -459,18 +448,19 @@  static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
     }
 }
 
-static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
+static void phys_page_set(AddressSpaceDispatch *d,
+                          target_phys_addr_t index, target_phys_addr_t nb,
                           uint16_t leaf)
 {
     /* Wildly overreserve - it doesn't matter much. */
     phys_map_node_reserve(3 * P_L2_LEVELS);
 
-    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
-MemoryRegionSection *phys_page_find(target_phys_addr_t index)
+MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, target_phys_addr_t index)
 {
-    PhysPageEntry lp = phys_map;
+    PhysPageEntry lp = d->phys_map;
     PhysPageEntry *p;
     int i;
     uint16_t s_index = phys_section_unassigned;
@@ -1472,7 +1462,7 @@  void tb_invalidate_phys_addr(target_phys_addr_t addr)
     ram_addr_t ram_addr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
     if (!(memory_region_is_ram(section->mr)
           || (section->mr->rom_device && section->mr->readable))) {
         return;
@@ -2218,9 +2208,9 @@  static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
     lp->ptr = PHYS_MAP_NODE_NIL;
 }
 
-static void destroy_all_mappings(void)
+static void destroy_all_mappings(AddressSpaceDispatch *d)
 {
-    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
+    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
     phys_map_nodes_reset();
 }
 
@@ -2240,12 +2230,12 @@  static void phys_sections_clear(void)
     phys_sections_nb = 0;
 }
 
-static void register_subpage(MemoryRegionSection *section)
+static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 {
     subpage_t *subpage;
     target_phys_addr_t base = section->offset_within_address_space
         & TARGET_PAGE_MASK;
-    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
+    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
         .size = TARGET_PAGE_SIZE,
@@ -2257,7 +2247,7 @@  static void register_subpage(MemoryRegionSection *section)
     if (!(existing->mr->subpage)) {
         subpage = subpage_init(base);
         subsection.mr = &subpage->iomem;
-        phys_page_set(base >> TARGET_PAGE_BITS, 1,
+        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
                       phys_section_add(&subsection));
     } else {
         subpage = container_of(existing->mr, subpage_t, iomem);
@@ -2268,7 +2258,7 @@  static void register_subpage(MemoryRegionSection *section)
 }
 
 
-static void register_multipage(MemoryRegionSection *section)
+static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 {
     target_phys_addr_t start_addr = section->offset_within_address_space;
     ram_addr_t size = section->size;
@@ -2278,13 +2268,13 @@  static void register_multipage(MemoryRegionSection *section)
     assert(size);
 
     addr = start_addr;
-    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
+    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
                   section_index);
 }
 
-void cpu_register_physical_memory_log(MemoryRegionSection *section,
-                                      bool readonly)
+static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 {
+    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
     MemoryRegionSection now = *section, remain = *section;
 
     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
@@ -2292,7 +2282,7 @@  void cpu_register_physical_memory_log(MemoryRegionSection *section,
         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
                        - now.offset_within_address_space,
                        now.size);
-        register_subpage(&now);
+        register_subpage(d, &now);
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
@@ -2301,10 +2291,10 @@  void cpu_register_physical_memory_log(MemoryRegionSection *section,
         now = remain;
         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
             now.size = TARGET_PAGE_SIZE;
-            register_subpage(&now);
+            register_subpage(d, &now);
         } else {
             now.size &= TARGET_PAGE_MASK;
-            register_multipage(&now);
+            register_multipage(d, &now);
         }
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
@@ -2312,7 +2302,7 @@  void cpu_register_physical_memory_log(MemoryRegionSection *section,
     }
     now = remain;
     if (now.size) {
-        register_subpage(&now);
+        register_subpage(d, &now);
     }
 }
 
@@ -3163,11 +3153,17 @@  static void io_mem_init(void)
                           "watch", UINT64_MAX);
 }
 
+static void mem_begin(MemoryListener *listener)
+{
+    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
+
+    destroy_all_mappings(d);
+    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
+}
+
 static void core_begin(MemoryListener *listener)
 {
-    destroy_all_mappings();
     phys_sections_clear();
-    phys_map.ptr = PHYS_MAP_NODE_NIL;
     phys_section_unassigned = dummy_section(&io_mem_unassigned);
     phys_section_notdirty = dummy_section(&io_mem_notdirty);
     phys_section_rom = dummy_section(&io_mem_rom);
@@ -3186,18 +3182,6 @@  static void tcg_commit(MemoryListener *listener)
     }
 }
 
-static void core_region_add(MemoryListener *listener,
-                            MemoryRegionSection *section)
-{
-    cpu_register_physical_memory_log(section, section->readonly);
-}
-
-static void core_region_nop(MemoryListener *listener,
-                            MemoryRegionSection *section)
-{
-    cpu_register_physical_memory_log(section, section->readonly);
-}
-
 static void core_log_global_start(MemoryListener *listener)
 {
     cpu_physical_memory_set_dirty_tracking(1);
@@ -3229,11 +3213,9 @@  static void io_region_del(MemoryListener *listener,
 static MemoryListener core_memory_listener = {
     MEMORY_LISTENER_DEFAULT_OPS,
     .begin = core_begin,
-    .region_add = core_region_add,
-    .region_nop = core_region_nop,
     .log_global_start = core_log_global_start,
     .log_global_stop = core_log_global_stop,
-    .priority = 0,
+    .priority = 1,
 };
 
 static MemoryListener io_memory_listener = {
@@ -3248,6 +3230,22 @@  static void io_region_del(MemoryListener *listener,
     .commit = tcg_commit,
 };
 
+void address_space_init_dispatch(AddressSpace *as)
+{
+    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
+
+    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
+    d->listener = (MemoryListener) {
+        MEMORY_LISTENER_DEFAULT_OPS,
+        .begin = mem_begin,
+        .region_add = mem_add,
+        .region_nop = mem_add,
+        .priority = 0,
+    };
+    as->dispatch = d;
+    memory_listener_register(&d->listener, as);
+}
+
 static void memory_map_init(void)
 {
     system_memory = g_malloc(sizeof(*system_memory));
@@ -3319,9 +3317,11 @@  int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 }
 
 #else
-void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
-                            int len, int is_write)
+
+void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
+                      int len, bool is_write)
 {
+    AddressSpaceDispatch *d = as->dispatch;
     int l;
     uint8_t *ptr;
     uint32_t val;
@@ -3333,7 +3333,7 @@  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(page >> TARGET_PAGE_BITS);
+        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
 
         if (is_write) {
             if (!memory_region_is_ram(section->mr)) {
@@ -3410,10 +3410,36 @@  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
     }
 }
 
+void address_space_write(AddressSpace *as, target_phys_addr_t addr,
+                         const uint8_t *buf, int len)
+{
+    address_space_rw(as, addr, (uint8_t *)buf, len, true);
+}
+
+/**
+ * address_space_read: read from an address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ */
+void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len)
+{
+    address_space_rw(as, addr, buf, len, false);
+}
+
+
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+                            int len, int is_write)
+{
+    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
+}
+
 /* used for ROM loading : can write in RAM and ROM */
 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
                                    const uint8_t *buf, int len)
 {
+    AddressSpaceDispatch *d = address_space_memory.dispatch;
     int l;
     uint8_t *ptr;
     target_phys_addr_t page;
@@ -3424,7 +3450,7 @@  void cpu_physical_memory_write_rom(target_phys_addr_t addr,
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(page >> TARGET_PAGE_BITS);
+        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
 
         if (!(memory_region_is_ram(section->mr) ||
               memory_region_is_romd(section->mr))) {
@@ -3504,10 +3530,12 @@  static void cpu_notify_map_clients(void)
  * Use cpu_register_map_client() to know when retrying the map operation is
  * likely to succeed.
  */
-void *cpu_physical_memory_map(target_phys_addr_t addr,
-                              target_phys_addr_t *plen,
-                              int is_write)
+void *address_space_map(AddressSpace *as,
+                        target_phys_addr_t addr,
+                        target_phys_addr_t *plen,
+                        bool is_write)
 {
+    AddressSpaceDispatch *d = as->dispatch;
     target_phys_addr_t len = *plen;
     target_phys_addr_t todo = 0;
     int l;
@@ -3522,7 +3550,7 @@  void *cpu_physical_memory_map(target_phys_addr_t addr,
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(page >> TARGET_PAGE_BITS);
+        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
 
         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
             if (todo || bounce.buffer) {
@@ -3532,7 +3560,7 @@  void *cpu_physical_memory_map(target_phys_addr_t addr,
             bounce.addr = addr;
             bounce.len = l;
             if (!is_write) {
-                cpu_physical_memory_read(addr, bounce.buffer, l);
+                address_space_read(as, addr, bounce.buffer, l);
             }
 
             *plen = l;
@@ -3553,12 +3581,12 @@  void *cpu_physical_memory_map(target_phys_addr_t addr,
     return ret;
 }
 
-/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
+/* Unmaps a memory region previously mapped by address_space_map().
  * Will also mark the memory as dirty if is_write == 1.  access_len gives
  * the amount of memory that was actually read or written by the caller.
  */
-void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
-                               int is_write, target_phys_addr_t access_len)
+void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
+                         int is_write, target_phys_addr_t access_len)
 {
     if (buffer != bounce.buffer) {
         if (is_write) {
@@ -3585,13 +3613,26 @@  void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
         return;
     }
     if (is_write) {
-        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
+        address_space_write(as, bounce.addr, bounce.buffer, access_len);
     }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
     cpu_notify_map_clients();
 }
 
+void *cpu_physical_memory_map(target_phys_addr_t addr,
+                              target_phys_addr_t *plen,
+                              int is_write)
+{
+    return address_space_map(&address_space_memory, addr, plen, is_write);
+}
+
+void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
+                               int is_write, target_phys_addr_t access_len)
+{
+    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
+}
+
 /* warning: addr must be aligned */
 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
                                          enum device_endian endian)
@@ -3600,7 +3641,7 @@  static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
     uint32_t val;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
@@ -3659,7 +3700,7 @@  static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
     uint64_t val;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
@@ -3726,7 +3767,7 @@  static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
     uint64_t val;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
@@ -3785,7 +3826,7 @@  void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -3817,7 +3858,7 @@  void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -3846,7 +3887,7 @@  static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -3919,7 +3960,7 @@  static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -4161,7 +4202,8 @@  bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
 {
     MemoryRegionSection *section;
 
-    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch,
+                             phys_addr >> TARGET_PAGE_BITS);
 
     return !(memory_region_is_ram(section->mr) ||
              memory_region_is_romd(section->mr));
diff --git a/memory-internal.h b/memory-internal.h
index 655f71f..a9d914e 100644
--- a/memory-internal.h
+++ b/memory-internal.h
@@ -21,6 +21,26 @@ 
 
 #ifndef CONFIG_USER_ONLY
 
+typedef struct PhysPageEntry PhysPageEntry;
+
+struct PhysPageEntry {
+    uint16_t is_leaf : 1;
+     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
+    uint16_t ptr : 15;
+};
+
+typedef struct AddressSpaceDispatch AddressSpaceDispatch;
+
+struct AddressSpaceDispatch {
+    /* This is a multi-level map on the physical address space.
+     * The bottom level has pointers to MemoryRegionSections.
+     */
+    PhysPageEntry phys_map;
+    MemoryListener listener;
+};
+
+void address_space_init_dispatch(AddressSpace *as);
+
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr);
 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
@@ -29,8 +49,6 @@  void qemu_ram_free_from_ptr(ram_addr_t addr);
 
 struct MemoryRegion;
 struct MemoryRegionSection;
-void cpu_register_physical_memory_log(struct MemoryRegionSection *section,
-                                      bool readonly);
 
 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
diff --git a/memory.c b/memory.c
index f829d84..28a79ae 100644
--- a/memory.c
+++ b/memory.c
@@ -1550,6 +1550,7 @@  void address_space_init(AddressSpace *as, MemoryRegion *root)
     QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
     as->name = NULL;
     memory_region_transaction_commit();
+    address_space_init_dispatch(as);
 }
 
 uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size)
diff --git a/memory.h b/memory.h
index 6115f48..84f7439 100644
--- a/memory.h
+++ b/memory.h
@@ -169,6 +169,7 @@  struct AddressSpace {
     struct FlatView *current_map;
     int ioeventfd_nb;
     struct MemoryRegionIoeventfd *ioeventfds;
+    struct AddressSpaceDispatch *dispatch;
     QTAILQ_ENTRY(AddressSpace) address_spaces_link;
 };
 
@@ -830,6 +831,67 @@  void mtree_info(fprintf_function mon_printf, void *f);
  */
 void address_space_init(AddressSpace *as, MemoryRegion *root);
 
+/**
+ * address_space_rw: read from or write to an address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @is_write: indicates the transfer direction
+ */
+void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
+                      int len, bool is_write);
+
+/**
+ * address_space_write: write to address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ */
+void address_space_write(AddressSpace *as, target_phys_addr_t addr,
+                         const uint8_t *buf, int len);
+
+/**
+ * address_space_read: read from an address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ */
+void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len);
+
+/* address_space_map: map a physical memory region into a host virtual address
+ *
+ * May map a subset of the requested range, given by and returned in @plen.
+ * May return %NULL if resources needed to perform the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ * Use cpu_register_map_client() to know when retrying the map operation is
+ * likely to succeed.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @plen: pointer to length of buffer; updated on return
+ * @is_write: indicates the transfer direction
+ */
+void *address_space_map(AddressSpace *as, target_phys_addr_t addr,
+                        target_phys_addr_t *plen, bool is_write);
+
+/* address_space_unmap: Unmaps a memory region previously mapped by address_space_map()
+ *
+ * Will also mark the memory as dirty if @is_write == %true.  @access_len gives
+ * the amount of memory that was actually read or written by the caller.
+ *
+ * @as: #AddressSpace used
+ * @addr: address within that address space
+ * @len: buffer length as returned by address_space_map()
+ * @access_len: amount of data actually transferred
+ * @is_write: indicates the transfer direction
+ */
+void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
+                         int is_write, target_phys_addr_t access_len);
+
+
 #endif
 
 #endif