Patchwork [00/40] Memory-related changes sneak peek for 1.6

login
register
mail settings
Submitter Paolo Bonzini
Date May 7, 2013, 2:16 p.m.
Message ID <1367936209-12109-1-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/242355/
State New
Headers show

Comments

Paolo Bonzini - May 7, 2013, 2:16 p.m.
Patches 1-10 are small changes, some of them bugfixes for the IOMMU
portion of the patches.

Patches 11-13 introduce three new address_space APIs and clean up
the dispatch logic a bit by removing memory_region_section_addr.

Patches 14-22 introduce IOMMU memory regions, convert sPAPR to the
new API, and finally eliminate DMAContext in favor of AddressSpace.

Patches 24-37 set an owner on all regions owned by hot-unpluggable
devices (and several others when that was easy), and add ref/unref
calls that keep the regions alive even if the owner is hot-unplugged.

Patches 38-40 make memory_region_find safe for usage outside the BQL.

These patches are also at git://github.com/bonzini/qemu.git, branch
iommu.

Please review!


Alexey Kardashevskiy (1):
  memory: give name to every AddressSpace

Avi Kivity (5):
  memory: fix address space initialization/destruction
  memory: limit sections in the radix tree to the actual address space
    size
  memory: iommu support
  vfio: abort if an emulated iommu is used
  pci: use memory core for iommu support

Jan Kiszka (3):
  memory: Replace open-coded memory_region_is_romd
  memory: Rename readable flag to romd_mode
  memory: Introduce address_space_lookup_region

Paolo Bonzini (31):
  memory: assert that PhysPageEntry's ptr does not overflow
  memory: allow memory_region_find() to run on non-root memory regions
  memory: do not duplicate memory_region_destructor_none
  memory: make memory_global_sync_dirty_bitmap take an AddressSpace
  memory: create FlatView for new address spaces
  exec: remove obsolete comment
  memory: add address_space_valid
  memory: add address_space_translate
  spapr: convert TCE API to use an opaque type
  spapr: make IOMMU translation go through IOMMUTLBEntry
  spapr: use memory core for iommu support
  dma: eliminate old-style IOMMU support
  spapr_vio: take care of creating our own AddressSpace/DMAContext
  dma: eliminate DMAContext
  memory: add getter/setter for owner
  memory: add ref/unref
  memory: add ref/unref calls
  pci: set owner for BARs
  sysbus: set owner for MMIO regions
  acpi: add memory_region_set_owner calls
  misc: add memory_region_set_owner calls
  isa/portio: allow setting an owner
  vga: add memory_region_set_owner calls
  pci-assign: add memory_region_set_owner calls
  vfio: add memory_region_set_owner calls
  exec: check MRU in qemu_ram_addr_from_host
  memory: return MemoryRegion from qemu_ram_addr_from_host
  memory: ref/unref memory across address_space_map/unmap
  memory: access FlatView from a local variable
  memory: use a new FlatView pointer on every topology update
  memory: add reference counting to FlatView

 arch_init.c                           |    2 +-
 cputlb.c                              |   20 ++-
 dma-helpers.c                         |  180 +------------------
 exec.c                                |  315 ++++++++++++++++++++++-----------
 hw/acpi/ich9.c                        |    1 +
 hw/acpi/piix4.c                       |    5 +
 hw/block/pflash_cfi01.c               |    6 +-
 hw/block/pflash_cfi02.c               |    2 +-
 hw/char/serial-pci.c                  |    1 +
 hw/core/loader.c                      |    1 +
 hw/core/sysbus.c                      |    2 +
 hw/display/cirrus_vga.c               |   19 ++-
 hw/display/exynos4210_fimd.c          |    6 +
 hw/display/framebuffer.c              |   10 +-
 hw/display/qxl.c                      |    5 +-
 hw/display/vga-isa-mm.c               |    2 +-
 hw/display/vga-isa.c                  |    4 +-
 hw/display/vga-pci.c                  |    5 +-
 hw/display/vga.c                      |   19 ++-
 hw/display/vga_int.h                  |    9 +-
 hw/display/vmware_vga.c               |    4 +-
 hw/dma/pl330.c                        |    8 +-
 hw/i386/kvm/ioapic.c                  |    2 +
 hw/i386/kvm/pci-assign.c              |   11 ++
 hw/i386/kvmvapic.c                    |    1 +
 hw/ide/ahci.c                         |   18 +-
 hw/ide/ahci.h                         |    4 +-
 hw/ide/ich.c                          |    2 +-
 hw/ide/macio.c                        |    4 +-
 hw/isa/apm.c                          |    1 +
 hw/isa/isa-bus.c                      |    2 +
 hw/misc/pc-testdev.c                  |    7 +
 hw/misc/vfio.c                        |   12 ++
 hw/pci/pci.c                          |   53 +++---
 hw/ppc/spapr_iommu.c                  |  108 +++++-------
 hw/ppc/spapr_pci.c                    |   18 ++-
 hw/ppc/spapr_vio.c                    |   13 +-
 hw/scsi/megasas.c                     |    4 +-
 hw/scsi/virtio-scsi.c                 |    2 +-
 hw/scsi/vmw_pvscsi.c                  |    2 +-
 hw/sd/sdhci.c                         |   22 ++--
 hw/usb/hcd-ehci-pci.c                 |    4 +-
 hw/usb/hcd-ehci-sysbus.c              |    2 +-
 hw/usb/hcd-ehci.c                     |   12 +-
 hw/usb/hcd-ehci.h                     |    2 +-
 hw/usb/hcd-ohci.c                     |   30 ++--
 hw/usb/libhw.c                        |    4 +-
 hw/virtio/dataplane/hostmem.c         |    7 +
 hw/virtio/vhost.c                     |    2 +
 hw/virtio/virtio-balloon.c            |    1 +
 hw/xen/xen_pt.c                       |    4 +
 include/exec/cpu-common.h             |    2 +-
 include/exec/cputlb.h                 |   12 +-
 include/exec/ioport.h                 |    3 +
 include/exec/memory.h                 |  186 +++++++++++++++----
 include/hw/pci-host/spapr.h           |    2 +-
 include/hw/pci/pci.h                  |   24 ++--
 include/hw/pci/pci_bus.h              |    5 +-
 include/hw/ppc/spapr.h                |   12 +-
 include/hw/ppc/spapr_vio.h            |   36 +++--
 include/hw/virtio/dataplane/hostmem.h |    1 +
 include/sysemu/dma.h                  |  158 +++++------------
 ioport.c                              |   10 +
 kvm-all.c                             |    2 +
 memory.c                              |  267 ++++++++++++++++++++++------
 target-arm/kvm.c                      |    2 +
 target-i386/kvm.c                     |    4 +-
 target-sparc/mmu_helper.c             |    1 +
 translate-all.c                       |    8 +-
 xen-all.c                             |    2 +
 70 files changed, 983 insertions(+), 734 deletions(-)

From aec410066128aaca6337754c389f0e7c2d47089c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 11:30:23 +0200
Subject: [PATCH 01/40] memory: assert that PhysPageEntry's ptr does not overflow

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)
Peter Maydell - May 7, 2013, 6:51 p.m.
On 7 May 2013 15:16, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Patches 1-10 are small changes, some of them bugfixes for the IOMMU
> portion of the patches.
>
> Patches 11-13 introduce three new address_space APIs and clean up
> the dispatch logic a bit by removing memory_region_section_addr.
>
> Patches 14-22 introduce IOMMU memory regions, convert sPAPR to the
> new API, and finally eliminate DMAContext in favor of AddressSpace.
>
> Patches 24-37 set an owner on all regions owned by hot-unpluggable
> devices (and several others when that was easy), and add ref/unref
> calls that keep the regions alive even if the owner is hot-unplugged.
>
> Patches 38-40 make memory_region_find safe for usage outside the BQL.

I assume you're not going to try to get these all committed
in one lump, by the way? I stopped reviewing after patch 22,
since the BQL related stuff seems to still be under
discussion.

thanks
-- PMM
Paolo Bonzini - May 7, 2013, 8:21 p.m.
Il 07/05/2013 20:51, Peter Maydell ha scritto:
> On 7 May 2013 15:16, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Patches 1-10 are small changes, some of them bugfixes for the IOMMU
>> portion of the patches.
>>
>> Patches 11-13 introduce three new address_space APIs and clean up
>> the dispatch logic a bit by removing memory_region_section_addr.
>>
>> Patches 14-22 introduce IOMMU memory regions, convert sPAPR to the
>> new API, and finally eliminate DMAContext in favor of AddressSpace.
>>
>> Patches 24-37 set an owner on all regions owned by hot-unpluggable
>> devices (and several others when that was easy), and add ref/unref
>> calls that keep the regions alive even if the owner is hot-unplugged.
>>
>> Patches 38-40 make memory_region_find safe for usage outside the BQL.
> 
> I assume you're not going to try to get these all committed
> in one lump, by the way?

In as big a lump as possible, but not necessarily in one lump.

> I stopped reviewing after patch 22,
> since the BQL related stuff seems to still be under
> discussion.

Note that patch 37 is a fix for an existing bug (disk I/O to a
device-owned RAM area can corrupt QEMU if you unplug the device in
parallel), and it needs all the 13 previous patches unfortunately.

Only the last 3 are strictly for out-of-BQL operation.

Paolo

Patch

diff --git a/exec.c b/exec.c
index 19725db..2e5b89a 100644
--- a/exec.c
+++ b/exec.c
@@ -719,6 +719,8 @@  static void destroy_all_mappings(AddressSpaceDispatch *d)
 
 static uint16_t phys_section_add(MemoryRegionSection *section)
 {
+    assert(phys_sections_nb < TARGET_PAGE_SIZE);
+
     if (phys_sections_nb == phys_sections_nb_alloc) {
         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
         phys_sections = g_renew(MemoryRegionSection, phys_sections,
-- 
1.7.1


From 4434e2f8603954905655a79b73f1b9859e20f227 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 15:48:28 +0200
Subject: [PATCH 02/40] memory: allow memory_region_find() to run on non-root memory regions

memory_region_find() is similar to registering a MemoryListener and
checking for the MemoryRegionSections that come from a particular
region.  There is no reason for this to be limited to a root memory
region.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h |   13 +++++++------
 memory.c              |   20 +++++++++++++++-----
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9e88320..efe210b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -725,17 +725,18 @@  void memory_region_set_alias_offset(MemoryRegion *mr,
  *
  * Returns a #MemoryRegionSection that describes a contiguous overlap.
  * It will have the following characteristics:
- *    .@offset_within_address_space >= @addr
- *    .@offset_within_address_space + .@size <= @addr + @size
  *    .@size = 0 iff no overlap was found
  *    .@mr is non-%NULL iff an overlap was found
  *
- * @address_space: a top-level (i.e. parentless) region that contains
- *       the region to be found
- * @addr: start of the area within @address_space to be searched
+ * If @mr is parent-less,
+ *    .@offset_within_address_space >= @addr
+ *    .@offset_within_address_space + .@size <= @addr + @size
+ *
+ * @mr: a (possibly indirect) parent that contains the region to be found
+ * @addr: start of the area within @as to be searched
  * @size: size of the area to be searched
  */
-MemoryRegionSection memory_region_find(MemoryRegion *address_space,
+MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size);
 
 /**
diff --git a/memory.c b/memory.c
index 75ca281..34bfb13 100644
--- a/memory.c
+++ b/memory.c
@@ -1451,15 +1451,24 @@  static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
                    sizeof(FlatRange), cmp_flatrange_addr);
 }
 
-MemoryRegionSection memory_region_find(MemoryRegion *address_space,
+MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size)
 {
-    AddressSpace *as = memory_region_to_address_space(address_space);
-    AddrRange range = addrrange_make(int128_make64(addr),
-                                     int128_make64(size));
-    FlatRange *fr = address_space_lookup(as, range);
     MemoryRegionSection ret = { .mr = NULL, .size = 0 };
+    MemoryRegion *root;
+    AddressSpace *as;
+    AddrRange range;
+    FlatRange *fr;
+
+    addr += mr->addr;
+    for (root = mr; root->parent; ) {
+        root = root->parent;
+        addr += root->addr;
+    }
 
+    as = memory_region_to_address_space(root);
+    range = addrrange_make(int128_make64(addr), int128_make64(size));
+    fr = address_space_lookup(as, range);
     if (!fr) {
         return ret;
     }
@@ -1470,6 +1479,7 @@  MemoryRegionSection memory_region_find(MemoryRegion *address_space,
     }
 
     ret.mr = fr->mr;
+    ret.address_space = as;
     range = addrrange_intersection(range, fr->addr);
     ret.offset_within_region = fr->offset_in_region;
     ret.offset_within_region += int128_get64(int128_sub(range.start,
-- 
1.7.1


From b3e8ad57d21913dd7a8dfee6578c987095ed6831 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 6 May 2013 18:07:05 +0200
Subject: [PATCH 03/40] memory: Replace open-coded memory_region_is_romd

Improves readability.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 translate-all.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index da93608..0d84b0d 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1359,7 +1359,7 @@  void tb_invalidate_phys_addr(hwaddr addr)
     section = phys_page_find(address_space_memory.dispatch,
                              addr >> TARGET_PAGE_BITS);
     if (!(memory_region_is_ram(section->mr)
-          || (section->mr->rom_device && section->mr->readable))) {
+          || memory_region_is_romd(section->mr))) {
         return;
     }
     ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-- 
1.7.1


From 5f2a3cce1385d2abd93a0c8bcbf5e58a02db82c0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 6 May 2013 18:12:06 +0200
Subject: [PATCH 04/40] memory: Rename readable flag to romd_mode

"Readable" is a very unfortunate name for this flag because even a
rom_device region will always be readable from the guest POV. What
differs is the mapping, just like the comments had to explain already.
Also, readable could currently be understood as being a generic region
flag, but it only applies to rom_device regions.

So name the flag and the function to modify it after the original term
"ROMD" which could also be interpreted as "ROM direct", i.e. ROM mode
with direct access. In any case, the scope if the flag is clearer now.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 hw/block/pflash_cfi01.c |    6 +++---
 hw/block/pflash_cfi02.c |    2 +-
 include/exec/memory.h   |   22 +++++++++++-----------
 memory.c                |   30 +++++++++++++++---------------
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 3ff20e0..63d7c99 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -105,7 +105,7 @@  static void pflash_timer (void *opaque)
     DPRINTF("%s: command %02x done\n", __func__, pfl->cmd);
     /* Reset flash */
     pfl->status ^= 0x80;
-    memory_region_rom_device_set_readable(&pfl->mem, true);
+    memory_region_rom_device_set_romd(&pfl->mem, true);
     pfl->wcycle = 0;
     pfl->cmd = 0;
 }
@@ -281,7 +281,7 @@  static void pflash_write(pflash_t *pfl, hwaddr offset,
 
     if (!pfl->wcycle) {
         /* Set the device in I/O access mode */
-        memory_region_rom_device_set_readable(&pfl->mem, false);
+        memory_region_rom_device_set_romd(&pfl->mem, false);
     }
 
     switch (pfl->wcycle) {
@@ -458,7 +458,7 @@  static void pflash_write(pflash_t *pfl, hwaddr offset,
                   "\n", __func__, offset, pfl->wcycle, pfl->cmd, value);
 
  reset_flash:
-    memory_region_rom_device_set_readable(&pfl->mem, true);
+    memory_region_rom_device_set_romd(&pfl->mem, true);
 
     pfl->wcycle = 0;
     pfl->cmd = 0;
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index 9a7fa70..5f25246 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -111,7 +111,7 @@  static void pflash_setup_mappings(pflash_t *pfl)
 
 static void pflash_register_memory(pflash_t *pfl, int rom_mode)
 {
-    memory_region_rom_device_set_readable(&pfl->orig_mem, rom_mode);
+    memory_region_rom_device_set_romd(&pfl->orig_mem, rom_mode);
     pfl->rom_mode = rom_mode;
 }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index efe210b..f65acfd 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -126,7 +126,7 @@  struct MemoryRegion {
     ram_addr_t ram_addr;
     bool subpage;
     bool terminates;
-    bool readable;
+    bool romd_mode;
     bool ram;
     bool readonly; /* For RAM regions */
     bool enabled;
@@ -355,16 +355,16 @@  uint64_t memory_region_size(MemoryRegion *mr);
 bool memory_region_is_ram(MemoryRegion *mr);
 
 /**
- * memory_region_is_romd: check whether a memory region is ROMD
+ * memory_region_is_romd: check whether a memory region is in ROMD mode
  *
- * Returns %true is a memory region is ROMD and currently set to allow
+ * Returns %true is a memory region is a ROM device and currently set to allow
  * direct reads.
  *
  * @mr: the memory region being queried
  */
 static inline bool memory_region_is_romd(MemoryRegion *mr)
 {
-    return mr->rom_device && mr->readable;
+    return mr->rom_device && mr->romd_mode;
 }
 
 /**
@@ -502,18 +502,18 @@  void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
 void memory_region_set_readonly(MemoryRegion *mr, bool readonly);
 
 /**
- * memory_region_rom_device_set_readable: enable/disable ROM readability
+ * memory_region_rom_device_set_romd: enable/disable ROMD mode
  *
  * Allows a ROM device (initialized with memory_region_init_rom_device() to
- * to be marked as readable (default) or not readable.  When it is readable,
- * the device is mapped to guest memory.  When not readable, reads are
- * forwarded to the #MemoryRegion.read function.
+ * set to ROMD mode (default) or MMIO mode.  When it is in ROMD mode, the
+ * device is mapped to guest memory and satisfies read access directly.
+ * When in MMIO mode, reads are forwarded to the #MemoryRegion.read function.
+ * Writes are always handled by the #MemoryRegion.write function.
  *
  * @mr: the memory region to be updated
- * @readable: whether reads are satisified directly (%true) or via callbacks
- *            (%false)
+ * @romd_mode: whether the region in in ROMD mode or not
  */
-void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable);
+void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode);
 
 /**
  * memory_region_set_coalescing: Enable memory coalescing for the region.
diff --git a/memory.c b/memory.c
index 34bfb13..013464b 100644
--- a/memory.c
+++ b/memory.c
@@ -213,7 +213,7 @@  struct FlatRange {
     hwaddr offset_in_region;
     AddrRange addr;
     uint8_t dirty_log_mask;
-    bool readable;
+    bool romd_mode;
     bool readonly;
 };
 
@@ -236,7 +236,7 @@  static bool flatrange_equal(FlatRange *a, FlatRange *b)
     return a->mr == b->mr
         && addrrange_equal(a->addr, b->addr)
         && a->offset_in_region == b->offset_in_region
-        && a->readable == b->readable
+        && a->romd_mode == b->romd_mode
         && a->readonly == b->readonly;
 }
 
@@ -276,7 +276,7 @@  static bool can_merge(FlatRange *r1, FlatRange *r2)
                                 r1->addr.size),
                      int128_make64(r2->offset_in_region))
         && r1->dirty_log_mask == r2->dirty_log_mask
-        && r1->readable == r2->readable
+        && r1->romd_mode == r2->romd_mode
         && r1->readonly == r2->readonly;
 }
 
@@ -532,7 +532,7 @@  static void render_memory_region(FlatView *view,
             fr.offset_in_region = offset_in_region;
             fr.addr = addrrange_make(base, now);
             fr.dirty_log_mask = mr->dirty_log_mask;
-            fr.readable = mr->readable;
+            fr.romd_mode = mr->romd_mode;
             fr.readonly = readonly;
             flatview_insert(view, i, &fr);
             ++i;
@@ -552,7 +552,7 @@  static void render_memory_region(FlatView *view,
         fr.offset_in_region = offset_in_region;
         fr.addr = addrrange_make(base, remain);
         fr.dirty_log_mask = mr->dirty_log_mask;
-        fr.readable = mr->readable;
+        fr.romd_mode = mr->romd_mode;
         fr.readonly = readonly;
         flatview_insert(view, i, &fr);
     }
@@ -801,7 +801,7 @@  void memory_region_init(MemoryRegion *mr,
     mr->enabled = true;
     mr->terminates = false;
     mr->ram = false;
-    mr->readable = true;
+    mr->romd_mode = true;
     mr->readonly = false;
     mr->rom_device = false;
     mr->destructor = memory_region_destructor_none;
@@ -1121,11 +1121,11 @@  void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
     }
 }
 
-void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
+void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
 {
-    if (mr->readable != readable) {
+    if (mr->romd_mode != romd_mode) {
         memory_region_transaction_begin();
-        mr->readable = readable;
+        mr->romd_mode = romd_mode;
         memory_region_update_pending |= mr->enabled;
         memory_region_transaction_commit();
     }
@@ -1659,9 +1659,9 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
                    base + mr->addr
                    + (hwaddr)int128_get64(mr->size) - 1,
                    mr->priority,
-                   mr->readable ? 'R' : '-',
-                   !mr->readonly && !(mr->rom_device && mr->readable) ? 'W'
-                                                                      : '-',
+                   mr->romd_mode ? 'R' : '-',
+                   !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
+                                                                       : '-',
                    mr->name,
                    mr->alias->name,
                    mr->alias_offset,
@@ -1674,9 +1674,9 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
                    base + mr->addr
                    + (hwaddr)int128_get64(mr->size) - 1,
                    mr->priority,
-                   mr->readable ? 'R' : '-',
-                   !mr->readonly && !(mr->rom_device && mr->readable) ? 'W'
-                                                                      : '-',
+                   mr->romd_mode ? 'R' : '-',
+                   !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
+                                                                       : '-',
                    mr->name);
     }
 
-- 
1.7.1


From 98f505210d420ea66a2c9566657527c2c61e1a38 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 3 May 2013 17:18:36 +0200
Subject: [PATCH 05/40] memory: do not duplicate memory_region_destructor_none

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/memory.c b/memory.c
index 013464b..a6e9bc5 100644
--- a/memory.c
+++ b/memory.c
@@ -768,10 +768,6 @@  static void memory_region_destructor_ram_from_ptr(MemoryRegion *mr)
     qemu_ram_free_from_ptr(mr->ram_addr);
 }
 
-static void memory_region_destructor_iomem(MemoryRegion *mr)
-{
-}
-
 static void memory_region_destructor_rom_device(MemoryRegion *mr)
 {
     qemu_ram_free(mr->ram_addr & TARGET_PAGE_MASK);
@@ -929,7 +925,7 @@  void memory_region_init_io(MemoryRegion *mr,
     mr->ops = ops;
     mr->opaque = opaque;
     mr->terminates = true;
-    mr->destructor = memory_region_destructor_iomem;
+    mr->destructor = memory_region_destructor_none;
     mr->ram_addr = ~(ram_addr_t)0;
 }
 
-- 
1.7.1


From 0f9f5c1fc24b61563cd4ba7af812ace0a6cb1152 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 24 Apr 2013 10:46:55 +0200
Subject: [PATCH 06/40] memory: make memory_global_sync_dirty_bitmap take an AddressSpace

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch_init.c           |    2 +-
 include/exec/memory.h |    7 +++----
 memory.c              |    3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 49c5dc2..5d32ecf 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -386,7 +386,7 @@  static void migration_bitmap_sync(void)
     }
 
     trace_migration_bitmap_sync_start();
-    memory_global_sync_dirty_bitmap(get_system_memory());
+    address_space_sync_dirty_bitmap(&address_space_memory);
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index f65acfd..489dc73 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -757,13 +757,12 @@  memory_region_section_addr(MemoryRegionSection *section,
 }
 
 /**
- * memory_global_sync_dirty_bitmap: synchronize the dirty log for all memory
+ * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
  *
  * Synchronizes the dirty page log for an entire address space.
- * @address_space: a top-level (i.e. parentless) region that contains the
- *       memory being synchronized
+ * @as: the address space that contains the memory being synchronized
  */
-void memory_global_sync_dirty_bitmap(MemoryRegion *address_space);
+void address_space_sync_dirty_bitmap(AddressSpace *as);
 
 /**
  * memory_region_transaction_begin: Start a transaction.
diff --git a/memory.c b/memory.c
index a6e9bc5..7a956a0 100644
--- a/memory.c
+++ b/memory.c
@@ -1486,9 +1486,8 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
     return ret;
 }
 
-void memory_global_sync_dirty_bitmap(MemoryRegion *address_space)
+void address_space_sync_dirty_bitmap(AddressSpace *as)
 {
-    AddressSpace *as = memory_region_to_address_space(address_space);
     FlatRange *fr;
 
     FOR_EACH_FLAT_RANGE(fr, as->current_map) {
-- 
1.7.1


From 4f30f3aaad45954884d49aec8989b93899e65afd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi.kivity@gmail.com>
Date: Tue, 30 Oct 2012 13:47:44 +0200
Subject: [PATCH 07/40] memory: fix address space initialization/destruction

A couple of fields were left uninitialized.  This was not observed earlier
because all address spaces were statically allocated.  Also free allocation
for those fields.

Signed-off-by: Avi Kivity <avi.kivity@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/memory.c b/memory.c
index 7a956a0..fc4515b 100644
--- a/memory.c
+++ b/memory.c
@@ -1573,6 +1573,8 @@  void address_space_init(AddressSpace *as, MemoryRegion *root)
     as->root = root;
     as->current_map = g_new(FlatView, 1);
     flatview_init(as->current_map);
+    as->ioeventfd_nb = 0;
+    as->ioeventfds = NULL;
     QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
     as->name = NULL;
     memory_region_transaction_commit();
@@ -1589,6 +1591,7 @@  void address_space_destroy(AddressSpace *as)
     address_space_destroy_dispatch(as);
     flatview_destroy(as->current_map);
     g_free(as->current_map);
+    g_free(as->ioeventfds);
 }
 
 uint64_t io_mem_read(MemoryRegion *mr, hwaddr addr, unsigned size)
-- 
1.7.1


From dead812ca6dc44d97024ccda1a9d9217946aa742 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi.kivity@gmail.com>
Date: Tue, 30 Oct 2012 13:47:45 +0200
Subject: [PATCH 08/40] memory: limit sections in the radix tree to the actual address space size

The radix tree is statically sized to fit TARGET_PHYS_ADDR_SPACE_BITS.
If a larger memory region is registered, it will overflow.

Fix by limiting any section in the radix tree to the supported size.

This problem was not observed earlier since artificial regions (containers
and aliases) are eliminated by the memory core, leaving only device regions
which have reasonable sizes.  An IOMMU however cannot be eliminated by the
memory core, and may have an artificial size.

Signed-off-by: Avi Kivity <avi.kivity@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c |   15 ++++++++++++++-
 1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/exec.c b/exec.c
index 2e5b89a..fccecf6 100644
--- a/exec.c
+++ b/exec.c
@@ -777,10 +777,23 @@  static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *sec
                   section_index);
 }
 
+static MemoryRegionSection limit(MemoryRegionSection section)
+{
+    unsigned practical_as_bits = MIN(TARGET_PHYS_ADDR_SPACE_BITS, 62);
+    hwaddr as_limit;
+
+    as_limit = (hwaddr)1 << practical_as_bits;
+
+    section.size = MIN(section.offset_within_address_space + section.size, as_limit)
+                   - section.offset_within_address_space;
+
+    return section;
+}
+
 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
-    MemoryRegionSection now = *section, remain = *section;
+    MemoryRegionSection now = limit(*section), remain = limit(*section);
 
     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
         || (now.size < TARGET_PAGE_SIZE)) {
-- 
1.7.1


From 11c523170b4dc49c5010ede5ddaaded64b0ef1fa Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 16 Apr 2013 15:39:51 +0200
Subject: [PATCH 09/40] memory: create FlatView for new address spaces

A new address space has an empty FlatView.  In order to create it, we
must (a) call memory_region_transaction_commit after the address space is
inserted into the list; (b) force memory_region_transaction_commit to
do something.

This bug was latent so far because all address spaces started empty, including
the PCI address space where the bus master region is initially disabled.
However, the target address space of an IOMMU is usually rooted at
get_system_memory(), which might not be empty at the time the IOMMU is created.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/memory.c b/memory.c
index fc4515b..a8929aa 100644
--- a/memory.c
+++ b/memory.c
@@ -1577,8 +1577,9 @@  void address_space_init(AddressSpace *as, MemoryRegion *root)
     as->ioeventfds = NULL;
     QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
     as->name = NULL;
-    memory_region_transaction_commit();
     address_space_init_dispatch(as);
+    memory_region_update_pending |= root->enabled;
+    memory_region_transaction_commit();
 }
 
 void address_space_destroy(AddressSpace *as)
-- 
1.7.1


From 714a2bab77e6e5df064071c05d079de0b059434c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2013 15:33:16 +0200
Subject: [PATCH 10/40] exec: remove obsolete comment

See how we call memory_region_section_addr two lines below to
convert a physical address to a base address in the region.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c |    6 ------
 1 files changed, 0 insertions(+), 6 deletions(-)

diff --git a/exec.c b/exec.c
index fccecf6..1dbd956 100644
--- a/exec.c
+++ b/exec.c
@@ -639,12 +639,6 @@  hwaddr memory_region_section_get_iotlb(CPUArchState *env,
             iotlb |= phys_section_rom;
         }
     } else {
-        /* IO handlers are currently passed a physical address.
-           It would be nice to pass an offset from the base address
-           of that region.  This would avoid having to special case RAM,
-           and avoid full address decoding in every device.
-           We can't use the high bits of pd for this because
-           IO_MEM_ROMD uses these as a ram address.  */
         iotlb = section - phys_sections;
         iotlb += memory_region_section_addr(section, paddr);
     }
-- 
1.7.1


From 6ac25708f0c7a7c725c1c202d771954310ebfbc1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2013 15:40:59 +0200
Subject: [PATCH 11/40] memory: add address_space_valid

Checking whether an address space is possible in the old-style
IOMMU implementation, but there is no equivalent in the memory API.
Implement it with a lookup of the dispatch tree.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 dma-helpers.c         |    5 +++++
 exec.c                |   24 ++++++++++++++++++++++++
 include/exec/memory.h |   12 ++++++++++++
 include/sysemu/dma.h  |    3 ++-
 4 files changed, 43 insertions(+), 1 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 272632f..2962b69 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -298,6 +298,11 @@  bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
             plen = len;
         }
 
+        if (!address_space_valid(dma->as, paddr, len,
+                                 dir == DMA_DIRECTION_FROM_DEVICE)) {
+            return false;
+        }
+
         len -= plen;
         addr += plen;
     }
diff --git a/exec.c b/exec.c
index 1dbd956..405de9f 100644
--- a/exec.c
+++ b/exec.c
@@ -2093,6 +2093,30 @@  static void cpu_notify_map_clients(void)
     }
 }
 
+bool address_space_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
+{
+    AddressSpaceDispatch *d = as->dispatch;
+    MemoryRegionSection *section;
+    int l;
+    hwaddr page;
+
+    while (len > 0) {
+        page = addr & TARGET_PAGE_MASK;
+        l = (page + TARGET_PAGE_SIZE) - addr;
+        if (l > len) {
+            l = len;
+        }
+        section = phys_page_find(d, addr >> TARGET_PAGE_BITS);
+        if (section->mr == &io_mem_unassigned) {
+            return false;
+        }
+
+        len -= l;
+        addr += l;
+    }
+    return true;
+}
+
 /* Map a physical memory region into a host virtual address.
  * May map a subset of the requested range, given by and returned in *plen.
  * May return NULL if resources needed to perform the mapping are exhausted.
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 489dc73..c38e974 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -857,6 +857,18 @@  void address_space_write(AddressSpace *as, hwaddr addr,
  */
 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len);
 
+/* address_space_valid: check for validity of an address space range
+ *
+ * Check whether access to the given address space range is permitted by
+ * any IOMMU regions that are active for the address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @len: pointer to length
+ * @is_write: indicates the transfer direction
+ */
+bool address_space_valid(AddressSpace *as, hwaddr addr, int len, bool is_write);
+
 /* address_space_map: map a physical memory region into a host virtual address
  *
  * May map a subset of the requested range, given by and returned in @plen.
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index a52c93a..2e239dc 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -113,7 +113,8 @@  static inline bool dma_memory_valid(DMAContext *dma,
                                     DMADirection dir)
 {
     if (!dma_has_iommu(dma)) {
-        return true;
+        return address_space_valid(dma->as, addr, len,
+                                   dir == DMA_DIRECTION_FROM_DEVICE);
     } else {
         return iommu_dma_memory_valid(dma, addr, len, dir);
     }
-- 
1.7.1


From 709e90cb3f1e1995f93c95ba5353c96dd8a530b2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2013 13:21:06 +0200
Subject: [PATCH 12/40] memory: add address_space_translate

Using phys_page_find to translate an AddressSpace to a MemoryRegionSection
is unwieldy.  It requires to pass the page index rather than the address,
and later memory_region_section_addr has to be called.  Replace
memory_region_section_addr with a function that does all of it: call
phys_page_find, compute the offset within the region, and check how
big the current mapping is.  This way, a large flat region can be written
with a single lookup rather than a page at a time.

address_space_translate will also provide a single point where IOMMU
forwarding is implemented.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cputlb.c              |   20 +++---
 exec.c                |  184 ++++++++++++++++++++++++++----------------------
 include/exec/cputlb.h |   12 ++--
 include/exec/memory.h |   31 ++++-----
 translate-all.c       |    6 +-
 5 files changed, 133 insertions(+), 120 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index aba7e44..1f85da0 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -248,13 +248,18 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     target_ulong code_address;
     uintptr_t addend;
     CPUTLBEntry *te;
-    hwaddr iotlb;
+    hwaddr iotlb, xlat, sz;
 
     assert(size >= TARGET_PAGE_SIZE);
     if (size != TARGET_PAGE_SIZE) {
         tlb_add_large_page(env, vaddr, size);
     }
-    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
+
+    sz = size;
+    section = address_space_translate(&address_space_memory, paddr, &xlat, &sz,
+                                      false);
+    assert(sz >= TARGET_PAGE_SIZE);
+
 #if defined(DEBUG_TLB)
     printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
            " prot=%x idx=%d pd=0x%08lx\n",
@@ -269,15 +274,14 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     }
     if (memory_region_is_ram(section->mr) ||
         memory_region_is_romd(section->mr)) {
-        addend = (uintptr_t)memory_region_get_ram_ptr(section->mr)
-        + memory_region_section_addr(section, paddr);
+        addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
     } else {
         addend = 0;
     }
 
     code_address = address;
-    iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, prot,
-                                            &address);
+    iotlb = memory_region_section_get_iotlb(env, section, vaddr, paddr, xlat,
+                                            prot, &address);
 
     index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     env->iotlb[mmu_idx][index] = iotlb - vaddr;
@@ -300,9 +304,7 @@  void tlb_set_page(CPUArchState *env, target_ulong vaddr,
             /* Write access calls the I/O callback.  */
             te->addr_write = address | TLB_MMIO;
         } else if (memory_region_is_ram(section->mr)
-                   && !cpu_physical_memory_is_dirty(
-                           section->mr->ram_addr
-                           + memory_region_section_addr(section, paddr))) {
+                   && !cpu_physical_memory_is_dirty(section->mr->ram_addr + xlat)) {
             te->addr_write = address | TLB_NOTDIRTY;
         } else {
             te->addr_write = address;
diff --git a/exec.c b/exec.c
index 405de9f..9709bc4 100644
--- a/exec.c
+++ b/exec.c
@@ -182,24 +182,36 @@  static void phys_page_set(AddressSpaceDispatch *d,
     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
-MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
+static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
 {
     PhysPageEntry lp = d->phys_map;
     PhysPageEntry *p;
     int i;
-    uint16_t s_index = phys_section_unassigned;
 
     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
         if (lp.ptr == PHYS_MAP_NODE_NIL) {
-            goto not_found;
+            return &phys_sections[phys_section_unassigned];
         }
         p = phys_map_nodes[lp.ptr];
         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
     }
+    return &phys_sections[lp.ptr];
+}
+
+MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
+                                             hwaddr *xlat, hwaddr *plen,
+                                             bool is_write)
+{
+    MemoryRegionSection *section;
+
+    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+    /* Compute offset with MemoryRegionSection */
+    addr -= section->offset_within_address_space;
+    *plen = MIN(section->size - addr, *plen);
 
-    s_index = lp.ptr;
-not_found:
-    return &phys_sections[s_index];
+    /* Compute offset with MemoryRegion */
+    *xlat = addr + section->offset_within_region;
+    return section;
 }
 
 bool memory_region_is_unassigned(MemoryRegion *mr)
@@ -620,11 +632,11 @@  static int cpu_physical_memory_set_dirty_tracking(int enable)
 }
 
 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
-                                                   MemoryRegionSection *section,
-                                                   target_ulong vaddr,
-                                                   hwaddr paddr,
-                                                   int prot,
-                                                   target_ulong *address)
+                                       MemoryRegionSection *section,
+                                       target_ulong vaddr,
+                                       hwaddr paddr, hwaddr xlat,
+                                       int prot,
+                                       target_ulong *address)
 {
     hwaddr iotlb;
     CPUWatchpoint *wp;
@@ -632,7 +644,7 @@  hwaddr memory_region_section_get_iotlb(CPUArchState *env,
     if (memory_region_is_ram(section->mr)) {
         /* Normal RAM.  */
         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-            + memory_region_section_addr(section, paddr);
+            + xlat;
         if (!section->readonly) {
             iotlb |= phys_section_notdirty;
         } else {
@@ -640,7 +652,7 @@  hwaddr memory_region_section_get_iotlb(CPUArchState *env,
         }
     } else {
         iotlb = section - phys_sections;
-        iotlb += memory_region_section_addr(section, paddr);
+        iotlb += xlat;
     }
 
     /* Make accesses to pages with watchpoints go via the
@@ -1903,24 +1915,18 @@  static void invalidate_and_set_dirty(hwaddr addr,
 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
                       int len, bool is_write)
 {
-    AddressSpaceDispatch *d = as->dispatch;
-    int l;
+    hwaddr l;
     uint8_t *ptr;
     uint32_t val;
-    hwaddr page;
+    hwaddr addr1;
     MemoryRegionSection *section;
 
     while (len > 0) {
-        page = addr & TARGET_PAGE_MASK;
-        l = (page + TARGET_PAGE_SIZE) - addr;
-        if (l > len)
-            l = len;
-        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
+        l = len;
+        section = address_space_translate(as, addr, &addr1, &l, is_write);
 
         if (is_write) {
             if (!memory_region_is_ram(section->mr)) {
-                hwaddr addr1;
-                addr1 = memory_region_section_addr(section, addr);
                 /* XXX: could force cpu_single_env to NULL to avoid
                    potential bugs */
                 if (l >= 4 && ((addr1 & 3) == 0)) {
@@ -1940,9 +1946,7 @@  void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
                     l = 1;
                 }
             } else if (!section->readonly) {
-                ram_addr_t addr1;
-                addr1 = memory_region_get_ram_addr(section->mr)
-                    + memory_region_section_addr(section, addr);
+                addr1 += memory_region_get_ram_addr(section->mr);
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
                 memcpy(ptr, buf, l);
@@ -1952,9 +1956,7 @@  void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
         } else {
             if (!(memory_region_is_ram(section->mr) ||
                   memory_region_is_romd(section->mr))) {
-                hwaddr addr1;
                 /* I/O case */
-                addr1 = memory_region_section_addr(section, addr);
                 if (l >= 4 && ((addr1 & 3) == 0)) {
                     /* 32 bit read access */
                     val = io_mem_read(section->mr, addr1, 4);
@@ -1973,9 +1975,7 @@  void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
                 }
             } else {
                 /* RAM case */
-                ptr = qemu_get_ram_ptr(section->mr->ram_addr
-                                       + memory_region_section_addr(section,
-                                                                    addr));
+                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
                 memcpy(buf, ptr, l);
                 qemu_put_ram_ptr(ptr);
             }
@@ -2015,26 +2015,21 @@  void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
 void cpu_physical_memory_write_rom(hwaddr addr,
                                    const uint8_t *buf, int len)
 {
-    AddressSpaceDispatch *d = address_space_memory.dispatch;
-    int l;
+    hwaddr l;
     uint8_t *ptr;
-    hwaddr page;
+    hwaddr addr1;
     MemoryRegionSection *section;
 
     while (len > 0) {
-        page = addr & TARGET_PAGE_MASK;
-        l = (page + TARGET_PAGE_SIZE) - addr;
-        if (l > len)
-            l = len;
-        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
+        l = len;
+        section = address_space_translate(&address_space_memory,
+                                          addr, &addr1, &l, true);
 
         if (!(memory_region_is_ram(section->mr) ||
               memory_region_is_romd(section->mr))) {
             /* do nothing */
         } else {
-            unsigned long addr1;
-            addr1 = memory_region_get_ram_addr(section->mr)
-                + memory_region_section_addr(section, addr);
+            addr1 += memory_region_get_ram_addr(section->mr);
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
@@ -2095,18 +2090,12 @@  static void cpu_notify_map_clients(void)
 
 bool address_space_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
 {
-    AddressSpaceDispatch *d = as->dispatch;
     MemoryRegionSection *section;
-    int l;
-    hwaddr page;
+    hwaddr l, xlat;
 
     while (len > 0) {
-        page = addr & TARGET_PAGE_MASK;
-        l = (page + TARGET_PAGE_SIZE) - addr;
-        if (l > len) {
-            l = len;
-        }
-        section = phys_page_find(d, addr >> TARGET_PAGE_BITS);
+        l = len;
+        section = address_space_translate(as, addr, &xlat, &l, is_write);
         if (section->mr == &io_mem_unassigned) {
             return false;
         }
@@ -2129,22 +2118,17 @@  void *address_space_map(AddressSpace *as,
                         hwaddr *plen,
                         bool is_write)
 {
-    AddressSpaceDispatch *d = as->dispatch;
     hwaddr len = *plen;
     hwaddr todo = 0;
-    int l;
-    hwaddr page;
+    hwaddr l, xlat;
     MemoryRegionSection *section;
     ram_addr_t raddr = RAM_ADDR_MAX;
     ram_addr_t rlen;
     void *ret;
 
     while (len > 0) {
-        page = addr & TARGET_PAGE_MASK;
-        l = (page + TARGET_PAGE_SIZE) - addr;
-        if (l > len)
-            l = len;
-        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
+        l = len;
+        section = address_space_translate(as, addr, &xlat, &l, is_write);
 
         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
             if (todo || bounce.buffer) {
@@ -2161,8 +2145,11 @@  void *address_space_map(AddressSpace *as,
             return bounce.buffer;
         }
         if (!todo) {
-            raddr = memory_region_get_ram_addr(section->mr)
-                + memory_region_section_addr(section, addr);
+            raddr = memory_region_get_ram_addr(section->mr) + xlat;
+        } else {
+            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
+                break;
+            }
         }
 
         len -= l;
@@ -2228,13 +2215,17 @@  static inline uint32_t ldl_phys_internal(hwaddr addr,
     uint8_t *ptr;
     uint32_t val;
     MemoryRegionSection *section;
+    hwaddr l = 4;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      false);
+    if (l < 4) {
+        return -1;
+    }
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
         /* I/O case */
-        addr = memory_region_section_addr(section, addr);
         val = io_mem_read(section->mr, addr, 4);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2249,7 +2240,7 @@  static inline uint32_t ldl_phys_internal(hwaddr addr,
         /* RAM case */
         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
                                 & TARGET_PAGE_MASK)
-                               + memory_region_section_addr(section, addr));
+                               + addr);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
             val = ldl_le_p(ptr);
@@ -2287,13 +2278,17 @@  static inline uint64_t ldq_phys_internal(hwaddr addr,
     uint8_t *ptr;
     uint64_t val;
     MemoryRegionSection *section;
+    hwaddr l = 8;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      false);
+    if (l < 8) {
+        return -1;
+    }
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
         /* I/O case */
-        addr = memory_region_section_addr(section, addr);
 
         /* XXX This is broken when device endian != cpu endian.
                Fix and add "endian" variable check */
@@ -2308,7 +2303,7 @@  static inline uint64_t ldq_phys_internal(hwaddr addr,
         /* RAM case */
         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
                                 & TARGET_PAGE_MASK)
-                               + memory_region_section_addr(section, addr));
+                               + addr);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
             val = ldq_le_p(ptr);
@@ -2354,13 +2349,17 @@  static inline uint32_t lduw_phys_internal(hwaddr addr,
     uint8_t *ptr;
     uint64_t val;
     MemoryRegionSection *section;
+    hwaddr l = 2;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      false);
+    if (l < 2) {
+        return -1;
+    }
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
         /* I/O case */
-        addr = memory_region_section_addr(section, addr);
         val = io_mem_read(section->mr, addr, 2);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2375,7 +2374,7 @@  static inline uint32_t lduw_phys_internal(hwaddr addr,
         /* RAM case */
         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
                                 & TARGET_PAGE_MASK)
-                               + memory_region_section_addr(section, addr));
+                               + addr);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
             val = lduw_le_p(ptr);
@@ -2413,11 +2412,15 @@  void stl_phys_notdirty(hwaddr addr, uint32_t val)
 {
     uint8_t *ptr;
     MemoryRegionSection *section;
+    hwaddr l = 2;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      true);
+    if (l < 2) {
+        return;
+    }
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
-        addr = memory_region_section_addr(section, addr);
         if (memory_region_is_ram(section->mr)) {
             section = &phys_sections[phys_section_rom];
         }
@@ -2425,7 +2428,7 @@  void stl_phys_notdirty(hwaddr addr, uint32_t val)
     } else {
         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
                                & TARGET_PAGE_MASK)
-            + memory_region_section_addr(section, addr);
+            + addr;
         ptr = qemu_get_ram_ptr(addr1);
         stl_p(ptr, val);
 
@@ -2445,11 +2448,15 @@  void stq_phys_notdirty(hwaddr addr, uint64_t val)
 {
     uint8_t *ptr;
     MemoryRegionSection *section;
+    hwaddr l = 4;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      true);
+    if (l < 4) {
+        return;
+    }
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
-        addr = memory_region_section_addr(section, addr);
         if (memory_region_is_ram(section->mr)) {
             section = &phys_sections[phys_section_rom];
         }
@@ -2463,7 +2470,7 @@  void stq_phys_notdirty(hwaddr addr, uint64_t val)
     } else {
         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
                                 & TARGET_PAGE_MASK)
-                               + memory_region_section_addr(section, addr));
+                               + addr);
         stq_p(ptr, val);
     }
 }
@@ -2474,11 +2481,15 @@  static inline void stl_phys_internal(hwaddr addr, uint32_t val,
 {
     uint8_t *ptr;
     MemoryRegionSection *section;
+    hwaddr l = 8;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      true);
+    if (l < 8) {
+        return;
+    }
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
-        addr = memory_region_section_addr(section, addr);
         if (memory_region_is_ram(section->mr)) {
             section = &phys_sections[phys_section_rom];
         }
@@ -2495,7 +2506,7 @@  static inline void stl_phys_internal(hwaddr addr, uint32_t val,
     } else {
         unsigned long addr1;
         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-            + memory_region_section_addr(section, addr);
+            + addr;
         /* RAM case */
         ptr = qemu_get_ram_ptr(addr1);
         switch (endian) {
@@ -2541,11 +2552,15 @@  static inline void stw_phys_internal(hwaddr addr, uint32_t val,
 {
     uint8_t *ptr;
     MemoryRegionSection *section;
+    hwaddr l = 4;
 
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l,
+                                      true);
+    if (l < 4) {
+        return;
+    }
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
-        addr = memory_region_section_addr(section, addr);
         if (memory_region_is_ram(section->mr)) {
             section = &phys_sections[phys_section_rom];
         }
@@ -2562,7 +2577,7 @@  static inline void stw_phys_internal(hwaddr addr, uint32_t val,
     } else {
         unsigned long addr1;
         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-            + memory_region_section_addr(section, addr);
+            + addr;
         /* RAM case */
         ptr = qemu_get_ram_ptr(addr1);
         switch (endian) {
@@ -2666,9 +2681,10 @@  bool virtio_is_big_endian(void)
 bool cpu_physical_memory_is_io(hwaddr phys_addr)
 {
     MemoryRegionSection *section;
+    hwaddr l = 1;
 
-    section = phys_page_find(address_space_memory.dispatch,
-                             phys_addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory,
+                                      phys_addr, &phys_addr, &l, false);
 
     return !(memory_region_is_ram(section->mr) ||
              memory_region_is_romd(section->mr));
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index 733c885..e821660 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -26,8 +26,6 @@  void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                              target_ulong vaddr);
 void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
                            uintptr_t length);
-MemoryRegionSection *phys_page_find(struct AddressSpaceDispatch *d,
-                                    hwaddr index);
 void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUArchState *env, target_ulong vaddr);
 extern int tlb_flush_count;
@@ -35,11 +33,11 @@  extern int tlb_flush_count;
 /* exec.c */
 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr);
 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
-                                                   MemoryRegionSection *section,
-                                                   target_ulong vaddr,
-                                                   hwaddr paddr,
-                                                   int prot,
-                                                   target_ulong *address);
+                                       MemoryRegionSection *section,
+                                       target_ulong vaddr,
+                                       hwaddr paddr, hwaddr xlat,
+                                       int prot,
+                                       target_ulong *address);
 bool memory_region_is_unassigned(MemoryRegion *mr);
 
 #endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index c38e974..914f5d4 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -740,23 +740,6 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size);
 
 /**
- * memory_region_section_addr: get offset within MemoryRegionSection
- *
- * Returns offset within MemoryRegionSection
- *
- * @section: the memory region section being queried
- * @addr: address in address space
- */
-static inline hwaddr
-memory_region_section_addr(MemoryRegionSection *section,
-                           hwaddr addr)
-{
-    addr -= section->offset_within_address_space;
-    addr += section->offset_within_region;
-    return addr;
-}
-
-/**
  * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
  *
  * Synchronizes the dirty page log for an entire address space.
@@ -857,6 +840,20 @@  void address_space_write(AddressSpace *as, hwaddr addr,
  */
 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len);
 
+/* address_space_translate: translate an address range into an address space
+ * into a MemoryRegionSection and an address range into that section
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @xlat: pointer to address within the returned memory region section's
+ * #MemoryRegion.
+ * @len: pointer to length
+ * @is_write: indicates the transfer direction
+ */
+MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
+                                             hwaddr *xlat, hwaddr *len,
+                                             bool is_write);
+
 /* address_space_valid: check for validity of an address space range
  *
  * Check whether access to the given address space range is permitted by
diff --git a/translate-all.c b/translate-all.c
index 0d84b0d..7a7d537 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1355,15 +1355,15 @@  void tb_invalidate_phys_addr(hwaddr addr)
 {
     ram_addr_t ram_addr;
     MemoryRegionSection *section;
+    hwaddr l = 1;
 
-    section = phys_page_find(address_space_memory.dispatch,
-                             addr >> TARGET_PAGE_BITS);
+    section = address_space_translate(&address_space_memory, addr, &addr, &l, false);
     if (!(memory_region_is_ram(section->mr)
           || memory_region_is_romd(section->mr))) {
         return;
     }
     ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-        + memory_region_section_addr(section, addr);
+        + addr;
     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
 }
 #endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
-- 
1.7.1


From 56c78924db451f1a3288c9321f198b16b845333d Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 6 May 2013 16:48:02 +0200
Subject: [PATCH 13/40] memory: Introduce address_space_lookup_region

This introduces a wrapper for phys_page_find (before we complicate
address_space_translate with IOMMU translation).  This function will
also encapsulate locking and reference counting when we introduce
BQL-free dispatching.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/exec.c b/exec.c
index 9709bc4..6efad6d 100644
--- a/exec.c
+++ b/exec.c
@@ -198,13 +198,19 @@  static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index
     return &phys_sections[lp.ptr];
 }
 
+static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
+                                                        hwaddr addr)
+{
+    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+}
+
 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
                                              hwaddr *xlat, hwaddr *plen,
                                              bool is_write)
 {
     MemoryRegionSection *section;
 
-    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_lookup_region(as, addr);
     /* Compute offset with MemoryRegionSection */
     addr -= section->offset_within_address_space;
     *plen = MIN(section->size - addr, *plen);
-- 
1.7.1


From 09c14c1d787d5319ce0d4dbceb9366937031f51b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi.kivity@gmail.com>
Date: Tue, 30 Oct 2012 13:47:46 +0200
Subject: [PATCH 14/40] memory: iommu support

Add a new memory region type that translates addresses it is given,
then forwards them to a target address space.  This is similar to
an alias, except that the mapping is more flexible than a linear
translation and trucation, and also less efficient since the
translation happens at runtime.

The implementation uses an AddressSpace mapping the target region to
avoid hierarchical dispatch all the way to the resolved region; only
iommu regions are looked up dynamically.

Signed-off-by: Avi Kivity <avi.kivity@gmail.com>
[Modified to put translation in address_space_translate - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                |   35 +++++++++++++++++++++++++++++------
 include/exec/memory.h |   44 ++++++++++++++++++++++++++++++++++++++++++++
 memory.c              |   21 +++++++++++++++++++++
 3 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/exec.c b/exec.c
index 6efad6d..7dab2fa 100644
--- a/exec.c
+++ b/exec.c
@@ -208,15 +208,38 @@  MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
                                              hwaddr *xlat, hwaddr *plen,
                                              bool is_write)
 {
+    IOMMUTLBEntry iotlb;
     MemoryRegionSection *section;
+    hwaddr len = *plen;
 
-    section = address_space_lookup_region(as, addr);
-    /* Compute offset with MemoryRegionSection */
-    addr -= section->offset_within_address_space;
-    *plen = MIN(section->size - addr, *plen);
+    for (;;) {
+        section = address_space_lookup_region(as, addr);
+
+        /* Compute offset with MemoryRegionSection */
+        addr -= section->offset_within_address_space;
+        len = MIN(section->size - addr, len);
+
+        /* Compute offset with MemoryRegion */
+        addr += section->offset_within_region;
+
+        if (!section->mr->iommu_ops) {
+            break;
+        }
+
+        iotlb = section->mr->iommu_ops->translate(section->mr, addr);
+        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+                | (addr & iotlb.addr_mask));
+        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
+        if (!iotlb.perm[is_write]) {
+            section = &phys_sections[phys_section_unassigned];
+            break;
+        }
+
+        as = section->mr->iommu_target_as;
+    }
 
-    /* Compute offset with MemoryRegion */
-    *xlat = addr + section->offset_within_region;
+    *plen = len;
+    *xlat = addr;
     return section;
 }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 914f5d4..e05296b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -112,12 +112,27 @@  struct MemoryRegionOps {
     const MemoryRegionMmio old_mmio;
 };
 
+typedef struct IOMMUTLBEntry IOMMUTLBEntry;
+typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
+
+struct IOMMUTLBEntry {
+    hwaddr translated_addr;
+    hwaddr addr_mask;  /* 0xfff = 4k translation */
+    bool perm[2]; /* permissions, [0] for read, [1] for write */
+};
+
+struct MemoryRegionIOMMUOps {
+    /* Returns a TLB entry that contains a given address. */
+    IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr);
+};
+
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
 struct MemoryRegion {
     /* All fields are private - violators will be prosecuted */
     const MemoryRegionOps *ops;
+    const MemoryRegionIOMMUOps *iommu_ops;
     void *opaque;
     MemoryRegion *parent;
     Int128 size;
@@ -144,6 +159,7 @@  struct MemoryRegion {
     uint8_t dirty_log_mask;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
+    struct AddressSpace *iommu_target_as;
 };
 
 struct MemoryRegionPortio {
@@ -329,6 +345,25 @@  void memory_region_init_rom_device(MemoryRegion *mr,
 void memory_region_init_reservation(MemoryRegion *mr,
                                     const char *name,
                                     uint64_t size);
+
+/**
+ * memory_region_init_iommu: Initialize a memory region that translates addresses
+ *
+ * An IOMMU region translates addresses and forwards accesses to a target memory region.
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @ops: a function that translates addresses into the @target region
+ * @target_as: the #AddressSpace that will be used to satisfy accesses to translated
+ *          addresses
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region.
+ */
+void memory_region_init_iommu(MemoryRegion *mr,
+                              MemoryRegionIOMMUOps *ops,
+                              AddressSpace *target_as,
+                              const char *name,
+                              uint64_t size);
+
 /**
  * memory_region_destroy: Destroy a memory region and reclaim all resources.
  *
@@ -368,6 +403,15 @@  static inline bool memory_region_is_romd(MemoryRegion *mr)
 }
 
 /**
+ * memory_region_is_iommu: check whether a memory region is an iommu
+ *
+ * Returns %true is a memory region is an iommu.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_is_iommu(MemoryRegion *mr);
+
+/**
  * memory_region_name: get a memory region's name
  *
  * Returns the string that was used to initialize the memory region.
diff --git a/memory.c b/memory.c
index a8929aa..b0d5e33 100644
--- a/memory.c
+++ b/memory.c
@@ -787,6 +787,7 @@  void memory_region_init(MemoryRegion *mr,
                         uint64_t size)
 {
     mr->ops = NULL;
+    mr->iommu_ops = NULL;
     mr->parent = NULL;
     mr->size = int128_make64(size);
     if (size == UINT64_MAX) {
@@ -978,6 +979,21 @@  void memory_region_init_rom_device(MemoryRegion *mr,
     mr->ram_addr = qemu_ram_alloc(size, mr);
 }
 
+void memory_region_init_iommu(MemoryRegion *mr,
+                              MemoryRegionIOMMUOps *ops,
+                              AddressSpace *target_as,
+                              const char *name,
+                              uint64_t size)
+{
+    memory_region_init(mr, name, size);
+    mr->ops = NULL;
+    mr->iommu_ops = ops,
+    mr->opaque = mr;
+    mr->terminates = true;  /* then re-forwards */
+    mr->destructor = memory_region_destructor_none;
+    mr->iommu_target_as = target_as;
+}
+
 static uint64_t invalid_read(void *opaque, hwaddr addr,
                              unsigned size)
 {
@@ -1052,6 +1068,11 @@  bool memory_region_is_rom(MemoryRegion *mr)
     return mr->ram && mr->readonly;
 }
 
+bool memory_region_is_iommu(MemoryRegion *mr)
+{
+    return mr->iommu_ops;
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;
-- 
1.7.1


From 10f19efd55a416806850b6074f26ed4712069f65 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi.kivity@gmail.com>
Date: Tue, 30 Oct 2012 13:47:49 +0200
Subject: [PATCH 15/40] vfio: abort if an emulated iommu is used

vfio doesn't support guest iommus yet, indicate it to the user
by gently depositing a core on their disk.

Signed-off-by: Avi Kivity <avi.kivity@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/misc/vfio.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 693a9ff..178dd11 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1939,6 +1939,8 @@  static void vfio_listener_region_add(MemoryListener *listener,
     void *vaddr;
     int ret;
 
+    assert(!memory_region_is_iommu(section->mr));
+
     if (vfio_listener_skipped_section(section)) {
         DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
                 section->offset_within_address_space,
-- 
1.7.1


From 78c5a629090ef01292e32d74cd2b711c0c6af6ba Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 10 Apr 2013 17:30:48 +0200
Subject: [PATCH 16/40] spapr: convert TCE API to use an opaque type

The TCE table is currently returned as a DMAContext, and non-type-safe
APIs are called later passing back the DMAContext.  Since we want to move
away from DMAContext, use an opaque type instead, and add an accessor
to retrieve the DMAContext from it.

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ppc/spapr_iommu.c        |   54 +++++++++++++++++-------------------------
 hw/ppc/spapr_pci.c          |    8 +++---
 hw/ppc/spapr_vio.c          |   13 +++++----
 include/hw/pci-host/spapr.h |    2 +-
 include/hw/ppc/spapr.h      |   12 +++++----
 include/hw/ppc/spapr_vio.h  |    1 +
 6 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index e1fe941..7a507e0 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -36,8 +36,6 @@  enum sPAPRTCEAccess {
     SPAPR_TCE_RW = 3,
 };
 
-typedef struct sPAPRTCETable sPAPRTCETable;
-
 struct sPAPRTCETable {
     DMAContext dma;
     uint32_t liobn;
@@ -122,7 +120,7 @@  static int spapr_tce_translate(DMAContext *dma,
     return 0;
 }
 
-DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
+sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
 {
     sPAPRTCETable *tcet;
 
@@ -155,43 +153,40 @@  DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
     }
 
 #ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_iommu: New TCE table, liobn=0x%x, context @ %p, "
-            "table @ %p, fd=%d\n", liobn, &tcet->dma, tcet->table, tcet->fd);
+    fprintf(stderr, "spapr_iommu: New TCE table @ %p, liobn=0x%x, "
+            "table @ %p, fd=%d\n", tcet, liobn, tcet->table, tcet->fd);
 #endif
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
-    return &tcet->dma;
+    return tcet;
 }
 
-void spapr_tce_free(DMAContext *dma)
+void spapr_tce_free(sPAPRTCETable *tcet)
 {
+    QLIST_REMOVE(tcet, list);
 
-    if (dma) {
-        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-
-        QLIST_REMOVE(tcet, list);
-
-        if (!kvm_enabled() ||
-            (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
-                                     tcet->window_size) != 0)) {
-            g_free(tcet->table);
-        }
-
-        g_free(tcet);
+    if (!kvm_enabled() ||
+        (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
+                                 tcet->window_size) != 0)) {
+        g_free(tcet->table);
     }
+
+    g_free(tcet);
 }
 
-void spapr_tce_set_bypass(DMAContext *dma, bool bypass)
+DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet)
 {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    return &tcet->dma;
+}
 
+void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
+{
     tcet->bypass = bypass;
 }
 
-void spapr_tce_reset(DMAContext *dma)
+void spapr_tce_reset(sPAPRTCETable *tcet)
 {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
     size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
         * sizeof(sPAPRTCE);
 
@@ -277,17 +272,12 @@  int spapr_dma_dt(void *fdt, int node_off, const char *propname,
 }
 
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
-                      DMAContext *iommu)
+                      sPAPRTCETable *tcet)
 {
-    if (!iommu) {
+    if (!tcet) {
         return 0;
     }
 
-    if (iommu->translate == spapr_tce_translate) {
-        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu);
-        return spapr_dma_dt(fdt, node_off, propname,
-                tcet->liobn, 0, tcet->window_size);
-    }
-
-    return -1;
+    return spapr_dma_dt(fdt, node_off, propname,
+                        tcet->liobn, 0, tcet->window_size);
 }
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 62ff323..eb64a8f 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -511,7 +511,7 @@  static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
 {
     sPAPRPHBState *phb = opaque;
 
-    return phb->dma;
+    return spapr_tce_get_dma(phb->tcet);
 }
 
 static int spapr_phb_init(SysBusDevice *s)
@@ -646,8 +646,8 @@  static int spapr_phb_init(SysBusDevice *s)
 
     sphb->dma_window_start = 0;
     sphb->dma_window_size = 0x40000000;
-    sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn, sphb->dma_window_size);
-    if (!sphb->dma) {
+    sphb->tcet = spapr_tce_new_table(sphb->dma_liobn, sphb->dma_window_size);
+    if (!sphb->tcet) {
         fprintf(stderr, "Unable to create TCE table for %s\n", sphb->dtbusname);
         return -1;
     }
@@ -676,7 +676,7 @@  static void spapr_phb_reset(DeviceState *qdev)
     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
 
     /* Reset the IOMMU state */
-    spapr_tce_reset(sphb->dma);
+    spapr_tce_reset(sphb->tcet);
 }
 
 static Property spapr_phb_properties[] = {
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 1405c32..a06ac94 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -145,7 +145,7 @@  static int vio_make_devnode(VIOsPAPRDevice *dev,
         }
     }
 
-    ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->dma);
+    ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->tcet);
     if (ret < 0) {
         return ret;
     }
@@ -319,8 +319,8 @@  int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    if (dev->dma) {
-        spapr_tce_reset(dev->dma);
+    if (dev->tcet) {
+        spapr_tce_reset(dev->tcet);
     }
     free_crq(dev);
 }
@@ -345,12 +345,12 @@  static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    if (!dev->dma) {
+    if (!dev->tcet) {
         rtas_st(rets, 0, -3);
         return;
     }
 
-    spapr_tce_set_bypass(dev->dma, !!enable);
+    spapr_tce_set_bypass(dev->tcet, !!enable);
 
     rtas_st(rets, 0, 0);
 }
@@ -457,7 +457,8 @@  static int spapr_vio_busdev_init(DeviceState *qdev)
 
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+        dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
+        dev->dma = spapr_tce_get_dma(dev->tcet);
     }
 
     return pc->init(dev);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index b21080c..653dd40 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -49,7 +49,7 @@  typedef struct sPAPRPHBState {
     uint32_t dma_liobn;
     uint64_t dma_window_start;
     uint64_t dma_window_size;
-    DMAContext *dma;
+    sPAPRTCETable *tcet;
 
     struct {
         uint32_t irq;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 864bee9..e8d617b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -342,17 +342,19 @@  typedef struct sPAPRTCE {
 
 #define RTAS_ERROR_LOG_MAX      2048
 
+typedef struct sPAPRTCETable sPAPRTCETable;
 
 void spapr_iommu_init(void);
 void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
-DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
-void spapr_tce_free(DMAContext *dma);
-void spapr_tce_reset(DMAContext *dma);
-void spapr_tce_set_bypass(DMAContext *dma, bool bypass);
+sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size);
+DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet);
+void spapr_tce_free(sPAPRTCETable *tcet);
+void spapr_tce_reset(sPAPRTCETable *tcet);
+void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
-                      DMAContext *dma);
+                      sPAPRTCETable *tcet);
 
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index f98ec0a..56f2821 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -63,6 +63,7 @@  struct VIOsPAPRDevice {
     uint32_t irq;
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
+    sPAPRTCETable *tcet;
     DMAContext *dma;
 };
 
-- 
1.7.1


From ce608ec7588a33519c626b95f713579fe7e44a29 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 16 Apr 2013 15:05:06 +0200
Subject: [PATCH 17/40] spapr: make IOMMU translation go through IOMMUTLBEntry

The next step is to introduce the translation code that will be used for
IOMMU MemoryRegions, but still do the actual translation in a DMAContext.

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ppc/spapr_iommu.c |   57 ++++++++++++++++++++++++++++----------------------
 1 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 7a507e0..45fb81d 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -68,15 +68,8 @@  static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
-static int spapr_tce_translate(DMAContext *dma,
-                               dma_addr_t addr,
-                               hwaddr *paddr,
-                               hwaddr *len,
-                               DMADirection dir)
+static IOMMUTLBEntry spapr_tce_translate_iommu(sPAPRTCETable *tcet, hwaddr addr)
 {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-    enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE)
-        ? SPAPR_TCE_WO : SPAPR_TCE_RO;
     uint64_t tce;
 
 #ifdef DEBUG_TCE
@@ -85,9 +78,11 @@  static int spapr_tce_translate(DMAContext *dma,
 #endif
 
     if (tcet->bypass) {
-        *paddr = addr;
-        *len = (hwaddr)-1;
-        return 0;
+        return (IOMMUTLBEntry) {
+            .translated_addr = 0,
+            .addr_mask = ~(hwaddr)0,
+            .perm = { true, true },
+        };
     }
 
     /* Check if we are in bound */
@@ -95,28 +90,40 @@  static int spapr_tce_translate(DMAContext *dma,
 #ifdef DEBUG_TCE
         fprintf(stderr, "spapr_tce_translate out of bounds\n");
 #endif
-        return -EFAULT;
+        return (IOMMUTLBEntry) { .perm = { false, false } };
     }
 
     tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce;
 
-    /* Check TCE */
-    if (!(tce & access)) {
-        return -EPERM;
-    }
+#ifdef DEBUG_TCE
+    fprintf(stderr, " ->  *paddr=0x%llx, *len=0x%llx\n",
+            (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1);
+#endif
 
-    /* How much til end of page ? */
-    *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1;
+    return (IOMMUTLBEntry) {
+        .translated_addr = tce & ~SPAPR_TCE_PAGE_MASK,
+        .addr_mask = SPAPR_TCE_PAGE_MASK,
+        .perm = { [0] = tce & SPAPR_TCE_RO, [1] = tce & SPAPR_TCE_WO },
+    };
+}
 
-    /* Translate */
-    *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) |
-        (addr & SPAPR_TCE_PAGE_MASK);
+static int spapr_tce_translate(DMAContext *dma,
+                               dma_addr_t addr,
+                               hwaddr *paddr,
+                               hwaddr *len,
+                               DMADirection dir)
+ {
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    bool is_write = (dir == DMA_DIRECTION_FROM_DEVICE);
+    IOMMUTLBEntry entry = spapr_tce_translate_iommu(tcet, addr);
 
-#ifdef DEBUG_TCE
-    fprintf(stderr, " ->  *paddr=0x" TARGET_FMT_plx ", *len=0x"
-            TARGET_FMT_plx "\n", *paddr, *len);
-#endif
+    if (!entry.perm[is_write]) {
+        return -EPERM;
+    }
 
+    /* Translate */
+    *paddr = entry.translated_addr | (addr & entry.addr_mask);
+    *len = (addr | entry.addr_mask) - addr + 1;
     return 0;
 }
 
-- 
1.7.1


From 7e23ad2c1a26221ba927e92f97ef2e56308ec0d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2013 12:35:33 +0200
Subject: [PATCH 18/40] spapr: use memory core for iommu support

Now we can stop using a "translating" DMAContext, but we do not yet modify
the sPAPRTCETable users to get an AddressSpace; they keep using the table
via a DMAContext.

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ppc/spapr_iommu.c   |   42 ++++++++++++++++++++----------------------
 include/hw/ppc/spapr.h |    1 +
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 45fb81d..75d8370 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -37,12 +37,16 @@  enum sPAPRTCEAccess {
 };
 
 struct sPAPRTCETable {
+    /* temporary until everyone has its own AddressSpace */
     DMAContext dma;
+    AddressSpace as;
+
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
     bool bypass;
     int fd;
+    MemoryRegion iommu;
     QLIST_ENTRY(sPAPRTCETable) list;
 };
 
@@ -68,8 +72,9 @@  static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
-static IOMMUTLBEntry spapr_tce_translate_iommu(sPAPRTCETable *tcet, hwaddr addr)
+static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
 {
+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
     uint64_t tce;
 
 #ifdef DEBUG_TCE
@@ -107,25 +112,9 @@  static IOMMUTLBEntry spapr_tce_translate_iommu(sPAPRTCETable *tcet, hwaddr addr)
     };
 }
 
-static int spapr_tce_translate(DMAContext *dma,
-                               dma_addr_t addr,
-                               hwaddr *paddr,
-                               hwaddr *len,
-                               DMADirection dir)
- {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-    bool is_write = (dir == DMA_DIRECTION_FROM_DEVICE);
-    IOMMUTLBEntry entry = spapr_tce_translate_iommu(tcet, addr);
-
-    if (!entry.perm[is_write]) {
-        return -EPERM;
-    }
-
-    /* Translate */
-    *paddr = entry.translated_addr | (addr & entry.addr_mask);
-    *len = (addr | entry.addr_mask) - addr + 1;
-    return 0;
-}
+static MemoryRegionIOMMUOps spapr_iommu_ops = {
+    .translate = spapr_tce_translate_iommu,
+};
 
 sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
 {
@@ -142,8 +131,6 @@  sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
     }
 
     tcet = g_malloc0(sizeof(*tcet));
-    dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL);
-
     tcet->liobn = liobn;
     tcet->window_size = window_size;
 
@@ -164,6 +151,12 @@  sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
             "table @ %p, fd=%d\n", tcet, liobn, tcet->table, tcet->fd);
 #endif
 
+    memory_region_init_iommu(&tcet->iommu, &spapr_iommu_ops,
+                             &address_space_memory,
+                             "iommu-spapr", INT64_MAX);
+    address_space_init(&tcet->as, &tcet->iommu);
+    dma_context_init(&tcet->dma, &tcet->as, NULL, NULL, NULL);
+
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
     return tcet;
@@ -187,6 +180,11 @@  DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet)
     return &tcet->dma;
 }
 
+MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
+{
+    return &tcet->iommu;
+}
+
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
 {
     tcet->bypass = bypass;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e8d617b..142abb7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -349,6 +349,7 @@  void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size);
 DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet);
+MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_free(sPAPRTCETable *tcet);
 void spapr_tce_reset(sPAPRTCETable *tcet);
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
-- 
1.7.1


From c537f2332d33fffff67af2d7f48af9727adac659 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 10 Apr 2013 17:49:04 +0200
Subject: [PATCH 19/40] dma: eliminate old-style IOMMU support

The translate function in the DMAContext is now always NULL.
Remove every reference to it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 dma-helpers.c        |  173 ++------------------------------------------------
 exec.c               |    3 +-
 hw/pci/pci.c         |    3 +-
 hw/ppc/spapr_iommu.c |    2 +-
 include/sysemu/dma.h |  118 +++++++++-------------------------
 5 files changed, 42 insertions(+), 257 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 2962b69..3e23d47 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,9 +14,12 @@ 
 
 /* #define DEBUG_IOMMU */
 
-static void do_dma_memory_set(AddressSpace *as,
-                              dma_addr_t addr, uint8_t c, dma_addr_t len)
+int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
+    AddressSpace *as = dma->as;
+
+    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
+
 #define FILLBUF_SIZE 512
     uint8_t fillbuf[FILLBUF_SIZE];
     int l;
@@ -28,16 +31,6 @@  static void do_dma_memory_set(AddressSpace *as,
         len -= l;
         addr += l;
     }
-}
-
-int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
-{
-    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
-
-    if (dma_has_iommu(dma)) {
-        return iommu_dma_memory_set(dma, addr, c, len);
-    }
-    do_dma_memory_set(dma->as, addr, c, len);
 
     return 0;
 }
@@ -278,162 +271,10 @@  void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
     bdrv_acct_start(bs, cookie, sg->size, type);
 }
 
-bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
-                            DMADirection dir)
-{
-    hwaddr paddr, plen;
-
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
-            " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
-#endif
-
-    while (len) {
-        if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
-            return false;
-        }
-
-        /* The translation might be valid for larger regions. */
-        if (plen > len) {
-            plen = len;
-        }
-
-        if (!address_space_valid(dma->as, paddr, len,
-                                 dir == DMA_DIRECTION_FROM_DEVICE)) {
-            return false;
-        }
-
-        len -= plen;
-        addr += plen;
-    }
-
-    return true;
-}
-
-int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
-                        void *buf, dma_addr_t len, DMADirection dir)
+void dma_context_init(DMAContext *dma, AddressSpace *as)
 {
-    hwaddr paddr, plen;
-    int err;
-
 #ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x"
-            DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
-#endif
-
-    while (len) {
-        err = dma->translate(dma, addr, &paddr, &plen, dir);
-        if (err) {
-	    /*
-             * In case of failure on reads from the guest, we clean the
-             * destination buffer so that a device that doesn't test
-             * for errors will not expose qemu internal memory.
-	     */
-	    memset(buf, 0, len);
-            return -1;
-        }
-
-        /* The translation might be valid for larger regions. */
-        if (plen > len) {
-            plen = len;
-        }
-
-        address_space_rw(dma->as, paddr, buf, plen, dir == DMA_DIRECTION_FROM_DEVICE);
-
-        len -= plen;
-        addr += plen;
-        buf += plen;
-    }
-
-    return 0;
-}
-
-int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
-                         dma_addr_t len)
-{
-    hwaddr paddr, plen;
-    int err;
-
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_memory_set context=%p addr=0x" DMA_ADDR_FMT
-            " len=0x" DMA_ADDR_FMT "\n", dma, addr, len);
-#endif
-
-    while (len) {
-        err = dma->translate(dma, addr, &paddr, &plen,
-                             DMA_DIRECTION_FROM_DEVICE);
-        if (err) {
-            return err;
-        }
-
-        /* The translation might be valid for larger regions. */
-        if (plen > len) {
-            plen = len;
-        }
-
-        do_dma_memory_set(dma->as, paddr, c, plen);
-
-        len -= plen;
-        addr += plen;
-    }
-
-    return 0;
-}
-
-void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
-                      DMAMapFunc map, DMAUnmapFunc unmap)
-{
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
-            dma, translate, map, unmap);
+    fprintf(stderr, "dma_context_init(%p -> %p)\n", dma, as);
 #endif
     dma->as = as;
-    dma->translate = translate;
-    dma->map = map;
-    dma->unmap = unmap;
-}
-
-void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
-                           DMADirection dir)
-{
-    int err;
-    hwaddr paddr, plen;
-    void *buf;
-
-    if (dma->map) {
-        return dma->map(dma, addr, len, dir);
-    }
-
-    plen = *len;
-    err = dma->translate(dma, addr, &paddr, &plen, dir);
-    if (err) {
-        return NULL;
-    }
-
-    /*
-     * If this is true, the virtual region is contiguous,
-     * but the translated physical region isn't. We just
-     * clamp *len, much like address_space_map() does.
-     */
-    if (plen < *len) {
-        *len = plen;
-    }
-
-    buf = address_space_map(dma->as, paddr, &plen, dir == DMA_DIRECTION_FROM_DEVICE);
-    *len = plen;
-
-    return buf;
-}
-
-void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
-                            DMADirection dir, dma_addr_t access_len)
-{
-    if (dma->unmap) {
-        dma->unmap(dma, buffer, len, dir, access_len);
-        return;
-    }
-
-    address_space_unmap(dma->as, buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
-                        access_len);
-
 }
diff --git a/exec.c b/exec.c
index 7dab2fa..c2e5d95 100644
--- a/exec.c
+++ b/exec.c
@@ -1870,8 +1870,7 @@  static void memory_map_init(void)
     memory_listener_register(&io_memory_listener, &address_space_io);
     memory_listener_register(&tcg_memory_listener, &address_space_memory);
 
-    dma_context_init(&dma_context_memory, &address_space_memory,
-                     NULL, NULL, NULL);
+    dma_context_init(&dma_context_memory, &address_space_memory);
 }
 
 MemoryRegion *get_system_memory(void)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d5257ed..16ed118 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -814,8 +814,9 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
         memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
         address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
         pci_dev->dma = g_new(DMAContext, 1);
-        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL);
+        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
     }
+
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 75d8370..c384322 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -155,7 +155,7 @@  sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
                              &address_space_memory,
                              "iommu-spapr", INT64_MAX);
     address_space_init(&tcet->as, &tcet->iommu);
-    dma_context_init(&tcet->dma, &tcet->as, NULL, NULL, NULL);
+    dma_context_init(&tcet->dma, &tcet->as);
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 2e239dc..7521d50 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -46,26 +46,8 @@  typedef uint64_t dma_addr_t;
 #define DMA_ADDR_BITS 64
 #define DMA_ADDR_FMT "%" PRIx64
 
-typedef int DMATranslateFunc(DMAContext *dma,
-                             dma_addr_t addr,
-                             hwaddr *paddr,
-                             hwaddr *len,
-                             DMADirection dir);
-typedef void* DMAMapFunc(DMAContext *dma,
-                         dma_addr_t addr,
-                         dma_addr_t *len,
-                         DMADirection dir);
-typedef void DMAUnmapFunc(DMAContext *dma,
-                          void *buffer,
-                          dma_addr_t len,
-                          DMADirection dir,
-                          dma_addr_t access_len);
-
 struct DMAContext {
     AddressSpace *as;
-    DMATranslateFunc *translate;
-    DMAMapFunc *map;
-    DMAUnmapFunc *unmap;
 };
 
 /* A global DMA context corresponding to the address_space_memory
@@ -98,115 +80,78 @@  static inline void dma_barrier(DMAContext *dma, DMADirection dir)
     }
 }
 
-static inline bool dma_has_iommu(DMAContext *dma)
-{
-    return dma && dma->translate;
-}
-
 /* Checks that the given range of addresses is valid for DMA.  This is
  * useful for certain cases, but usually you should just use
  * dma_memory_{read,write}() and check for errors */
-bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
-                            DMADirection dir);
 static inline bool dma_memory_valid(DMAContext *dma,
                                     dma_addr_t addr, dma_addr_t len,
                                     DMADirection dir)
 {
-    if (!dma_has_iommu(dma)) {
-        return address_space_valid(dma->as, addr, len,
-                                   dir == DMA_DIRECTION_FROM_DEVICE);
-    } else {
-        return iommu_dma_memory_valid(dma, addr, len, dir);
-    }
+    return address_space_valid(dma->as, addr, len,
+                               dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
-                        void *buf, dma_addr_t len, DMADirection dir);
-static inline int dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
-                                        void *buf, dma_addr_t len,
-                                        DMADirection dir)
+static inline void dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
+                                         void *buf, dma_addr_t len,
+                                         DMADirection dir)
 {
-    if (!dma_has_iommu(dma)) {
-        /* Fast-path for no IOMMU */
-        address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
-        return 0;
-    } else {
-        return iommu_dma_memory_rw(dma, addr, buf, len, dir);
-    }
+    address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_read_relaxed(DMAContext *dma, dma_addr_t addr,
-                                          void *buf, dma_addr_t len)
+static inline void dma_memory_read_relaxed(DMAContext *dma, dma_addr_t addr,
+                                           void *buf, dma_addr_t len)
 {
-    return dma_memory_rw_relaxed(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    dma_memory_rw_relaxed(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline int dma_memory_write_relaxed(DMAContext *dma, dma_addr_t addr,
-                                           const void *buf, dma_addr_t len)
+static inline void dma_memory_write_relaxed(DMAContext *dma, dma_addr_t addr,
+                                            const void *buf, dma_addr_t len)
 {
-    return dma_memory_rw_relaxed(dma, addr, (void *)buf, len,
+    dma_memory_rw_relaxed(dma, addr, (void *)buf, len,
                                  DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
-                                void *buf, dma_addr_t len,
-                                DMADirection dir)
+static inline void dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+                                 void *buf, dma_addr_t len,
+                                 DMADirection dir)
 {
     dma_barrier(dma, dir);
 
-    return dma_memory_rw_relaxed(dma, addr, buf, len, dir);
+    dma_memory_rw_relaxed(dma, addr, buf, len, dir);
 }
 
-static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
-                                  void *buf, dma_addr_t len)
+static inline void dma_memory_read(DMAContext *dma, dma_addr_t addr,
+                                   void *buf, dma_addr_t len)
 {
-    return dma_memory_rw(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    dma_memory_rw(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr,
-                                   const void *buf, dma_addr_t len)
+static inline void dma_memory_write(DMAContext *dma, dma_addr_t addr,
+                                    const void *buf, dma_addr_t len)
 {
-    return dma_memory_rw(dma, addr, (void *)buf, len,
-                         DMA_DIRECTION_FROM_DEVICE);
+    dma_memory_rw(dma, addr, (void *)buf, len, DMA_DIRECTION_FROM_DEVICE);
 }
 
-int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
-			 dma_addr_t len);
-
 int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len);
 
-void *iommu_dma_memory_map(DMAContext *dma,
-                           dma_addr_t addr, dma_addr_t *len,
-                           DMADirection dir);
 static inline void *dma_memory_map(DMAContext *dma,
                                    dma_addr_t addr, dma_addr_t *len,
                                    DMADirection dir)
 {
-    if (!dma_has_iommu(dma)) {
-        hwaddr xlen = *len;
-        void *p;
-
-        p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
-        *len = xlen;
-        return p;
-    } else {
-        return iommu_dma_memory_map(dma, addr, len, dir);
-    }
+    hwaddr xlen = *len;
+    void *p;
+
+    p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
+    *len = xlen;
+    return p;
 }
 
-void iommu_dma_memory_unmap(DMAContext *dma,
-                            void *buffer, dma_addr_t len,
-                            DMADirection dir, dma_addr_t access_len);
 static inline void dma_memory_unmap(DMAContext *dma,
                                     void *buffer, dma_addr_t len,
                                     DMADirection dir, dma_addr_t access_len)
 {
-    if (!dma_has_iommu(dma)) {
-        address_space_unmap(dma->as, buffer, (hwaddr)len,
-                            dir == DMA_DIRECTION_FROM_DEVICE, access_len);
-    } else {
-        iommu_dma_memory_unmap(dma, buffer, len, dir, access_len);
-    }
+    address_space_unmap(dma->as, buffer, (hwaddr)len,
+                        dir == DMA_DIRECTION_FROM_DEVICE, access_len);
 }
 
 #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
@@ -247,8 +192,7 @@  DEFINE_LDST_DMA(q, q, 64, be);
 
 #undef DEFINE_LDST_DMA
 
-void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
-                      DMAMapFunc map, DMAUnmapFunc unmap);
+void dma_context_init(DMAContext *dma, AddressSpace *as);
 
 struct ScatterGatherEntry {
     dma_addr_t base;
-- 
1.7.1


From 792356993c94cf713d0d6335a9056e9c9937ba2b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi.kivity@gmail.com>
Date: Tue, 30 Oct 2012 13:47:48 +0200
Subject: [PATCH 20/40] pci: use memory core for iommu support

Use the new iommu support in the memory core for iommu support.  The only
user, spapr, is also converted, but it still provides a DMAContext
interface until the non-PCI bits switch to AddressSpace.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi.kivity@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/pci/pci.c             |   53 ++++++++++++++++++++++++++--------------------
 hw/ppc/spapr_pci.c       |   12 +++++++---
 include/hw/pci/pci.h     |    7 ++++-
 include/hw/pci/pci_bus.h |    5 ++-
 4 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 16ed118..3eb397c 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -279,6 +279,16 @@  int pci_find_domain(const PCIBus *bus)
     return -1;
 }
 
+static MemoryRegion *pci_default_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    /* FIXME: inherit memory region from bus creator */
+    return get_system_memory();
+}
+
+static void pci_default_iommu_dtor(MemoryRegion *mr)
+{
+}
+
 static void pci_bus_init(PCIBus *bus, DeviceState *parent,
                          const char *name,
                          MemoryRegion *address_space_mem,
@@ -289,6 +299,7 @@  static void pci_bus_init(PCIBus *bus, DeviceState *parent,
     bus->devfn_min = devfn_min;
     bus->address_space_mem = address_space_mem;
     bus->address_space_io = address_space_io;
+    pci_setup_iommu(bus, pci_default_iommu, NULL, NULL);
 
     /* host bridge */
     QLIST_INIT(&bus->child);
@@ -801,21 +812,15 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                      PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name);
         return NULL;
     }
+
     pci_dev->bus = bus;
-    if (bus->dma_context_fn) {
-        pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn);
-    } else {
-        /* FIXME: Make dma_context_fn use MemoryRegions instead, so this path is
-         * taken unconditionally */
-        /* FIXME: inherit memory region from bus creator */
-        memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
-                                 get_system_memory(), 0,
-                                 memory_region_size(get_system_memory()));
-        memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-        address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
-        pci_dev->dma = g_new(DMAContext, 1);
-        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
-    }
+    pci_dev->iommu = bus->iommu_fn(bus, bus->iommu_opaque, devfn);
+    memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
+                             pci_dev->iommu, 0, memory_region_size(pci_dev->iommu));
+    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+    pci_dev->dma = g_new(DMAContext, 1);
+    dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
 
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
@@ -870,12 +875,12 @@  static void do_pci_unregister_device(PCIDevice *pci_dev)
     pci_dev->bus->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
-    if (!pci_dev->bus->dma_context_fn) {
-        address_space_destroy(&pci_dev->bus_master_as);
-        memory_region_destroy(&pci_dev->bus_master_enable_region);
-        g_free(pci_dev->dma);
-        pci_dev->dma = NULL;
-    }
+    address_space_destroy(&pci_dev->bus_master_as);
+    memory_region_del_subregion(&pci_dev->bus_master_enable_region, pci_dev->iommu);
+    pci_dev->bus->iommu_dtor_fn(pci_dev->iommu);
+    memory_region_destroy(&pci_dev->bus_master_enable_region);
+    g_free(pci_dev->dma);
+    pci_dev->dma = NULL;
 }
 
 static void pci_unregister_io_regions(PCIDevice *pci_dev)
@@ -2234,10 +2239,12 @@  static void pci_device_class_init(ObjectClass *klass, void *data)
     k->props = pci_props;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor,
+                     void *opaque)
 {
-    bus->dma_context_fn = fn;
-    bus->dma_context_opaque = opaque;
+    bus->iommu_fn = fn;
+    bus->iommu_dtor_fn = dtor ? dtor : pci_default_iommu_dtor;
+    bus->iommu_opaque = opaque;
 }
 
 static const TypeInfo pci_device_type_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index eb64a8f..ffbb45e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -506,12 +506,11 @@  static const MemoryRegionOps spapr_msi_ops = {
 /*
  * PHB PCI device
  */
-static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
-                                            int devfn)
+static MemoryRegion *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     sPAPRPHBState *phb = opaque;
 
-    return spapr_tce_get_dma(phb->tcet);
+    return spapr_tce_get_iommu(phb->tcet);
 }
 
 static int spapr_phb_init(SysBusDevice *s)
@@ -651,7 +655,7 @@  static int spapr_phb_init(SysBusDevice *s)
         fprintf(stderr, "Unable to create TCE table for %s\n", sphb->dtbusname);
         return -1;
     }
-    pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb);
+    pci_setup_iommu(bus, spapr_pci_dma_iommu, NULL, sphb);
 
     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
 
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 8d075ab..7e7b0f4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -242,6 +242,7 @@  struct PCIDevice {
     PCIIORegion io_regions[PCI_NUM_REGIONS];
     AddressSpace bus_master_as;
     MemoryRegion bus_master_enable_region;
+    MemoryRegion *iommu;
     DMAContext *dma;
 
     /* do not access the following fields */
@@ -401,9 +402,11 @@  int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
-typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int);
+typedef MemoryRegion *(*PCIIOMMUFunc)(PCIBus *, void *, int);
+typedef void (*PCIIOMMUDestructorFunc)(MemoryRegion *mr);
 
-void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque);
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, PCIIOMMUDestructorFunc dtor,
+                     void *opaque);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 6ee443c..fada8f5 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -10,8 +10,9 @@ 
 
 struct PCIBus {
     BusState qbus;
-    PCIDMAContextFunc dma_context_fn;
-    void *dma_context_opaque;
+    PCIIOMMUFunc iommu_fn;
+    PCIIOMMUDestructorFunc iommu_dtor_fn;
+    void *iommu_opaque;
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
-- 
1.7.1


From bcc2d6705279feb9b3acb3ecd74d288f99b7a280 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Apr 2013 12:38:50 +0200
Subject: [PATCH 21/40] spapr_vio: take care of creating our own AddressSpace/DMAContext

Fetch the root region from the sPAPRTCETable, and use it to build
an AddressSpace and DMAContext.

Now, everywhere we have a DMAContext we also have access to the
corresponding AddressSpace (either because we create it just before
the DMAContext, or because dma_context_memory's AddressSpace is
trivially address_space_memory).

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/ppc/spapr_iommu.c       |   11 -----------
 hw/ppc/spapr_vio.c         |    3 ++-
 include/hw/ppc/spapr.h     |    1 -
 include/hw/ppc/spapr_vio.h |   36 +++++++++++++++++++++++-------------
 4 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index c384322..90469b3 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -37,10 +37,6 @@  enum sPAPRTCEAccess {
 };
 
 struct sPAPRTCETable {
-    /* temporary until everyone has its own AddressSpace */
-    DMAContext dma;
-    AddressSpace as;
-
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
@@ -154,8 +150,6 @@  sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
     memory_region_init_iommu(&tcet->iommu, &spapr_iommu_ops,
                              &address_space_memory,
                              "iommu-spapr", INT64_MAX);
-    address_space_init(&tcet->as, &tcet->iommu);
-    dma_context_init(&tcet->dma, &tcet->as);
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
@@ -175,11 +169,6 @@  void spapr_tce_free(sPAPRTCETable *tcet)
     g_free(tcet);
 }
 
-DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet)
-{
-    return &tcet->dma;
-}
-
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
 {
     return &tcet->iommu;
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index a06ac94..8d77a36 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -458,7 +458,8 @@  static int spapr_vio_busdev_init(DeviceState *qdev)
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
         dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
-        dev->dma = spapr_tce_get_dma(dev->tcet);
+        address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet));
+        dma_context_init(&dev->dma, &dev->as);
     }
 
     return pc->init(dev);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 142abb7..a83720e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -348,7 +348,6 @@  void spapr_iommu_init(void);
 void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size);
-DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_free(sPAPRTCETable *tcet);
 void spapr_tce_reset(sPAPRTCETable *tcet);
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index 56f2821..b757f32 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -63,8 +63,9 @@  struct VIOsPAPRDevice {
     uint32_t irq;
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
+    AddressSpace as;
+    DMAContext dma;
     sPAPRTCETable *tcet;
-    DMAContext *dma;
 };
 
 #define DEFINE_SPAPR_PROPERTIES(type, field)           \
@@ -92,35 +93,44 @@  static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
                                        uint32_t size, DMADirection dir)
 {
-    return dma_memory_valid(dev->dma, taddr, size, dir);
+    return dma_memory_valid(&dev->dma, taddr, size, dir);
 }
 
 static inline int spapr_vio_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
                                      void *buf, uint32_t size)
 {
-    return (dma_memory_read(dev->dma, taddr, buf, size) != 0) ?
-        H_DEST_PARM : H_SUCCESS;
+    if (!dma_memory_valid(&dev->dma, taddr, size, DMA_DIRECTION_TO_DEVICE)) {
+        return H_DEST_PARM;
+    }
+    dma_memory_read(&dev->dma, taddr, buf, size);
+    return H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
                                       const void *buf, uint32_t size)
 {
-    return (dma_memory_write(dev->dma, taddr, buf, size) != 0) ?
-        H_DEST_PARM : H_SUCCESS;
+    if (!dma_memory_valid(&dev->dma, taddr, size, DMA_DIRECTION_FROM_DEVICE)) {
+        return H_DEST_PARM;
+    }
+    dma_memory_write(&dev->dma, taddr, buf, size);
+    return H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr,
                                     uint8_t c, uint32_t size)
 {
-    return (dma_memory_set(dev->dma, taddr, c, size) != 0) ?
-        H_DEST_PARM : H_SUCCESS;
+    if (!dma_memory_valid(&dev->dma, taddr, size, DMA_DIRECTION_FROM_DEVICE)) {
+        return H_DEST_PARM;
+    }
+    dma_memory_set(&dev->dma, taddr, c, size);
+    return H_SUCCESS;
 }
 
-#define vio_stb(_dev, _addr, _val) (stb_dma((_dev)->dma, (_addr), (_val)))
-#define vio_sth(_dev, _addr, _val) (stw_be_dma((_dev)->dma, (_addr), (_val)))
-#define vio_stl(_dev, _addr, _val) (stl_be_dma((_dev)->dma, (_addr), (_val)))
-#define vio_stq(_dev, _addr, _val) (stq_be_dma((_dev)->dma, (_addr), (_val)))
-#define vio_ldq(_dev, _addr) (ldq_be_dma((_dev)->dma, (_addr)))
+#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->dma, (_addr)))
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
-- 
1.7.1


From b87464e18f334cb100999f47ce0f98657bf8ddfb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 10 Apr 2013 18:15:49 +0200
Subject: [PATCH 22/40] dma: eliminate DMAContext

The DMAContext is a simple pointer to an AddressSpace that is now always
already available.  Make everyone hold the address space directly,
and clean up the DMA API to use the AddressSpace directly.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 dma-helpers.c              |   24 ++++----------
 exec.c                     |    3 --
 hw/dma/pl330.c             |    8 ++--
 hw/ide/ahci.c              |   18 +++++-----
 hw/ide/ahci.h              |    4 +-
 hw/ide/ich.c               |    2 +-
 hw/ide/macio.c             |    4 +-
 hw/pci/pci.c               |    4 --
 hw/ppc/spapr_vio.c         |    1 -
 hw/scsi/megasas.c          |    4 +-
 hw/scsi/virtio-scsi.c      |    2 +-
 hw/scsi/vmw_pvscsi.c       |    2 +-
 hw/sd/sdhci.c              |   22 ++++++------
 hw/usb/hcd-ehci-pci.c      |    4 +-
 hw/usb/hcd-ehci-sysbus.c   |    2 +-
 hw/usb/hcd-ehci.c          |   12 +++---
 hw/usb/hcd-ehci.h          |    2 +-
 hw/usb/hcd-ohci.c          |   30 +++++++++---------
 hw/usb/libhw.c             |    4 +-
 include/hw/pci/pci.h       |   17 +++++-----
 include/hw/ppc/spapr_vio.h |   25 +++++++-------
 include/sysemu/dma.h       |   75 +++++++++++++++++++-------------------------
 22 files changed, 119 insertions(+), 150 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 3e23d47..15cabfe 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,11 +14,9 @@ 
 
 /* #define DEBUG_IOMMU */
 
-int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
+int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
-    AddressSpace *as = dma->as;
-
-    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
+    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
 
 #define FILLBUF_SIZE 512
     uint8_t fillbuf[FILLBUF_SIZE];
@@ -35,13 +33,13 @@  int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
     return 0;
 }
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, AddressSpace *as)
 {
     qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
     qsg->nsg = 0;
     qsg->nalloc = alloc_hint;
     qsg->size = 0;
-    qsg->dma = dma;
+    qsg->as = as;
 }
 
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
@@ -101,7 +99,7 @@  static void dma_bdrv_unmap(DMAAIOCB *dbs)
     int i;
 
     for (i = 0; i < dbs->iov.niov; ++i) {
-        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
+        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
                          dbs->iov.iov[i].iov_len, dbs->dir,
                          dbs->iov.iov[i].iov_len);
     }
@@ -149,7 +147,7 @@  static void dma_bdrv_cb(void *opaque, int ret)
     while (dbs->sg_cur_index < dbs->sg->nsg) {
         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-        mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
+        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
         if (!mem)
             break;
         qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -246,7 +244,7 @@  static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
     while (len > 0) {
         ScatterGatherEntry entry = sg->sg[sg_cur_index++];
         int32_t xfer = MIN(len, entry.len);
-        dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
+        dma_memory_rw(sg->as, entry.base, ptr, xfer, dir);
         ptr += xfer;
         len -= xfer;
         resid -= xfer;
@@ -270,11 +268,3 @@  void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
 {
     bdrv_acct_start(bs, cookie, sg->size, type);
 }
-
-void dma_context_init(DMAContext *dma, AddressSpace *as)
-{
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_context_init(%p -> %p)\n", dma, as);
-#endif
-    dma->as = as;
-}
diff --git a/exec.c b/exec.c
index c2e5d95..2b6e5b8 100644
--- a/exec.c
+++ b/exec.c
@@ -64,7 +64,6 @@  static MemoryRegion *system_io;
 
 AddressSpace address_space_io;
 AddressSpace address_space_memory;
-DMAContext dma_context_memory;
 
 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
 static MemoryRegion io_mem_subpage_ram;
@@ -1869,8 +1868,6 @@  static void memory_map_init(void)
     memory_listener_register(&core_memory_listener, &address_space_memory);
     memory_listener_register(&io_memory_listener, &address_space_io);
     memory_listener_register(&tcg_memory_listener, &address_space_memory);
-
-    dma_context_init(&dma_context_memory, &address_space_memory);
 }
 
 MemoryRegion *get_system_memory(void)
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 8b33138..44f016d 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -1074,7 +1074,7 @@  static inline const PL330InsnDesc *pl330_fetch_insn(PL330Chan *ch)
     uint8_t opcode;
     int i;
 
-    dma_memory_read(&dma_context_memory, ch->pc, &opcode, 1);
+    dma_memory_read(&address_space_memory, ch->pc, &opcode, 1);
     for (i = 0; insn_desc[i].size; i++) {
         if ((opcode & insn_desc[i].opmask) == insn_desc[i].opcode) {
             return &insn_desc[i];
@@ -1088,7 +1088,7 @@  static inline void pl330_exec_insn(PL330Chan *ch, const PL330InsnDesc *insn)
     uint8_t buf[PL330_INSN_MAXSIZE];
 
     assert(insn->size <= PL330_INSN_MAXSIZE);
-    dma_memory_read(&dma_context_memory, ch->pc, buf, insn->size);
+    dma_memory_read(&address_space_memory, ch->pc, buf, insn->size);
     insn->exec(ch, buf[0], &buf[1], insn->size - 1);
 }
 
@@ -1153,7 +1153,7 @@  static int pl330_exec_cycle(PL330Chan *channel)
     if (q != NULL && q->len <= pl330_fifo_num_free(&s->fifo)) {
         int len = q->len - (q->addr & (q->len - 1));
 
-        dma_memory_read(&dma_context_memory, q->addr, buf, len);
+        dma_memory_read(&address_space_memory, q->addr, buf, len);
         if (PL330_ERR_DEBUG > 1) {
             DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
                       q->addr, len);
@@ -1185,7 +1185,7 @@  static int pl330_exec_cycle(PL330Chan *channel)
             fifo_res = pl330_fifo_get(&s->fifo, buf, len, q->tag);
         }
         if (fifo_res == PL330_FIFO_OK || q->z) {
-            dma_memory_write(&dma_context_memory, q->addr, buf, len);
+            dma_memory_write(&address_space_memory, q->addr, buf, len);
             if (PL330_ERR_DEBUG > 1) {
                 DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
                          q->addr, len);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 3405583..8fe75c9 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -597,7 +597,7 @@  static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     if (!cmd_fis) {
         /* map cmd_fis */
         uint64_t tbl_addr = le64_to_cpu(ad->cur_cmd->tbl_addr);
-        cmd_fis = dma_memory_map(ad->hba->dma, tbl_addr, &cmd_len,
+        cmd_fis = dma_memory_map(ad->hba->as, tbl_addr, &cmd_len,
                                  DMA_DIRECTION_TO_DEVICE);
         cmd_mapped = 1;
     }
@@ -630,7 +630,7 @@  static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
 
     if (cmd_mapped) {
-        dma_memory_unmap(ad->hba->dma, cmd_fis, cmd_len,
+        dma_memory_unmap(ad->hba->as, cmd_fis, cmd_len,
                          DMA_DIRECTION_TO_DEVICE, cmd_len);
     }
 }
@@ -657,7 +657,7 @@  static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
     }
 
     /* map PRDT */
-    if (!(prdt = dma_memory_map(ad->hba->dma, prdt_addr, &prdt_len,
+    if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
                                 DMA_DIRECTION_TO_DEVICE))){
         DPRINTF(ad->port_no, "map failed\n");
         return -1;
@@ -691,7 +691,7 @@  static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
             goto out;
         }
 
-        qemu_sglist_init(sglist, (sglist_alloc_hint - off_idx), ad->hba->dma);
+        qemu_sglist_init(sglist, (sglist_alloc_hint - off_idx), ad->hba->as);
         qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr + off_pos),
                         le32_to_cpu(tbl[off_idx].flags_size) + 1 - off_pos);
 
@@ -703,7 +703,7 @@  static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
     }
 
 out:
-    dma_memory_unmap(ad->hba->dma, prdt, prdt_len,
+    dma_memory_unmap(ad->hba->as, prdt, prdt_len,
                      DMA_DIRECTION_TO_DEVICE, prdt_len);
     return r;
 }
@@ -836,7 +836,7 @@  static int handle_cmd(AHCIState *s, int port, int slot)
     tbl_addr = le64_to_cpu(cmd->tbl_addr);
 
     cmd_len = 0x80;
-    cmd_fis = dma_memory_map(s->dma, tbl_addr, &cmd_len,
+    cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
                              DMA_DIRECTION_FROM_DEVICE);
 
     if (!cmd_fis) {
@@ -963,7 +963,7 @@  static int handle_cmd(AHCIState *s, int port, int slot)
     }
 
 out:
-    dma_memory_unmap(s->dma, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
+    dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
                      cmd_len);
 
     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
@@ -1145,12 +1145,12 @@  static const IDEDMAOps ahci_dma_ops = {
     .reset = ahci_dma_reset,
 };
 
-void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports)
+void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports)
 {
     qemu_irq *irqs;
     int i;
 
-    s->dma = dma;
+    s->as = as;
     s->ports = ports;
     s->dev = g_malloc0(sizeof(AHCIDevice) * ports);
     ahci_reg_init(s);
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 85f37fe..341a571 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -297,7 +297,7 @@  typedef struct AHCIState {
     uint32_t idp_index;     /* Current IDP index */
     int32_t ports;
     qemu_irq irq;
-    DMAContext *dma;
+    AddressSpace *as;
 } AHCIState;
 
 typedef struct AHCIPCIState {
@@ -338,7 +338,7 @@  typedef struct NCQFrame {
     uint8_t reserved10;
 } QEMU_PACKED NCQFrame;
 
-void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports);
+void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports);
 void ahci_uninit(AHCIState *s);
 
 void ahci_reset(AHCIState *s);
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index ed1f1a2..6c0c0c2 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -104,7 +104,7 @@  static int pci_ich9_ahci_init(PCIDevice *dev)
     uint8_t *sata_cap;
     d = DO_UPCAST(struct AHCIPCIState, card, dev);
 
-    ahci_init(&d->ahci, &dev->qdev, pci_dma_context(dev), 6);
+    ahci_init(&d->ahci, &dev->qdev, pci_get_address_space(dev), 6);
 
     pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);
 
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 64b2406..a1a411e 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -71,7 +71,7 @@  static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
     s->io_buffer_size = io->len;
 
     qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1,
-                     &dma_context_memory);
+                     &address_space_memory);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
@@ -128,7 +128,7 @@  static void pmac_ide_transfer_cb(void *opaque, int ret)
     s->io_buffer_size = io->len;
 
     qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1,
-                     &dma_context_memory);
+                     &address_space_memory);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 3eb397c..c10b776 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -819,8 +819,6 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                              pci_dev->iommu, 0, memory_region_size(pci_dev->iommu));
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
     address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
-    pci_dev->dma = g_new(DMAContext, 1);
-    dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
 
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
@@ -879,8 +877,6 @@  static void do_pci_unregister_device(PCIDevice *pci_dev)
     memory_region_del_subregion(&pci_dev->bus_master_enable_region, pci_dev->iommu);
     pci_dev->bus->iommu_dtor_fn(pci_dev->iommu);
     memory_region_destroy(&pci_dev->bus_master_enable_region);
-    g_free(pci_dev->dma);
-    pci_dev->dma = NULL;
 }
 
 static void pci_unregister_io_regions(PCIDevice *pci_dev)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8d77a36..5e72f1b 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -459,7 +459,6 @@  static int spapr_vio_busdev_init(DeviceState *qdev)
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
         dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
         address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet));
-        dma_context_init(&dev->dma, &dev->as);
     }
 
     return pc->init(dev);
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 4934a81..483c238 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -232,7 +232,7 @@  static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl)
                                          MEGASAS_MAX_SGE);
         return iov_count;
     }
-    qemu_sglist_init(&cmd->qsg, iov_count, pci_dma_context(&s->dev));
+    qemu_sglist_init(&cmd->qsg, iov_count, pci_get_address_space(&s->dev));
     for (i = 0; i < iov_count; i++) {
         dma_addr_t iov_pa, iov_size_p;
 
@@ -628,7 +628,7 @@  static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd)
     }
     iov_pa = megasas_sgl_get_addr(cmd, &cmd->frame->dcmd.sgl);
     iov_size = megasas_sgl_get_len(cmd, &cmd->frame->dcmd.sgl);
-    qemu_sglist_init(&cmd->qsg, 1, pci_dma_context(&s->dev));
+    qemu_sglist_init(&cmd->qsg, 1, pci_get_address_space(&s->dev));
     qemu_sglist_add(&cmd->qsg, iov_pa, iov_size);
     cmd->iov_size = iov_size;
     return cmd->iov_size;
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 08dd3f3..b8a0abf 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -80,7 +80,7 @@  static void virtio_scsi_bad_req(void)
 static void qemu_sgl_init_external(QEMUSGList *qsgl, struct iovec *sg,
                                    hwaddr *addr, int num)
 {
-    qemu_sglist_init(qsgl, num, &dma_context_memory);
+    qemu_sglist_init(qsgl, num, &address_space_memory);
     while (num--) {
         qemu_sglist_add(qsgl, *(addr++), (sg++)->iov_len);
     }
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index 48d12f4..eb2270f 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -617,7 +617,7 @@  pvscsi_build_sglist(PVSCSIState *s, PVSCSIRequest *r)
 {
     PCIDevice *d = PCI_DEVICE(s);
 
-    qemu_sglist_init(&r->sgl, 1, pci_dma_context(d));
+    pci_dma_sglist_init(&r->sgl, d, 1);
     if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
         pvscsi_convert_sglist(r);
     } else {
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 91dc9b0..5d9247a 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -496,7 +496,7 @@  static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
                     s->blkcnt--;
                 }
             }
-            dma_memory_write(&dma_context_memory, s->sdmasysad,
+            dma_memory_write(&address_space_memory, s->sdmasysad,
                              &s->fifo_buffer[begin], s->data_count - begin);
             s->sdmasysad += s->data_count - begin;
             if (s->data_count == block_size) {
@@ -518,7 +518,7 @@  static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
                 s->data_count = block_size;
                 boundary_count -= block_size - begin;
             }
-            dma_memory_read(&dma_context_memory, s->sdmasysad,
+            dma_memory_read(&address_space_memory, s->sdmasysad,
                             &s->fifo_buffer[begin], s->data_count);
             s->sdmasysad += s->data_count - begin;
             if (s->data_count == block_size) {
@@ -557,10 +557,10 @@  static void sdhci_sdma_transfer_single_block(SDHCIState *s)
         for (n = 0; n < datacnt; n++) {
             s->fifo_buffer[n] = sd_read_data(s->card);
         }
-        dma_memory_write(&dma_context_memory, s->sdmasysad, s->fifo_buffer,
+        dma_memory_write(&address_space_memory, s->sdmasysad, s->fifo_buffer,
                          datacnt);
     } else {
-        dma_memory_read(&dma_context_memory, s->sdmasysad, s->fifo_buffer,
+        dma_memory_read(&address_space_memory, s->sdmasysad, s->fifo_buffer,
                         datacnt);
         for (n = 0; n < datacnt; n++) {
             sd_write_data(s->card, s->fifo_buffer[n]);
@@ -588,7 +588,7 @@  static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
     hwaddr entry_addr = (hwaddr)s->admasysaddr;
     switch (SDHC_DMA_TYPE(s->hostctl)) {
     case SDHC_CTRL_ADMA2_32:
-        dma_memory_read(&dma_context_memory, entry_addr, (uint8_t *)&adma2,
+        dma_memory_read(&address_space_memory, entry_addr, (uint8_t *)&adma2,
                         sizeof(adma2));
         adma2 = le64_to_cpu(adma2);
         /* The spec does not specify endianness of descriptor table.
@@ -600,7 +600,7 @@  static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         dscr->incr = 8;
         break;
     case SDHC_CTRL_ADMA1_32:
-        dma_memory_read(&dma_context_memory, entry_addr, (uint8_t *)&adma1,
+        dma_memory_read(&address_space_memory, entry_addr, (uint8_t *)&adma1,
                         sizeof(adma1));
         adma1 = le32_to_cpu(adma1);
         dscr->addr = (hwaddr)(adma1 & 0xFFFFF000);
@@ -613,12 +613,12 @@  static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         }
         break;
     case SDHC_CTRL_ADMA2_64:
-        dma_memory_read(&dma_context_memory, entry_addr,
+        dma_memory_read(&address_space_memory, entry_addr,
                         (uint8_t *)(&dscr->attr), 1);
-        dma_memory_read(&dma_context_memory, entry_addr + 2,
+        dma_memory_read(&address_space_memory, entry_addr + 2,
                         (uint8_t *)(&dscr->length), 2);
         dscr->length = le16_to_cpu(dscr->length);
-        dma_memory_read(&dma_context_memory, entry_addr + 4,
+        dma_memory_read(&address_space_memory, entry_addr + 4,
                         (uint8_t *)(&dscr->addr), 8);
         dscr->attr = le64_to_cpu(dscr->attr);
         dscr->attr &= 0xfffffff8;
@@ -678,7 +678,7 @@  static void sdhci_do_adma(SDHCIState *s)
                         s->data_count = block_size;
                         length -= block_size - begin;
                     }
-                    dma_memory_write(&dma_context_memory, dscr.addr,
+                    dma_memory_write(&address_space_memory, dscr.addr,
                                      &s->fifo_buffer[begin],
                                      s->data_count - begin);
                     dscr.addr += s->data_count - begin;
@@ -702,7 +702,7 @@  static void sdhci_do_adma(SDHCIState *s)
                         s->data_count = block_size;
                         length -= block_size - begin;
                     }
-                    dma_memory_read(&dma_context_memory, dscr.addr,
+                    dma_memory_read(&address_space_memory, dscr.addr,
                                     &s->fifo_buffer[begin], s->data_count);
                     dscr.addr += s->data_count - begin;
                     if (s->data_count == block_size) {
diff --git a/hw/usb/hcd-ehci-pci.c b/hw/usb/hcd-ehci-pci.c
index 0eb7826..f1b5f5d 100644
--- a/hw/usb/hcd-ehci-pci.c
+++ b/hw/usb/hcd-ehci-pci.c
@@ -63,7 +63,7 @@  static int usb_ehci_pci_initfn(PCIDevice *dev)
     s->caps[0x09] = 0x68;        /* EECP */
 
     s->irq = dev->irq[3];
-    s->dma = pci_dma_context(dev);
+    s->as = pci_get_address_space(dev);
 
     s->capsbase = 0x00;
     s->opregbase = 0x20;
@@ -86,7 +86,7 @@  static void usb_ehci_pci_write_config(PCIDevice *dev, uint32_t addr,
         return;
     }
     busmaster = pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_MASTER;
-    i->ehci.dma = busmaster ? pci_dma_context(dev) : NULL;
+    i->ehci.as = busmaster ? pci_get_address_space(dev) : &address_space_memory;
 }
 
 static Property ehci_pci_properties[] = {
diff --git a/hw/usb/hcd-ehci-sysbus.c b/hw/usb/hcd-ehci-sysbus.c
index b68a66a..f9e4fd3 100644
--- a/hw/usb/hcd-ehci-sysbus.c
+++ b/hw/usb/hcd-ehci-sysbus.c
@@ -40,7 +40,7 @@  static int usb_ehci_sysbus_initfn(SysBusDevice *dev)
 
     s->capsbase = sec->capsbase;
     s->opregbase = sec->opregbase;
-    s->dma = &dma_context_memory;
+    s->as = &address_space_memory;
 
     usb_ehci_initfn(s, DEVICE(dev));
     sysbus_init_irq(dev, &s->irq);
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 0d3799d..1ad2159 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -446,7 +446,7 @@  static inline int get_dwords(EHCIState *ehci, uint32_t addr,
 {
     int i;
 
-    if (!ehci->dma) {
+    if (!ehci->as) {
         ehci_raise_irq(ehci, USBSTS_HSE);
         ehci->usbcmd &= ~USBCMD_RUNSTOP;
         trace_usb_ehci_dma_error();
@@ -454,7 +454,7 @@  static inline int get_dwords(EHCIState *ehci, uint32_t addr,
     }
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        dma_memory_read(ehci->dma, addr, buf, sizeof(*buf));
+        dma_memory_read(ehci->as, addr, buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -467,7 +467,7 @@  static inline int put_dwords(EHCIState *ehci, uint32_t addr,
 {
     int i;
 
-    if (!ehci->dma) {
+    if (!ehci->as) {
         ehci_raise_irq(ehci, USBSTS_HSE);
         ehci->usbcmd &= ~USBCMD_RUNSTOP;
         trace_usb_ehci_dma_error();
@@ -476,7 +476,7 @@  static inline int put_dwords(EHCIState *ehci, uint32_t addr,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        dma_memory_write(ehci->dma, addr, &tmp, sizeof(tmp));
+        dma_memory_write(ehci->as, addr, &tmp, sizeof(tmp));
     }
 
     return num;
@@ -1245,7 +1245,7 @@  static int ehci_init_transfer(EHCIPacket *p)
     cpage  = get_field(p->qtd.token, QTD_TOKEN_CPAGE);
     bytes  = get_field(p->qtd.token, QTD_TOKEN_TBYTES);
     offset = p->qtd.bufptr[0] & ~QTD_BUFPTR_MASK;
-    qemu_sglist_init(&p->sgl, 5, p->queue->ehci->dma);
+    qemu_sglist_init(&p->sgl, 5, p->queue->ehci->as);
 
     while (bytes > 0) {
         if (cpage > 4) {
@@ -1484,7 +1484,7 @@  static int ehci_process_itd(EHCIState *ehci,
                 return -1;
             }
 
-            qemu_sglist_init(&ehci->isgl, 2, ehci->dma);
+            qemu_sglist_init(&ehci->isgl, 2, ehci->as);
             if (off + len > 4096) {
                 /* transfer crosses page border */
                 uint32_t len2 = off + len - 4096;
diff --git a/hw/usb/hcd-ehci.h b/hw/usb/hcd-ehci.h
index e95bb7e..2fcb92f 100644
--- a/hw/usb/hcd-ehci.h
+++ b/hw/usb/hcd-ehci.h
@@ -261,7 +261,7 @@  struct EHCIState {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
-    DMAContext *dma;
+    AddressSpace *as;
     MemoryRegion mem_caps;
     MemoryRegion mem_opreg;
     MemoryRegion mem_ports;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 51241cd..5513924 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -62,7 +62,7 @@  typedef struct {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
-    DMAContext *dma;
+    AddressSpace *as;
     int num_ports;
     const char *name;
 
@@ -508,7 +508,7 @@  static inline int get_dwords(OHCIState *ohci,
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->as, addr, buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -525,7 +525,7 @@  static inline int put_dwords(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->as, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
@@ -540,7 +540,7 @@  static inline int get_words(OHCIState *ohci,
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->as, addr, buf, sizeof(*buf));
         *buf = le16_to_cpu(*buf);
     }
 
@@ -557,7 +557,7 @@  static inline int put_words(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint16_t tmp = cpu_to_le16(*buf);
-        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->as, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
@@ -585,7 +585,7 @@  static inline int ohci_read_iso_td(OHCIState *ohci,
 static inline int ohci_read_hcca(OHCIState *ohci,
                                  dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    dma_memory_read(ohci->dma, addr + ohci->localmem_base, hcca, sizeof(*hcca));
+    dma_memory_read(ohci->as, addr + ohci->localmem_base, hcca, sizeof(*hcca));
     return 1;
 }
 
@@ -617,7 +617,7 @@  static inline int ohci_put_iso_td(OHCIState *ohci,
 static inline int ohci_put_hcca(OHCIState *ohci,
                                 dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    dma_memory_write(ohci->dma,
+    dma_memory_write(ohci->as,
                      addr + ohci->localmem_base + HCCA_WRITEBACK_OFFSET,
                      (char *)hcca + HCCA_WRITEBACK_OFFSET,
                      HCCA_WRITEBACK_SIZE);
@@ -634,12 +634,12 @@  static void ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = td->be & ~0xfffu;
     buf += n;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 /* Read/Write the contents of an ISO TD from/to main memory.  */
@@ -653,12 +653,12 @@  static void ohci_copy_iso_td(OHCIState *ohci,
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = end_addr & ~0xfffu;
     buf += n;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 static void ohci_process_lists(OHCIState *ohci, int completion);
@@ -1788,11 +1788,11 @@  static USBBusOps ohci_bus_ops = {
 static int usb_ohci_init(OHCIState *ohci, DeviceState *dev,
                          int num_ports, dma_addr_t localmem_base,
                          char *masterbus, uint32_t firstport,
-                         DMAContext *dma)
+                         AddressSpace *as)
 {
     int i;
 
-    ohci->dma = dma;
+    ohci->as = as;
 
     if (usb_frame_time == 0) {
 #ifdef OHCI_TIME_WARP
@@ -1859,7 +1859,7 @@  static int usb_ohci_initfn_pci(struct PCIDevice *dev)
 
     if (usb_ohci_init(&ohci->state, &dev->qdev, ohci->num_ports, 0,
                       ohci->masterbus, ohci->firstport,
-                      pci_dma_context(dev)) != 0) {
+                      pci_get_address_space(dev)) != 0) {
         return -1;
     }
     ohci->state.irq = ohci->pci_dev.irq[0];
@@ -1882,7 +1882,7 @@  static int ohci_init_pxa(SysBusDevice *dev)
 
     /* Cannot fail as we pass NULL for masterbus */
     usb_ohci_init(&s->ohci, &dev->qdev, s->num_ports, s->dma_offset, NULL, 0,
-                  &dma_context_memory);
+                  &address_space_memory);
     sysbus_init_irq(dev, &s->ohci.irq);
     sysbus_init_mmio(dev, &s->ohci.mem);
 
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index d2d4b51..8df11c4 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -37,7 +37,7 @@  int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 
         while (len) {
             dma_addr_t xlen = len;
-            mem = dma_memory_map(sgl->dma, base, &xlen, dir);
+            mem = dma_memory_map(sgl->as, base, &xlen, dir);
             if (!mem) {
                 goto err;
             }
@@ -63,7 +63,7 @@  void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl)
     int i;
 
     for (i = 0; i < p->iov.niov; i++) {
-        dma_memory_unmap(sgl->dma, p->iov.iov[i].iov_base,
+        dma_memory_unmap(sgl->as, p->iov.iov[i].iov_base,
                          p->iov.iov[i].iov_len, dir,
                          p->iov.iov[i].iov_len);
     }
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 7e7b0f4..400e772 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -243,7 +243,6 @@  struct PCIDevice {
     AddressSpace bus_master_as;
     MemoryRegion bus_master_enable_region;
     MemoryRegion *iommu;
-    DMAContext *dma;
 
     /* do not access the following fields */
     PCIConfigReadFunc *config_read;
@@ -642,15 +641,15 @@  static inline uint32_t pci_config_size(const PCIDevice *d)
 }
 
 /* DMA access functions */
-static inline DMAContext *pci_dma_context(PCIDevice *dev)
+static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
 {
-    return dev->dma;
+    return &dev->bus_master_as;
 }
 
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
                              void *buf, dma_addr_t len, DMADirection dir)
 {
-    dma_memory_rw(pci_dma_context(dev), addr, buf, len, dir);
+    dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
     return 0;
 }
 
@@ -670,12 +669,12 @@  static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
     static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev,      \
                                                    dma_addr_t addr)     \
     {                                                                   \
-        return ld##_l##_dma(pci_dma_context(dev), addr);                \
+        return ld##_l##_dma(pci_get_address_space(dev), addr);          \
     }                                                                   \
     static inline void st##_s##_pci_dma(PCIDevice *dev,                 \
                                         dma_addr_t addr, uint##_bits##_t val) \
     {                                                                   \
-        st##_s##_dma(pci_dma_context(dev), addr, val);                  \
+        st##_s##_dma(pci_get_address_space(dev), addr, val);            \
     }
 
 PCI_DMA_DEFINE_LDST(ub, b, 8);
@@ -693,20 +692,20 @@  static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
 {
     void *buf;
 
-    buf = dma_memory_map(pci_dma_context(dev), addr, plen, dir);
+    buf = dma_memory_map(pci_get_address_space(dev), addr, plen, dir);
     return buf;
 }
 
 static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
                                  DMADirection dir, dma_addr_t access_len)
 {
-    dma_memory_unmap(pci_dma_context(dev), buffer, len, dir, access_len);
+    dma_memory_unmap(pci_get_address_space(dev), buffer, len, dir, access_len);
 }
 
 static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
                                        int alloc_hint)
 {
-    qemu_sglist_init(qsg, alloc_hint, pci_dma_context(dev));
+    qemu_sglist_init(qsg, alloc_hint, pci_get_address_space(dev));
 }
 
 extern const VMStateDescription vmstate_pci_device;
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index b757f32..5fe2a7c 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -64,7 +64,6 @@  struct VIOsPAPRDevice {
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
     AddressSpace as;
-    DMAContext dma;
     sPAPRTCETable *tcet;
 };
 
@@ -93,44 +92,44 @@  static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
                                        uint32_t size, DMADirection dir)
 {
-    return dma_memory_valid(&dev->dma, taddr, size, dir);
+    return dma_memory_valid(&dev->as, taddr, size, dir);
 }
 
 static inline int spapr_vio_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
                                      void *buf, uint32_t size)
 {
-    if (!dma_memory_valid(&dev->dma, taddr, size, DMA_DIRECTION_TO_DEVICE)) {
+    if (!dma_memory_valid(&dev->as, taddr, size, DMA_DIRECTION_TO_DEVICE)) {
         return H_DEST_PARM;
     }
-    dma_memory_read(&dev->dma, taddr, buf, size);
+    dma_memory_read(&dev->as, taddr, buf, size);
     return H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
                                       const void *buf, uint32_t size)
 {
-    if (!dma_memory_valid(&dev->dma, taddr, size, DMA_DIRECTION_FROM_DEVICE)) {
+    if (!dma_memory_valid(&dev->as, taddr, size, DMA_DIRECTION_FROM_DEVICE)) {
         return H_DEST_PARM;
     }
-    dma_memory_write(&dev->dma, taddr, buf, size);
+    dma_memory_write(&dev->as, taddr, buf, size);
     return H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr,
                                     uint8_t c, uint32_t size)
 {
-    if (!dma_memory_valid(&dev->dma, taddr, size, DMA_DIRECTION_FROM_DEVICE)) {
+    if (!dma_memory_valid(&dev->as, taddr, size, DMA_DIRECTION_FROM_DEVICE)) {
         return H_DEST_PARM;
     }
-    dma_memory_set(&dev->dma, taddr, c, size);
+    dma_memory_set(&dev->as, taddr, c, size);
     return H_SUCCESS;
 }
 
-#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->dma, (_addr)))
+#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->as, (_addr)))
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 7521d50..8a58079 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -12,11 +12,11 @@ 
 
 #include <stdio.h>
 #include "exec/memory.h"
+#include "exec/address-spaces.h"
 #include "hw/hw.h"
 #include "block/block.h"
 #include "sysemu/kvm.h"
 
-typedef struct DMAContext DMAContext;
 typedef struct ScatterGatherEntry ScatterGatherEntry;
 
 typedef enum {
@@ -29,7 +29,7 @@  struct QEMUSGList {
     int nsg;
     int nalloc;
     size_t size;
-    DMAContext *dma;
+    AddressSpace *as;
 };
 
 #ifndef CONFIG_USER_ONLY
@@ -46,16 +46,7 @@  typedef uint64_t dma_addr_t;
 #define DMA_ADDR_BITS 64
 #define DMA_ADDR_FMT "%" PRIx64
 
-struct DMAContext {
-    AddressSpace *as;
-};
-
-/* A global DMA context corresponding to the address_space_memory
- * AddressSpace, for sysbus devices which do DMA.
- */
-extern DMAContext dma_context_memory;
-
-static inline void dma_barrier(DMAContext *dma, DMADirection dir)
+static inline void dma_barrier(AddressSpace *as, DMADirection dir)
 {
     /*
      * This is called before DMA read and write operations
@@ -83,104 +74,104 @@  static inline void dma_barrier(DMAContext *dma, DMADirection dir)
 /* Checks that the given range of addresses is valid for DMA.  This is
  * useful for certain cases, but usually you should just use
  * dma_memory_{read,write}() and check for errors */
-static inline bool dma_memory_valid(DMAContext *dma,
+static inline bool dma_memory_valid(AddressSpace *as,
                                     dma_addr_t addr, dma_addr_t len,
                                     DMADirection dir)
 {
-    return address_space_valid(dma->as, addr, len,
+    return address_space_valid(as, addr, len,
                                dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline void dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
+static inline void dma_memory_rw_relaxed(AddressSpace *as, dma_addr_t addr,
                                          void *buf, dma_addr_t len,
                                          DMADirection dir)
 {
-    address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    address_space_rw(as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline void dma_memory_read_relaxed(DMAContext *dma, dma_addr_t addr,
+static inline void dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr,
                                            void *buf, dma_addr_t len)
 {
-    dma_memory_rw_relaxed(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline void dma_memory_write_relaxed(DMAContext *dma, dma_addr_t addr,
+static inline void dma_memory_write_relaxed(AddressSpace *as, dma_addr_t addr,
                                             const void *buf, dma_addr_t len)
 {
-    dma_memory_rw_relaxed(dma, addr, (void *)buf, len,
+    dma_memory_rw_relaxed(as, addr, (void *)buf, len,
                                  DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline void dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+static inline void dma_memory_rw(AddressSpace *as, dma_addr_t addr,
                                  void *buf, dma_addr_t len,
                                  DMADirection dir)
 {
-    dma_barrier(dma, dir);
+    dma_barrier(as, dir);
 
-    dma_memory_rw_relaxed(dma, addr, buf, len, dir);
+    dma_memory_rw_relaxed(as, addr, buf, len, dir);
 }
 
-static inline void dma_memory_read(DMAContext *dma, dma_addr_t addr,
+static inline void dma_memory_read(AddressSpace *as, dma_addr_t addr,
                                    void *buf, dma_addr_t len)
 {
-    dma_memory_rw(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline void dma_memory_write(DMAContext *dma, dma_addr_t addr,
+static inline void dma_memory_write(AddressSpace *as, dma_addr_t addr,
                                     const void *buf, dma_addr_t len)
 {
-    dma_memory_rw(dma, addr, (void *)buf, len, DMA_DIRECTION_FROM_DEVICE);
+    dma_memory_rw(as, addr, (void *)buf, len, DMA_DIRECTION_FROM_DEVICE);
 }
 
-int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len);
+int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len);
 
-static inline void *dma_memory_map(DMAContext *dma,
+static inline void *dma_memory_map(AddressSpace *as,
                                    dma_addr_t addr, dma_addr_t *len,
                                    DMADirection dir)
 {
     hwaddr xlen = *len;
     void *p;
 
-    p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
+    p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
     *len = xlen;
     return p;
 }
 
-static inline void dma_memory_unmap(DMAContext *dma,
+static inline void dma_memory_unmap(AddressSpace *as,
                                     void *buffer, dma_addr_t len,
                                     DMADirection dir, dma_addr_t access_len)
 {
-    address_space_unmap(dma->as, buffer, (hwaddr)len,
+    address_space_unmap(as, buffer, (hwaddr)len,
                         dir == DMA_DIRECTION_FROM_DEVICE, access_len);
 }
 
 #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
-    static inline uint##_bits##_t ld##_lname##_##_end##_dma(DMAContext *dma, \
+    static inline uint##_bits##_t ld##_lname##_##_end##_dma(AddressSpace *as, \
                                                             dma_addr_t addr) \
     {                                                                   \
         uint##_bits##_t val;                                            \
-        dma_memory_read(dma, addr, &val, (_bits) / 8);                  \
+        dma_memory_read(as, addr, &val, (_bits) / 8);                   \
         return _end##_bits##_to_cpu(val);                               \
     }                                                                   \
-    static inline void st##_sname##_##_end##_dma(DMAContext *dma,       \
+    static inline void st##_sname##_##_end##_dma(AddressSpace *as,      \
                                                  dma_addr_t addr,       \
                                                  uint##_bits##_t val)   \
     {                                                                   \
         val = cpu_to_##_end##_bits(val);                                \
-        dma_memory_write(dma, addr, &val, (_bits) / 8);                 \
+        dma_memory_write(as, addr, &val, (_bits) / 8);                  \
     }
 
-static inline uint8_t ldub_dma(DMAContext *dma, dma_addr_t addr)
+static inline uint8_t ldub_dma(AddressSpace *as, dma_addr_t addr)
 {
     uint8_t val;
 
-    dma_memory_read(dma, addr, &val, 1);
+    dma_memory_read(as, addr, &val, 1);
     return val;
 }
 
-static inline void stb_dma(DMAContext *dma, dma_addr_t addr, uint8_t val)
+static inline void stb_dma(AddressSpace *as, dma_addr_t addr, uint8_t val)
 {
-    dma_memory_write(dma, addr, &val, 1);
+    dma_memory_write(as, addr, &val, 1);
 }
 
 DEFINE_LDST_DMA(uw, w, 16, le);
@@ -192,14 +183,12 @@  DEFINE_LDST_DMA(q, q, 64, be);
 
 #undef DEFINE_LDST_DMA
 
-void dma_context_init(DMAContext *dma, AddressSpace *as);
-
 struct ScatterGatherEntry {
     dma_addr_t base;
     dma_addr_t len;
 };
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma);
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, AddressSpace *as);
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
-- 
1.7.1


From d5f91f32b2275e9193dfea002b91a0d0dc78ea27 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Mon, 29 Apr 2013 16:25:51 +0000
Subject: [PATCH 23/40] memory: give name to every AddressSpace

The "info mtree" command in QEMU console prints only "memory" and "I/O"
address spaces while there are actually a lot more other AddressSpace
structs created by PCI and VIO devices. Those devices do not normally
have names and therefore not present in "info mtree" output.

The patch fixes this.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                |    6 ++----
 hw/pci/pci.c          |    3 ++-
 hw/ppc/spapr_vio.c    |    2 +-
 include/exec/memory.h |    5 +++--
 memory.c              |    8 +++-----
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/exec.c b/exec.c
index 2b6e5b8..03d6ea7 100644
--- a/exec.c
+++ b/exec.c
@@ -1857,13 +1857,11 @@  static void memory_map_init(void)
 {
     system_memory = g_malloc(sizeof(*system_memory));
     memory_region_init(system_memory, "system", INT64_MAX);
-    address_space_init(&address_space_memory, system_memory);
-    address_space_memory.name = "memory";
+    address_space_init(&address_space_memory, system_memory, "memory");
 
     system_io = g_malloc(sizeof(*system_io));
     memory_region_init(system_io, "io", 65536);
-    address_space_init(&address_space_io, system_io);
-    address_space_io.name = "I/O";
+    address_space_init(&address_space_io, system_io, "I/O");
 
     memory_listener_register(&core_memory_listener, &address_space_memory);
     memory_listener_register(&io_memory_listener, &address_space_io);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index c10b776..3db7f55 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -818,7 +818,8 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
                              pci_dev->iommu, 0, memory_region_size(pci_dev->iommu));
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
+                       name);
 
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 5e72f1b..3cfa326 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -458,7 +458,7 @@  static int spapr_vio_busdev_init(DeviceState *qdev)
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
         dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
-        address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet));
+        address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
     }
 
     return pc->init(dev);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index e05296b..06586f3 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -177,7 +177,7 @@  struct MemoryRegionPortio {
  */
 struct AddressSpace {
     /* All fields are private. */
-    const char *name;
+    char *name;
     MemoryRegion *root;
     struct FlatView *current_map;
     int ioeventfd_nb;
@@ -839,8 +839,9 @@  void mtree_info(fprintf_function mon_printf, void *f);
  *
  * @as: an uninitialized #AddressSpace
  * @root: a #MemoryRegion that routes addesses for the address space
+ * @name: an address space name
  */
-void address_space_init(AddressSpace *as, MemoryRegion *root);
+void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name);
 
 
 /**
diff --git a/memory.c b/memory.c
index b0d5e33..00e31c0 100644
--- a/memory.c
+++ b/memory.c
@@ -1588,7 +1588,7 @@  void memory_listener_unregister(MemoryListener *listener)
     QTAILQ_REMOVE(&memory_listeners, listener, link);
 }
 
-void address_space_init(AddressSpace *as, MemoryRegion *root)
+void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
 {
     memory_region_transaction_begin();
     as->root = root;
@@ -1597,7 +1597,7 @@  void address_space_init(AddressSpace *as, MemoryRegion *root)
     as->ioeventfd_nb = 0;
     as->ioeventfds = NULL;
     QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
-    as->name = NULL;
+    as->name = g_strdup(name?name:"anonymous");
     address_space_init_dispatch(as);
     memory_region_update_pending |= root->enabled;
     memory_region_transaction_commit();
@@ -1612,6 +1612,7 @@  void address_space_destroy(AddressSpace *as)
     QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
     address_space_destroy_dispatch(as);
     flatview_destroy(as->current_map);
+    g_free(as->name);
     g_free(as->current_map);
     g_free(as->ioeventfds);
 }
@@ -1738,9 +1739,6 @@  void mtree_info(fprintf_function mon_printf, void *f)
     QTAILQ_INIT(&ml_head);
 
     QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
-        if (!as->name) {
-            continue;
-        }
         mon_printf(f, "%s\n", as->name);
         mtree_print_mr(mon_printf, f, as->root, 0, 0, &ml_head);
     }
-- 
1.7.1


From 97d7ec17467e17da33c8f5ebb7debfb3f8b508ce Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 06:59:09 +0200
Subject: [PATCH 24/40] memory: add getter/setter for owner

Whenever memory regions are accessed outside the BQL, they need to be
preserved against hot-unplug.  MemoryRegions actually do not have their
own reference count; they piggyback on a QOM object, their "owner".
Add two functions to retrieve and specify the owner.

The setter function will affect the owner recursively on a whole tree
of contained regions, but without crossing (a) aliases (b) regions that
are already owned by another device.  This is so that a device can create
a complex tree of regions and a single call to memory_region_set_owner
(perhaps even within a bus-specific function, e.g. pci_register_bar) will
affect the entire tree.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h |   18 ++++++++++++++++++
 memory.c              |   21 +++++++++++++++++++++
 2 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 06586f3..5c20bac 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -135,6 +135,7 @@  struct MemoryRegion {
     const MemoryRegionIOMMUOps *iommu_ops;
     void *opaque;
     MemoryRegion *parent;
+    struct Object *owner;
     Int128 size;
     hwaddr addr;
     void (*destructor)(MemoryRegion *mr);
@@ -374,6 +375,23 @@  void memory_region_init_iommu(MemoryRegion *mr,
 void memory_region_destroy(MemoryRegion *mr);
 
 /**
+ * memory_region_owner: get a memory region's owner.
+ *
+ * @mr: the memory region being queried.
+ */
+struct Object *memory_region_owner(MemoryRegion *mr);
+
+/**
+ * memory_region_set_owner: set the owner for a memory region and all
+ * the unowned regions below it.
+ *
+ * @mr: the memory region being set.
+ * @owner: the object that acts as the owner
+ */
+void memory_region_set_owner(MemoryRegion *mr,
+                             struct Object *owner);
+
+/**
  * memory_region_size: get a memory region's size.
  *
  * @mr: the memory region being queried.
diff --git a/memory.c b/memory.c
index 00e31c0..f7fddb1 100644
--- a/memory.c
+++ b/memory.c
@@ -789,6 +789,7 @@  void memory_region_init(MemoryRegion *mr,
     mr->ops = NULL;
     mr->iommu_ops = NULL;
     mr->parent = NULL;
+    mr->owner = NULL;
     mr->size = int128_make64(size);
     if (size == UINT64_MAX) {
         mr->size = int128_2_64();
@@ -1040,6 +1041,26 @@  void memory_region_destroy(MemoryRegion *mr)
     g_free(mr->ioeventfds);
 }
 
+Object *memory_region_owner(MemoryRegion *mr)
+{
+    return mr->owner;
+}
+
+void memory_region_set_owner(MemoryRegion *mr,
+                             Object *owner)
+{
+    MemoryRegion *child;
+
+    assert(mr->owner == NULL || mr->owner == owner);
+    mr->owner = owner;
+
+    QTAILQ_FOREACH(child, &mr->subregions, subregions_link) {
+        if (child->owner == NULL || child->owner == owner) {
+            memory_region_set_owner(child, owner);
+        }
+    }
+}
+
 uint64_t memory_region_size(MemoryRegion *mr)
 {
     if (int128_eq(mr->size, int128_2_64())) {
-- 
1.7.1


From 6779775be21f7ae52fd5def34d6cab8e840e0ff6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 09:06:00 +0200
Subject: [PATCH 25/40] memory: add ref/unref

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/memory.h |   34 ++++++++++++++++++++++++++++++++++
 memory.c              |   14 ++++++++++++++
 2 files changed, 48 insertions(+), 0 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 5c20bac..ebac085 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -253,6 +253,40 @@  struct MemoryListener {
 void memory_region_init(MemoryRegion *mr,
                         const char *name,
                         uint64_t size);
+
+/**
+ * memory_region_ref: Add 1 to a memory region's reference count
+ *
+ * Whenever memory regions are accessed outside the BQL, they need to be
+ * preserved against hot-unplug.  MemoryRegions actually do not have their
+ * own reference count; they piggyback on a QOM object, their "owner".
+ * This function adds a reference to the owner.
+ *
+ * All MemoryRegions must have an owner if they can disappear, even if the
+ * device they belong to operates exclusively under the BQL.  This is because
+ * the region could be returned at any time by memory_region_find, and this
+ * is usually under guest control.
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region; any subregions beyond this size will be clipped
+ */
+void memory_region_ref(MemoryRegion *mr);
+
+/**
+ * memory_region_ref: Remove 1 to a memory region's reference count
+ *
+ * Whenever memory regions are accessed outside the BQL, they need to be
+ * preserved against hot-unplug.  MemoryRegions actually do not have their
+ * own reference count; they piggyback on a QOM object, their "owner".
+ * This function removes a reference to the owner and possibly destroys it.
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region; any subregions beyond this size will be clipped
+ */
+void memory_region_unref(MemoryRegion *mr);
+
 /**
  * memory_region_init_io: Initialize an I/O memory region.
  *
diff --git a/memory.c b/memory.c
index f7fddb1..c7ab19c 100644
--- a/memory.c
+++ b/memory.c
@@ -1061,6 +1061,20 @@  void memory_region_set_owner(MemoryRegion *mr,
     }
 }
 
+void memory_region_ref(MemoryRegion *mr)
+{
+    if (mr && mr->owner) {
+        object_ref(mr->owner);
+    }
+}
+
+void memory_region_unref(MemoryRegion *mr)
+{
+    if (mr && mr->owner) {
+        object_unref(mr->owner);
+    }
+}
+
 uint64_t memory_region_size(MemoryRegion *mr)
 {
     if (int128_eq(mr->size, int128_2_64())) {
-- 
1.7.1


From 124a7d065a876ebd2d2dd6dddb5c9b044806a36a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 10:46:11 +0200
Subject: [PATCH 26/40] memory: add ref/unref calls

Add ref/unref calls at the following places:

- places where memory regions are stashed by a listener and
  used outside the BQL (including in Xen or KVM).

- memory_region_find callsites

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                                |    6 +++++-
 hw/core/loader.c                      |    1 +
 hw/display/exynos4210_fimd.c          |    6 ++++++
 hw/display/framebuffer.c              |   10 ++++++----
 hw/i386/kvm/ioapic.c                  |    2 ++
 hw/i386/kvmvapic.c                    |    1 +
 hw/misc/vfio.c                        |    2 ++
 hw/virtio/dataplane/hostmem.c         |    7 +++++++
 hw/virtio/vhost.c                     |    2 ++
 hw/virtio/virtio-balloon.c            |    1 +
 hw/xen/xen_pt.c                       |    4 ++++
 include/hw/virtio/dataplane/hostmem.h |    1 +
 kvm-all.c                             |    2 ++
 memory.c                              |   16 ++++++++++++++++
 target-arm/kvm.c                      |    2 ++
 target-sparc/mmu_helper.c             |    1 +
 xen-all.c                             |    2 ++
 17 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/exec.c b/exec.c
index 03d6ea7..f163a55 100644
--- a/exec.c
+++ b/exec.c
@@ -761,12 +761,16 @@  static uint16_t phys_section_add(MemoryRegionSection *section)
                                 phys_sections_nb_alloc);
     }
     phys_sections[phys_sections_nb] = *section;
+    memory_region_ref(section->mr);
     return phys_sections_nb++;
 }
 
 static void phys_sections_clear(void)
 {
-    phys_sections_nb = 0;
+    while (phys_sections_nb > 0) {
+        MemoryRegionSection *section = &phys_sections[--phys_sections_nb];
+        memory_region_unref(section->mr);
+    }
 }
 
 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 7507914..97e7ba2 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -727,6 +727,7 @@  int rom_load_all(void)
         addr += rom->romsize;
         section = memory_region_find(get_system_memory(), rom->addr, 1);
         rom->isrom = section.size && memory_region_is_rom(section.mr);
+        memory_region_unref(section.mr);
     }
     qemu_register_reset(rom_reset, NULL);
     roms_loaded = 1;
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
index 6cb5016..afa2e54 100644
--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1126,6 +1126,11 @@  static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
     /* Total number of bytes of virtual screen used by current window */
     w->fb_len = fb_mapped_len = (w->virtpage_width + w->virtpage_offsize) *
             (w->rightbot_y - w->lefttop_y + 1);
+
+    /* TODO: add .exit and unref the region there.  Not needed yet since sysbus
+     * does not support hot-unplug.
+     */
+    memory_region_unref(w->mem_section.mr);
     w->mem_section = memory_region_find(sysbus_address_space(&s->busdev),
             fb_start_addr, w->fb_len);
     assert(w->mem_section.mr);
@@ -1154,6 +1159,7 @@  static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
     return;
 
 error_return:
+    memory_region_unref(w->mem_section.mr);
     w->mem_section.mr = NULL;
     w->mem_section.size = 0;
     w->host_fb_addr = NULL;
diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c
index 6be31db..8288b93 100644
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -54,10 +54,10 @@  void framebuffer_update_display(
     src_len = src_width * rows;
 
     mem_section = memory_region_find(address_space, base, src_len);
+    mem = mem_section.mr;
     if (mem_section.size != src_len || !memory_region_is_ram(mem_section.mr)) {
-        return;
+        goto out;
     }
-    mem = mem_section.mr;
     assert(mem);
     assert(mem_section.offset_within_address_space == base);
 
@@ -67,10 +67,10 @@  void framebuffer_update_display(
        but it's not really worth it as dirty flag tracking will probably
        already have failed above.  */
     if (!src_base)
-        return;
+        goto out;
     if (src_len != src_width * rows) {
         cpu_physical_memory_unmap(src_base, src_len, 0, 0);
-        return;
+        goto out;
     }
     src = src_base;
     dest = surface_data(ds);
@@ -107,4 +107,6 @@  void framebuffer_update_display(
                               DIRTY_MEMORY_VGA);
     *first_row = first;
     *last_row = last;
+out:
+    memory_region_unref(mem);
 }
diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index a3bd519..e3c29da 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -107,6 +107,8 @@  static void kvm_ioapic_put(IOAPICCommonState *s)
         fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
         abort();
     }
+
+    memory_region_unref(mrs.mr);
 }
 
 static void kvm_ioapic_reset(DeviceState *dev)
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 655483b..e375c1c 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -605,6 +605,7 @@  static void vapic_map_rom_writable(VAPICROMState *s)
                              rom_size);
     memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000);
     s->rom_mapped_writable = true;
+    memory_region_unref(section.mr);
 }
 
 static int vapic_prepare(VAPICROMState *s)
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 178dd11..0ae6878 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1969,6 +1969,7 @@  static void vfio_listener_region_add(MemoryListener *listener,
     DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
             iova, end - 1, vaddr);
 
+    memory_region_ref(section->mr);
     ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
     if (ret) {
         error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
@@ -2010,6 +2011,7 @@  static void vfio_listener_region_del(MemoryListener *listener,
             iova, end - 1);
 
     ret = vfio_dma_unmap(container, iova, end - iova);
+    memory_region_unref(section->mr);
     if (ret) {
         error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
                      "0x%"HWADDR_PRIx") = %d (%m)",
diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
index 37292ff..66829bb 100644
--- a/hw/virtio/dataplane/hostmem.c
+++ b/hw/virtio/dataplane/hostmem.c
@@ -64,8 +64,12 @@  out:
 static void hostmem_listener_commit(MemoryListener *listener)
 {
     HostMem *hostmem = container_of(listener, HostMem, listener);
+    int i;
 
     qemu_mutex_lock(&hostmem->current_regions_lock);
+    for (i = 0; i < hostmem->num_current_regions; i++) {
+        memory_region_unref(hostmem->current_regions[i].mr);
+    }
     g_free(hostmem->current_regions);
     hostmem->current_regions = hostmem->new_regions;
     hostmem->num_current_regions = hostmem->num_new_regions;
@@ -92,8 +96,11 @@  static void hostmem_append_new_region(HostMem *hostmem,
         .guest_addr = section->offset_within_address_space,
         .size = section->size,
         .readonly = section->readonly,
+        .mr = section->mr,
     };
     hostmem->num_new_regions++;
+
+    memory_region_ref(section->mr);
 }
 
 static void hostmem_listener_append_region(MemoryListener *listener,
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index fbabf99..190522b 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -497,6 +497,7 @@  static void vhost_region_add(MemoryListener *listener,
     dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
                                 dev->n_mem_sections);
     dev->mem_sections[dev->n_mem_sections - 1] = *section;
+    memory_region_ref(section->mr);
     vhost_set_memory(listener, section, true);
 }
 
@@ -512,6 +513,7 @@  static void vhost_region_del(MemoryListener *listener,
     }
 
     vhost_set_memory(listener, section, false);
+    memory_region_unref(section->mr);
     for (i = 0; i < dev->n_mem_sections; ++i) {
         if (dev->mem_sections[i].offset_within_address_space
             == section->offset_within_address_space) {
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d669756..fac8800 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -205,6 +205,7 @@  static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
             addr = section.offset_within_region;
             balloon_page(memory_region_get_ram_ptr(section.mr) + addr,
                          !!(vq == s->dvq));
+            memory_region_unref(section.mr);
         }
 
         virtqueue_push(vq, &elem, offset);
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index c199818..be1fd52 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -606,6 +606,7 @@  static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec)
     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
                                              memory_listener);
 
+    memory_region_ref(sec->mr);
     xen_pt_region_update(s, sec, true);
 }
 
@@ -615,6 +616,7 @@  static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec)
                                              memory_listener);
 
     xen_pt_region_update(s, sec, false);
+    memory_region_unref(sec->mr);
 }
 
 static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
@@ -622,6 +624,7 @@  static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
                                              io_listener);
 
+    memory_region_ref(sec->mr);
     xen_pt_region_update(s, sec, true);
 }
 
@@ -631,6 +634,7 @@  static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec)
                                              io_listener);
 
     xen_pt_region_update(s, sec, false);
+    memory_region_unref(sec->mr);
 }
 
 static const MemoryListener xen_pt_memory_listener = {
diff --git a/include/hw/virtio/dataplane/hostmem.h b/include/hw/virtio/dataplane/hostmem.h
index b2cf093..2810f4b 100644
--- a/include/hw/virtio/dataplane/hostmem.h
+++ b/include/hw/virtio/dataplane/hostmem.h
@@ -18,6 +18,7 @@ 
 #include "qemu/thread.h"
 
 typedef struct {
+    MemoryRegion *mr;
     void *host_addr;
     hwaddr guest_addr;
     uint64_t size;
diff --git a/kvm-all.c b/kvm-all.c
index 3a31602..1d876aa 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -766,6 +766,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
 static void kvm_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
+    memory_region_ref(section->mr);
     kvm_set_phys_mem(section, true);
 }
 
@@ -773,6 +774,7 @@  static void kvm_region_del(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     kvm_set_phys_mem(section, false);
+    memory_region_unref(section->mr);
 }
 
 static void kvm_log_sync(MemoryListener *listener,
diff --git a/memory.c b/memory.c
index c7ab19c..4b30c3f 100644
--- a/memory.c
+++ b/memory.c
@@ -145,6 +145,7 @@  static bool memory_listener_match(MemoryListener *listener,
         }                                                               \
     } while (0)
 
+/* No need to ref/unref .mr, the FlatRange keeps it alive.  */
 #define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback)            \
     MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) {       \
         .mr = (fr)->mr,                                                 \
@@ -260,11 +261,17 @@  static void flatview_insert(FlatView *view, unsigned pos, FlatRange *range)
     memmove(view->ranges + pos + 1, view->ranges + pos,
             (view->nr - pos) * sizeof(FlatRange));
     view->ranges[pos] = *range;
+    memory_region_ref(range->mr);
     ++view->nr;
 }
 
 static void flatview_destroy(FlatView *view)
 {
+    int i;
+
+    for (i = 0; i < view->nr; i++) {
+        memory_region_unref(view->ranges[i].mr);
+    }
     g_free(view->ranges);
 }
 
@@ -763,6 +770,11 @@  static void memory_region_destructor_ram(MemoryRegion *mr)
     qemu_ram_free(mr->ram_addr);
 }
 
+static void memory_region_destructor_alias(MemoryRegion *mr)
+{
+    memory_region_unref(mr->alias);
+}
+
 static void memory_region_destructor_ram_from_ptr(MemoryRegion *mr)
 {
     qemu_ram_free_from_ptr(mr->ram_addr);
@@ -961,6 +973,8 @@  void memory_region_init_alias(MemoryRegion *mr,
                               uint64_t size)
 {
     memory_region_init(mr, name, size);
+    memory_region_ref(orig);
+    mr->destructor = memory_region_destructor_alias;
     mr->alias = orig;
     mr->alias_offset = offset;
 }
@@ -1539,6 +1553,8 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
     ret.size = int128_get64(range.size);
     ret.offset_within_address_space = int128_get64(range.start);
     ret.readonly = fr->readonly;
+    memory_region_ref(ret.mr);
+
     return ret;
 }
 
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index b7bdc03..b9051a4 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -127,6 +127,7 @@  static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
                 abort();
             }
         }
+        memory_region_unref(kd->mr);
         g_free(kd);
     }
 }
@@ -152,6 +153,7 @@  void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
     kd->kda.id = devid;
     kd->kda.addr = -1;
     QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
+    memory_region_ref(kd->mr);
 }
 
 typedef struct Reg {
diff --git a/target-sparc/mmu_helper.c b/target-sparc/mmu_helper.c
index a9649ae..3c1ccc2 100644
--- a/target-sparc/mmu_helper.c
+++ b/target-sparc/mmu_helper.c
@@ -845,6 +845,7 @@  hwaddr cpu_get_phys_page_debug(CPUSPARCState *env, target_ulong addr)
         }
     }
     section = memory_region_find(get_system_memory(), phys_addr, 1);
+    memory_region_unref(section.mr);
     if (!section.size) {
         return -1;
     }
diff --git a/xen-all.c b/xen-all.c
index 539a154..2f24ced 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -459,6 +459,7 @@  static void xen_set_memory(struct MemoryListener *listener,
 static void xen_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
+    memory_region_ref(section->mr);
     xen_set_memory(listener, section, true);
 }
 
@@ -466,6 +467,7 @@  static void xen_region_del(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     xen_set_memory(listener, section, false);
+    memory_region_unref(section->mr);
 }
 
 static void xen_sync_dirty_bitmap(XenIOState *state,
-- 
1.7.1


From 6154dafb504ac657c5514d388743b02bb7c4b26a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 06:59:52 +0200
Subject: [PATCH 27/40] pci: set owner for BARs

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/pci/pci.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 3db7f55..0daac29 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -919,6 +919,8 @@  void pci_register_bar(PCIDevice *pci_dev, int region_num,
     uint64_t wmask;
     pcibus_t size = memory_region_size(memory);
 
+    memory_region_set_owner(memory, OBJECT(pci_dev));
+
     assert(region_num >= 0);
     assert(region_num < PCI_NUM_REGIONS);
     if (size & (size-1)) {
-- 
1.7.1


From c15017492a7d8c3af69338e04961e8db218d1564 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 07:26:41 +0200
Subject: [PATCH 28/40] sysbus: set owner for MMIO regions

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/core/sysbus.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 9004d8c..788696b 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -115,6 +115,8 @@  void sysbus_init_mmio(SysBusDevice *dev, MemoryRegion *memory)
     n = dev->num_mmio++;
     dev->mmio[n].addr = -1;
     dev->mmio[n].memory = memory;
+
+    memory_region_set_owner(dev->mmio[n].memory, OBJECT(dev));
 }
 
 MemoryRegion *sysbus_mmio_get_region(SysBusDevice *dev, int n)
-- 
1.7.1


From 18d459f8c3ce069878e94fcc2ac979c618d6124d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 08:56:13 +0200
Subject: [PATCH 29/40] acpi: add memory_region_set_owner calls

ACPI regions are added directly to the I/O address space, without
going through BARs.  Thus they need the owner to be set directly.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/acpi/ich9.c  |    1 +
 hw/acpi/piix4.c |    5 +++++
 hw/isa/apm.c    |    1 +
 3 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 4a17f32..0b19864 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -223,6 +223,7 @@  void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
                           8);
     memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi);
 
+    memory_region_set_owner(&pm->io, OBJECT(lpc_pci));
     pm->irq = sci_irq;
     qemu_register_reset(pm_reset, pm);
     pm->powerdown_notifier.notify = pm_powerdown_req;
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index c4af1cc..d097592 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -433,6 +433,8 @@  static int piix4_pm_initfn(PCIDevice *dev)
     acpi_pm1_cnt_init(&s->ar, &s->io, s->s4_val);
     acpi_gpe_init(&s->ar, GPE_LEN);
 
+    memory_region_set_owner(&s->io, OBJECT(s));
+
     s->powerdown_notifier.notify = piix4_pm_powerdown_req;
     qemu_register_powerdown_notifier(&s->powerdown_notifier);
 
@@ -672,10 +674,12 @@  static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
 {
     memory_region_init_io(&s->io_gpe, &piix4_gpe_ops, s, "apci-gpe0",
                           GPE_LEN);
+    memory_region_set_owner(&s->io_gpe, OBJECT(s));
     memory_region_add_subregion(parent, GPE_BASE, &s->io_gpe);
 
     memory_region_init_io(&s->io_pci, &piix4_pci_ops, s, "apci-pci-hotplug",
                           PCI_HOTPLUG_SIZE);
+    memory_region_set_owner(&s->io_pci, OBJECT(s));
     memory_region_add_subregion(parent, PCI_HOTPLUG_ADDR,
                                 &s->io_pci);
     pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
@@ -683,6 +687,7 @@  static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
     qemu_for_each_cpu(piix4_init_cpu_status, &s->gpe_cpu);
     memory_region_init_io(&s->io_cpu, &cpu_hotplug_ops, s, "apci-cpu-hotplug",
                           PIIX4_PROC_LEN);
+    memory_region_set_owner(&s->io_cpu, OBJECT(s));
     memory_region_add_subregion(parent, PIIX4_PROC_BASE, &s->io_cpu);
     s->cpu_added_notifier.notify = piix4_cpu_added_req;
     qemu_register_cpu_added_notifier(&s->cpu_added_notifier);
diff --git a/hw/isa/apm.c b/hw/isa/apm.c
index 5f21d21..376c564 100644
--- a/hw/isa/apm.c
+++ b/hw/isa/apm.c
@@ -97,6 +97,7 @@  void apm_init(PCIDevice *dev, APMState *apm, apm_ctrl_changed_t callback,
 
     /* ioport 0xb2, 0xb3 */
     memory_region_init_io(&apm->io, &apm_ops, apm, "apm-io", 2);
+    memory_region_set_owner(&apm->io, OBJECT(dev));
     memory_region_add_subregion(pci_address_space_io(dev), APM_CNT_IOPORT,
                                 &apm->io);
 }
-- 
1.7.1


From 66ff6cd04763424cf883d3a262bd883bbf3213b9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 08:57:58 +0200
Subject: [PATCH 30/40] misc: add memory_region_set_owner calls

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/char/serial-pci.c |    1 +
 hw/misc/pc-testdev.c |    7 +++++++
 2 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c
index 2138e35..6b6106b 100644
--- a/hw/char/serial-pci.c
+++ b/hw/char/serial-pci.c
@@ -106,6 +106,7 @@  static int multi_serial_pci_init(PCIDevice *dev)
         s->irq = pci->irqs[i];
         pci->name[i] = g_strdup_printf("uart #%d", i+1);
         memory_region_init_io(&s->io, &serial_io_ops, s, pci->name[i], 8);
+        memory_region_set_owner(&s->io, OBJECT(pci));
         memory_region_add_subregion(&pci->iobar, 8 * i, &s->io);
     }
     return 0;
diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c
index 32df175..77998d6 100644
--- a/hw/misc/pc-testdev.c
+++ b/hw/misc/pc-testdev.c
@@ -150,12 +150,19 @@  static int init_test_device(ISADevice *isa)
 
     memory_region_init_io(&dev->ioport, &test_ioport_ops, dev,
                           "pc-testdev-ioport", 4);
+    memory_region_set_owner(&dev->ioport, OBJECT(dev));
+
     memory_region_init_io(&dev->flush, &test_flush_ops, dev,
                           "pc-testdev-flush-page", 4);
+    memory_region_set_owner(&dev->flush, OBJECT(dev));
+
     memory_region_init_io(&dev->irq, &test_irq_ops, dev,
                           "pc-testdev-irq-line", 24);
+    memory_region_set_owner(&dev->irq, OBJECT(dev));
+
     memory_region_init_io(&dev->iomem, &test_iomem_ops, dev,
                           "pc-testdev-iomem", IOMEM_LEN);
+    memory_region_set_owner(&dev->iomem, OBJECT(dev));
 
     memory_region_add_subregion(io,  0xe0,       &dev->ioport);
     memory_region_add_subregion(io,  0xe4,       &dev->flush);
-- 
1.7.1


From 5a9cbbf27c157f5857e477281ec7daa4ae09f049 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 08:32:40 +0200
Subject: [PATCH 31/40] isa/portio: allow setting an owner

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/isa/isa-bus.c      |    2 ++
 include/exec/ioport.h |    3 +++
 ioport.c              |   10 ++++++++++
 3 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c
index 7860b17..d263d0f 100644
--- a/hw/isa/isa-bus.c
+++ b/hw/isa/isa-bus.c
@@ -100,6 +100,7 @@  static inline void isa_init_ioport(ISADevice *dev, uint16_t ioport)
 
 void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start)
 {
+    memory_region_set_owner(io, OBJECT(dev));
     memory_region_add_subregion(isabus->address_space_io, start, io);
     isa_init_ioport(dev, start);
 }
@@ -116,6 +117,7 @@  void isa_register_portio_list(ISADevice *dev, uint16_t start,
     isa_init_ioport(dev, start);
 
     portio_list_init(piolist, pio_start, opaque, name);
+    portio_list_set_owner(piolist, OBJECT(dev));
     portio_list_add(piolist, isabus->address_space_io, start);
 }
 
diff --git a/include/exec/ioport.h b/include/exec/ioport.h
index fc28350..5fe0d99 100644
--- a/include/exec/ioport.h
+++ b/include/exec/ioport.h
@@ -62,6 +62,7 @@  typedef struct PortioList {
     unsigned nr;
     struct MemoryRegion **regions;
     struct MemoryRegion **aliases;
+    struct Object *owner;
     void *opaque;
     const char *name;
 } PortioList;
@@ -69,6 +70,8 @@  typedef struct PortioList {
 void portio_list_init(PortioList *piolist,
                       const struct MemoryRegionPortio *callbacks,
                       void *opaque, const char *name);
+void portio_list_set_owner(PortioList *piolist,
+                           struct Object *owner);
 void portio_list_destroy(PortioList *piolist);
 void portio_list_add(PortioList *piolist,
                      struct MemoryRegion *address_space,
diff --git a/ioport.c b/ioport.c
index a0ac2a0..1cccd70 100644
--- a/ioport.c
+++ b/ioport.c
@@ -347,6 +347,12 @@  void portio_list_init(PortioList *piolist,
     piolist->address_space = NULL;
     piolist->opaque = opaque;
     piolist->name = name;
+    piolist->owner = NULL;
+}
+
+void portio_list_set_owner(PortioList *piolist, Object *owner)
+{
+    piolist->owner = owner;
 }
 
 void portio_list_destroy(PortioList *piolist)
@@ -386,8 +392,12 @@  static void portio_list_add_1(PortioList *piolist,
      */
     memory_region_init_io(region, ops, piolist->opaque, piolist->name,
                           INT64_MAX);
+    memory_region_set_owner(region, piolist->owner);
+
     memory_region_init_alias(alias, piolist->name,
                              region, start + off_low, off_high - off_low);
+    memory_region_set_owner(alias, piolist->owner);
+
     memory_region_add_subregion(piolist->address_space,
                                 start + off_low, alias);
     piolist->regions[piolist->nr] = region;
-- 
1.7.1


From 440e242741c891464ddc8354772303018ece636d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 08:58:25 +0200
Subject: [PATCH 32/40] vga: add memory_region_set_owner calls

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/display/cirrus_vga.c |   19 ++++++++++++++-----
 hw/display/qxl.c        |    5 +++--
 hw/display/vga-isa-mm.c |    2 +-
 hw/display/vga-isa.c    |    4 ++--
 hw/display/vga-pci.c    |    5 +++--
 hw/display/vga.c        |   19 ++++++++++++++-----
 hw/display/vga_int.h    |    9 +++++----
 hw/display/vmware_vga.c |    4 ++--
 8 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 64bfe2b..ffc4dd4 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2806,12 +2806,14 @@  static const MemoryRegionOps cirrus_vga_io_ops = {
     },
 };
 
-static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci,
+static void cirrus_init_common(CirrusVGAState * s, int device_id,
+                               DeviceState *owner,
                                MemoryRegion *system_memory,
                                MemoryRegion *system_io)
 {
     int i;
     static int inited;
+    int is_pci = !!object_dynamic_cast(OBJECT(owner), TYPE_PCI_DEVICE);
 
     if (!inited) {
         inited = 1;
@@ -2843,19 +2845,23 @@  static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci,
     /* Register ioport 0x3b0 - 0x3df */
     memory_region_init_io(&s->cirrus_vga_io, &cirrus_vga_io_ops, s,
                           "cirrus-io", 0x30);
+    memory_region_set_owner(&s->cirrus_vga_io, OBJECT(owner));
     memory_region_add_subregion(system_io, 0x3b0, &s->cirrus_vga_io);
 
     memory_region_init(&s->low_mem_container,
                        "cirrus-lowmem-container",
                        0x20000);
+    memory_region_set_owner(&s->low_mem_container, OBJECT(owner));
 
     memory_region_init_io(&s->low_mem, &cirrus_vga_mem_ops, s,
                           "cirrus-low-memory", 0x20000);
+    memory_region_set_owner(&s->low_mem, OBJECT(owner));
     memory_region_add_subregion(&s->low_mem_container, 0, &s->low_mem);
     for (i = 0; i < 2; ++i) {
         static const char *names[] = { "vga.bank0", "vga.bank1" };
         MemoryRegion *bank = &s->cirrus_bank[i];
         memory_region_init_alias(bank, names[i], &s->vga.vram, 0, 0x8000);
+        memory_region_set_owner(bank, OBJECT(owner));
         memory_region_set_enabled(bank, false);
         memory_region_add_subregion_overlap(&s->low_mem_container, i * 0x8000,
                                             bank, 1);
@@ -2870,6 +2876,7 @@  static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci,
     memory_region_init_io(&s->cirrus_linear_io, &cirrus_linear_io_ops, s,
                           "cirrus-linear-io", s->vga.vram_size_mb
                                               * 1024 * 1024);
+    memory_region_set_owner(&s->cirrus_linear_io, OBJECT(owner));
     memory_region_set_flush_coalesced(&s->cirrus_linear_io);
 
     /* I/O handler for LFB */
@@ -2878,11 +2885,13 @@  static void cirrus_init_common(CirrusVGAState * s, int device_id, int is_pci,
                           s,
                           "cirrus-bitblt-mmio",
                           0x400000);
+    memory_region_set_owner(&s->cirrus_linear_bitblt_io, OBJECT(owner));
     memory_region_set_flush_coalesced(&s->cirrus_linear_bitblt_io);
 
     /* I/O handler for memory-mapped I/O */
     memory_region_init_io(&s->cirrus_mmio_io, &cirrus_mmio_io_ops, s,
                           "cirrus-mmio", CIRRUS_PNPMMIO_SIZE);
+    memory_region_set_owner(&s->cirrus_mmio_io, OBJECT(owner));
     memory_region_set_flush_coalesced(&s->cirrus_mmio_io);
 
     s->real_vram_size =
@@ -2912,8 +2921,8 @@  static int vga_initfn(ISADevice *dev)
     ISACirrusVGAState *d = ISA_CIRRUS_VGA(dev);
     VGACommonState *s = &d->cirrus_vga.vga;
 
-    vga_common_init(s);
-    cirrus_init_common(&d->cirrus_vga, CIRRUS_ID_CLGD5430, 0,
+    vga_common_init(s, DEVICE(dev));
+    cirrus_init_common(&d->cirrus_vga, CIRRUS_ID_CLGD5430, DEVICE(d),
                        isa_address_space(dev), isa_address_space_io(dev));
     s->con = graphic_console_init(DEVICE(dev), s->hw_ops, s);
     rom_add_vga(VGABIOS_CIRRUS_FILENAME);
@@ -2959,8 +2968,8 @@  static int pci_cirrus_vga_initfn(PCIDevice *dev)
      int16_t device_id = pc->device_id;
 
      /* setup VGA */
-     vga_common_init(&s->vga);
-     cirrus_init_common(s, device_id, 1, pci_address_space(dev),
+     vga_common_init(&s->vga, DEVICE(dev));
+     cirrus_init_common(s, device_id, DEVICE(dev), pci_address_space(dev),
                         pci_address_space_io(dev));
      s->vga.con = graphic_console_init(DEVICE(dev), s->vga.hw_ops, &s->vga);
 
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 2d49e9a..fc7bfa9 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2063,9 +2063,10 @@  static int qxl_init_primary(PCIDevice *dev)
     qxl->id = 0;
     qxl_init_ramsize(qxl);
     vga->vram_size_mb = qxl->vga.vram_size >> 20;
-    vga_common_init(vga);
-    vga_init(vga, pci_address_space(dev), pci_address_space_io(dev), false);
+    vga_common_init(vga, dev);
+    vga_init(vga, dev, pci_address_space(dev), pci_address_space_io(dev), false);
     portio_list_init(qxl_vga_port_list, qxl_vga_portio_list, vga, "vga");
+    portio_list_set_owner(vbe_port_list, OBJECT(dev));
     portio_list_add(qxl_vga_port_list, pci_address_space_io(dev), 0x3b0);
 
     vga->con = graphic_console_init(DEVICE(dev), &qxl_ops, qxl);
diff --git a/hw/display/vga-isa-mm.c b/hw/display/vga-isa-mm.c
index ceeb92f..64c6fc3 100644
--- a/hw/display/vga-isa-mm.c
+++ b/hw/display/vga-isa-mm.c
@@ -132,7 +132,7 @@  int isa_vga_mm_init(hwaddr vram_base,
     s = g_malloc0(sizeof(*s));
 
     s->vga.vram_size_mb = VGA_RAM_SIZE >> 20;
-    vga_common_init(&s->vga);
+    vga_common_init(&s->vga, NULL);
     vga_mm_init(s, vram_base, ctrl_base, it_shift, address_space);
 
     s->vga.con = graphic_console_init(NULL, s->vga.hw_ops, s);
diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c
index 9e63b69..f52c104 100644
--- a/hw/display/vga-isa.c
+++ b/hw/display/vga-isa.c
@@ -55,9 +55,9 @@  static int vga_initfn(ISADevice *dev)
     MemoryRegion *vga_io_memory;
     const MemoryRegionPortio *vga_ports, *vbe_ports;
 
-    vga_common_init(s);
+    vga_common_init(s, DEVICE(dev));
     s->legacy_address_space = isa_address_space(dev);
-    vga_io_memory = vga_init_io(s, &vga_ports, &vbe_ports);
+    vga_io_memory = vga_init_io(s, DEVICE(dev), &vga_ports, &vbe_ports);
     isa_register_portio_list(dev, 0x3b0, vga_ports, s, "vga");
     if (vbe_ports) {
         isa_register_portio_list(dev, 0x1ce, vbe_ports, s, "vbe");
diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c
index cea8db7..3f860e9 100644
--- a/hw/display/vga-pci.c
+++ b/hw/display/vga-pci.c
@@ -147,8 +147,9 @@  static int pci_std_vga_initfn(PCIDevice *dev)
     VGACommonState *s = &d->vga;
 
     /* vga + console init */
-    vga_common_init(s);
-    vga_init(s, pci_address_space(dev), pci_address_space_io(dev), true);
+    vga_common_init(s, DEVICE(dev));
+    vga_init(s, DEVICE(dev), pci_address_space(dev), pci_address_space_io(dev),
+             true);
 
     s->con = graphic_console_init(DEVICE(dev), s->hw_ops, s);
 
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 21a108d..2d7e37a 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -199,6 +199,8 @@  static void vga_update_memory_access(VGACommonState *s)
         base += isa_mem_base;
         region = g_malloc(sizeof(*region));
         memory_region_init_alias(region, "vga.chain4", &s->vram, offset, size);
+        memory_region_set_owner(region, memory_region_owner(&s->vram));
+
         memory_region_add_subregion_overlap(s->legacy_address_space, base,
                                             region, 2);
         s->chain4_alias = region;
@@ -2256,7 +2258,7 @@  static const GraphicHwOps vga_ops = {
     .text_update = vga_update_text,
 };
 
-void vga_common_init(VGACommonState *s)
+void vga_common_init(VGACommonState *s, DeviceState *owner)
 {
     int i, j, v, b;
 
@@ -2293,6 +2295,7 @@  void vga_common_init(VGACommonState *s)
 
     s->is_vbe_vmstate = 1;
     memory_region_init_ram(&s->vram, "vga.vram", s->vram_size);
+    memory_region_set_owner(&s->vram, OBJECT(owner));
     vmstate_register_ram_global(&s->vram);
     xen_register_framebuffer(&s->vram);
     s->vram_ptr = memory_region_get_ram_ptr(&s->vram);
@@ -2333,7 +2336,7 @@  static const MemoryRegionPortio vbe_portio_list[] = {
 };
 
 /* Used by both ISA and PCI */
-MemoryRegion *vga_init_io(VGACommonState *s,
+MemoryRegion *vga_init_io(VGACommonState *s, DeviceState *owner,
                           const MemoryRegionPortio **vga_ports,
                           const MemoryRegionPortio **vbe_ports)
 {
@@ -2345,13 +2348,15 @@  MemoryRegion *vga_init_io(VGACommonState *s,
     vga_mem = g_malloc(sizeof(*vga_mem));
     memory_region_init_io(vga_mem, &vga_mem_ops, s,
                           "vga-lowmem", 0x20000);
+    memory_region_set_owner(vga_mem, OBJECT(owner));
     memory_region_set_flush_coalesced(vga_mem);
 
     return vga_mem;
 }
 
-void vga_init(VGACommonState *s, MemoryRegion *address_space,
-              MemoryRegion *address_space_io, bool init_vga_ports)
+void vga_init(VGACommonState *s, DeviceState *owner,
+              MemoryRegion *address_space, MemoryRegion *address_space_io,
+              bool init_vga_ports)
 {
     MemoryRegion *vga_io_memory;
     const MemoryRegionPortio *vga_ports, *vbe_ports;
@@ -2364,7 +2369,7 @@  void vga_init(VGACommonState *s, MemoryRegion *address_space,
 
     s->legacy_address_space = address_space;
 
-    vga_io_memory = vga_init_io(s, &vga_ports, &vbe_ports);
+    vga_io_memory = vga_init_io(s, owner, &vga_ports, &vbe_ports);
     memory_region_add_subregion_overlap(address_space,
                                         isa_mem_base + 0x000a0000,
                                         vga_io_memory,
@@ -2372,10 +2377,12 @@  void vga_init(VGACommonState *s, MemoryRegion *address_space,
     memory_region_set_coalescing(vga_io_memory);
     if (init_vga_ports) {
         portio_list_init(vga_port_list, vga_ports, s, "vga");
+        portio_list_set_owner(vga_port_list, OBJECT(owner));
         portio_list_add(vga_port_list, address_space_io, 0x3b0);
     }
     if (vbe_ports) {
         portio_list_init(vbe_port_list, vbe_ports, s, "vbe");
+        portio_list_set_owner(vbe_port_list, OBJECT(owner));
         portio_list_add(vbe_port_list, address_space_io, 0x1ce);
     }
 }
@@ -2387,6 +2394,8 @@  void vga_init_vbe(VGACommonState *s, MemoryRegion *system_memory)
      */
     memory_region_init_alias(&s->vram_vbe, "vram.vbe",
                              &s->vram, 0, memory_region_size(&s->vram));
+    memory_region_set_owner(&s->vram_vbe, memory_region_owner(&s->vram));
+
     /* XXX: use optimized standard vga accesses */
     memory_region_add_subregion(system_memory,
                                 VBE_DISPI_LFB_PHYSICAL_ADDRESS,
diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h
index 66f9f3c..7fe4967 100644
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -177,10 +177,11 @@  static inline int c6_to_8(int v)
     return (v << 2) | (b << 1) | b;
 }
 
-void vga_common_init(VGACommonState *s);
-void vga_init(VGACommonState *s, MemoryRegion *address_space,
-              MemoryRegion *address_space_io, bool init_vga_ports);
-MemoryRegion *vga_init_io(VGACommonState *s,
+void vga_common_init(VGACommonState *s, DeviceState *owner);
+void vga_init(VGACommonState *s, DeviceState *owner,
+              MemoryRegion *address_space, MemoryRegion *address_space_io,
+              bool init_vga_ports);
+MemoryRegion *vga_init_io(VGACommonState *s, DeviceState *owner,
                           const MemoryRegionPortio **vga_ports,
                           const MemoryRegionPortio **vbe_ports);
 void vga_common_reset(VGACommonState *s);
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index fd3569d..1f94b8e 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1198,8 +1198,8 @@  static void vmsvga_init(DeviceState *dev, struct vmsvga_state_s *s,
     vmstate_register_ram_global(&s->fifo_ram);
     s->fifo_ptr = memory_region_get_ram_ptr(&s->fifo_ram);
 
-    vga_common_init(&s->vga);
-    vga_init(&s->vga, address_space, io, true);
+    vga_common_init(&s->vga, dev);
+    vga_init(&s->vga, dev, address_space, io, true);
     vmstate_register(NULL, 0, &vmstate_vga_common, &s->vga);
     s->new_depth = 32;
 }
-- 
1.7.1


From ad7acb4678bba543423637c8ac0179b4cc14679c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 08:59:02 +0200
Subject: [PATCH 33/40] pci-assign: add memory_region_set_owner calls

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/kvm/pci-assign.c |   11 +++++++++++
 1 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index c1e08ec..80cda9a 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -300,6 +300,7 @@  static void assigned_dev_iomem_setup(PCIDevice *pci_dev, int region_num,
     if (e_size > 0) {
         memory_region_init(&region->container, "assigned-dev-container",
                            e_size);
+        memory_region_set_owner(&region->container, OBJECT(pci_dev));
         memory_region_add_subregion(&region->container, 0, &region->real_iomem);
 
         /* deal with MSI-X MMIO page */
@@ -330,9 +331,12 @@  static void assigned_dev_ioport_setup(PCIDevice *pci_dev, int region_num,
 
     region->e_size = size;
     memory_region_init(&region->container, "assigned-dev-container", size);
+    memory_region_set_owner(&region->container, OBJECT(pci_dev));
+
     memory_region_init_io(&region->real_iomem, &assigned_dev_ioport_ops,
                           r_dev->v_addrs + region_num,
                           "assigned-dev-iomem", size);
+    memory_region_set_owner(&region->real_iomem, OBJECT(pci_dev));
     memory_region_add_subregion(&region->container, 0, &region->real_iomem);
 }
 
@@ -482,6 +486,8 @@  static int assigned_dev_register_regions(PCIRegion *io_regions,
                                       &slow_bar_ops, &pci_dev->v_addrs[i],
                                       "assigned-dev-slow-bar",
                                       cur_region->size);
+                memory_region_set_owner(&pci_dev->v_addrs[i].real_iomem,
+                                        OBJECT(pci_dev));
             } else {
                 void *virtbase = pci_dev->v_addrs[i].u.r_virtbase;
                 char name[32];
@@ -490,6 +496,9 @@  static int assigned_dev_register_regions(PCIRegion *io_regions,
                 memory_region_init_ram_ptr(&pci_dev->v_addrs[i].real_iomem,
                                            name, cur_region->size,
                                            virtbase);
+                memory_region_set_owner(&pci_dev->v_addrs[i].real_iomem,
+                                        OBJECT(pci_dev));
+
                 vmstate_register_ram(&pci_dev->v_addrs[i].real_iomem,
                                      &pci_dev->dev.qdev);
             }
@@ -1633,6 +1642,7 @@  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 
     memory_region_init_io(&dev->mmio, &assigned_dev_msix_mmio_ops, dev,
                           "assigned-dev-msix", MSIX_PAGE_SIZE);
+    memory_region_set_owner(&dev->mmio, OBJECT(dev));
     return 0;
 }
 
@@ -1898,6 +1908,7 @@  static void assigned_dev_load_option_rom(AssignedDevice *dev)
     snprintf(name, sizeof(name), "%s.rom",
             object_get_typename(OBJECT(dev)));
     memory_region_init_ram(&dev->dev.rom, name, st.st_size);
+    memory_region_set_owner(&dev->dev.rom, OBJECT(dev));
     vmstate_register_ram(&dev->dev.rom, &dev->dev.qdev);
     ptr = memory_region_get_ram_ptr(&dev->dev.rom);
     memset(ptr, 0xff, st.st_size);
-- 
1.7.1


From 2c3d6858c7095704384fd5aadbf8280c64049601 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 7 May 2013 08:59:11 +0200
Subject: [PATCH 34/40] vfio: add memory_region_set_owner calls

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/misc/vfio.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 0ae6878..c4a8853 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1156,6 +1156,7 @@  static void vfio_vga_probe_ati_3c3_quirk(VFIODevice *vdev)
 
     memory_region_init_io(&quirk->mem, &vfio_ati_3c3_quirk, quirk,
                           "vfio-ati-3c3-quirk", 1);
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem, 3,
                                 &quirk->mem);
 
@@ -1247,6 +1248,7 @@  static void vfio_probe_ati_4010_quirk(VFIODevice *vdev, int nr)
 
     memory_region_init_io(&quirk->mem, &vfio_ati_4010_quirk, quirk,
                           "vfio-ati-4010-quirk", 8);
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -1333,6 +1335,7 @@  static void vfio_probe_ati_f10_quirk(VFIODevice *vdev, int nr)
 
     memory_region_init_io(&quirk->mem, &vfio_ati_f10_quirk, quirk,
                           "vfio-ati-f10-quirk", 8);
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -1453,6 +1456,7 @@  static void vfio_vga_probe_nvidia_3d0_quirk(VFIODevice *vdev)
 
     memory_region_init_io(&quirk->mem, &vfio_nvidia_3d0_quirk, quirk,
                           "vfio-nvidia-3d0-quirk", 6);
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
                                 0x10, &quirk->mem);
 
@@ -1568,6 +1572,7 @@  static void vfio_probe_nvidia_bar5_window_quirk(VFIODevice *vdev, int nr)
 
     memory_region_init_io(&quirk->mem, &vfio_nvidia_bar5_window_quirk, quirk,
                           "vfio-nvidia-bar5-window-quirk", 16);
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion_overlap(&vdev->bars[nr].mem, 0, &quirk->mem, 1);
 
     QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
@@ -1647,6 +1652,7 @@  static void vfio_probe_nvidia_bar0_88000_quirk(VFIODevice *vdev, int nr)
     memory_region_init_io(&quirk->mem, &vfio_nvidia_bar0_88000_quirk, quirk,
                           "vfio-nvidia-bar0-88000-quirk",
                           TARGET_PAGE_ALIGN(PCIE_CONFIG_SPACE_SIZE));
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
                                         0x88000 & TARGET_PAGE_MASK,
                                         &quirk->mem, 1);
@@ -1726,6 +1732,7 @@  static void vfio_probe_nvidia_bar0_1800_quirk(VFIODevice *vdev, int nr)
     memory_region_init_io(&quirk->mem, &vfio_nvidia_bar0_1800_quirk, quirk,
                           "vfio-nvidia-bar0-1800-quirk",
                           TARGET_PAGE_ALIGN(PCI_CONFIG_SPACE_SIZE));
+    memory_region_set_owner(&quirk->mem, OBJECT(vdev));
     memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
                                         0x1800 & TARGET_PAGE_MASK,
                                         &quirk->mem, 1);
@@ -2237,6 +2244,7 @@  empty_region:
         memory_region_init(submem, name, 0);
     }
 
+    memory_region_set_owner(submem, memory_region_owner(mem));
     memory_region_add_subregion(mem, offset, submem);
 
     return ret;
-- 
1.7.1


From 06f4696fd1c7978d51c809959fd4ce8877f9b448 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 14:28:39 +0200
Subject: [PATCH 35/40] exec: check MRU in qemu_ram_addr_from_host

This function is not used outside the iothread mutex, so it
can use ram_list.mru_block.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/exec.c b/exec.c
index f163a55..2914936 100644
--- a/exec.c
+++ b/exec.c
@@ -1412,18 +1412,26 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         return 0;
     }
 
+    block = ram_list.mru_block;
+    if (block && block->host && host - block->host < block->length) {
+        goto found;
+    }
+
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
             continue;
         }
         if (host - block->host < block->length) {
-            *ram_addr = block->offset + (host - block->host);
-            return 0;
+            goto found;
         }
     }
 
     return -1;
+
+found:
+    *ram_addr = block->offset + (host - block->host);
+    return 0;
 }
 
 /* Some of the softmmu routines need to translate from a host pointer
-- 
1.7.1


From 8510b1a668c1d2620fc6e3311099585e46a140eb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 14:36:15 +0200
Subject: [PATCH 36/40] memory: return MemoryRegion from qemu_ram_addr_from_host

It will be needed in the next patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c                    |   35 +++++++++++++++++++++--------------
 include/exec/cpu-common.h |    2 +-
 target-i386/kvm.c         |    4 ++--
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/exec.c b/exec.c
index 2914936..63123e0 100644
--- a/exec.c
+++ b/exec.c
@@ -1295,15 +1295,7 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 }
 #endif /* !_WIN32 */
 
-/* Return a host pointer to ram allocated with qemu_ram_alloc.
-   With the exception of the softmmu code in this file, this should
-   only be used for local memory (e.g. video ram) that the device owns,
-   and knows it isn't going to access beyond the end of the block.
-
-   It should not be used for general purpose DMA.
-   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
- */
-void *qemu_get_ram_ptr(ram_addr_t addr)
+static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 {
     RAMBlock *block;
 
@@ -1323,6 +1315,21 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
 
 found:
     ram_list.mru_block = block;
+    return block;
+}
+
+/* Return a host pointer to ram allocated with qemu_ram_alloc.
+   With the exception of the softmmu code in this file, this should
+   only be used for local memory (e.g. video ram) that the device owns,
+   and knows it isn't going to access beyond the end of the block.
+
+   It should not be used for general purpose DMA.
+   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
+ */
+void *qemu_get_ram_ptr(ram_addr_t addr)
+{
+    RAMBlock *block = qemu_get_ram_block(addr);
+
     if (xen_enabled()) {
         /* We need to check if the requested address is in the RAM
          * because we don't want to map the entire memory in QEMU.
@@ -1402,14 +1409,14 @@  void qemu_put_ram_ptr(void *addr)
     trace_qemu_put_ram_ptr(addr);
 }
 
-int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
+MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
 {
     RAMBlock *block;
     uint8_t *host = ptr;
 
     if (xen_enabled()) {
         *ram_addr = xen_ram_addr_from_mapcache(ptr);
-        return 0;
+        return qemu_get_ram_block(*ram_addr)->mr;
     }
 
     block = ram_list.mru_block;
@@ -1427,11 +1434,11 @@  int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         }
     }
 
-    return -1;
+    return NULL;
 
 found:
     *ram_addr = block->offset + (host - block->host);
-    return 0;
+    return block->mr;
 }
 
 /* Some of the softmmu routines need to translate from a host pointer
@@ -1440,7 +1447,7 @@  ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
 {
     ram_addr_t ram_addr;
 
-    if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
+    if (qemu_ram_addr_from_host(ptr, &ram_addr) == NULL) {
         fprintf(stderr, "Bad ram pointer %p\n", ptr);
         abort();
     }
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 2e5f11f..84dfd3b 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -53,7 +53,7 @@  void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 void *qemu_get_ram_ptr(ram_addr_t addr);
 void qemu_put_ram_ptr(void *addr);
 /* This should not be used by devices.  */
-int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 9ffb6ca..7ba98cd 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -318,7 +318,7 @@  int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
 
     if ((env->mcg_cap & MCG_SER_P) && addr
         && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) {
-        if (qemu_ram_addr_from_host(addr, &ram_addr) ||
+        if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
             !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
                     "QEMU itself instead of guest system!\n");
@@ -350,7 +350,7 @@  int kvm_arch_on_sigbus(int code, void *addr)
         hwaddr paddr;
 
         /* Hope we are lucky for AO MCE */
-        if (qemu_ram_addr_from_host(addr, &ram_addr) ||
+        if (qemu_ram_addr_from_host(addr, &ram_addr) == NULL ||
             !kvm_physical_memory_addr_from_host(CPU(first_cpu)->kvm_state,
                                                 addr, &paddr)) {
             fprintf(stderr, "Hardware memory error for memory used by "
-- 
1.7.1


From 4e00d11fa293223d3f25e3fca51dda536bbea84d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 14:37:15 +0200
Subject: [PATCH 37/40] memory: ref/unref memory across address_space_map/unmap

The iothread mutex might be released between map and unmap, so the
mapped region might disappear.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 exec.c |   12 +++++++++++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/exec.c b/exec.c
index 63123e0..1efcde4 100644
--- a/exec.c
+++ b/exec.c
@@ -2085,6 +2085,7 @@  void cpu_physical_memory_write_rom(hwaddr addr,
 }
 
 typedef struct {
+    MemoryRegion *mr;
     void *buffer;
     hwaddr addr;
     hwaddr len;
@@ -2179,15 +2180,18 @@  void *address_space_map(AddressSpace *as,
             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
             bounce.addr = addr;
             bounce.len = l;
+            bounce.mr = section->mr;
             if (!is_write) {
                 address_space_read(as, addr, bounce.buffer, l);
             }
 
             *plen = l;
+            memory_region_ref(section->mr);
             return bounce.buffer;
         }
         if (!todo) {
             raddr = memory_region_get_ram_addr(section->mr) + xlat;
+            memory_region_ref(section->mr);
         } else {
             if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
                 break;
@@ -2212,8 +2216,12 @@  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len)
 {
     if (buffer != bounce.buffer) {
+        MemoryRegion *mr;
+        ram_addr_t addr1;
+
+        mr = qemu_ram_addr_from_host(buffer, &addr1);
+        assert(mr);
         if (is_write) {
-            ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
             while (access_len) {
                 unsigned l;
                 l = TARGET_PAGE_SIZE;
@@ -2227,6 +2235,7 @@  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
         if (xen_enabled()) {
             xen_invalidate_map_cache_entry(buffer);
         }
+        memory_region_unref(mr);
         return;
     }
     if (is_write) {
@@ -2234,6 +2243,7 @@  void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
     }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
+    memory_region_unref(bounce.mr);
     cpu_notify_map_clients();
 }
 
-- 
1.7.1


From 22d38d6913e8cbc3c08574c8b71aa6ec7e547ea7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 10:26:13 +0200
Subject: [PATCH 38/40] memory: access FlatView from a local variable

We will soon require accesses to as->current_map to be placed under
a lock (with reference counting so as to keep the critical section
small).  To simplify this change, always fetch as->current_map into
a local variable and access it through that variable.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c |   31 +++++++++++++++++++++----------
 1 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/memory.c b/memory.c
index 4b30c3f..75736b9 100644
--- a/memory.c
+++ b/memory.c
@@ -632,13 +632,15 @@  static void address_space_add_del_ioeventfds(AddressSpace *as,
 
 static void address_space_update_ioeventfds(AddressSpace *as)
 {
+    FlatView *view;
     FlatRange *fr;
     unsigned ioeventfd_nb = 0;
     MemoryRegionIoeventfd *ioeventfds = NULL;
     AddrRange tmp;
     unsigned i;
 
-    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
+    view = as->current_map;
+    FOR_EACH_FLAT_RANGE(fr, view) {
         for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
             tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
                                   int128_sub(fr->addr.start,
@@ -1169,7 +1171,8 @@  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
     FlatRange *fr;
 
     QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
-        FOR_EACH_FLAT_RANGE(fr, as->current_map) {
+        FlatView *view = as->current_map;
+        FOR_EACH_FLAT_RANGE(fr, view) {
             if (fr->mr == mr) {
                 MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
             }
@@ -1219,12 +1222,14 @@  void *memory_region_get_ram_ptr(MemoryRegion *mr)
 
 static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as)
 {
+    FlatView *view;
     FlatRange *fr;
     CoalescedMemoryRange *cmr;
     AddrRange tmp;
     MemoryRegionSection section;
 
-    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
+    view = as->current_map;
+    FOR_EACH_FLAT_RANGE(fr, view) {
         if (fr->mr == mr) {
             section = (MemoryRegionSection) {
                 .address_space = as,
@@ -1511,9 +1516,9 @@  static int cmp_flatrange_addr(const void *addr_, const void *fr_)
     return 0;
 }
 
-static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
+static FlatRange *flatview_lookup(FlatView *view, AddrRange addr)
 {
-    return bsearch(&addr, as->current_map->ranges, as->current_map->nr,
+    return bsearch(&addr, view->ranges, view->nr,
                    sizeof(FlatRange), cmp_flatrange_addr);
 }
 
@@ -1524,6 +1529,7 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
     MemoryRegion *root;
     AddressSpace *as;
     AddrRange range;
+    FlatView *view;
     FlatRange *fr;
 
     addr += mr->addr;
@@ -1534,13 +1540,14 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
 
     as = memory_region_to_address_space(root);
     range = addrrange_make(int128_make64(addr), int128_make64(size));
-    fr = address_space_lookup(as, range);
+
+    view = as->current_map;
+    fr = flatview_lookup(view, range);
     if (!fr) {
         return ret;
     }
 
-    while (fr > as->current_map->ranges
-           && addrrange_intersects(fr[-1].addr, range)) {
+    while (fr > view->ranges && addrrange_intersects(fr[-1].addr, range)) {
         --fr;
     }
 
@@ -1560,9 +1567,11 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
 
 void address_space_sync_dirty_bitmap(AddressSpace *as)
 {
+    FlatView *view;
     FlatRange *fr;
 
-    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
+    view = as->current_map;
+    FOR_EACH_FLAT_RANGE(fr, view) {
         MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
     }
 }
@@ -1582,6 +1591,7 @@  void memory_global_dirty_log_stop(void)
 static void listener_add_address_space(MemoryListener *listener,
                                        AddressSpace *as)
 {
+    FlatView *view;
     FlatRange *fr;
 
     if (listener->address_space_filter
@@ -1595,7 +1605,8 @@  static void listener_add_address_space(MemoryListener *listener,
         }
     }
 
-    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
+    view = as->current_map;
+    FOR_EACH_FLAT_RANGE(fr, view) {
         MemoryRegionSection section = {
             .mr = fr->mr,
             .address_space = as,
-- 
1.7.1


From eee8881b980d7cc49675611ce5ec90564543f845 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 10:29:07 +0200
Subject: [PATCH 39/40] memory: use a new FlatView pointer on every topology update

This is the first step towards converting as->current_map to
RCU-style updates, where the FlatView updates run concurrently
with uses of an old FlatView.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c |   34 ++++++++++++++++++----------------
 1 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/memory.c b/memory.c
index 75736b9..52e1fca 100644
--- a/memory.c
+++ b/memory.c
@@ -273,6 +273,7 @@  static void flatview_destroy(FlatView *view)
         memory_region_unref(view->ranges[i].mr);
     }
     g_free(view->ranges);
+    g_free(view);
 }
 
 static bool can_merge(FlatRange *r1, FlatRange *r2)
@@ -566,17 +567,18 @@  static void render_memory_region(FlatView *view,
 }
 
 /* Render a memory topology into a list of disjoint absolute ranges. */
-static FlatView generate_memory_topology(MemoryRegion *mr)
+static FlatView *generate_memory_topology(MemoryRegion *mr)
 {
-    FlatView view;
+    FlatView *view;
 
-    flatview_init(&view);
+    view = g_new(FlatView, 1);
+    flatview_init(view);
 
     if (mr) {
-        render_memory_region(&view, mr, int128_zero(),
+        render_memory_region(view, mr, int128_zero(),
                              addrrange_make(int128_zero(), int128_2_64()), false);
     }
-    flatview_simplify(&view);
+    flatview_simplify(view);
 
     return view;
 }
@@ -664,8 +666,8 @@  static void address_space_update_ioeventfds(AddressSpace *as)
 }
 
 static void address_space_update_topology_pass(AddressSpace *as,
-                                               FlatView old_view,
-                                               FlatView new_view,
+                                               const FlatView *old_view,
+                                               const FlatView *new_view,
                                                bool adding)
 {
     unsigned iold, inew;
@@ -675,14 +677,14 @@  static void address_space_update_topology_pass(AddressSpace *as,
      * Kill ranges in the old map, and instantiate ranges in the new map.
      */
     iold = inew = 0;
-    while (iold < old_view.nr || inew < new_view.nr) {
-        if (iold < old_view.nr) {
-            frold = &old_view.ranges[iold];
+    while (iold < old_view->nr || inew < new_view->nr) {
+        if (iold < old_view->nr) {
+            frold = &old_view->ranges[iold];
         } else {
             frold = NULL;
         }
-        if (inew < new_view.nr) {
-            frnew = &new_view.ranges[inew];
+        if (inew < new_view->nr) {
+            frnew = &new_view->ranges[inew];
         } else {
             frnew = NULL;
         }
@@ -728,14 +730,14 @@  static void address_space_update_topology_pass(AddressSpace *as,
 
 static void address_space_update_topology(AddressSpace *as)
 {
-    FlatView old_view = *as->current_map;
-    FlatView new_view = generate_memory_topology(as->root);
+    FlatView *old_view = as->current_map;
+    FlatView *new_view = generate_memory_topology(as->root);
 
     address_space_update_topology_pass(as, old_view, new_view, false);
     address_space_update_topology_pass(as, old_view, new_view, true);
 
-    *as->current_map = new_view;
-    flatview_destroy(&old_view);
+    as->current_map = new_view;
+    flatview_destroy(old_view);
     address_space_update_ioeventfds(as);
 }
 
-- 
1.7.1


From 752269a023f2e2f7591158ca069b5af7aa822223 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 6 May 2013 11:57:21 +0200
Subject: [PATCH 40/40] memory: add reference counting to FlatView

With this change, a FlatView can be used even after a concurrent
update has replaced it.  Because we do not have RCU, we use a
mutex to protect the small critical sections that read/write the
as->current_map pointer.  Accesses to the FlatView can be done
outside the mutex.

If a MemoryRegion will be used after the FlatView is unref-ed (or after
a MemoryListener callback is returned), a reference has to be added to
that MemoryRegion.  For example, memory_region_find adds a reference to
the MemoryRegion that it returns.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 memory.c |   75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/memory.c b/memory.c
index 52e1fca..105e211 100644
--- a/memory.c
+++ b/memory.c
@@ -26,12 +26,26 @@  static unsigned memory_region_transaction_depth;
 static bool memory_region_update_pending;
 static bool global_dirty_log = false;
 
+/* flat_view_mutex is taken around reading as->current_map; the critical
+ * section is extremely short, so I'm using a single mutex for every AS.
+ * We could also RCU for the read-side.
+ *
+ * The BQL is taken around transaction commits, hence both locks are taken
+ * while writing to as->current_map (with the BQL taken outside).
+ */
+static QemuMutex flat_view_mutex;
+
 static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
     = QTAILQ_HEAD_INITIALIZER(memory_listeners);
 
 static QTAILQ_HEAD(, AddressSpace) address_spaces
     = QTAILQ_HEAD_INITIALIZER(address_spaces);
 
+static void memory_init(void)
+{
+    qemu_mutex_init(&flat_view_mutex);
+}
+
 typedef struct AddrRange AddrRange;
 
 /*
@@ -222,6 +236,7 @@  struct FlatRange {
  * order.
  */
 struct FlatView {
+    unsigned ref;
     FlatRange *ranges;
     unsigned nr;
     unsigned nr_allocated;
@@ -243,6 +258,7 @@  static bool flatrange_equal(FlatRange *a, FlatRange *b)
 
 static void flatview_init(FlatView *view)
 {
+    view->ref = 1;
     view->ranges = NULL;
     view->nr = 0;
     view->nr_allocated = 0;
@@ -276,6 +292,18 @@  static void flatview_destroy(FlatView *view)
     g_free(view);
 }
 
+static void flatview_ref(FlatView *view)
+{
+    __sync_fetch_and_add(&view->ref, 1);
+}
+
+static void flatview_unref(FlatView *view)
+{
+    if (__sync_fetch_and_sub(&view->ref, 1) == 1) {
+        flatview_destroy(view);
+    }
+}
+
 static bool can_merge(FlatRange *r1, FlatRange *r2)
 {
     return int128_eq(addrrange_end(r1->addr), r2->addr.start)
@@ -728,16 +756,38 @@  static void address_space_update_topology_pass(AddressSpace *as,
 }
 
 
+static FlatView *address_space_get_flatview(AddressSpace *as)
+{
+    FlatView *view;
+
+    qemu_mutex_lock(&flat_view_mutex);
+    view = as->current_map;
+    flatview_ref(view);
+    qemu_mutex_unlock(&flat_view_mutex);
+    return view;
+}
+
 static void address_space_update_topology(AddressSpace *as)
 {
-    FlatView *old_view = as->current_map;
+    FlatView *old_view = address_space_get_flatview(as);
     FlatView *new_view = generate_memory_topology(as->root);
 
     address_space_update_topology_pass(as, old_view, new_view, false);
     address_space_update_topology_pass(as, old_view, new_view, true);
 
+    qemu_mutex_lock(&flat_view_mutex);
+    flatview_unref(as->current_map);
     as->current_map = new_view;
-    flatview_destroy(old_view);
+    qemu_mutex_unlock(&flat_view_mutex);
+
+    /* Note that all the old MemoryRegions are still alive up to this
+     * point.  This relieves most MemoryListeners from the need to
+     * ref/unref the MemoryRegions they get---unless they use them
+     * outside the iothread mutex, in which case precise reference
+     * counting is necessary.
+     */
+    flatview_unref(old_view);
+
     address_space_update_ioeventfds(as);
 }
 
@@ -1173,12 +1223,13 @@  void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
     FlatRange *fr;
 
     QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
-        FlatView *view = as->current_map;
+        FlatView *view = address_space_get_flatview(as);
         FOR_EACH_FLAT_RANGE(fr, view) {
             if (fr->mr == mr) {
                 MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
             }
         }
+        flatview_unref(view);
     }
 }
 
@@ -1230,7 +1281,7 @@  static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
     AddrRange tmp;
     MemoryRegionSection section;
 
-    view = as->current_map;
+    view = address_space_get_flatview(as);
     FOR_EACH_FLAT_RANGE(fr, view) {
         if (fr->mr == mr) {
             section = (MemoryRegionSection) {
@@ -1256,6 +1307,7 @@  static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
             }
         }
     }
+    flatview_unref(view);
 }
 
 static void memory_region_update_coalesced_range(MemoryRegion *mr)
@@ -1543,7 +1595,7 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
     as = memory_region_to_address_space(root);
     range = addrrange_make(int128_make64(addr), int128_make64(size));
 
-    view = as->current_map;
+    view = address_space_get_flatview(as);
     fr = flatview_lookup(view, range);
     if (!fr) {
         return ret;
@@ -1564,6 +1616,7 @@  MemoryRegionSection memory_region_find(MemoryRegion *mr,
     ret.readonly = fr->readonly;
     memory_region_ref(ret.mr);
 
+    flatview_unref(view);
     return ret;
 }
 
@@ -1572,10 +1625,11 @@  void address_space_sync_dirty_bitmap(AddressSpace *as)
     FlatView *view;
     FlatRange *fr;
 
-    view = as->current_map;
+    view = address_space_get_flatview(as);
     FOR_EACH_FLAT_RANGE(fr, view) {
         MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
     }
+    flatview_unref(view);
 }
 
 void memory_global_dirty_log_start(void)
@@ -1607,7 +1661,7 @@  static void listener_add_address_space(MemoryListener *listener,
         }
     }
 
-    view = as->current_map;
+    view = address_space_get_flatview(as);
     FOR_EACH_FLAT_RANGE(fr, view) {
         MemoryRegionSection section = {
             .mr = fr->mr,
@@ -1621,6 +1675,7 @@  static void listener_add_address_space(MemoryListener *listener,
             listener->region_add(listener, &section);
         }
     }
+    flatview_unref(view);
 }
 
 void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
@@ -1654,6 +1709,10 @@  void memory_listener_unregister(MemoryListener *listener)
 
 void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
 {
+    if (QTAILQ_EMPTY(&address_spaces)) {
+        memory_init();
+    }
+
     memory_region_transaction_begin();
     as->root = root;
     as->current_map = g_new(FlatView, 1);
@@ -1675,7 +1734,7 @@  void address_space_destroy(AddressSpace *as)
     memory_region_transaction_commit();
     QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
     address_space_destroy_dispatch(as);
-    flatview_destroy(as->current_map);
+    flatview_unref(as->current_map);
     g_free(as->name);
     g_free(as->current_map);
     g_free(as->ioeventfds);