diff mbox

[2/7] pci: memory access API and IOMMU support

Message ID 1283119703-9781-1-git-send-email-eduard.munteanu@linux360.ro
State New
Headers show

Commit Message

Eduard - Gabriel Munteanu Aug. 29, 2010, 10:08 p.m. UTC
PCI devices should access memory through pci_memory_*() instead of
cpu_physical_memory_*(). This also provides support for translation and
access checking in case an IOMMU is emulated.

Memory maps are treated as remote IOTLBs (that is, translation caches
belonging to the IOMMU-aware device itself). Clients (devices) must
provide callbacks for map invalidation in case these maps are
persistent beyond the current I/O context, e.g. AIO DMA transfers.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 hw/pci.c           |  191 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/pci.h           |   69 +++++++++++++++++++
 hw/pci_internals.h |   12 +++
 qemu-common.h      |    1 +
 4 files changed, 272 insertions(+), 1 deletions(-)

Comments

Eduard - Gabriel Munteanu Aug. 29, 2010, 10:11 p.m. UTC | #1
On Mon, Aug 30, 2010 at 01:08:23AM +0300, Eduard - Gabriel Munteanu wrote:
> PCI devices should access memory through pci_memory_*() instead of
> cpu_physical_memory_*(). This also provides support for translation and
> access checking in case an IOMMU is emulated.
> 
> Memory maps are treated as remote IOTLBs (that is, translation caches
> belonging to the IOMMU-aware device itself). Clients (devices) must
> provide callbacks for map invalidation in case these maps are
> persistent beyond the current I/O context, e.g. AIO DMA transfers.
> 
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---

Please merge this instead of the patch I sent with the series. I wanted
to avoid resubmitting the whole series.


	Eduard
Stefan Weil Sept. 1, 2010, 8:10 p.m. UTC | #2
Please see my comments at the end of this mail.


Am 30.08.2010 00:08, schrieb Eduard - Gabriel Munteanu:
> PCI devices should access memory through pci_memory_*() instead of
> cpu_physical_memory_*(). This also provides support for translation and
> access checking in case an IOMMU is emulated.
>
> Memory maps are treated as remote IOTLBs (that is, translation caches
> belonging to the IOMMU-aware device itself). Clients (devices) must
> provide callbacks for map invalidation in case these maps are
> persistent beyond the current I/O context, e.g. AIO DMA transfers.
>
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
> hw/pci.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> hw/pci.h | 69 +++++++++++++++++++
> hw/pci_internals.h | 12 +++
> qemu-common.h | 1 +
> 4 files changed, 272 insertions(+), 1 deletions(-)
>
> diff --git a/hw/pci.c b/hw/pci.c
> index 2dc1577..afcb33c 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
>
> ...
>
> diff --git a/hw/pci.h b/hw/pci.h
> index c551f96..c95863a 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -172,6 +172,8 @@ struct PCIDevice {
> char *romfile;
> ram_addr_t rom_offset;
> uint32_t rom_bar;
> +
> + QLIST_HEAD(, PCIMemoryMap) memory_maps;
> };
>
> PCIDevice *pci_register_device(PCIBus *bus, const char *name,
> @@ -391,4 +393,71 @@ static inline int ranges_overlap(uint64_t first1, 
> uint64_t len1,
> return !(last2 < first1 || last1 < first2);
> }
>
> +/*
> + * Memory I/O and PCI IOMMU definitions.
> + */
> +
> +#define IOMMU_PERM_READ (1 << 0)
> +#define IOMMU_PERM_WRITE (1 << 1)
> +#define IOMMU_PERM_RW (IOMMU_PERM_READ | IOMMU_PERM_WRITE)
> +
> +typedef int PCIInvalidateMapFunc(void *opaque);
> +typedef int PCITranslateFunc(PCIDevice *iommu,
> + PCIDevice *dev,
> + pcibus_t addr,
> + target_phys_addr_t *paddr,
> + target_phys_addr_t *len,
> + unsigned perms);
> +
> +void pci_memory_rw(PCIDevice *dev,
> + pcibus_t addr,
> + uint8_t *buf,
> + pcibus_t len,
> + int is_write);
> +void *pci_memory_map(PCIDevice *dev,
> + PCIInvalidateMapFunc *cb,
> + void *opaque,
> + pcibus_t addr,
> + target_phys_addr_t *len,
> + int is_write);
> +void pci_memory_unmap(PCIDevice *dev,
> + void *buffer,
> + target_phys_addr_t len,
> + int is_write,
> + target_phys_addr_t access_len);
> +void pci_register_iommu(PCIDevice *dev, PCITranslateFunc *translate);
> +void pci_memory_invalidate_range(PCIDevice *dev, pcibus_t addr, 
> pcibus_t len);
> +
> +#define DECLARE_PCI_LD(suffix, size) \
> +uint##size##_t pci_ld##suffix(PCIDevice *dev, pcibus_t addr);
> +
> +#define DECLARE_PCI_ST(suffix, size) \
> +void pci_st##suffix(PCIDevice *dev, pcibus_t addr, uint##size##_t val);
> +
> +DECLARE_PCI_LD(ub, 8)
> +DECLARE_PCI_LD(uw, 16)
> +DECLARE_PCI_LD(l, 32)
> +DECLARE_PCI_LD(q, 64)
> +
> +DECLARE_PCI_ST(b, 8)
> +DECLARE_PCI_ST(w, 16)
> +DECLARE_PCI_ST(l, 32)
> +DECLARE_PCI_ST(q, 64)
> +
> +static inline void pci_memory_read(PCIDevice *dev,
> + pcibus_t addr,
> + uint8_t *buf,
> + pcibus_t len)
> +{
> + pci_memory_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline void pci_memory_write(PCIDevice *dev,
> + pcibus_t addr,
> + const uint8_t *buf,
> + pcibus_t len)
> +{
> + pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
> +}
> +
> #endif

The functions pci_memory_read and pci_memory_write not only read
or write byte data but many different data types which leads to
a lot of type casts in your other patches.

I'd prefer "void *buf" and "const void *buf" in the argument lists.
Then all those type casts could be removed.

Regards
Stefan Weil
Michael S. Tsirkin Sept. 2, 2010, 6 a.m. UTC | #3
On Wed, Sep 01, 2010 at 10:10:30PM +0200, Stefan Weil wrote:
> >+static inline void pci_memory_read(PCIDevice *dev,
> >+ pcibus_t addr,
> >+ uint8_t *buf,
> >+ pcibus_t len)
> >+{
> >+ pci_memory_rw(dev, addr, buf, len, 0);
> >+}
> >+
> >+static inline void pci_memory_write(PCIDevice *dev,
> >+ pcibus_t addr,
> >+ const uint8_t *buf,
> >+ pcibus_t len)
> >+{
> >+ pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
> >+}
> >+
> >#endif
> 
> The functions pci_memory_read and pci_memory_write not only read
> or write byte data but many different data types which leads to
> a lot of type casts in your other patches.
> 
> I'd prefer "void *buf" and "const void *buf" in the argument lists.
> Then all those type casts could be removed.
> 
> Regards
> Stefan Weil

Further, I am not sure pcibus_t is a good type to use here.
This also forces use of pci specific types in e.g. ide, or resorting to
casts as this patch does. We probably should use a more generic type
for this.
Eduard - Gabriel Munteanu Sept. 2, 2010, 8:51 a.m. UTC | #4
On Wed, Sep 01, 2010 at 10:10:30PM +0200, Stefan Weil wrote:
> Please see my comments at the end of this mail.
> 
> 
> Am 30.08.2010 00:08, schrieb Eduard - Gabriel Munteanu:
> > PCI devices should access memory through pci_memory_*() instead of
> > cpu_physical_memory_*(). This also provides support for translation and
> > access checking in case an IOMMU is emulated.
> >
> > Memory maps are treated as remote IOTLBs (that is, translation caches
> > belonging to the IOMMU-aware device itself). Clients (devices) must
> > provide callbacks for map invalidation in case these maps are
> > persistent beyond the current I/O context, e.g. AIO DMA transfers.
> >
> > Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> > ---

[snip]

> > +static inline void pci_memory_read(PCIDevice *dev,
> > + pcibus_t addr,
> > + uint8_t *buf,
> > + pcibus_t len)
> > +{
> > + pci_memory_rw(dev, addr, buf, len, 0);
> > +}
> > +
> > +static inline void pci_memory_write(PCIDevice *dev,
> > + pcibus_t addr,
> > + const uint8_t *buf,
> > + pcibus_t len)
> > +{
> > + pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
> > +}
> > +
> > #endif
> 
> The functions pci_memory_read and pci_memory_write not only read
> or write byte data but many different data types which leads to
> a lot of type casts in your other patches.
> 
> I'd prefer "void *buf" and "const void *buf" in the argument lists.
> Then all those type casts could be removed.
> 
> Regards
> Stefan Weil

I only followed an approach similar to how cpu_physical_memory_{read,write}()
is defined. I think I should change both cpu_physical_memory_* stuff and
pci_memory_* stuff, not only the latter, if I decide to go on that
approach.


	Eduard
Eduard - Gabriel Munteanu Sept. 2, 2010, 9:08 a.m. UTC | #5
On Thu, Sep 02, 2010 at 09:00:46AM +0300, Michael S. Tsirkin wrote:
> On Wed, Sep 01, 2010 at 10:10:30PM +0200, Stefan Weil wrote:
> > >+static inline void pci_memory_read(PCIDevice *dev,
> > >+ pcibus_t addr,
> > >+ uint8_t *buf,
> > >+ pcibus_t len)
> > >+{
> > >+ pci_memory_rw(dev, addr, buf, len, 0);
> > >+}
> > >+
> > >+static inline void pci_memory_write(PCIDevice *dev,
> > >+ pcibus_t addr,
> > >+ const uint8_t *buf,
> > >+ pcibus_t len)
> > >+{
> > >+ pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
> > >+}
> > >+
> > >#endif
> > 
> > The functions pci_memory_read and pci_memory_write not only read
> > or write byte data but many different data types which leads to
> > a lot of type casts in your other patches.
> > 
> > I'd prefer "void *buf" and "const void *buf" in the argument lists.
> > Then all those type casts could be removed.
> > 
> > Regards
> > Stefan Weil
> 
> Further, I am not sure pcibus_t is a good type to use here.
> This also forces use of pci specific types in e.g. ide, or resorting to
> casts as this patch does. We probably should use a more generic type
> for this.

It only forces use of PCI-specific types in the IDE controller, which is
already a PCI device.


	Eduard

> -- 
> MST
Anthony Liguori Sept. 2, 2010, 1:24 p.m. UTC | #6
On 09/02/2010 04:08 AM, Eduard - Gabriel Munteanu wrote:
> On Thu, Sep 02, 2010 at 09:00:46AM +0300, Michael S. Tsirkin wrote:
>    
>> On Wed, Sep 01, 2010 at 10:10:30PM +0200, Stefan Weil wrote:
>>      
>>>> +static inline void pci_memory_read(PCIDevice *dev,
>>>> + pcibus_t addr,
>>>> + uint8_t *buf,
>>>> + pcibus_t len)
>>>> +{
>>>> + pci_memory_rw(dev, addr, buf, len, 0);
>>>> +}
>>>> +
>>>> +static inline void pci_memory_write(PCIDevice *dev,
>>>> + pcibus_t addr,
>>>> + const uint8_t *buf,
>>>> + pcibus_t len)
>>>> +{
>>>> + pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
>>>> +}
>>>> +
>>>> #endif
>>>>          
>>> The functions pci_memory_read and pci_memory_write not only read
>>> or write byte data but many different data types which leads to
>>> a lot of type casts in your other patches.
>>>
>>> I'd prefer "void *buf" and "const void *buf" in the argument lists.
>>> Then all those type casts could be removed.
>>>
>>> Regards
>>> Stefan Weil
>>>        
>> Further, I am not sure pcibus_t is a good type to use here.
>> This also forces use of pci specific types in e.g. ide, or resorting to
>> casts as this patch does. We probably should use a more generic type
>> for this.
>>      
> It only forces use of PCI-specific types in the IDE controller, which is
> already a PCI device.
>    

But IDE controllers are not always PCI devices...  This isn't an issue 
with your patch, per-say, but with how we're modelling the IDE 
controller today.  There's no great solution but I think your patch is 
an improvement over what we have today.

I do agree with Stefan though that void * would make a lot more sense.

Regards,

Anthony Liguori

> 	Eduard
>
>    
>> -- 
>> MST
>>      
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Stefan Weil Sept. 2, 2010, 4:05 p.m. UTC | #7
Am 02.09.2010 10:51, schrieb Eduard - Gabriel Munteanu:
> On Wed, Sep 01, 2010 at 10:10:30PM +0200, Stefan Weil wrote:
>    
>> Please see my comments at the end of this mail.
>>
>>
>> Am 30.08.2010 00:08, schrieb Eduard - Gabriel Munteanu:
>>      
>>> PCI devices should access memory through pci_memory_*() instead of
>>> cpu_physical_memory_*(). This also provides support for translation and
>>> access checking in case an IOMMU is emulated.
>>>
>>> Memory maps are treated as remote IOTLBs (that is, translation caches
>>> belonging to the IOMMU-aware device itself). Clients (devices) must
>>> provide callbacks for map invalidation in case these maps are
>>> persistent beyond the current I/O context, e.g. AIO DMA transfers.
>>>
>>> Signed-off-by: Eduard - Gabriel Munteanu<eduard.munteanu@linux360.ro>
>>> ---
>>>        
> [snip]
>
>    
>>> +static inline void pci_memory_read(PCIDevice *dev,
>>> + pcibus_t addr,
>>> + uint8_t *buf,
>>> + pcibus_t len)
>>> +{
>>> + pci_memory_rw(dev, addr, buf, len, 0);
>>> +}
>>> +
>>> +static inline void pci_memory_write(PCIDevice *dev,
>>> + pcibus_t addr,
>>> + const uint8_t *buf,
>>> + pcibus_t len)
>>> +{
>>> + pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
>>> +}
>>> +
>>> #endif
>>>        
>> The functions pci_memory_read and pci_memory_write not only read
>> or write byte data but many different data types which leads to
>> a lot of type casts in your other patches.
>>
>> I'd prefer "void *buf" and "const void *buf" in the argument lists.
>> Then all those type casts could be removed.
>>
>> Regards
>> Stefan Weil
>>      
> I only followed an approach similar to how cpu_physical_memory_{read,write}()
> is defined. I think I should change both cpu_physical_memory_* stuff and
> pci_memory_* stuff, not only the latter, if I decide to go on that
> approach.
>
>
> 	Eduard
>    


Yes, cpu_physical_memory_read, cpu_physical_memory_write
and cpu_physical_memory_rw should be changed, too.

They also require several type casts today.

But this change can be done in an independent patch.

Stefan
Eduard - Gabriel Munteanu Sept. 2, 2010, 4:14 p.m. UTC | #8
On Thu, Sep 02, 2010 at 06:05:39PM +0200, Stefan Weil wrote:
> Am 02.09.2010 10:51, schrieb Eduard - Gabriel Munteanu:

[snip]

> >> The functions pci_memory_read and pci_memory_write not only read
> >> or write byte data but many different data types which leads to
> >> a lot of type casts in your other patches.
> >>
> >> I'd prefer "void *buf" and "const void *buf" in the argument lists.
> >> Then all those type casts could be removed.
> >>
> >> Regards
> >> Stefan Weil
> >>      
> > I only followed an approach similar to how cpu_physical_memory_{read,write}()
> > is defined. I think I should change both cpu_physical_memory_* stuff and
> > pci_memory_* stuff, not only the latter, if I decide to go on that
> > approach.
> >
> >
> > 	Eduard
> >    
> 
> 
> Yes, cpu_physical_memory_read, cpu_physical_memory_write
> and cpu_physical_memory_rw should be changed, too.
> 
> They also require several type casts today.
> 
> But this change can be done in an independent patch.
> 
> Stefan

Roger, I'm on it. The existing casts could remain there AFAICT, so it's
a pretty simple change.


	Eduard
diff mbox

Patch

diff --git a/hw/pci.c b/hw/pci.c
index 2dc1577..afcb33c 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -158,6 +158,19 @@  static void pci_device_reset(PCIDevice *dev)
     pci_update_mappings(dev);
 }
 
+static int pci_no_translate(PCIDevice *iommu,
+                            PCIDevice *dev,
+                            pcibus_t addr,
+                            target_phys_addr_t *paddr,
+                            target_phys_addr_t *len,
+                            unsigned perms)
+{
+    *paddr = addr;
+    *len = -1;
+
+    return 0;
+}
+
 static void pci_bus_reset(void *opaque)
 {
     PCIBus *bus = opaque;
@@ -220,7 +233,10 @@  void pci_bus_new_inplace(PCIBus *bus, DeviceState *parent,
 {
     qbus_create_inplace(&bus->qbus, &pci_bus_info, parent, name);
     assert(PCI_FUNC(devfn_min) == 0);
-    bus->devfn_min = devfn_min;
+
+    bus->devfn_min  = devfn_min;
+    bus->iommu      = NULL;
+    bus->translate  = pci_no_translate;
 
     /* host bridge */
     QLIST_INIT(&bus->child);
@@ -1789,3 +1805,176 @@  static char *pcibus_get_dev_path(DeviceState *dev)
     return strdup(path);
 }
 
+void pci_register_iommu(PCIDevice *iommu,
+                        PCITranslateFunc *translate)
+{
+    iommu->bus->iommu = iommu;
+    iommu->bus->translate = translate;
+}
+
+void pci_memory_rw(PCIDevice *dev,
+                   pcibus_t addr,
+                   uint8_t *buf,
+                   pcibus_t len,
+                   int is_write)
+{
+    int err;
+    unsigned perms;
+    PCIDevice *iommu = dev->bus->iommu;
+    target_phys_addr_t paddr, plen;
+
+    perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
+
+    while (len) {
+        err = dev->bus->translate(iommu, dev, addr, &paddr, &plen, perms);
+        if (err) {
+            return;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+        len -= plen;
+        addr += plen;
+        buf += plen;
+    }
+}
+
+static void pci_memory_register_map(PCIDevice *dev,
+                                    pcibus_t addr,
+                                    pcibus_t len,
+                                    target_phys_addr_t paddr,
+                                    PCIInvalidateMapFunc *invalidate,
+                                    void *invalidate_opaque)
+{
+    PCIMemoryMap *map;
+
+    map = qemu_malloc(sizeof(PCIMemoryMap));
+    map->addr               = addr;
+    map->len                = len;
+    map->paddr              = paddr;
+    map->invalidate         = invalidate;
+    map->invalidate_opaque  = invalidate_opaque;
+
+    QLIST_INSERT_HEAD(&dev->memory_maps, map, list);
+}
+
+static void pci_memory_unregister_map(PCIDevice *dev,
+                                      target_phys_addr_t paddr,
+                                      target_phys_addr_t len)
+{
+    PCIMemoryMap *map;
+
+    QLIST_FOREACH(map, &dev->memory_maps, list) {
+        if (map->paddr == paddr && map->len == len) {
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+
+void pci_memory_invalidate_range(PCIDevice *dev,
+                                 pcibus_t addr,
+                                 pcibus_t len)
+{
+    PCIMemoryMap *map;
+
+    QLIST_FOREACH(map, &dev->memory_maps, list) {
+        if (ranges_overlap(addr, len, map->addr, map->len)) {
+            map->invalidate(map->invalidate_opaque);
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+
+void *pci_memory_map(PCIDevice *dev,
+                     PCIInvalidateMapFunc *cb,
+                     void *opaque,
+                     pcibus_t addr,
+                     target_phys_addr_t *len,
+                     int is_write)
+{
+    int err;
+    unsigned perms;
+    PCIDevice *iommu = dev->bus->iommu;
+    target_phys_addr_t paddr, plen;
+
+    perms = is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ;
+
+    plen = *len;
+    err = dev->bus->translate(iommu, dev, addr, &paddr, &plen, perms);
+    if (err) {
+        return NULL;
+    }
+
+    /*
+     * If this is true, the virtual region is contiguous,
+     * but the translated physical region isn't. We just
+     * clamp *len, much like cpu_physical_memory_map() does.
+     */
+    if (plen < *len) {
+        *len = plen;
+    }
+
+    /* We treat maps as remote TLBs to cope with stuff like AIO. */
+    if (cb) {
+        pci_memory_register_map(dev, addr, *len, paddr, cb, opaque);
+    }
+
+    return cpu_physical_memory_map(paddr, len, is_write);
+}
+
+void pci_memory_unmap(PCIDevice *dev,
+                      void *buffer,
+                      target_phys_addr_t len,
+                      int is_write,
+                      target_phys_addr_t access_len)
+{
+    cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+    pci_memory_unregister_map(dev, (target_phys_addr_t) buffer, len);
+}
+
+#define DEFINE_PCI_LD(suffix, size)                                       \
+uint##size##_t pci_ld##suffix(PCIDevice *dev, pcibus_t addr)              \
+{                                                                         \
+    int err;                                                              \
+    target_phys_addr_t paddr, plen;                                       \
+                                                                          \
+    err = dev->bus->translate(dev->bus->iommu, dev,                       \
+                              addr, &paddr, &plen, IOMMU_PERM_READ);      \
+    if (err || (plen < size / 8)) {                                       \
+        return 0;                                                         \
+    }                                                                     \
+                                                                          \
+    return ld##suffix##_phys(paddr);                                      \
+}
+
+#define DEFINE_PCI_ST(suffix, size)                                       \
+void pci_st##suffix(PCIDevice *dev, pcibus_t addr, uint##size##_t val)    \
+{                                                                         \
+    int err;                                                              \
+    target_phys_addr_t paddr, plen;                                       \
+                                                                          \
+    err = dev->bus->translate(dev->bus->iommu, dev,                       \
+                              addr, &paddr, &plen, IOMMU_PERM_WRITE);     \
+    if (err || (plen < size / 8)) {                                       \
+        return;                                                           \
+    }                                                                     \
+                                                                          \
+    st##suffix##_phys(paddr, val);                                        \
+}
+
+DEFINE_PCI_LD(ub, 8)
+DEFINE_PCI_LD(uw, 16)
+DEFINE_PCI_LD(l, 32)
+DEFINE_PCI_LD(q, 64)
+
+DEFINE_PCI_ST(b, 8)
+DEFINE_PCI_ST(w, 16)
+DEFINE_PCI_ST(l, 32)
+DEFINE_PCI_ST(q, 64)
diff --git a/hw/pci.h b/hw/pci.h
index c551f96..c95863a 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -172,6 +172,8 @@  struct PCIDevice {
     char *romfile;
     ram_addr_t rom_offset;
     uint32_t rom_bar;
+
+    QLIST_HEAD(, PCIMemoryMap) memory_maps;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
@@ -391,4 +393,71 @@  static inline int ranges_overlap(uint64_t first1, uint64_t len1,
     return !(last2 < first1 || last1 < first2);
 }
 
+/*
+ * Memory I/O and PCI IOMMU definitions.
+ */
+
+#define IOMMU_PERM_READ     (1 << 0)
+#define IOMMU_PERM_WRITE    (1 << 1)
+#define IOMMU_PERM_RW       (IOMMU_PERM_READ | IOMMU_PERM_WRITE)
+
+typedef int PCIInvalidateMapFunc(void *opaque);
+typedef int PCITranslateFunc(PCIDevice *iommu,
+                             PCIDevice *dev,
+                             pcibus_t addr,
+                             target_phys_addr_t *paddr,
+                             target_phys_addr_t *len,
+                             unsigned perms);
+
+void pci_memory_rw(PCIDevice *dev,
+                   pcibus_t addr,
+                   uint8_t *buf,
+                   pcibus_t len,
+                   int is_write);
+void *pci_memory_map(PCIDevice *dev,
+                     PCIInvalidateMapFunc *cb,
+                     void *opaque,
+                     pcibus_t addr,
+                     target_phys_addr_t *len,
+                     int is_write);
+void pci_memory_unmap(PCIDevice *dev,
+                      void *buffer,
+                      target_phys_addr_t len,
+                      int is_write,
+                      target_phys_addr_t access_len);
+void pci_register_iommu(PCIDevice *dev, PCITranslateFunc *translate);
+void pci_memory_invalidate_range(PCIDevice *dev, pcibus_t addr, pcibus_t len);
+
+#define DECLARE_PCI_LD(suffix, size)                                    \
+uint##size##_t pci_ld##suffix(PCIDevice *dev, pcibus_t addr);
+
+#define DECLARE_PCI_ST(suffix, size)                                    \
+void pci_st##suffix(PCIDevice *dev, pcibus_t addr, uint##size##_t val);
+
+DECLARE_PCI_LD(ub, 8)
+DECLARE_PCI_LD(uw, 16)
+DECLARE_PCI_LD(l, 32)
+DECLARE_PCI_LD(q, 64)
+
+DECLARE_PCI_ST(b, 8)
+DECLARE_PCI_ST(w, 16)
+DECLARE_PCI_ST(l, 32)
+DECLARE_PCI_ST(q, 64)
+
+static inline void pci_memory_read(PCIDevice *dev,
+                                   pcibus_t addr,
+                                   uint8_t *buf,
+                                   pcibus_t len)
+{
+    pci_memory_rw(dev, addr, buf, len, 0);
+}
+
+static inline void pci_memory_write(PCIDevice *dev,
+                                    pcibus_t addr,
+                                    const uint8_t *buf,
+                                    pcibus_t len)
+{
+    pci_memory_rw(dev, addr, (uint8_t *) buf, len, 1);
+}
+
 #endif
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index e3c93a3..fb134b9 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -33,6 +33,9 @@  struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    PCIDevice                       *iommu;
+    PCITranslateFunc                *translate;
 };
 
 struct PCIBridge {
@@ -44,4 +47,13 @@  struct PCIBridge {
     const char *bus_name;
 };
 
+struct PCIMemoryMap {
+    pcibus_t                        addr;
+    pcibus_t                        len;
+    target_phys_addr_t              paddr;
+    PCIInvalidateMapFunc            *invalidate;
+    void                            *invalidate_opaque;
+    QLIST_ENTRY(PCIMemoryMap)       list;
+};
+
 #endif /* QEMU_PCI_INTERNALS_H */
diff --git a/qemu-common.h b/qemu-common.h
index d735235..8b060e8 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -218,6 +218,7 @@  typedef struct SMBusDevice SMBusDevice;
 typedef struct PCIHostState PCIHostState;
 typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIBus PCIBus;
+typedef struct PCIMemoryMap PCIMemoryMap;
 typedef struct PCIDevice PCIDevice;
 typedef struct PCIBridge PCIBridge;
 typedef struct SerialState SerialState;