Patchwork [01/13] Generic DMA memory access interface

login
register
mail settings
Submitter Eduard - Gabriel Munteanu
Date Jan. 29, 2011, 5:40 p.m.
Message ID <7ec6f2018811566a4b207c4f5b8d7b8b7342b786.1296321798.git.eduard.munteanu@linux360.ro>
Download mbox | patch
Permalink /patch/80942/
State New
Headers show

Comments

Eduard - Gabriel Munteanu - Jan. 29, 2011, 5:40 p.m.
This introduces replacements for memory access functions like
cpu_physical_memory_read(). The new interface can handle address
translation and access checking through an IOMMU.

Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
---
 Makefile.target |    2 +-
 hw/dma_rw.c     |  124 +++++++++++++++++++++++++++++++++++++++++++
 hw/dma_rw.h     |  157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 282 insertions(+), 1 deletions(-)
 create mode 100644 hw/dma_rw.c
 create mode 100644 hw/dma_rw.h
Blue Swirl - Feb. 5, 2011, 10:20 a.m.
On Thu, Feb 3, 2011 at 11:32 PM, Eduard - Gabriel Munteanu
<eduard.munteanu@linux360.ro> wrote:
> This introduces replacements for memory access functions like
> cpu_physical_memory_read(). The new interface can handle address
> translation and access checking through an IOMMU.
>
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
>  Makefile.target |    2 +-
>  hw/dma_rw.c     |  124 +++++++++++++++++++++++++++++++++++++++++++
>  hw/dma_rw.h     |  157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 282 insertions(+), 1 deletions(-)
>  create mode 100644 hw/dma_rw.c
>  create mode 100644 hw/dma_rw.h
>
> diff --git a/Makefile.target b/Makefile.target
> index e15b1c4..e5817ab 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
>  obj-i386-y += vmmouse.o vmport.o hpet.o applesmc.o
>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>  obj-i386-y += debugcon.o multiboot.o
> -obj-i386-y += pc_piix.o
> +obj-i386-y += pc_piix.o dma_rw.o
>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>
>  # shared objects
> diff --git a/hw/dma_rw.c b/hw/dma_rw.c
> new file mode 100644
> index 0000000..ef8e7f8
> --- /dev/null
> +++ b/hw/dma_rw.c
> @@ -0,0 +1,124 @@
> +/*
> + * Generic DMA memory access interface.
> + *
> + * Copyright (c) 2011 Eduard - Gabriel Munteanu
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "dma_rw.h"
> +#include "range.h"
> +
> +static void dma_register_memory_map(DMADevice *dev,
> +                                    dma_addr_t addr,
> +                                    dma_addr_t len,
> +                                    target_phys_addr_t paddr,
> +                                    DMAInvalidateMapFunc *invalidate,
> +                                    void *invalidate_opaque)
> +{
> +    DMAMemoryMap *map;
> +
> +    map = qemu_malloc(sizeof(DMAMemoryMap));
> +    map->addr               = addr;
> +    map->len                = len;
> +    map->paddr              = paddr;
> +    map->invalidate         = invalidate;
> +    map->invalidate_opaque  = invalidate_opaque;
> +
> +    QLIST_INSERT_HEAD(&dev->mmu->memory_maps, map, list);
> +}
> +
> +static void dma_unregister_memory_map(DMADevice *dev,
> +                                      target_phys_addr_t paddr,
> +                                      dma_addr_t len)
> +{
> +    DMAMemoryMap *map;
> +
> +    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> +        if (map->paddr == paddr && map->len == len) {
> +            QLIST_REMOVE(map, list);
> +            free(map);
> +        }
> +    }
> +}
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 dma_addr_t len)
> +{
> +    DMAMemoryMap *map;
> +
> +    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> +        if (ranges_overlap(addr, len, map->addr, map->len)) {
> +            map->invalidate(map->invalidate_opaque);
> +            QLIST_REMOVE(map, list);
> +            free(map);
> +        }
> +    }
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> +                     DMAInvalidateMapFunc *cb,
> +                     void *opaque,
> +                     dma_addr_t addr,
> +                     dma_addr_t *len,
> +                     int is_write)
> +{
> +    int err;
> +    target_phys_addr_t paddr, plen;
> +
> +    if (!dev || !dev->mmu) {
> +        return cpu_physical_memory_map(addr, len, is_write);
> +    }
> +
> +    plen = *len;
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> +    if (err) {
> +        return NULL;
> +    }
> +
> +    /*
> +     * If this is true, the virtual region is contiguous,
> +     * but the translated physical region isn't. We just
> +     * clamp *len, much like cpu_physical_memory_map() does.
> +     */
> +    if (plen < *len) {
> +        *len = plen;
> +    }
> +
> +    /* We treat maps as remote TLBs to cope with stuff like AIO. */
> +    if (cb) {
> +        dma_register_memory_map(dev, addr, *len, paddr, cb, opaque);
> +    }
> +
> +    return cpu_physical_memory_map(paddr, len, is_write);
> +}
> +
> +void dma_memory_unmap(DMADevice *dev,
> +                      void *buffer,
> +                      dma_addr_t len,
> +                      int is_write,
> +                      dma_addr_t access_len)
> +{
> +    cpu_physical_memory_unmap(buffer, len, is_write, access_len);
> +    if (dev && dev->mmu) {
> +        dma_unregister_memory_map(dev, (target_phys_addr_t) buffer, len);
> +    }
> +}
> +
> diff --git a/hw/dma_rw.h b/hw/dma_rw.h
> new file mode 100644
> index 0000000..bc93511
> --- /dev/null
> +++ b/hw/dma_rw.h
> @@ -0,0 +1,157 @@
> +#ifndef DMA_RW_H
> +#define DMA_RW_H
> +
> +#include "qemu-common.h"
> +
> +typedef uint64_t dma_addr_t;
> +
> +typedef struct DMAMmu DMAMmu;
> +typedef struct DMADevice DMADevice;
> +typedef struct DMAMemoryMap DMAMemoryMap;
> +
> +typedef int DMATranslateFunc(DMADevice *dev,
> +                             dma_addr_t addr,
> +                             dma_addr_t *paddr,
> +                             dma_addr_t *len,
> +                             int is_write);
> +
> +typedef void DMAInvalidateMapFunc(void *);
> +
> +struct DMAMmu {
> +    DeviceState *iommu;
> +    DMATranslateFunc *translate;
> +    QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
> +};
> +
> +struct DMADevice {
> +    DMAMmu *mmu;
> +};
> +
> +struct DMAMemoryMap {
> +    dma_addr_t              addr;
> +    dma_addr_t              len;
> +    target_phys_addr_t      paddr;
> +    DMAInvalidateMapFunc    *invalidate;
> +    void                    *invalidate_opaque;
> +
> +    QLIST_ENTRY(DMAMemoryMap) list;
> +};
> +
> +static inline void dma_memory_rw(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 void *buf,
> +                                 dma_addr_t len,
> +                                 int is_write)
> +{
> +    dma_addr_t paddr, plen;
> +    int err;
> +
> +    /*
> +     * Fast-path non-iommu.
> +     * More importantly, makes it obvious what this function does.
> +     */
> +    if (!dev || !dev->mmu) {
> +        cpu_physical_memory_rw(addr, buf, plen, is_write);
> +        return;
> +    }
> +
> +    while (len) {
> +        err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> +        if (err) {
> +            return;
> +        }
> +
> +        /* The translation might be valid for larger regions. */
> +        if (plen > len) {
> +            plen = len;
> +        }
> +
> +        cpu_physical_memory_rw(paddr, buf, plen, is_write);
> +
> +        len -= plen;
> +        addr += plen;
> +        buf += plen;
> +    }
> +}
> +
> +static inline void dma_memory_read(DMADevice *dev,
> +                                   dma_addr_t addr,
> +                                   void *buf,
> +                                   dma_addr_t len)
> +{
> +    dma_memory_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline void dma_memory_write(DMADevice *dev,
> +                                    dma_addr_t addr,
> +                                    const void *buf,
> +                                    dma_addr_t len)
> +{
> +    dma_memory_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> +                     DMAInvalidateMapFunc *cb,
> +                     void *opaque,
> +                     dma_addr_t addr,
> +                     dma_addr_t *len,
> +                     int is_write);
> +void dma_memory_unmap(DMADevice *dev,
> +                      void *buffer,
> +                      dma_addr_t len,
> +                      int is_write,
> +                      dma_addr_t access_len);
> +
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 dma_addr_t len);
> +
> +
> +#define DEFINE_DMA_LD(suffix, size)                                       \
> +static inline uint##size##_t                                              \
> +dma_ld##suffix(DMADevice *dev, dma_addr_t addr)                           \
> +{                                                                         \
> +    int err;                                                              \
> +    dma_addr_t paddr, plen;                                               \
> +                                                                          \
> +    if (!dev || !dev->mmu) {                                              \
> +        return ld##suffix##_phys(addr);                                   \
> +    }                                                                     \
> +                                                                          \
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, 0);               \
> +    if (err || (plen < size / 8))                                         \

If the access is unaligned and the translation splits it to two (for
example, because of page boundary), the access is ignored, which can't
be correct.

Do we have such cases? If yes, should this be handled by the caller
instead (maybe not)?
Michael S. Tsirkin - Feb. 6, 2011, 11:13 a.m.
On Fri, Feb 04, 2011 at 01:32:55AM +0200, Eduard - Gabriel Munteanu wrote:
> This introduces replacements for memory access functions like
> cpu_physical_memory_read(). The new interface can handle address
> translation and access checking through an IOMMU.
> 
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
>  Makefile.target |    2 +-
>  hw/dma_rw.c     |  124 +++++++++++++++++++++++++++++++++++++++++++
>  hw/dma_rw.h     |  157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 282 insertions(+), 1 deletions(-)
>  create mode 100644 hw/dma_rw.c
>  create mode 100644 hw/dma_rw.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index e15b1c4..e5817ab 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
>  obj-i386-y += vmmouse.o vmport.o hpet.o applesmc.o
>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>  obj-i386-y += debugcon.o multiboot.o
> -obj-i386-y += pc_piix.o
> +obj-i386-y += pc_piix.o dma_rw.o
>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>  
>  # shared objects
> diff --git a/hw/dma_rw.c b/hw/dma_rw.c
> new file mode 100644
> index 0000000..ef8e7f8
> --- /dev/null
> +++ b/hw/dma_rw.c
> @@ -0,0 +1,124 @@
> +/*
> + * Generic DMA memory access interface.
> + *
> + * Copyright (c) 2011 Eduard - Gabriel Munteanu
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "dma_rw.h"
> +#include "range.h"
> +
> +static void dma_register_memory_map(DMADevice *dev,
> +                                    dma_addr_t addr,
> +                                    dma_addr_t len,
> +                                    target_phys_addr_t paddr,
> +                                    DMAInvalidateMapFunc *invalidate,
> +                                    void *invalidate_opaque)
> +{
> +    DMAMemoryMap *map;
> +
> +    map = qemu_malloc(sizeof(DMAMemoryMap));
> +    map->addr               = addr;
> +    map->len                = len;
> +    map->paddr              = paddr;
> +    map->invalidate         = invalidate;
> +    map->invalidate_opaque  = invalidate_opaque;
> +
> +    QLIST_INSERT_HEAD(&dev->mmu->memory_maps, map, list);
> +}
> +
> +static void dma_unregister_memory_map(DMADevice *dev,
> +                                      target_phys_addr_t paddr,
> +                                      dma_addr_t len)
> +{
> +    DMAMemoryMap *map;
> +
> +    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> +        if (map->paddr == paddr && map->len == len) {
> +            QLIST_REMOVE(map, list);
> +            free(map);
> +        }
> +    }
> +}
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 dma_addr_t len)
> +{
> +    DMAMemoryMap *map;
> +
> +    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> +        if (ranges_overlap(addr, len, map->addr, map->len)) {
> +            map->invalidate(map->invalidate_opaque);
> +            QLIST_REMOVE(map, list);
> +            free(map);
> +        }
> +    }
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> +                     DMAInvalidateMapFunc *cb,
> +                     void *opaque,
> +                     dma_addr_t addr,
> +                     dma_addr_t *len,
> +                     int is_write)
> +{
> +    int err;
> +    target_phys_addr_t paddr, plen;
> +
> +    if (!dev || !dev->mmu) {
> +        return cpu_physical_memory_map(addr, len, is_write);
> +    }
> +
> +    plen = *len;
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> +    if (err) {
> +        return NULL;
> +    }
> +
> +    /*
> +     * If this is true, the virtual region is contiguous,
> +     * but the translated physical region isn't. We just
> +     * clamp *len, much like cpu_physical_memory_map() does.
> +     */
> +    if (plen < *len) {
> +        *len = plen;
> +    }
> +
> +    /* We treat maps as remote TLBs to cope with stuff like AIO. */
> +    if (cb) {
> +        dma_register_memory_map(dev, addr, *len, paddr, cb, opaque);
> +    }
> +
> +    return cpu_physical_memory_map(paddr, len, is_write);
> +}
> +
> +void dma_memory_unmap(DMADevice *dev,
> +                      void *buffer,
> +                      dma_addr_t len,
> +                      int is_write,
> +                      dma_addr_t access_len)
> +{
> +    cpu_physical_memory_unmap(buffer, len, is_write, access_len);
> +    if (dev && dev->mmu) {
> +        dma_unregister_memory_map(dev, (target_phys_addr_t) buffer, len);
> +    }
> +}
> +
> diff --git a/hw/dma_rw.h b/hw/dma_rw.h
> new file mode 100644
> index 0000000..bc93511
> --- /dev/null
> +++ b/hw/dma_rw.h
> @@ -0,0 +1,157 @@
> +#ifndef DMA_RW_H
> +#define DMA_RW_H
> +
> +#include "qemu-common.h"
> +
> +typedef uint64_t dma_addr_t;
> +
> +typedef struct DMAMmu DMAMmu;
> +typedef struct DMADevice DMADevice;
> +typedef struct DMAMemoryMap DMAMemoryMap;
> +
> +typedef int DMATranslateFunc(DMADevice *dev,
> +                             dma_addr_t addr,
> +                             dma_addr_t *paddr,
> +                             dma_addr_t *len,
> +                             int is_write);

So len is in/out here which is a bit confusing,
and apparently not documented until you look at the usage.
I also don't think it needs to be dma_addr_t - it's not
an address. I don't believe we ever need to
translate more than 2G in one go: how about returning
the length on success, negative on error?

Or add a comment.

> +
> +typedef void DMAInvalidateMapFunc(void *);
> +
> +struct DMAMmu {
> +    DeviceState *iommu;
> +    DMATranslateFunc *translate;
> +    QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
> +};
> +
> +struct DMADevice {
> +    DMAMmu *mmu;
> +};
> +
> +struct DMAMemoryMap {
> +    dma_addr_t              addr;
> +    dma_addr_t              len;
> +    target_phys_addr_t      paddr;
> +    DMAInvalidateMapFunc    *invalidate;
> +    void                    *invalidate_opaque;
> +
> +    QLIST_ENTRY(DMAMemoryMap) list;
> +};
> +
> +static inline void dma_memory_rw(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 void *buf,
> +                                 dma_addr_t len,
> +                                 int is_write)
> +{
> +    dma_addr_t paddr, plen;
> +    int err;
> +
> +    /*
> +     * Fast-path non-iommu.
> +     * More importantly, makes it obvious what this function does.
> +     */
> +    if (!dev || !dev->mmu) {
> +        cpu_physical_memory_rw(addr, buf, plen, is_write);
> +        return;
> +    }
> +
> +    while (len) {
> +        err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> +        if (err) {
> +            return;
> +        }
> +
> +        /* The translation might be valid for larger regions. */
> +        if (plen > len) {
> +            plen = len;
> +        }
> +
> +        cpu_physical_memory_rw(paddr, buf, plen, is_write);
> +
> +        len -= plen;
> +        addr += plen;
> +        buf += plen;
> +    }
> +}
> +
> +static inline void dma_memory_read(DMADevice *dev,
> +                                   dma_addr_t addr,
> +                                   void *buf,
> +                                   dma_addr_t len)
> +{
> +    dma_memory_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline void dma_memory_write(DMADevice *dev,
> +                                    dma_addr_t addr,
> +                                    const void *buf,
> +                                    dma_addr_t len)
> +{
> +    dma_memory_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> +                     DMAInvalidateMapFunc *cb,
> +                     void *opaque,
> +                     dma_addr_t addr,
> +                     dma_addr_t *len,
> +                     int is_write);
> +void dma_memory_unmap(DMADevice *dev,
> +                      void *buffer,
> +                      dma_addr_t len,
> +                      int is_write,
> +                      dma_addr_t access_len);
> +
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 dma_addr_t len);
> +
> +
> +#define DEFINE_DMA_LD(suffix, size)                                       \
> +static inline uint##size##_t                                              \
> +dma_ld##suffix(DMADevice *dev, dma_addr_t addr)                           \
> +{                                                                         \
> +    int err;                                                              \
> +    dma_addr_t paddr, plen;                                               \
> +                                                                          \
> +    if (!dev || !dev->mmu) {                                              \
> +        return ld##suffix##_phys(addr);                                   \
> +    }                                                                     \
> +                                                                          \
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, 0);               \
> +    if (err || (plen < size / 8))                                         \
> +        return 0;                                                         \
> +                                                                          \
> +    return ld##suffix##_phys(paddr);                                      \
> +}
> +
> +#define DEFINE_DMA_ST(suffix, size)                                       \
> +static inline void                                                        \
> +dma_st##suffix(DMADevice *dev, dma_addr_t addr, uint##size##_t val)       \
> +{                                                                         \
> +    int err;                                                              \
> +    target_phys_addr_t paddr, plen;                                       \
> +                                                                          \
> +    if (!dev || !dev->mmu) {                                              \
> +        st##suffix##_phys(addr, val);                                     \
> +        return;                                                           \
> +    }                                                                     \
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, 1);               \
> +    if (err || (plen < size / 8))                                         \
> +        return;                                                           \
> +                                                                          \
> +    st##suffix##_phys(paddr, val);                                        \
> +}
> +
> +DEFINE_DMA_LD(ub, 8)
> +DEFINE_DMA_LD(uw, 16)
> +DEFINE_DMA_LD(l, 32)
> +DEFINE_DMA_LD(q, 64)
> +
> +DEFINE_DMA_ST(b, 8)
> +DEFINE_DMA_ST(w, 16)
> +DEFINE_DMA_ST(l, 32)
> +DEFINE_DMA_ST(q, 64)
> +
> +#endif

I am guessing the assumption is that address is size-aligned
(which is right) so translation will fail for all addresses
or pass for all of them. But in that case,
assert() is better?

> -- 
> 1.7.3.4
Michael S. Tsirkin - Feb. 6, 2011, 11:16 a.m.
On Fri, Feb 04, 2011 at 01:32:55AM +0200, Eduard - Gabriel Munteanu wrote:
> This introduces replacements for memory access functions like
> cpu_physical_memory_read(). The new interface can handle address
> translation and access checking through an IOMMU.
> 
> Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
> ---
>  Makefile.target |    2 +-
>  hw/dma_rw.c     |  124 +++++++++++++++++++++++++++++++++++++++++++
>  hw/dma_rw.h     |  157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 282 insertions(+), 1 deletions(-)
>  create mode 100644 hw/dma_rw.c
>  create mode 100644 hw/dma_rw.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index e15b1c4..e5817ab 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
>  obj-i386-y += vmmouse.o vmport.o hpet.o applesmc.o
>  obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
>  obj-i386-y += debugcon.o multiboot.o
> -obj-i386-y += pc_piix.o
> +obj-i386-y += pc_piix.o dma_rw.o

Does this need to be target specific?

>  obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
>  
>  # shared objects
> diff --git a/hw/dma_rw.c b/hw/dma_rw.c
> new file mode 100644
> index 0000000..ef8e7f8
> --- /dev/null
> +++ b/hw/dma_rw.c
> @@ -0,0 +1,124 @@
> +/*
> + * Generic DMA memory access interface.
> + *
> + * Copyright (c) 2011 Eduard - Gabriel Munteanu
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "dma_rw.h"
> +#include "range.h"
> +
> +static void dma_register_memory_map(DMADevice *dev,
> +                                    dma_addr_t addr,
> +                                    dma_addr_t len,
> +                                    target_phys_addr_t paddr,
> +                                    DMAInvalidateMapFunc *invalidate,
> +                                    void *invalidate_opaque)
> +{
> +    DMAMemoryMap *map;
> +
> +    map = qemu_malloc(sizeof(DMAMemoryMap));
> +    map->addr               = addr;
> +    map->len                = len;
> +    map->paddr              = paddr;
> +    map->invalidate         = invalidate;
> +    map->invalidate_opaque  = invalidate_opaque;
> +
> +    QLIST_INSERT_HEAD(&dev->mmu->memory_maps, map, list);
> +}
> +
> +static void dma_unregister_memory_map(DMADevice *dev,
> +                                      target_phys_addr_t paddr,
> +                                      dma_addr_t len)
> +{
> +    DMAMemoryMap *map;
> +
> +    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> +        if (map->paddr == paddr && map->len == len) {
> +            QLIST_REMOVE(map, list);
> +            free(map);
> +        }
> +    }
> +}
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 dma_addr_t len)
> +{
> +    DMAMemoryMap *map;
> +
> +    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
> +        if (ranges_overlap(addr, len, map->addr, map->len)) {
> +            map->invalidate(map->invalidate_opaque);
> +            QLIST_REMOVE(map, list);
> +            free(map);
> +        }
> +    }
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> +                     DMAInvalidateMapFunc *cb,
> +                     void *opaque,
> +                     dma_addr_t addr,
> +                     dma_addr_t *len,
> +                     int is_write)
> +{
> +    int err;
> +    target_phys_addr_t paddr, plen;
> +
> +    if (!dev || !dev->mmu) {
> +        return cpu_physical_memory_map(addr, len, is_write);
> +    }
> +
> +    plen = *len;
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> +    if (err) {
> +        return NULL;
> +    }
> +
> +    /*
> +     * If this is true, the virtual region is contiguous,
> +     * but the translated physical region isn't. We just
> +     * clamp *len, much like cpu_physical_memory_map() does.
> +     */
> +    if (plen < *len) {
> +        *len = plen;
> +    }
> +
> +    /* We treat maps as remote TLBs to cope with stuff like AIO. */
> +    if (cb) {
> +        dma_register_memory_map(dev, addr, *len, paddr, cb, opaque);
> +    }
> +
> +    return cpu_physical_memory_map(paddr, len, is_write);
> +}
> +
> +void dma_memory_unmap(DMADevice *dev,
> +                      void *buffer,
> +                      dma_addr_t len,
> +                      int is_write,
> +                      dma_addr_t access_len)
> +{
> +    cpu_physical_memory_unmap(buffer, len, is_write, access_len);
> +    if (dev && dev->mmu) {
> +        dma_unregister_memory_map(dev, (target_phys_addr_t) buffer, len);
> +    }
> +}
> +
> diff --git a/hw/dma_rw.h b/hw/dma_rw.h

Can we have a configure option to disable this
at compile time? Add stubs to avoid propagating ifdefs
all over the code.


> new file mode 100644
> index 0000000..bc93511
> --- /dev/null
> +++ b/hw/dma_rw.h
> @@ -0,0 +1,157 @@
> +#ifndef DMA_RW_H
> +#define DMA_RW_H
> +
> +#include "qemu-common.h"
> +
> +typedef uint64_t dma_addr_t;
> +
> +typedef struct DMAMmu DMAMmu;
> +typedef struct DMADevice DMADevice;
> +typedef struct DMAMemoryMap DMAMemoryMap;
> +
> +typedef int DMATranslateFunc(DMADevice *dev,
> +                             dma_addr_t addr,
> +                             dma_addr_t *paddr,
> +                             dma_addr_t *len,
> +                             int is_write);
> +
> +typedef void DMAInvalidateMapFunc(void *);
> +
> +struct DMAMmu {
> +    DeviceState *iommu;
> +    DMATranslateFunc *translate;
> +    QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
> +};
> +
> +struct DMADevice {
> +    DMAMmu *mmu;
> +};
> +
> +struct DMAMemoryMap {
> +    dma_addr_t              addr;
> +    dma_addr_t              len;
> +    target_phys_addr_t      paddr;
> +    DMAInvalidateMapFunc    *invalidate;
> +    void                    *invalidate_opaque;
> +
> +    QLIST_ENTRY(DMAMemoryMap) list;
> +};
> +
> +static inline void dma_memory_rw(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 void *buf,
> +                                 dma_addr_t len,
> +                                 int is_write)
> +{
> +    dma_addr_t paddr, plen;
> +    int err;
> +
> +    /*
> +     * Fast-path non-iommu.
> +     * More importantly, makes it obvious what this function does.
> +     */
> +    if (!dev || !dev->mmu) {
> +        cpu_physical_memory_rw(addr, buf, plen, is_write);
> +        return;
> +    }
> +
> +    while (len) {
> +        err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
> +        if (err) {
> +            return;
> +        }
> +
> +        /* The translation might be valid for larger regions. */
> +        if (plen > len) {
> +            plen = len;
> +        }
> +
> +        cpu_physical_memory_rw(paddr, buf, plen, is_write);
> +
> +        len -= plen;
> +        addr += plen;
> +        buf += plen;
> +    }
> +}
> +
> +static inline void dma_memory_read(DMADevice *dev,
> +                                   dma_addr_t addr,
> +                                   void *buf,
> +                                   dma_addr_t len)
> +{
> +    dma_memory_rw(dev, addr, buf, len, 0);
> +}
> +
> +static inline void dma_memory_write(DMADevice *dev,
> +                                    dma_addr_t addr,
> +                                    const void *buf,
> +                                    dma_addr_t len)
> +{
> +    dma_memory_rw(dev, addr, (void *) buf, len, 1);
> +}
> +
> +void *dma_memory_map(DMADevice *dev,
> +                     DMAInvalidateMapFunc *cb,
> +                     void *opaque,
> +                     dma_addr_t addr,
> +                     dma_addr_t *len,
> +                     int is_write);
> +void dma_memory_unmap(DMADevice *dev,
> +                      void *buffer,
> +                      dma_addr_t len,
> +                      int is_write,
> +                      dma_addr_t access_len);
> +
> +
> +void dma_invalidate_memory_range(DMADevice *dev,
> +                                 dma_addr_t addr,
> +                                 dma_addr_t len);
> +
> +
> +#define DEFINE_DMA_LD(suffix, size)                                       \
> +static inline uint##size##_t                                              \
> +dma_ld##suffix(DMADevice *dev, dma_addr_t addr)                           \
> +{                                                                         \
> +    int err;                                                              \
> +    dma_addr_t paddr, plen;                                               \
> +                                                                          \
> +    if (!dev || !dev->mmu) {                                              \
> +        return ld##suffix##_phys(addr);                                   \
> +    }                                                                     \
> +                                                                          \
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, 0);               \
> +    if (err || (plen < size / 8))                                         \
> +        return 0;                                                         \
> +                                                                          \
> +    return ld##suffix##_phys(paddr);                                      \
> +}
> +
> +#define DEFINE_DMA_ST(suffix, size)                                       \
> +static inline void                                                        \
> +dma_st##suffix(DMADevice *dev, dma_addr_t addr, uint##size##_t val)       \
> +{                                                                         \
> +    int err;                                                              \
> +    target_phys_addr_t paddr, plen;                                       \
> +                                                                          \
> +    if (!dev || !dev->mmu) {                                              \
> +        st##suffix##_phys(addr, val);                                     \
> +        return;                                                           \
> +    }                                                                     \
> +    err = dev->mmu->translate(dev, addr, &paddr, &plen, 1);               \
> +    if (err || (plen < size / 8))                                         \
> +        return;                                                           \
> +                                                                          \
> +    st##suffix##_phys(paddr, val);                                        \
> +}
> +
> +DEFINE_DMA_LD(ub, 8)
> +DEFINE_DMA_LD(uw, 16)
> +DEFINE_DMA_LD(l, 32)
> +DEFINE_DMA_LD(q, 64)
> +
> +DEFINE_DMA_ST(b, 8)
> +DEFINE_DMA_ST(w, 16)
> +DEFINE_DMA_ST(l, 32)
> +DEFINE_DMA_ST(q, 64)
> +
> +#endif
> -- 
> 1.7.3.4

Patch

diff --git a/Makefile.target b/Makefile.target
index e15b1c4..e5817ab 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -218,7 +218,7 @@  obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
 obj-i386-y += vmmouse.o vmport.o hpet.o applesmc.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
-obj-i386-y += pc_piix.o
+obj-i386-y += pc_piix.o dma_rw.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/hw/dma_rw.c b/hw/dma_rw.c
new file mode 100644
index 0000000..ef8e7f8
--- /dev/null
+++ b/hw/dma_rw.c
@@ -0,0 +1,124 @@ 
+/*
+ * Generic DMA memory access interface.
+ *
+ * Copyright (c) 2011 Eduard - Gabriel Munteanu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "dma_rw.h"
+#include "range.h"
+
+static void dma_register_memory_map(DMADevice *dev,
+                                    dma_addr_t addr,
+                                    dma_addr_t len,
+                                    target_phys_addr_t paddr,
+                                    DMAInvalidateMapFunc *invalidate,
+                                    void *invalidate_opaque)
+{
+    DMAMemoryMap *map;
+
+    map = qemu_malloc(sizeof(DMAMemoryMap));
+    map->addr               = addr;
+    map->len                = len;
+    map->paddr              = paddr;
+    map->invalidate         = invalidate;
+    map->invalidate_opaque  = invalidate_opaque;
+
+    QLIST_INSERT_HEAD(&dev->mmu->memory_maps, map, list);
+}
+
+static void dma_unregister_memory_map(DMADevice *dev,
+                                      target_phys_addr_t paddr,
+                                      dma_addr_t len)
+{
+    DMAMemoryMap *map;
+
+    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
+        if (map->paddr == paddr && map->len == len) {
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+
+void dma_invalidate_memory_range(DMADevice *dev,
+                                 dma_addr_t addr,
+                                 dma_addr_t len)
+{
+    DMAMemoryMap *map;
+
+    QLIST_FOREACH(map, &dev->mmu->memory_maps, list) {
+        if (ranges_overlap(addr, len, map->addr, map->len)) {
+            map->invalidate(map->invalidate_opaque);
+            QLIST_REMOVE(map, list);
+            free(map);
+        }
+    }
+}
+
+void *dma_memory_map(DMADevice *dev,
+                     DMAInvalidateMapFunc *cb,
+                     void *opaque,
+                     dma_addr_t addr,
+                     dma_addr_t *len,
+                     int is_write)
+{
+    int err;
+    target_phys_addr_t paddr, plen;
+
+    if (!dev || !dev->mmu) {
+        return cpu_physical_memory_map(addr, len, is_write);
+    }
+
+    plen = *len;
+    err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
+    if (err) {
+        return NULL;
+    }
+
+    /*
+     * If this is true, the virtual region is contiguous,
+     * but the translated physical region isn't. We just
+     * clamp *len, much like cpu_physical_memory_map() does.
+     */
+    if (plen < *len) {
+        *len = plen;
+    }
+
+    /* We treat maps as remote TLBs to cope with stuff like AIO. */
+    if (cb) {
+        dma_register_memory_map(dev, addr, *len, paddr, cb, opaque);
+    }
+
+    return cpu_physical_memory_map(paddr, len, is_write);
+}
+
+void dma_memory_unmap(DMADevice *dev,
+                      void *buffer,
+                      dma_addr_t len,
+                      int is_write,
+                      dma_addr_t access_len)
+{
+    cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+    if (dev && dev->mmu) {
+        dma_unregister_memory_map(dev, (target_phys_addr_t) buffer, len);
+    }
+}
+
diff --git a/hw/dma_rw.h b/hw/dma_rw.h
new file mode 100644
index 0000000..bc93511
--- /dev/null
+++ b/hw/dma_rw.h
@@ -0,0 +1,157 @@ 
+#ifndef DMA_RW_H
+#define DMA_RW_H
+
+#include "qemu-common.h"
+
+typedef uint64_t dma_addr_t;
+
+typedef struct DMAMmu DMAMmu;
+typedef struct DMADevice DMADevice;
+typedef struct DMAMemoryMap DMAMemoryMap;
+
+typedef int DMATranslateFunc(DMADevice *dev,
+                             dma_addr_t addr,
+                             dma_addr_t *paddr,
+                             dma_addr_t *len,
+                             int is_write);
+
+typedef void DMAInvalidateMapFunc(void *);
+
+struct DMAMmu {
+    DeviceState *iommu;
+    DMATranslateFunc *translate;
+    QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
+};
+
+struct DMADevice {
+    DMAMmu *mmu;
+};
+
+struct DMAMemoryMap {
+    dma_addr_t              addr;
+    dma_addr_t              len;
+    target_phys_addr_t      paddr;
+    DMAInvalidateMapFunc    *invalidate;
+    void                    *invalidate_opaque;
+
+    QLIST_ENTRY(DMAMemoryMap) list;
+};
+
+static inline void dma_memory_rw(DMADevice *dev,
+                                 dma_addr_t addr,
+                                 void *buf,
+                                 dma_addr_t len,
+                                 int is_write)
+{
+    dma_addr_t paddr, plen;
+    int err;
+
+    /*
+     * Fast-path non-iommu.
+     * More importantly, makes it obvious what this function does.
+     */
+    if (!dev || !dev->mmu) {
+        cpu_physical_memory_rw(addr, buf, plen, is_write);
+        return;
+    }
+
+    while (len) {
+        err = dev->mmu->translate(dev, addr, &paddr, &plen, is_write);
+        if (err) {
+            return;
+        }
+
+        /* The translation might be valid for larger regions. */
+        if (plen > len) {
+            plen = len;
+        }
+
+        cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+        len -= plen;
+        addr += plen;
+        buf += plen;
+    }
+}
+
+static inline void dma_memory_read(DMADevice *dev,
+                                   dma_addr_t addr,
+                                   void *buf,
+                                   dma_addr_t len)
+{
+    dma_memory_rw(dev, addr, buf, len, 0);
+}
+
+static inline void dma_memory_write(DMADevice *dev,
+                                    dma_addr_t addr,
+                                    const void *buf,
+                                    dma_addr_t len)
+{
+    dma_memory_rw(dev, addr, (void *) buf, len, 1);
+}
+
+void *dma_memory_map(DMADevice *dev,
+                     DMAInvalidateMapFunc *cb,
+                     void *opaque,
+                     dma_addr_t addr,
+                     dma_addr_t *len,
+                     int is_write);
+void dma_memory_unmap(DMADevice *dev,
+                      void *buffer,
+                      dma_addr_t len,
+                      int is_write,
+                      dma_addr_t access_len);
+
+
+void dma_invalidate_memory_range(DMADevice *dev,
+                                 dma_addr_t addr,
+                                 dma_addr_t len);
+
+
+#define DEFINE_DMA_LD(suffix, size)                                       \
+static inline uint##size##_t                                              \
+dma_ld##suffix(DMADevice *dev, dma_addr_t addr)                           \
+{                                                                         \
+    int err;                                                              \
+    dma_addr_t paddr, plen;                                               \
+                                                                          \
+    if (!dev || !dev->mmu) {                                              \
+        return ld##suffix##_phys(addr);                                   \
+    }                                                                     \
+                                                                          \
+    err = dev->mmu->translate(dev, addr, &paddr, &plen, 0);               \
+    if (err || (plen < size / 8))                                         \
+        return 0;                                                         \
+                                                                          \
+    return ld##suffix##_phys(paddr);                                      \
+}
+
+#define DEFINE_DMA_ST(suffix, size)                                       \
+static inline void                                                        \
+dma_st##suffix(DMADevice *dev, dma_addr_t addr, uint##size##_t val)       \
+{                                                                         \
+    int err;                                                              \
+    target_phys_addr_t paddr, plen;                                       \
+                                                                          \
+    if (!dev || !dev->mmu) {                                              \
+        st##suffix##_phys(addr, val);                                     \
+        return;                                                           \
+    }                                                                     \
+    err = dev->mmu->translate(dev, addr, &paddr, &plen, 1);               \
+    if (err || (plen < size / 8))                                         \
+        return;                                                           \
+                                                                          \
+    st##suffix##_phys(paddr, val);                                        \
+}
+
+DEFINE_DMA_LD(ub, 8)
+DEFINE_DMA_LD(uw, 16)
+DEFINE_DMA_LD(l, 32)
+DEFINE_DMA_LD(q, 64)
+
+DEFINE_DMA_ST(b, 8)
+DEFINE_DMA_ST(w, 16)
+DEFINE_DMA_ST(l, 32)
+DEFINE_DMA_ST(q, 64)
+
+#endif