From patchwork Thu Mar 1 05:36:06 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Gibson X-Patchwork-Id: 143903 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 54F1EB6EF3 for ; Thu, 1 Mar 2012 16:37:07 +1100 (EST) Received: from localhost ([::1]:57253 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S2yhN-0005Z6-1G for incoming@patchwork.ozlabs.org; Thu, 01 Mar 2012 00:37:05 -0500 Received: from eggs.gnu.org ([208.118.235.92]:49026) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S2yh3-0005KO-3E for qemu-devel@nongnu.org; Thu, 01 Mar 2012 00:36:47 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1S2ygy-0002od-KY for qemu-devel@nongnu.org; Thu, 01 Mar 2012 00:36:44 -0500 Received: from ozlabs.org ([203.10.76.45]:39313) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S2ygy-0002nN-0C for qemu-devel@nongnu.org; Thu, 01 Mar 2012 00:36:40 -0500 Received: by ozlabs.org (Postfix, from userid 1007) id E7DBC1007D2; Thu, 1 Mar 2012 16:36:33 +1100 (EST) From: David Gibson To: qemu-devel@nongnu.org Date: Thu, 1 Mar 2012 16:36:06 +1100 Message-Id: <1330580169-15446-11-git-send-email-david@gibson.dropbear.id.au> X-Mailer: git-send-email 1.7.9 In-Reply-To: <1330580169-15446-1-git-send-email-david@gibson.dropbear.id.au> References: <1330580169-15446-1-git-send-email-david@gibson.dropbear.id.au> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 203.10.76.45 Cc: eduard.munteanu@linux360.ro, rth@twiddle.net, David Gibson , mst@redhat.com Subject: [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org This patch adds the basic infrastructure necessary to emulate an IOMMU visible to the guest. The DMAContext structure is extended with information and a callback describing the translation, and the various DMA functions used by devices will now perform IOMMU translation using this callback. Cc: Michael S. Tsirkin Cc: Richard Henderson Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: David Gibson --- configure | 12 ++++ dma-helpers.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dma.h | 126 +++++++++++++++++++++++++++++++------- 3 files changed, 305 insertions(+), 22 deletions(-) diff --git a/configure b/configure index fb0e18e..3eff89c 100755 --- a/configure +++ b/configure @@ -138,6 +138,7 @@ linux_aio="" cap_ng="" attr="" libattr="" +iommu="yes" xfs="" vhost_net="no" @@ -784,6 +785,10 @@ for opt do ;; --enable-vhost-net) vhost_net="yes" ;; + --enable-iommu) iommu="yes" + ;; + --disable-iommu) iommu="no" + ;; --disable-opengl) opengl="no" ;; --enable-opengl) opengl="yes" @@ -1085,6 +1090,8 @@ echo " --enable-docs enable documentation build" echo " --disable-docs disable documentation build" echo " --disable-vhost-net disable vhost-net acceleration support" echo " --enable-vhost-net enable vhost-net acceleration support" +echo " --disable-iommu disable IOMMU emulation support" +echo " --enable-iommu enable IOMMU emulation support" echo " --enable-trace-backend=B Set trace backend" echo " Available backends:" $("$source_path"/scripts/tracetool --list-backends) echo " --with-trace-file=NAME Full PATH,NAME of file to store traces" @@ -2935,6 +2942,7 @@ echo "posix_madvise $posix_madvise" echo "uuid support $uuid" echo "libcap-ng support $cap_ng" echo "vhost-net support $vhost_net" +echo "IOMMU support $iommu" echo "Trace backend $trace_backend" echo "Trace output file $trace_file-" echo "spice support $spice" @@ -3812,6 +3820,10 @@ if test "$target_softmmu" = "yes" -a \( \ echo "CONFIG_NEED_MMU=y" >> $config_target_mak fi +if test "$iommu" = "yes" ; then + echo "CONFIG_IOMMU=y" >> $config_host_mak +fi + if test "$gprof" = "yes" ; then echo "TARGET_GPROF=yes" >> $config_target_mak if test "$target_linux_user" = "yes" ; then diff --git a/dma-helpers.c b/dma-helpers.c index 9dcfb2c..8269d60 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -10,6 +10,9 @@ #include "dma.h" #include "block_int.h" #include "trace.h" +#include "range.h" + +/*#define DEBUG_IOMMU*/ void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma) { @@ -255,3 +258,189 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, { bdrv_acct_start(bs, cookie, sg->size, type); } + +#ifdef CONFIG_IOMMU +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, + DMADirection dir) +{ + target_phys_addr_t paddr, plen; + +#ifdef DEBUG_DMA + fprintf(stderr, "dma_memory_check iommu=%p addr=0x%llx len=0x%llx dir=%d\n", + (unsigned long long)addr, (unsigned long long)len, dir); +#endif + + while (len) { + if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) { + return false; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + len -= plen; + addr += plen; + } + + return true; +} + +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir) +{ + target_phys_addr_t paddr, plen; + int err; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_rw iommu=%p addr=0x%llx len=0x%llx dir=%d\n", + (unsigned long long)addr, (unsigned long long)len, dir); +#endif + + while (len) { + err = dma->translate(dma, addr, &paddr, &plen, dir); + if (err) { + return -1; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + cpu_physical_memory_rw(paddr, buf, plen, + dir == DMA_DIRECTION_FROM_DEVICE); + + len -= plen; + addr += plen; + buf += plen; + } + + return 0; +} + +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) +{ + target_phys_addr_t paddr, plen; + int err; + +#ifdef DEBUG_IOMMU + fprintf(stderr, "dma_memory_zero iommu=%p addr=0x%llx len=0x%llx\n", + (unsigned long long)addr, (unsigned long long)len); +#endif + + while (len) { + err = dma->translate(dma, addr, &paddr, &plen, + DMA_DIRECTION_FROM_DEVICE); + if (err) { + return -1; + } + + /* The translation might be valid for larger regions. */ + if (plen > len) { + plen = len; + } + + cpu_physical_memory_zero(paddr, plen); + + len -= plen; + addr += plen; + } + + return 0; +} + +typedef struct DMAMemoryMap DMAMemoryMap; +struct DMAMemoryMap { + dma_addr_t addr; + size_t len; + void *buf; + DMAInvalidateMapFunc *invalidate; + void *invalidate_opaque; + + QLIST_ENTRY(DMAMemoryMap) list; +}; + +void dma_context_init(DMAContext *dma, DMATranslateFunc fn, void *opaque) +{ + dma->translate = fn; + dma->opaque = opaque; + QLIST_INIT(&dma->memory_maps); +} + +void dma_invalidate_memory_range(DMAContext *dma, + dma_addr_t addr, dma_addr_t len) +{ + DMAMemoryMap *map; + + QLIST_FOREACH(map, &dma->memory_maps, list) { + if (ranges_overlap(addr, len, map->addr, map->len)) { + map->invalidate(map->invalidate_opaque); + QLIST_REMOVE(map, list); + free(map); + } + } +} + +void *__dma_memory_map(DMAContext *dma, DMAInvalidateMapFunc *cb, + void *cb_opaque, dma_addr_t addr, dma_addr_t *len, + DMADirection dir) +{ + int err; + target_phys_addr_t paddr, plen; + void *buf; + + plen = *len; + err = dma->translate(dma, addr, &paddr, &plen, dir); + if (err) { + return NULL; + } + + /* + * If this is true, the virtual region is contiguous, + * but the translated physical region isn't. We just + * clamp *len, much like cpu_physical_memory_map() does. + */ + if (plen < *len) { + *len = plen; + } + + buf = cpu_physical_memory_map(paddr, &plen, + dir == DMA_DIRECTION_FROM_DEVICE); + *len = plen; + + /* We treat maps as remote TLBs to cope with stuff like AIO. */ + if (cb) { + DMAMemoryMap *map; + + map = g_malloc(sizeof(DMAMemoryMap)); + map->addr = addr; + map->len = *len; + map->buf = buf; + map->invalidate = cb; + map->invalidate_opaque = cb_opaque; + + QLIST_INSERT_HEAD(&dma->memory_maps, map, list); + } + + return buf; +} + +void __dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len) +{ + DMAMemoryMap *map; + + cpu_physical_memory_unmap(buffer, len, + dir == DMA_DIRECTION_FROM_DEVICE, + access_len); + + QLIST_FOREACH(map, &dma->memory_maps, list) { + if ((map->buf == buffer) && (map->len == len)) { + QLIST_REMOVE(map, list); + free(map); + } + } +} +#endif /* CONFIG_IOMMU */ diff --git a/dma.h b/dma.h index a66e3d7..c27f8b3 100644 --- a/dma.h +++ b/dma.h @@ -15,6 +15,7 @@ #include "hw/hw.h" #include "block.h" +typedef struct DMAContext DMAContext; typedef struct ScatterGatherEntry ScatterGatherEntry; typedef enum { @@ -31,30 +32,83 @@ struct QEMUSGList { }; #if defined(TARGET_PHYS_ADDR_BITS) + +#ifdef CONFIG_IOMMU +/* + * When an IOMMU is present, bus addresses become distinct from + * CPU/memory physical addresses and may be a different size. Because + * the IOVA size depends more on the bus than on the platform, we more + * or less have to treat these as 64-bit always to cover all (or at + * least most) cases. + */ +typedef uint64_t dma_addr_t; + +#define DMA_ADDR_BITS 64 +#define DMA_ADDR_FMT "%" PRIx64 +#else typedef target_phys_addr_t dma_addr_t; #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS #define DMA_ADDR_FMT TARGET_FMT_plx +#endif + +typedef int DMATranslateFunc(DMAContext *dma, + dma_addr_t addr, + target_phys_addr_t *paddr, + target_phys_addr_t *len, + DMADirection dir); + +typedef struct DMAContext { +#ifdef CONFIG_IOMMU + DMATranslateFunc *translate; + void *opaque; + QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps; +#endif +} DMAContext; + +#ifdef CONFIG_IOMMU +static inline bool dma_has_iommu(DMAContext *dma) +{ + return (dma != NULL); +} +#else +static inline bool dma_has_iommu(DMAContext *dma) +{ + return false; +} +#endif typedef void DMAInvalidateMapFunc(void *); /* Checks that the given range of addresses is valid for DMA. This is * useful for certain cases, but usually you should just use * dma_memory_{read,write}() and check for errors */ -static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr, - dma_addr_t len, DMADirection dir) +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len, + DMADirection dir); +static inline bool dma_memory_valid(DMAContext *dma, + dma_addr_t addr, dma_addr_t len, + DMADirection dir) { - /* Stub version, with no iommu we assume all bus addresses are valid */ - return true; + if (!dma_has_iommu(dma)) { + return true; + } else { + return __dma_memory_valid(dma, addr, len, dir); + } } +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr, + void *buf, dma_addr_t len, DMADirection dir); static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr, void *buf, dma_addr_t len, DMADirection dir) { - /* Stub version when we have no iommu support */ - cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len, - dir == DMA_DIRECTION_FROM_DEVICE); - return 0; + if (!dma_has_iommu(dma)) { + /* Fast-path for no IOMMU */ + cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len, + dir == DMA_DIRECTION_FROM_DEVICE); + return 0; + } else { + return __dma_memory_rw(dma, addr, buf, len, dir); + } } static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr, @@ -70,35 +124,55 @@ static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr, DMA_DIRECTION_FROM_DEVICE); } +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len); static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len) { - /* Stub version when we have no iommu support */ - cpu_physical_memory_zero(addr, len); - return 0; + if (!dma_has_iommu(dma)) { + /* Fast-path for no IOMMU */ + cpu_physical_memory_zero(addr, len); + return 0; + } else { + return __dma_memory_zero(dma, addr, len); + } } +void *__dma_memory_map(DMAContext *dma, + DMAInvalidateMapFunc *cb, void *opaque, + dma_addr_t addr, dma_addr_t *len, + DMADirection dir); static inline void *dma_memory_map(DMAContext *dma, - DMAInvalidateMapFunc *cb, void *opaque, + DMAInvalidateMapFunc *cb, void *cb_opaque, dma_addr_t addr, dma_addr_t *len, DMADirection dir) { - target_phys_addr_t xlen = *len; - void *p; - - p = cpu_physical_memory_map(addr, &xlen, - dir == DMA_DIRECTION_FROM_DEVICE); - *len = xlen; - return p; + if (!dma_has_iommu(dma)) { + target_phys_addr_t xlen = *len; + void *p; + + p = cpu_physical_memory_map(addr, &xlen, + dir == DMA_DIRECTION_FROM_DEVICE); + *len = xlen; + return p; + } else { + return __dma_memory_map(dma, cb, cb_opaque, addr, len, dir); + } } +void __dma_memory_unmap(DMAContext *dma, + void *buffer, dma_addr_t len, + DMADirection dir, dma_addr_t access_len); static inline void dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len, DMADirection dir, dma_addr_t access_len) { - return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, - dir == DMA_DIRECTION_FROM_DEVICE, - access_len); + if (!dma_has_iommu(dma)) { + return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len, + dir == DMA_DIRECTION_FROM_DEVICE, + access_len); + } else { + __dma_memory_unmap(dma, buffer, len, dir, access_len); + } } #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \ @@ -139,6 +213,14 @@ DEFINE_LDST_DMA(q, q, 64, be); #undef DEFINE_LDST_DMA +#ifdef CONFIG_IOMMU + +void dma_context_init(DMAContext *dma, DMATranslateFunc, void *opaque); +void dma_invalidate_memory_range(DMAContext *dma, + dma_addr_t addr, dma_addr_t len); + +#endif /* CONFIG_IOMMU */ + struct ScatterGatherEntry { dma_addr_t base; dma_addr_t len;