Patchwork [07/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers

login
register
mail settings
Submitter David Gibson
Date March 9, 2012, 5:01 a.m.
Message ID <1331269308-22372-8-git-send-email-david@gibson.dropbear.id.au>
Download mbox | patch
Permalink /patch/145680/
State New
Headers show

Comments

David Gibson - March 9, 2012, 5:01 a.m.
dma-helpers.c contains a number of helper functions for doing
scatter/gather DMA, and various block device related DMA.  Currently,
these directly access guest memory using cpu_physical_memory_*(),
assuming no IOMMU translation.

This patch updates this code to use the new universal DMA helper
functions.  qemu_sglist_init() now takes a DMAContext * to describe
the DMA address space in which the scatter/gather will take place.

We minimally update the callers qemu_sglist_init() to pass NULL
(i.e. no translation, same as current behaviour).  Some of those
callers should pass something else in some cases to allow proper IOMMU
translation in future, but that will be fixed in later patches.

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 dma-helpers.c  |   26 ++++++++++++++++++--------
 dma.h          |    3 ++-
 hw/ide/ahci.c  |    3 ++-
 hw/ide/macio.c |    4 ++--
 hw/pci.h       |    2 +-
 5 files changed, 25 insertions(+), 13 deletions(-)
Kevin Wolf - March 9, 2012, 9:43 a.m.
Am 09.03.2012 06:01, schrieb David Gibson:
> dma-helpers.c contains a number of helper functions for doing
> scatter/gather DMA, and various block device related DMA.  Currently,
> these directly access guest memory using cpu_physical_memory_*(),
> assuming no IOMMU translation.
> 
> This patch updates this code to use the new universal DMA helper
> functions.  qemu_sglist_init() now takes a DMAContext * to describe
> the DMA address space in which the scatter/gather will take place.
> 
> We minimally update the callers qemu_sglist_init() to pass NULL
> (i.e. no translation, same as current behaviour).  Some of those
> callers should pass something else in some cases to allow proper IOMMU
> translation in future, but that will be fixed in later patches.
> 
> Cc: Kevin Wolf <kwolf@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  dma-helpers.c  |   26 ++++++++++++++++++--------
>  dma.h          |    3 ++-
>  hw/ide/ahci.c  |    3 ++-
>  hw/ide/macio.c |    4 ++--
>  hw/pci.h       |    2 +-
>  5 files changed, 25 insertions(+), 13 deletions(-)
> 
> diff --git a/dma-helpers.c b/dma-helpers.c
> index 5f19a85..9dcfb2c 100644
> --- a/dma-helpers.c
> +++ b/dma-helpers.c
> @@ -11,12 +11,13 @@
>  #include "block_int.h"
>  #include "trace.h"
>  
> -void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
> +void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
>  {
>      qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
>      qsg->nsg = 0;
>      qsg->nalloc = alloc_hint;
>      qsg->size = 0;
> +    qsg->dma = dma;
>  }
>  
>  void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
> @@ -75,10 +76,9 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
>      int i;
>  
>      for (i = 0; i < dbs->iov.niov; ++i) {
> -        cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
> -                                  dbs->iov.iov[i].iov_len,
> -                                  dbs->dir != DMA_DIRECTION_TO_DEVICE,
> -                                  dbs->iov.iov[i].iov_len);
> +        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
> +                         dbs->iov.iov[i].iov_len, dbs->dir,
> +                         dbs->iov.iov[i].iov_len);
>      }
>      qemu_iovec_reset(&dbs->iov);
>  }
> @@ -104,10 +104,20 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
>      }
>  }
>  
> +static void dma_bdrv_cancel(void *opaque)
> +{
> +    DMAAIOCB *dbs = opaque;
> +
> +    bdrv_aio_cancel(dbs->acb);
> +    dma_bdrv_unmap(dbs);
> +    qemu_iovec_destroy(&dbs->iov);
> +    qemu_aio_release(dbs);
> +}

I'm lacking the context to know when this is actually called, but it
looks suspicious. Did you consider that bdrv_aio_cancel() can actually
invoke the completion callback?

What's the difference between the existing dma_aio_cancel() and the
function that you need here?

Kevin
Paolo Bonzini - March 9, 2012, 10:11 a.m.
Il 09/03/2012 06:01, David Gibson ha scritto:
> dma-helpers.c contains a number of helper functions for doing
> scatter/gather DMA, and various block device related DMA.  Currently,
> these directly access guest memory using cpu_physical_memory_*(),
> assuming no IOMMU translation.
> 
> This patch updates this code to use the new universal DMA helper
> functions.  qemu_sglist_init() now takes a DMAContext * to describe
> the DMA address space in which the scatter/gather will take place.
> 
> We minimally update the callers qemu_sglist_init() to pass NULL
> (i.e. no translation, same as current behaviour).  Some of those
> callers should pass something else in some cases to allow proper IOMMU
> translation in future, but that will be fixed in later patches.
> 
> Cc: Kevin Wolf <kwolf@redhat.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  dma-helpers.c  |   26 ++++++++++++++++++--------
>  dma.h          |    3 ++-
>  hw/ide/ahci.c  |    3 ++-
>  hw/ide/macio.c |    4 ++--
>  hw/pci.h       |    2 +-
>  5 files changed, 25 insertions(+), 13 deletions(-)
> 
> diff --git a/dma-helpers.c b/dma-helpers.c
> index 5f19a85..9dcfb2c 100644
> --- a/dma-helpers.c
> +++ b/dma-helpers.c
> @@ -11,12 +11,13 @@
>  #include "block_int.h"
>  #include "trace.h"
>  
> -void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
> +void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
>  {
>      qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
>      qsg->nsg = 0;
>      qsg->nalloc = alloc_hint;
>      qsg->size = 0;
> +    qsg->dma = dma;
>  }
>  
>  void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
> @@ -75,10 +76,9 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
>      int i;
>  
>      for (i = 0; i < dbs->iov.niov; ++i) {
> -        cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
> -                                  dbs->iov.iov[i].iov_len,
> -                                  dbs->dir != DMA_DIRECTION_TO_DEVICE,
> -                                  dbs->iov.iov[i].iov_len);
> +        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
> +                         dbs->iov.iov[i].iov_len, dbs->dir,
> +                         dbs->iov.iov[i].iov_len);
>      }
>      qemu_iovec_reset(&dbs->iov);
>  }
> @@ -104,10 +104,20 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
>      }
>  }
>  
> +static void dma_bdrv_cancel(void *opaque)
> +{
> +    DMAAIOCB *dbs = opaque;
> +
> +    bdrv_aio_cancel(dbs->acb);
> +    dma_bdrv_unmap(dbs);
> +    qemu_iovec_destroy(&dbs->iov);
> +    qemu_aio_release(dbs);
> +}

What Kevin said.  Instead of a generic callback, dma_memory_map should
probably just receive the AIOCB (in this case &dbs->common) and call
bdrv_aio_cancel on it.

>  static void dma_bdrv_cb(void *opaque, int ret)
>  {
>      DMAAIOCB *dbs = (DMAAIOCB *)opaque;
> -    target_phys_addr_t cur_addr, cur_len;
> +    dma_addr_t cur_addr, cur_len;
>      void *mem;
>  
>      trace_dma_bdrv_cb(dbs, ret);
> @@ -124,8 +134,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
>      while (dbs->sg_cur_index < dbs->sg->nsg) {
>          cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
>          cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
> -        mem = cpu_physical_memory_map(cur_addr, &cur_len,
> -                                      dbs->dir != DMA_DIRECTION_TO_DEVICE);
> +        mem = dma_memory_map(dbs->sg->dma, dma_bdrv_cancel, dbs,
> +                             cur_addr, &cur_len, dbs->dir);
>          if (!mem)
>              break;
>          qemu_iovec_add(&dbs->iov, mem, cur_len);

dma_buf_rw should also use the DMAContext here (passing a NULL
invalidate function).

Paolo
David Gibson - March 13, 2012, 6:37 a.m.
On Fri, Mar 09, 2012 at 11:11:41AM +0100, Paolo Bonzini wrote:
> Il 09/03/2012 06:01, David Gibson ha scritto:
[snip]
> > +static void dma_bdrv_cancel(void *opaque)
> > +{
> > +    DMAAIOCB *dbs = opaque;
> > +
> > +    bdrv_aio_cancel(dbs->acb);
> > +    dma_bdrv_unmap(dbs);
> > +    qemu_iovec_destroy(&dbs->iov);
> > +    qemu_aio_release(dbs);
> > +}
> 
> What Kevin said.  Instead of a generic callback, dma_memory_map should
> probably just receive the AIOCB (in this case &dbs->common) and call
> bdrv_aio_cancel on it.

Uh, that doesn't really make sense.  There's no guarantee that a
dma_memory_map() invocation will be associated with an AIOCB, although
that is a mjor use.

> >  static void dma_bdrv_cb(void *opaque, int ret)
> >  {
> >      DMAAIOCB *dbs = (DMAAIOCB *)opaque;
> > -    target_phys_addr_t cur_addr, cur_len;
> > +    dma_addr_t cur_addr, cur_len;
> >      void *mem;
> >  
> >      trace_dma_bdrv_cb(dbs, ret);
> > @@ -124,8 +134,8 @@ static void dma_bdrv_cb(void *opaque, int ret)
> >      while (dbs->sg_cur_index < dbs->sg->nsg) {
> >          cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
> >          cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
> > -        mem = cpu_physical_memory_map(cur_addr, &cur_len,
> > -                                      dbs->dir != DMA_DIRECTION_TO_DEVICE);
> > +        mem = dma_memory_map(dbs->sg->dma, dma_bdrv_cancel, dbs,
> > +                             cur_addr, &cur_len, dbs->dir);
> >          if (!mem)
> >              break;
> >          qemu_iovec_add(&dbs->iov, mem, cur_len);
> > dma_buf_rw should also use the DMAContext here (passing a NULL
> invalidate function).

Done in the next spin.  Although no invalidate function is needed,
NULL or otherwise, since dma_buf_rw uses plain _rw, rather than
map/unmap.
David Gibson - March 13, 2012, 10:42 a.m.
On Fri, Mar 09, 2012 at 10:43:35AM +0100, Kevin Wolf wrote:
> Am 09.03.2012 06:01, schrieb David Gibson:
[snip]
> > @@ -104,10 +104,20 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
> >      }
> >  }
> >  
> > +static void dma_bdrv_cancel(void *opaque)
> > +{
> > +    DMAAIOCB *dbs = opaque;
> > +
> > +    bdrv_aio_cancel(dbs->acb);
> > +    dma_bdrv_unmap(dbs);
> > +    qemu_iovec_destroy(&dbs->iov);
> > +    qemu_aio_release(dbs);
> > +}
> 
> I'm lacking the context to know when this is actually called, but it
> looks suspicious. Did you consider that bdrv_aio_cancel() can actually
> invoke the completion callback?
> 
> What's the difference between the existing dma_aio_cancel() and the
> function that you need here?

So, first thing to note is that as I said in another sub-thread, there
are several approaches we could take for handling invalidation of
IOMMU mappings while they're in use by drivers, and I'm not sure which
is best yet.

Second is that this piece of code comes from Eduard - Gabriel's
original and I haven't actually understood it as well as I should have
:).

So, examining in more detail it looks like dma_aio_cancel() is the
right thing to do (I don't know if it existed when Eduard - Gabriel
wrote the initial version).

The semantics of the callback are that after it's complete, there
should be no further access to the dma_memory_map()ed memory areas.  I
haven't yet understood the bdrv stuff sufficiently well to be
completely sure that's true.
Kevin Wolf - March 13, 2012, 11:55 a.m.
Am 13.03.2012 11:42, schrieb David Gibson:
> On Fri, Mar 09, 2012 at 10:43:35AM +0100, Kevin Wolf wrote:
>> Am 09.03.2012 06:01, schrieb David Gibson:
> [snip]
>>> @@ -104,10 +104,20 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
>>>      }
>>>  }
>>>  
>>> +static void dma_bdrv_cancel(void *opaque)
>>> +{
>>> +    DMAAIOCB *dbs = opaque;
>>> +
>>> +    bdrv_aio_cancel(dbs->acb);
>>> +    dma_bdrv_unmap(dbs);
>>> +    qemu_iovec_destroy(&dbs->iov);
>>> +    qemu_aio_release(dbs);
>>> +}
>>
>> I'm lacking the context to know when this is actually called, but it
>> looks suspicious. Did you consider that bdrv_aio_cancel() can actually
>> invoke the completion callback?
>>
>> What's the difference between the existing dma_aio_cancel() and the
>> function that you need here?
> 
> So, first thing to note is that as I said in another sub-thread, there
> are several approaches we could take for handling invalidation of
> IOMMU mappings while they're in use by drivers, and I'm not sure which
> is best yet.
> 
> Second is that this piece of code comes from Eduard - Gabriel's
> original and I haven't actually understood it as well as I should have
> :).
> 
> So, examining in more detail it looks like dma_aio_cancel() is the
> right thing to do (I don't know if it existed when Eduard - Gabriel
> wrote the initial version).

I'm pretty sure that it's older than these patches.

> The semantics of the callback are that after it's complete, there
> should be no further access to the dma_memory_map()ed memory areas.  I
> haven't yet understood the bdrv stuff sufficiently well to be
> completely sure that's true.

Yes, I think it is. Once bdrv_aio_cancel() returns, the block layer
doesn't do anything with the request any more. The caller (e.g. IDE)
can't use the memory areas either because it doesn't even know them.

Kevin

Patch

diff --git a/dma-helpers.c b/dma-helpers.c
index 5f19a85..9dcfb2c 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -11,12 +11,13 @@ 
 #include "block_int.h"
 #include "trace.h"
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint)
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
 {
     qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
     qsg->nsg = 0;
     qsg->nalloc = alloc_hint;
     qsg->size = 0;
+    qsg->dma = dma;
 }
 
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
@@ -75,10 +76,9 @@  static void dma_bdrv_unmap(DMAAIOCB *dbs)
     int i;
 
     for (i = 0; i < dbs->iov.niov; ++i) {
-        cpu_physical_memory_unmap(dbs->iov.iov[i].iov_base,
-                                  dbs->iov.iov[i].iov_len,
-                                  dbs->dir != DMA_DIRECTION_TO_DEVICE,
-                                  dbs->iov.iov[i].iov_len);
+        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
+                         dbs->iov.iov[i].iov_len, dbs->dir,
+                         dbs->iov.iov[i].iov_len);
     }
     qemu_iovec_reset(&dbs->iov);
 }
@@ -104,10 +104,20 @@  static void dma_complete(DMAAIOCB *dbs, int ret)
     }
 }
 
+static void dma_bdrv_cancel(void *opaque)
+{
+    DMAAIOCB *dbs = opaque;
+
+    bdrv_aio_cancel(dbs->acb);
+    dma_bdrv_unmap(dbs);
+    qemu_iovec_destroy(&dbs->iov);
+    qemu_aio_release(dbs);
+}
+
 static void dma_bdrv_cb(void *opaque, int ret)
 {
     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
-    target_phys_addr_t cur_addr, cur_len;
+    dma_addr_t cur_addr, cur_len;
     void *mem;
 
     trace_dma_bdrv_cb(dbs, ret);
@@ -124,8 +134,8 @@  static void dma_bdrv_cb(void *opaque, int ret)
     while (dbs->sg_cur_index < dbs->sg->nsg) {
         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-        mem = cpu_physical_memory_map(cur_addr, &cur_len,
-                                      dbs->dir != DMA_DIRECTION_TO_DEVICE);
+        mem = dma_memory_map(dbs->sg->dma, dma_bdrv_cancel, dbs,
+                             cur_addr, &cur_len, dbs->dir);
         if (!mem)
             break;
         qemu_iovec_add(&dbs->iov, mem, cur_len);
diff --git a/dma.h b/dma.h
index 8b6ef44..a66e3d7 100644
--- a/dma.h
+++ b/dma.h
@@ -27,6 +27,7 @@  struct QEMUSGList {
     int nsg;
     int nalloc;
     size_t size;
+    DMAContext *dma;
 };
 
 #if defined(TARGET_PHYS_ADDR_BITS)
@@ -143,7 +144,7 @@  struct ScatterGatherEntry {
     dma_addr_t len;
 };
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma);
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 041ce1e..6a218b5 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -667,7 +667,8 @@  static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist)
     if (sglist_alloc_hint > 0) {
         AHCI_SG *tbl = (AHCI_SG *)prdt;
 
-        qemu_sglist_init(sglist, sglist_alloc_hint);
+        /* FIXME: pass the correct DMAContext */
+        qemu_sglist_init(sglist, sglist_alloc_hint, NULL);
         for (i = 0; i < sglist_alloc_hint; i++) {
             /* flags_size is zero-based */
             qemu_sglist_add(sglist, le64_to_cpu(tbl[i].addr),
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index edcf885..568a299 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -76,7 +76,7 @@  static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 
     s->io_buffer_size = io->len;
 
-    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1);
+    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1, NULL);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
@@ -133,7 +133,7 @@  static void pmac_ide_transfer_cb(void *opaque, int ret)
     s->io_buffer_index = 0;
     s->io_buffer_size = io->len;
 
-    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1);
+    qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1, NULL);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
diff --git a/hw/pci.h b/hw/pci.h
index c021805..41dcd05 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -611,7 +611,7 @@  static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
 static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
                                        int alloc_hint)
 {
-    qemu_sglist_init(qsg, alloc_hint);
+    qemu_sglist_init(qsg, alloc_hint, pci_dma_context(dev));
 }
 
 extern const VMStateDescription vmstate_pci_device;