diff mbox

[RFC,v2,2/2] virtio-iommu: vfio integration with virtio-iommu

Message ID 1500017104-3574-3-git-send-email-Bharat.Bhushan@nxp.com
State New
Headers show

Commit Message

Bharat Bhushan July 14, 2017, 7:25 a.m. UTC
This patch allows virtio-iommu protection for PCI
device-passthrough.

MSI region is mapped by current version of virtio-iommu driver.
This MSI region mapping in not getting pushed on hw iommu
vfio_get_vaddr() allows only ram-region. This RFC patch needed
to be improved.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
---
v1-v2:
  - Added trace events

 hw/virtio/trace-events           |   5 ++
 hw/virtio/virtio-iommu.c         | 133 +++++++++++++++++++++++++++++++++++++++
 include/hw/virtio/virtio-iommu.h |   6 ++
 3 files changed, 144 insertions(+)

Comments

Eric Auger Aug. 17, 2017, 3:33 p.m. UTC | #1
Hi Bharat,

On 14/07/2017 09:25, Bharat Bhushan wrote:
> This patch allows virtio-iommu protection for PCI
> device-passthrough.
> 
> MSI region is mapped by current version of virtio-iommu driver.
> This MSI region mapping in not getting pushed on hw iommu
> vfio_get_vaddr() allows only ram-region.
Why is it an issue. As far as I understand this is not needed actually
as the guest MSI doorbell is not used by the host.
 This RFC patch needed
> to be improved.
> 
> Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
> ---
> v1-v2:
>   - Added trace events
> 
>  hw/virtio/trace-events           |   5 ++
>  hw/virtio/virtio-iommu.c         | 133 +++++++++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio-iommu.h |   6 ++
>  3 files changed, 144 insertions(+)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 9196b63..3a3968b 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -39,3 +39,8 @@ virtio_iommu_unmap_left_interval(uint64_t low, uint64_t high, uint64_t next_low,
>  virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"], new interval=[0x%"PRIx64",0x%"PRIx64"]"
>  virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc [0x%"PRIx64",0x%"PRIx64"]"
>  virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> +virtio_iommu_notify_flag_add(const char *iommu) "Add virtio-iommu notifier node for memory region %s"
> +virtio_iommu_notify_flag_del(const char *iommu) "Del virtio-iommu notifier node for memory region %s"
> +virtio_iommu_remap(hwaddr iova, hwaddr pa, hwaddr size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> +virtio_iommu_map_region(hwaddr iova, hwaddr paddr, hwaddr map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> +virtio_iommu_unmap_region(hwaddr iova, hwaddr paddr, hwaddr map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index cd188fc..61f33cb 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -129,6 +129,48 @@ static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
>      }
>  }
>  
> +static void virtio_iommu_map_region(VirtIOIOMMU *s, hwaddr iova, hwaddr paddr,
> +                                    hwaddr size, int map)
bool map?

the function name is a bit misleading to me and does not really explain
what the function does. It "notifies" so why not using something like
virtio_iommu_map_notify and virtio_iommu_unmap_notify. I tend to think
having separate proto is cleaner and more standard.

Binding should happen on a specific IOMMUmemoryRegion (see next comment).

> +{
> +    VirtioIOMMUNotifierNode *node;
> +    IOMMUTLBEntry entry;
> +    uint64_t map_size = (1 << 12);
TODO: handle something else than 4K page.
> +    int npages;
> +    int i;
> +
> +    npages = size / map_size;
> +    entry.target_as = &address_space_memory;
> +    entry.addr_mask = map_size - 1;
> +
> +    for (i = 0; i < npages; i++) {
Although I understand we currently fail checking the consistency between
pIOMMU and vIOMMU page sizes, this will be very slow for guest DPDK use
case where hugepages are used.

Why not directly using the full size?  vfio_iommu_map_notify will report
errors if vfio_dma_map/unmap() fail.
> +        entry.iova = iova + (i * map_size);
> +        if (map) {
> +                trace_virtio_iommu_map_region(iova, paddr, map_size);
> +                entry.perm = IOMMU_RW;
> +                entry.translated_addr = paddr + (i * map_size);
> +        } else {
> +                trace_virtio_iommu_unmap_region(iova, paddr, map_size);
> +                entry.perm = IOMMU_NONE;
> +                entry.translated_addr = 0;
> +        }
> +
> +        QLIST_FOREACH(node, &s->notifiers_list, next) {
> +            memory_region_notify_iommu(&node->iommu_dev->iommu_mr, entry);
So as discussed this will notify *all* IOMMU memory regions and all
their notifiers which is not what we want. You may have a look at
vsmmuv3 v6 (or intel_iommu) where smmuv3_context_device_invalidate
retrieves the mr from the sid.

> +        }
> +    }
> +}
> +
> +static gboolean virtio_iommu_unmap_single(gpointer key, gpointer value,
> +                                          gpointer data)
> +{
> +    viommu_mapping *mapping = (viommu_mapping *) value;
> +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
> +
> +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, 0);
paddr=0? mapping->phys_addr as the trace() will be misleading. But as
mentioned earlier better use unmap() separate function.
> +
> +    return true;
> +}
> +
>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>                                 struct virtio_iommu_req_attach *req)
>  {
> @@ -170,10 +212,26 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>  {
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    viommu_dev *dev;
>      int ret;
>  
>      trace_virtio_iommu_detach(devid, reserved);
>  
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> +    if (!dev || !dev->as) {
> +        return -EINVAL;
> +    }
> +
> +    dev->as->nr_devices--;
> +
> +    /* Unmap all if this is last device detached */
> +    if (dev->as->nr_devices == 0) {
> +        g_tree_foreach(dev->as->mappings, virtio_iommu_unmap_single, s);
> +
> +        g_tree_remove(s->address_spaces, GUINT_TO_POINTER(dev->as->id));
> +        g_tree_destroy(dev->as->mappings);
> +    }
so this should be rebased on new ref count code.
> +
>      ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
>  
>      return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
> @@ -217,6 +275,7 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
>  
>      g_tree_insert(as->mappings, interval, mapping);
>  
> +    virtio_iommu_map_region(s, virt_addr, phys_addr, size, 1);
>      return VIRTIO_IOMMU_S_OK;
>  }
>  
> @@ -267,7 +326,9 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
>          } else {
>              break;
>          }
> +
>          if (interval.low >= interval.high) {
> +            virtio_iommu_map_region(s, virt_addr, 0, size, 0);
>              return VIRTIO_IOMMU_S_OK;
>          } else {
>              mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> @@ -410,6 +471,37 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
>      }
>  }
>  
> +static void virtio_iommu_notify_flag_changed(MemoryRegion *iommu,
> +                                          IOMMUNotifierFlag old,
> +                                          IOMMUNotifierFlag new)
> +{
> +    IOMMUDevice *sdev = container_of(iommu, IOMMUDevice, iommu_mr);
> +    VirtIOIOMMU *s = sdev->viommu;
> +    VirtioIOMMUNotifierNode *node = NULL;
> +    VirtioIOMMUNotifierNode *next_node = NULL;
> +
> +    if (old == IOMMU_NOTIFIER_NONE) {
> +        trace_virtio_iommu_notify_flag_add(iommu->name);
> +        node = g_malloc0(sizeof(*node));
> +        node->iommu_dev = sdev;
> +        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
> +        return;
> +    }
> +
> +    /* update notifier node with new flags */
> +    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
> +        if (node->iommu_dev == sdev) {
> +            if (new == IOMMU_NOTIFIER_NONE) {
> +                trace_virtio_iommu_notify_flag_del(iommu->name);
> +                QLIST_REMOVE(node, next);
> +                g_free(node);
> +            }
> +            return;
> +        }
> +    }
> +}
I think all that mechanics should be factorized somewhere else as all
vIOMMUs use that but this goes beyond the scope of this series.
> +
> +
>  static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr, hwaddr addr,
>                                              IOMMUAccessFlags flag)
>  {
> @@ -523,11 +615,50 @@ static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
>      return (ua > ub) - (ua < ub);
>  }
>  
> +static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
> +{
> +    viommu_mapping *mapping = (viommu_mapping *) value;
> +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
> +
> +    trace_virtio_iommu_remap(mapping->virt_addr, mapping->phys_addr,
> +                             mapping->size);
> +    /* unmap previous entry and map again */
> +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, 0);
> +
> +    virtio_iommu_map_region(s, mapping->virt_addr, mapping->phys_addr,
> +                            mapping->size, 1);
> +    return true;
> +}
> +
> +static void virtio_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
> +{
> +    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> +    VirtIOIOMMU *s = sdev->viommu;
> +    uint32_t sid;
> +    viommu_dev *dev;
> +
> +    sid = smmu_get_sid(sdev);
> +
> +    qemu_mutex_lock(&s->mutex);
> +
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
> +    if (!dev) {
> +        goto unlock;
> +    }
> +
> +    g_tree_foreach(dev->as->mappings, virtio_iommu_remap, s);
> +
> +unlock:
> +    qemu_mutex_unlock(&s->mutex);
> +    return;
not needed

Thanks

Eric
> +}
> +
>  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
>  
> +    QLIST_INIT(&s->notifiers_list);
>      virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
>                  sizeof(struct virtio_iommu_config));
>  
> @@ -538,6 +669,8 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>      s->config.input_range.end = -1UL;
>  
>      s->iommu_ops.translate = virtio_iommu_translate;
> +    s->iommu_ops.notify_flag_changed = virtio_iommu_notify_flag_changed;
> +    s->iommu_ops.replay = virtio_iommu_replay;
>      memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num));
>      s->as_by_busptr = g_hash_table_new_full(as_uint64_hash,
>                                              as_uint64_equal,
> diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
> index 2259413..76c758d 100644
> --- a/include/hw/virtio/virtio-iommu.h
> +++ b/include/hw/virtio/virtio-iommu.h
> @@ -44,6 +44,11 @@ typedef struct IOMMUPciBus {
>      IOMMUDevice  *pbdev[0]; /* Parent array is sparse, so dynamically alloc */
>  } IOMMUPciBus;
>  
> +typedef struct VirtioIOMMUNotifierNode {
> +    IOMMUDevice *iommu_dev;
> +    QLIST_ENTRY(VirtioIOMMUNotifierNode) next;
> +} VirtioIOMMUNotifierNode;
> +
>  typedef struct VirtIOIOMMU {
>      VirtIODevice parent_obj;
>      VirtQueue *vq;
> @@ -55,6 +60,7 @@ typedef struct VirtIOIOMMU {
>      GTree *address_spaces;
>      QemuMutex mutex;
>      GTree *devices;
> +    QLIST_HEAD(, VirtioIOMMUNotifierNode) notifiers_list;
>  } VirtIOIOMMU;
>  
>  #endif
>
Bharat Bhushan Aug. 18, 2017, 4:49 a.m. UTC | #2
> -----Original Message-----

> From: Auger Eric [mailto:eric.auger@redhat.com]

> Sent: Thursday, August 17, 2017 9:03 PM

> To: Bharat Bhushan <bharat.bhushan@nxp.com>;

> eric.auger.pro@gmail.com; peter.maydell@linaro.org;

> alex.williamson@redhat.com; mst@redhat.com; qemu-arm@nongnu.org;

> qemu-devel@nongnu.org

> Cc: wei@redhat.com; kevin.tian@intel.com; marc.zyngier@arm.com;

> tn@semihalf.com; will.deacon@arm.com; drjones@redhat.com;

> robin.murphy@arm.com; christoffer.dall@linaro.org

> Subject: Re: [Qemu-devel] [RFC v2 PATCH 2/2] virtio-iommu: vfio integration

> with virtio-iommu

> 

> Hi Bharat,

> 

> On 14/07/2017 09:25, Bharat Bhushan wrote:

> > This patch allows virtio-iommu protection for PCI device-passthrough.

> >

> > MSI region is mapped by current version of virtio-iommu driver.

> > This MSI region mapping in not getting pushed on hw iommu

> > vfio_get_vaddr() allows only ram-region.

> Why is it an issue. As far as I understand this is not needed actually as the

> guest MSI doorbell is not used by the host.

>  This RFC patch needed

> > to be improved.

> >

> > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>

> > ---

> > v1-v2:

> >   - Added trace events

> >

> >  hw/virtio/trace-events           |   5 ++

> >  hw/virtio/virtio-iommu.c         | 133

> +++++++++++++++++++++++++++++++++++++++

> >  include/hw/virtio/virtio-iommu.h |   6 ++

> >  3 files changed, 144 insertions(+)

> >

> > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index

> > 9196b63..3a3968b 100644

> > --- a/hw/virtio/trace-events

> > +++ b/hw/virtio/trace-events

> > @@ -39,3 +39,8 @@ virtio_iommu_unmap_left_interval(uint64_t low,

> > uint64_t high, uint64_t next_low,

> virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t

> next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"],

> new interval=[0x%"PRIx64",0x%"PRIx64"]"

> >  virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap

> inc [0x%"PRIx64",0x%"PRIx64"]"

> >  virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr,

> uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"

> > +virtio_iommu_notify_flag_add(const char *iommu) "Add virtio-iommu

> notifier node for memory region %s"

> > +virtio_iommu_notify_flag_del(const char *iommu) "Del virtio-iommu

> notifier node for memory region %s"

> > +virtio_iommu_remap(hwaddr iova, hwaddr pa, hwaddr size)

> "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""

> > +virtio_iommu_map_region(hwaddr iova, hwaddr paddr, hwaddr

> map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""

> > +virtio_iommu_unmap_region(hwaddr iova, hwaddr paddr, hwaddr

> map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""

> > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index

> > cd188fc..61f33cb 100644

> > --- a/hw/virtio/virtio-iommu.c

> > +++ b/hw/virtio/virtio-iommu.c

> > @@ -129,6 +129,48 @@ static gint interval_cmp(gconstpointer a,

> gconstpointer b, gpointer user_data)

> >      }

> >  }

> >

> > +static void virtio_iommu_map_region(VirtIOIOMMU *s, hwaddr iova,

> hwaddr paddr,

> > +                                    hwaddr size, int map)

> bool map?

> 

> the function name is a bit misleading to me and does not really explain what

> the function does. It "notifies" so why not using something like

> virtio_iommu_map_notify and virtio_iommu_unmap_notify. I tend to think

> having separate proto is cleaner and more standard.

> 

> Binding should happen on a specific IOMMUmemoryRegion (see next

> comment).

> 

> > +{

> > +    VirtioIOMMUNotifierNode *node;

> > +    IOMMUTLBEntry entry;

> > +    uint64_t map_size = (1 << 12);

> TODO: handle something else than 4K page.

> > +    int npages;

> > +    int i;

> > +

> > +    npages = size / map_size;

> > +    entry.target_as = &address_space_memory;

> > +    entry.addr_mask = map_size - 1;

> > +

> > +    for (i = 0; i < npages; i++) {

> Although I understand we currently fail checking the consistency between

> pIOMMU and vIOMMU page sizes, this will be very slow for guest DPDK use

> case where hugepages are used.

> 

> Why not directly using the full size?  vfio_iommu_map_notify will report

> errors if vfio_dma_map/unmap() fail.


Yes, just for understanding, VFIO/IOMMU will map at page granularity and QEMU rely on that?

> > +        entry.iova = iova + (i * map_size);

> > +        if (map) {

> > +                trace_virtio_iommu_map_region(iova, paddr, map_size);

> > +                entry.perm = IOMMU_RW;

> > +                entry.translated_addr = paddr + (i * map_size);

> > +        } else {

> > +                trace_virtio_iommu_unmap_region(iova, paddr, map_size);

> > +                entry.perm = IOMMU_NONE;

> > +                entry.translated_addr = 0;

> > +        }

> > +

> > +        QLIST_FOREACH(node, &s->notifiers_list, next) {

> > +            memory_region_notify_iommu(&node->iommu_dev->iommu_mr,

> > + entry);

> So as discussed this will notify *all* IOMMU memory regions and all their

> notifiers which is not what we want. You may have a look at

> vsmmuv3 v6 (or intel_iommu) where smmuv3_context_device_invalidate

> retrieves the mr from the sid.


Yes, this is one of the reason why multiple interfaces fails to work ☹
Will fix this.

> 

> > +        }

> > +    }

> > +}

> > +

> > +static gboolean virtio_iommu_unmap_single(gpointer key, gpointer

> value,

> > +                                          gpointer data) {

> > +    viommu_mapping *mapping = (viommu_mapping *) value;

> > +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;

> > +

> > +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size,

> > + 0);

> paddr=0? mapping->phys_addr as the trace() will be misleading. But as

> mentioned earlier better use unmap() separate function.

> > +

> > +    return true;

> > +}

> > +

> >  static int virtio_iommu_attach(VirtIOIOMMU *s,

> >                                 struct virtio_iommu_req_attach *req)

> > { @@ -170,10 +212,26 @@ static int virtio_iommu_detach(VirtIOIOMMU

> *s,

> > {

> >      uint32_t devid = le32_to_cpu(req->device);

> >      uint32_t reserved = le32_to_cpu(req->reserved);

> > +    viommu_dev *dev;

> >      int ret;

> >

> >      trace_virtio_iommu_detach(devid, reserved);

> >

> > +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));

> > +    if (!dev || !dev->as) {

> > +        return -EINVAL;

> > +    }

> > +

> > +    dev->as->nr_devices--;

> > +

> > +    /* Unmap all if this is last device detached */

> > +    if (dev->as->nr_devices == 0) {

> > +        g_tree_foreach(dev->as->mappings, virtio_iommu_unmap_single,

> > + s);

> > +

> > +        g_tree_remove(s->address_spaces, GUINT_TO_POINTER(dev->as-

> >id));

> > +        g_tree_destroy(dev->as->mappings);

> > +    }

> so this should be rebased on new ref count code.

> > +

> >      ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));

> >

> >      return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL; @@ -

> 217,6

> > +275,7 @@ static int virtio_iommu_map(VirtIOIOMMU *s,

> >

> >      g_tree_insert(as->mappings, interval, mapping);

> >

> > +    virtio_iommu_map_region(s, virt_addr, phys_addr, size, 1);

> >      return VIRTIO_IOMMU_S_OK;

> >  }

> >

> > @@ -267,7 +326,9 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,

> >          } else {

> >              break;

> >          }

> > +

> >          if (interval.low >= interval.high) {

> > +            virtio_iommu_map_region(s, virt_addr, 0, size, 0);

> >              return VIRTIO_IOMMU_S_OK;

> >          } else {

> >              mapping = g_tree_lookup(as->mappings,

> > (gpointer)&interval); @@ -410,6 +471,37 @@ static void

> virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)

> >      }

> >  }

> >

> > +static void virtio_iommu_notify_flag_changed(MemoryRegion *iommu,

> > +                                          IOMMUNotifierFlag old,

> > +                                          IOMMUNotifierFlag new) {

> > +    IOMMUDevice *sdev = container_of(iommu, IOMMUDevice,

> iommu_mr);

> > +    VirtIOIOMMU *s = sdev->viommu;

> > +    VirtioIOMMUNotifierNode *node = NULL;

> > +    VirtioIOMMUNotifierNode *next_node = NULL;

> > +

> > +    if (old == IOMMU_NOTIFIER_NONE) {

> > +        trace_virtio_iommu_notify_flag_add(iommu->name);

> > +        node = g_malloc0(sizeof(*node));

> > +        node->iommu_dev = sdev;

> > +        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);

> > +        return;

> > +    }

> > +

> > +    /* update notifier node with new flags */

> > +    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {

> > +        if (node->iommu_dev == sdev) {

> > +            if (new == IOMMU_NOTIFIER_NONE) {

> > +                trace_virtio_iommu_notify_flag_del(iommu->name);

> > +                QLIST_REMOVE(node, next);

> > +                g_free(node);

> > +            }

> > +            return;

> > +        }

> > +    }

> > +}

> I think all that mechanics should be factorized somewhere else as all

> vIOMMUs use that but this goes beyond the scope of this series.

> > +

> > +

> >  static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr,

> hwaddr addr,

> >                                              IOMMUAccessFlags flag)  {

> > @@ -523,11 +615,50 @@ static gint int_cmp(gconstpointer a, gconstpointer

> b, gpointer user_data)

> >      return (ua > ub) - (ua < ub);

> >  }

> >

> > +static gboolean virtio_iommu_remap(gpointer key, gpointer value,

> > +gpointer data) {

> > +    viommu_mapping *mapping = (viommu_mapping *) value;

> > +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;

> > +

> > +    trace_virtio_iommu_remap(mapping->virt_addr, mapping->phys_addr,

> > +                             mapping->size);

> > +    /* unmap previous entry and map again */

> > +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size,

> > + 0);

> > +

> > +    virtio_iommu_map_region(s, mapping->virt_addr, mapping-

> >phys_addr,

> > +                            mapping->size, 1);

> > +    return true;

> > +}

> > +

> > +static void virtio_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)

> {

> > +    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);

> > +    VirtIOIOMMU *s = sdev->viommu;

> > +    uint32_t sid;

> > +    viommu_dev *dev;

> > +

> > +    sid = smmu_get_sid(sdev);

> > +

> > +    qemu_mutex_lock(&s->mutex);

> > +

> > +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));

> > +    if (!dev) {

> > +        goto unlock;

> > +    }

> > +

> > +    g_tree_foreach(dev->as->mappings, virtio_iommu_remap, s);

> > +

> > +unlock:

> > +    qemu_mutex_unlock(&s->mutex);

> > +    return;

> not needed


Thanks
-Bharat

> 

> Thanks

> 

> Eric

> > +}

> > +

> >  static void virtio_iommu_device_realize(DeviceState *dev, Error

> > **errp)  {

> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);

> >      VirtIOIOMMU *s = VIRTIO_IOMMU(dev);

> >

> > +    QLIST_INIT(&s->notifiers_list);

> >      virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,

> >                  sizeof(struct virtio_iommu_config));

> >

> > @@ -538,6 +669,8 @@ static void

> virtio_iommu_device_realize(DeviceState *dev, Error **errp)

> >      s->config.input_range.end = -1UL;

> >

> >      s->iommu_ops.translate = virtio_iommu_translate;

> > +    s->iommu_ops.notify_flag_changed =

> virtio_iommu_notify_flag_changed;

> > +    s->iommu_ops.replay = virtio_iommu_replay;

> >      memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num));

> >      s->as_by_busptr = g_hash_table_new_full(as_uint64_hash,

> >                                              as_uint64_equal, diff

> > --git a/include/hw/virtio/virtio-iommu.h

> > b/include/hw/virtio/virtio-iommu.h

> > index 2259413..76c758d 100644

> > --- a/include/hw/virtio/virtio-iommu.h

> > +++ b/include/hw/virtio/virtio-iommu.h

> > @@ -44,6 +44,11 @@ typedef struct IOMMUPciBus {

> >      IOMMUDevice  *pbdev[0]; /* Parent array is sparse, so dynamically

> > alloc */  } IOMMUPciBus;

> >

> > +typedef struct VirtioIOMMUNotifierNode {

> > +    IOMMUDevice *iommu_dev;

> > +    QLIST_ENTRY(VirtioIOMMUNotifierNode) next; }

> > +VirtioIOMMUNotifierNode;

> > +

> >  typedef struct VirtIOIOMMU {

> >      VirtIODevice parent_obj;

> >      VirtQueue *vq;

> > @@ -55,6 +60,7 @@ typedef struct VirtIOIOMMU {

> >      GTree *address_spaces;

> >      QemuMutex mutex;

> >      GTree *devices;

> > +    QLIST_HEAD(, VirtioIOMMUNotifierNode) notifiers_list;

> >  } VirtIOIOMMU;

> >

> >  #endif

> >
Bharat Bhushan Aug. 21, 2017, 10:57 a.m. UTC | #3
Hi Eric,

> -----Original Message-----
> From: Auger Eric [mailto:eric.auger@redhat.com]
> Sent: Thursday, August 17, 2017 9:03 PM
> To: Bharat Bhushan <bharat.bhushan@nxp.com>;
> eric.auger.pro@gmail.com; peter.maydell@linaro.org;
> alex.williamson@redhat.com; mst@redhat.com; qemu-arm@nongnu.org;
> qemu-devel@nongnu.org
> Cc: wei@redhat.com; kevin.tian@intel.com; marc.zyngier@arm.com;
> tn@semihalf.com; will.deacon@arm.com; drjones@redhat.com;
> robin.murphy@arm.com; christoffer.dall@linaro.org
> Subject: Re: [Qemu-devel] [RFC v2 PATCH 2/2] virtio-iommu: vfio integration
> with virtio-iommu
> 
> Hi Bharat,
> 
> On 14/07/2017 09:25, Bharat Bhushan wrote:
> > This patch allows virtio-iommu protection for PCI device-passthrough.
> >
> > MSI region is mapped by current version of virtio-iommu driver.
> > This MSI region mapping in not getting pushed on hw iommu
> > vfio_get_vaddr() allows only ram-region.
> Why is it an issue. As far as I understand this is not needed actually as the
> guest MSI doorbell is not used by the host.
>  This RFC patch needed
> > to be improved.
> >
> > Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
> > ---
> > v1-v2:
> >   - Added trace events
> >
> >  hw/virtio/trace-events           |   5 ++
> >  hw/virtio/virtio-iommu.c         | 133
> +++++++++++++++++++++++++++++++++++++++
> >  include/hw/virtio/virtio-iommu.h |   6 ++
> >  3 files changed, 144 insertions(+)
> >
> > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index
> > 9196b63..3a3968b 100644
> > --- a/hw/virtio/trace-events
> > +++ b/hw/virtio/trace-events
> > @@ -39,3 +39,8 @@ virtio_iommu_unmap_left_interval(uint64_t low,
> > uint64_t high, uint64_t next_low,
> virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t
> next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"],
> new interval=[0x%"PRIx64",0x%"PRIx64"]"
> >  virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap
> inc [0x%"PRIx64",0x%"PRIx64"]"
> >  virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr,
> uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> > +virtio_iommu_notify_flag_add(const char *iommu) "Add virtio-iommu
> notifier node for memory region %s"
> > +virtio_iommu_notify_flag_del(const char *iommu) "Del virtio-iommu
> notifier node for memory region %s"
> > +virtio_iommu_remap(hwaddr iova, hwaddr pa, hwaddr size)
> "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> > +virtio_iommu_map_region(hwaddr iova, hwaddr paddr, hwaddr
> map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> > +virtio_iommu_unmap_region(hwaddr iova, hwaddr paddr, hwaddr
> map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index
> > cd188fc..61f33cb 100644
> > --- a/hw/virtio/virtio-iommu.c
> > +++ b/hw/virtio/virtio-iommu.c
> > @@ -129,6 +129,48 @@ static gint interval_cmp(gconstpointer a,
> gconstpointer b, gpointer user_data)
> >      }
> >  }
> >
> > +static void virtio_iommu_map_region(VirtIOIOMMU *s, hwaddr iova,
> hwaddr paddr,
> > +                                    hwaddr size, int map)
> bool map?
> 
> the function name is a bit misleading to me and does not really explain what
> the function does. It "notifies" so why not using something like
> virtio_iommu_map_notify and virtio_iommu_unmap_notify. I tend to think
> having separate proto is cleaner and more standard.
> 
> Binding should happen on a specific IOMMUmemoryRegion (see next
> comment).
> 
> > +{
> > +    VirtioIOMMUNotifierNode *node;
> > +    IOMMUTLBEntry entry;
> > +    uint64_t map_size = (1 << 12);
> TODO: handle something else than 4K page.
> > +    int npages;
> > +    int i;
> > +
> > +    npages = size / map_size;
> > +    entry.target_as = &address_space_memory;
> > +    entry.addr_mask = map_size - 1;
> > +
> > +    for (i = 0; i < npages; i++) {
> Although I understand we currently fail checking the consistency between
> pIOMMU and vIOMMU page sizes, this will be very slow for guest DPDK use
> case where hugepages are used.
> 
> Why not directly using the full size?  vfio_iommu_map_notify will report
> errors if vfio_dma_map/unmap() fail.
> > +        entry.iova = iova + (i * map_size);
> > +        if (map) {
> > +                trace_virtio_iommu_map_region(iova, paddr, map_size);
> > +                entry.perm = IOMMU_RW;
> > +                entry.translated_addr = paddr + (i * map_size);
> > +        } else {
> > +                trace_virtio_iommu_unmap_region(iova, paddr, map_size);
> > +                entry.perm = IOMMU_NONE;
> > +                entry.translated_addr = 0;
> > +        }
> > +
> > +        QLIST_FOREACH(node, &s->notifiers_list, next) {
> > +            memory_region_notify_iommu(&node->iommu_dev->iommu_mr,
> > + entry);
> So as discussed this will notify *all* IOMMU memory regions and all their
> notifiers which is not what we want. You may have a look at
> vsmmuv3 v6 (or intel_iommu) where smmuv3_context_device_invalidate
> retrieves the mr from the sid.

I sent out next version of the patch and I took different approach, I assumed that device-id in
virtio_iommu_attach is stream-id, and same as requested-id. This assumption is because the
"iommu-map" property maps 1:1. Looking forward your view about this.

Hopefully I addressed other comments and fixed/code-rework planned.

Thanks
-Bharat

> 
> > +        }
> > +    }
> > +}
> > +
> > +static gboolean virtio_iommu_unmap_single(gpointer key, gpointer
> value,
> > +                                          gpointer data) {
> > +    viommu_mapping *mapping = (viommu_mapping *) value;
> > +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
> > +
> > +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size,
> > + 0);
> paddr=0? mapping->phys_addr as the trace() will be misleading. But as
> mentioned earlier better use unmap() separate function.
> > +
> > +    return true;
> > +}
> > +
> >  static int virtio_iommu_attach(VirtIOIOMMU *s,
> >                                 struct virtio_iommu_req_attach *req)
> > { @@ -170,10 +212,26 @@ static int virtio_iommu_detach(VirtIOIOMMU
> *s,
> > {
> >      uint32_t devid = le32_to_cpu(req->device);
> >      uint32_t reserved = le32_to_cpu(req->reserved);
> > +    viommu_dev *dev;
> >      int ret;
> >
> >      trace_virtio_iommu_detach(devid, reserved);
> >
> > +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> > +    if (!dev || !dev->as) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    dev->as->nr_devices--;
> > +
> > +    /* Unmap all if this is last device detached */
> > +    if (dev->as->nr_devices == 0) {
> > +        g_tree_foreach(dev->as->mappings, virtio_iommu_unmap_single,
> > + s);
> > +
> > +        g_tree_remove(s->address_spaces, GUINT_TO_POINTER(dev->as-
> >id));
> > +        g_tree_destroy(dev->as->mappings);
> > +    }
> so this should be rebased on new ref count code.
> > +
> >      ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
> >
> >      return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL; @@ -
> 217,6
> > +275,7 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
> >
> >      g_tree_insert(as->mappings, interval, mapping);
> >
> > +    virtio_iommu_map_region(s, virt_addr, phys_addr, size, 1);
> >      return VIRTIO_IOMMU_S_OK;
> >  }
> >
> > @@ -267,7 +326,9 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
> >          } else {
> >              break;
> >          }
> > +
> >          if (interval.low >= interval.high) {
> > +            virtio_iommu_map_region(s, virt_addr, 0, size, 0);
> >              return VIRTIO_IOMMU_S_OK;
> >          } else {
> >              mapping = g_tree_lookup(as->mappings,
> > (gpointer)&interval); @@ -410,6 +471,37 @@ static void
> virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
> >      }
> >  }
> >
> > +static void virtio_iommu_notify_flag_changed(MemoryRegion *iommu,
> > +                                          IOMMUNotifierFlag old,
> > +                                          IOMMUNotifierFlag new) {
> > +    IOMMUDevice *sdev = container_of(iommu, IOMMUDevice,
> iommu_mr);
> > +    VirtIOIOMMU *s = sdev->viommu;
> > +    VirtioIOMMUNotifierNode *node = NULL;
> > +    VirtioIOMMUNotifierNode *next_node = NULL;
> > +
> > +    if (old == IOMMU_NOTIFIER_NONE) {
> > +        trace_virtio_iommu_notify_flag_add(iommu->name);
> > +        node = g_malloc0(sizeof(*node));
> > +        node->iommu_dev = sdev;
> > +        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
> > +        return;
> > +    }
> > +
> > +    /* update notifier node with new flags */
> > +    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
> > +        if (node->iommu_dev == sdev) {
> > +            if (new == IOMMU_NOTIFIER_NONE) {
> > +                trace_virtio_iommu_notify_flag_del(iommu->name);
> > +                QLIST_REMOVE(node, next);
> > +                g_free(node);
> > +            }
> > +            return;
> > +        }
> > +    }
> > +}
> I think all that mechanics should be factorized somewhere else as all
> vIOMMUs use that but this goes beyond the scope of this series.
> > +
> > +
> >  static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr,
> hwaddr addr,
> >                                              IOMMUAccessFlags flag)  {
> > @@ -523,11 +615,50 @@ static gint int_cmp(gconstpointer a, gconstpointer
> b, gpointer user_data)
> >      return (ua > ub) - (ua < ub);
> >  }
> >
> > +static gboolean virtio_iommu_remap(gpointer key, gpointer value,
> > +gpointer data) {
> > +    viommu_mapping *mapping = (viommu_mapping *) value;
> > +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
> > +
> > +    trace_virtio_iommu_remap(mapping->virt_addr, mapping->phys_addr,
> > +                             mapping->size);
> > +    /* unmap previous entry and map again */
> > +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size,
> > + 0);
> > +
> > +    virtio_iommu_map_region(s, mapping->virt_addr, mapping-
> >phys_addr,
> > +                            mapping->size, 1);
> > +    return true;
> > +}
> > +
> > +static void virtio_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
> {
> > +    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> > +    VirtIOIOMMU *s = sdev->viommu;
> > +    uint32_t sid;
> > +    viommu_dev *dev;
> > +
> > +    sid = smmu_get_sid(sdev);
> > +
> > +    qemu_mutex_lock(&s->mutex);
> > +
> > +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
> > +    if (!dev) {
> > +        goto unlock;
> > +    }
> > +
> > +    g_tree_foreach(dev->as->mappings, virtio_iommu_remap, s);
> > +
> > +unlock:
> > +    qemu_mutex_unlock(&s->mutex);
> > +    return;
> not needed
> 
> Thanks
> 
> Eric
> > +}
> > +
> >  static void virtio_iommu_device_realize(DeviceState *dev, Error
> > **errp)  {
> >      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >      VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
> >
> > +    QLIST_INIT(&s->notifiers_list);
> >      virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
> >                  sizeof(struct virtio_iommu_config));
> >
> > @@ -538,6 +669,8 @@ static void
> virtio_iommu_device_realize(DeviceState *dev, Error **errp)
> >      s->config.input_range.end = -1UL;
> >
> >      s->iommu_ops.translate = virtio_iommu_translate;
> > +    s->iommu_ops.notify_flag_changed =
> virtio_iommu_notify_flag_changed;
> > +    s->iommu_ops.replay = virtio_iommu_replay;
> >      memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num));
> >      s->as_by_busptr = g_hash_table_new_full(as_uint64_hash,
> >                                              as_uint64_equal, diff
> > --git a/include/hw/virtio/virtio-iommu.h
> > b/include/hw/virtio/virtio-iommu.h
> > index 2259413..76c758d 100644
> > --- a/include/hw/virtio/virtio-iommu.h
> > +++ b/include/hw/virtio/virtio-iommu.h
> > @@ -44,6 +44,11 @@ typedef struct IOMMUPciBus {
> >      IOMMUDevice  *pbdev[0]; /* Parent array is sparse, so dynamically
> > alloc */  } IOMMUPciBus;
> >
> > +typedef struct VirtioIOMMUNotifierNode {
> > +    IOMMUDevice *iommu_dev;
> > +    QLIST_ENTRY(VirtioIOMMUNotifierNode) next; }
> > +VirtioIOMMUNotifierNode;
> > +
> >  typedef struct VirtIOIOMMU {
> >      VirtIODevice parent_obj;
> >      VirtQueue *vq;
> > @@ -55,6 +60,7 @@ typedef struct VirtIOIOMMU {
> >      GTree *address_spaces;
> >      QemuMutex mutex;
> >      GTree *devices;
> > +    QLIST_HEAD(, VirtioIOMMUNotifierNode) notifiers_list;
> >  } VirtIOIOMMU;
> >
> >  #endif
> >
diff mbox

Patch

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 9196b63..3a3968b 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -39,3 +39,8 @@  virtio_iommu_unmap_left_interval(uint64_t low, uint64_t high, uint64_t next_low,
 virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"], new interval=[0x%"PRIx64",0x%"PRIx64"]"
 virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc [0x%"PRIx64",0x%"PRIx64"]"
 virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
+virtio_iommu_notify_flag_add(const char *iommu) "Add virtio-iommu notifier node for memory region %s"
+virtio_iommu_notify_flag_del(const char *iommu) "Del virtio-iommu notifier node for memory region %s"
+virtio_iommu_remap(hwaddr iova, hwaddr pa, hwaddr size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
+virtio_iommu_map_region(hwaddr iova, hwaddr paddr, hwaddr map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
+virtio_iommu_unmap_region(hwaddr iova, hwaddr paddr, hwaddr map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index cd188fc..61f33cb 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -129,6 +129,48 @@  static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
     }
 }
 
+static void virtio_iommu_map_region(VirtIOIOMMU *s, hwaddr iova, hwaddr paddr,
+                                    hwaddr size, int map)
+{
+    VirtioIOMMUNotifierNode *node;
+    IOMMUTLBEntry entry;
+    uint64_t map_size = (1 << 12);
+    int npages;
+    int i;
+
+    npages = size / map_size;
+    entry.target_as = &address_space_memory;
+    entry.addr_mask = map_size - 1;
+
+    for (i = 0; i < npages; i++) {
+        entry.iova = iova + (i * map_size);
+        if (map) {
+                trace_virtio_iommu_map_region(iova, paddr, map_size);
+                entry.perm = IOMMU_RW;
+                entry.translated_addr = paddr + (i * map_size);
+        } else {
+                trace_virtio_iommu_unmap_region(iova, paddr, map_size);
+                entry.perm = IOMMU_NONE;
+                entry.translated_addr = 0;
+        }
+
+        QLIST_FOREACH(node, &s->notifiers_list, next) {
+            memory_region_notify_iommu(&node->iommu_dev->iommu_mr, entry);
+        }
+    }
+}
+
+static gboolean virtio_iommu_unmap_single(gpointer key, gpointer value,
+                                          gpointer data)
+{
+    viommu_mapping *mapping = (viommu_mapping *) value;
+    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
+
+    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, 0);
+
+    return true;
+}
+
 static int virtio_iommu_attach(VirtIOIOMMU *s,
                                struct virtio_iommu_req_attach *req)
 {
@@ -170,10 +212,26 @@  static int virtio_iommu_detach(VirtIOIOMMU *s,
 {
     uint32_t devid = le32_to_cpu(req->device);
     uint32_t reserved = le32_to_cpu(req->reserved);
+    viommu_dev *dev;
     int ret;
 
     trace_virtio_iommu_detach(devid, reserved);
 
+    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
+    if (!dev || !dev->as) {
+        return -EINVAL;
+    }
+
+    dev->as->nr_devices--;
+
+    /* Unmap all if this is last device detached */
+    if (dev->as->nr_devices == 0) {
+        g_tree_foreach(dev->as->mappings, virtio_iommu_unmap_single, s);
+
+        g_tree_remove(s->address_spaces, GUINT_TO_POINTER(dev->as->id));
+        g_tree_destroy(dev->as->mappings);
+    }
+
     ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
 
     return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
@@ -217,6 +275,7 @@  static int virtio_iommu_map(VirtIOIOMMU *s,
 
     g_tree_insert(as->mappings, interval, mapping);
 
+    virtio_iommu_map_region(s, virt_addr, phys_addr, size, 1);
     return VIRTIO_IOMMU_S_OK;
 }
 
@@ -267,7 +326,9 @@  static int virtio_iommu_unmap(VirtIOIOMMU *s,
         } else {
             break;
         }
+
         if (interval.low >= interval.high) {
+            virtio_iommu_map_region(s, virt_addr, 0, size, 0);
             return VIRTIO_IOMMU_S_OK;
         } else {
             mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
@@ -410,6 +471,37 @@  static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq)
     }
 }
 
+static void virtio_iommu_notify_flag_changed(MemoryRegion *iommu,
+                                          IOMMUNotifierFlag old,
+                                          IOMMUNotifierFlag new)
+{
+    IOMMUDevice *sdev = container_of(iommu, IOMMUDevice, iommu_mr);
+    VirtIOIOMMU *s = sdev->viommu;
+    VirtioIOMMUNotifierNode *node = NULL;
+    VirtioIOMMUNotifierNode *next_node = NULL;
+
+    if (old == IOMMU_NOTIFIER_NONE) {
+        trace_virtio_iommu_notify_flag_add(iommu->name);
+        node = g_malloc0(sizeof(*node));
+        node->iommu_dev = sdev;
+        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
+        return;
+    }
+
+    /* update notifier node with new flags */
+    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
+        if (node->iommu_dev == sdev) {
+            if (new == IOMMU_NOTIFIER_NONE) {
+                trace_virtio_iommu_notify_flag_del(iommu->name);
+                QLIST_REMOVE(node, next);
+                g_free(node);
+            }
+            return;
+        }
+    }
+}
+
+
 static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr, hwaddr addr,
                                             IOMMUAccessFlags flag)
 {
@@ -523,11 +615,50 @@  static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
     return (ua > ub) - (ua < ub);
 }
 
+static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer data)
+{
+    viommu_mapping *mapping = (viommu_mapping *) value;
+    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
+
+    trace_virtio_iommu_remap(mapping->virt_addr, mapping->phys_addr,
+                             mapping->size);
+    /* unmap previous entry and map again */
+    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, 0);
+
+    virtio_iommu_map_region(s, mapping->virt_addr, mapping->phys_addr,
+                            mapping->size, 1);
+    return true;
+}
+
+static void virtio_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
+{
+    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    VirtIOIOMMU *s = sdev->viommu;
+    uint32_t sid;
+    viommu_dev *dev;
+
+    sid = smmu_get_sid(sdev);
+
+    qemu_mutex_lock(&s->mutex);
+
+    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
+    if (!dev) {
+        goto unlock;
+    }
+
+    g_tree_foreach(dev->as->mappings, virtio_iommu_remap, s);
+
+unlock:
+    qemu_mutex_unlock(&s->mutex);
+    return;
+}
+
 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
 
+    QLIST_INIT(&s->notifiers_list);
     virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
                 sizeof(struct virtio_iommu_config));
 
@@ -538,6 +669,8 @@  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
     s->config.input_range.end = -1UL;
 
     s->iommu_ops.translate = virtio_iommu_translate;
+    s->iommu_ops.notify_flag_changed = virtio_iommu_notify_flag_changed;
+    s->iommu_ops.replay = virtio_iommu_replay;
     memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num));
     s->as_by_busptr = g_hash_table_new_full(as_uint64_hash,
                                             as_uint64_equal,
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 2259413..76c758d 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -44,6 +44,11 @@  typedef struct IOMMUPciBus {
     IOMMUDevice  *pbdev[0]; /* Parent array is sparse, so dynamically alloc */
 } IOMMUPciBus;
 
+typedef struct VirtioIOMMUNotifierNode {
+    IOMMUDevice *iommu_dev;
+    QLIST_ENTRY(VirtioIOMMUNotifierNode) next;
+} VirtioIOMMUNotifierNode;
+
 typedef struct VirtIOIOMMU {
     VirtIODevice parent_obj;
     VirtQueue *vq;
@@ -55,6 +60,7 @@  typedef struct VirtIOIOMMU {
     GTree *address_spaces;
     QemuMutex mutex;
     GTree *devices;
+    QLIST_HEAD(, VirtioIOMMUNotifierNode) notifiers_list;
 } VirtIOIOMMU;
 
 #endif