[RFC,v2,6/8] virtio-iommu: Implement the translation and commands
diff mbox

Message ID 1496851287-9428-7-git-send-email-eric.auger@redhat.com
State New
Headers show

Commit Message

Auger Eric June 7, 2017, 4:01 p.m. UTC
This patch adds the actual implementation for the translation routine
and the virtio-iommu commands.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

v1 -> v2:
- fix compilation issue reported by autobuild system
---
 hw/virtio/trace-events   |   6 ++
 hw/virtio/virtio-iommu.c | 202 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 202 insertions(+), 6 deletions(-)

Comments

Jean-Philippe Brucker June 23, 2017, 4:09 p.m. UTC | #1
On 07/06/17 17:01, Eric Auger wrote:
> This patch adds the actual implementation for the translation routine
> and the virtio-iommu commands.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---[...]
>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>                                 struct virtio_iommu_req_attach *req)
> @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>      uint32_t asid = le32_to_cpu(req->address_space);
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    viommu_as *as;
> +    viommu_dev *dev;
>  
>      trace_virtio_iommu_attach(asid, devid, reserved);
>  
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> +    if (dev) {
> +        return -1;

I guess you could return S_INVAL here. However, if the device is already
attached to AS0, it should be detached and attached to AS1. The Linux
driver relies on this behavior when moving a device from kernel to user
address space and back.

Thanks,
Jean
Bharat Bhushan July 4, 2017, 9:13 a.m. UTC | #2
Hi Eric,

> -----Original Message-----
> From: Eric Auger [mailto:eric.auger@redhat.com]
> Sent: Wednesday, June 07, 2017 9:31 PM
> To: eric.auger.pro@gmail.com; eric.auger@redhat.com;
> peter.maydell@linaro.org; alex.williamson@redhat.com; mst@redhat.com;
> qemu-arm@nongnu.org; qemu-devel@nongnu.org; jean-
> philippe.brucker@arm.com
> Cc: will.deacon@arm.com; robin.murphy@arm.com; kevin.tian@intel.com;
> marc.zyngier@arm.com; christoffer.dall@linaro.org; drjones@redhat.com;
> wei@redhat.com; tn@semihalf.com; Bharat Bhushan
> <bharat.bhushan@nxp.com>
> Subject: [RFC v2 6/8] virtio-iommu: Implement the translation and
> commands
> 
> This patch adds the actual implementation for the translation routine
> and the virtio-iommu commands.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> v1 -> v2:
> - fix compilation issue reported by autobuild system
> ---
>  hw/virtio/trace-events   |   6 ++
>  hw/virtio/virtio-iommu.c | 202
> +++++++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 202 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 341dbdf..9196b63 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -33,3 +33,9 @@ virtio_iommu_detach(uint32_t dev, uint32_t flags)
> "dev=%d flags=%d"
>  virtio_iommu_map(uint32_t as, uint64_t phys_addr, uint64_t virt_addr,
> uint64_t size, uint32_t flags) "as= %d phys_addr=0x%"PRIx64"
> virt_addr=0x%"PRIx64" size=0x%"PRIx64" flags=%d"
>  virtio_iommu_unmap(uint32_t as, uint64_t virt_addr, uint64_t size, uint32_t
> reserved) "as= %d virt_addr=0x%"PRIx64" size=0x%"PRIx64" reserved=%d"
>  virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int
> flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
> +virtio_iommu_new_asid(uint32_t asid) "Allocate a new asid=%d"
> +virtio_iommu_new_devid(uint32_t devid) "Allocate a new devid=%d"
> +virtio_iommu_unmap_left_interval(uint64_t low, uint64_t high, uint64_t
> next_low, uint64_t next_high) "Unmap left [0x%"PRIx64",0x%"PRIx64"],
> new interval=[0x%"PRIx64",0x%"PRIx64"]"
> +virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t
> next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"],
> new interval=[0x%"PRIx64",0x%"PRIx64"]"
> +virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc
> [0x%"PRIx64",0x%"PRIx64"]"
> +virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr,
> uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index 902c779..cd188fc 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -32,10 +32,37 @@
>  #include "hw/virtio/virtio-bus.h"
>  #include "hw/virtio/virtio-access.h"
>  #include "hw/virtio/virtio-iommu.h"
> +#include "hw/pci/pci_bus.h"
> +#include "hw/pci/pci.h"
> 
>  /* Max size */
>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
> 
> +typedef struct viommu_as viommu_as;
> +
> +typedef struct viommu_mapping {
> +    uint64_t virt_addr;
> +    uint64_t phys_addr;
> +    uint64_t size;
> +    uint32_t flags;
> +} viommu_mapping;
> +
> +typedef struct viommu_interval {
> +    uint64_t low;
> +    uint64_t high;
> +} viommu_interval;
> +
> +typedef struct viommu_dev {
> +    uint32_t id;
> +    viommu_as *as;
> +} viommu_dev;
> +
> +struct viommu_as {
> +    uint32_t id;
> +    uint32_t nr_devices;
> +    GTree *mappings;
> +};
> +
>  static inline uint16_t smmu_get_sid(IOMMUDevice *dev)
>  {
>      return  ((pci_bus_num(dev->bus) & 0xff) << 8) | dev->devfn;
> @@ -88,6 +115,19 @@ static void virtio_iommu_init_as(VirtIOIOMMU *s)
>      }
>  }
> 
> +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer
> user_data)
> +{
> +    viommu_interval *inta = (viommu_interval *)a;
> +    viommu_interval *intb = (viommu_interval *)b;
> +
> +    if (inta->high <= intb->low) {
> +        return -1;
> +    } else if (intb->high <= inta->low) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> 
>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>                                 struct virtio_iommu_req_attach *req)
> @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>      uint32_t asid = le32_to_cpu(req->address_space);
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    viommu_as *as;
> +    viommu_dev *dev;
> 
>      trace_virtio_iommu_attach(asid, devid, reserved);
> 
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> +    if (dev) {
> +        return -1;
> +    }
> +
> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> +    if (!as) {
> +        as = g_malloc0(sizeof(*as));
> +        as->id = asid;
> +        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
> +                                         NULL, NULL, (GDestroyNotify)g_free);

Created the tree here but seems like missed destroy tree on detach.

Thanks
-Bharat

> +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);
> +        trace_virtio_iommu_new_asid(asid);
> +    }
> +
> +    dev = g_malloc0(sizeof(*dev));
> +    dev->as = as;
> +    dev->id = devid;
> +    as->nr_devices++;
> +    trace_virtio_iommu_new_devid(devid);
> +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);
> +
> +    return VIRTIO_IOMMU_S_OK;
>  }
> 
>  static int virtio_iommu_detach(VirtIOIOMMU *s,
> @@ -106,10 +170,13 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>  {
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    int ret;
> 
>      trace_virtio_iommu_detach(devid, reserved);
> 
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
> +
> +    return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
>  }
> 
>  static int virtio_iommu_map(VirtIOIOMMU *s,
> @@ -120,10 +187,37 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
>      uint64_t size = le64_to_cpu(req->size);
>      uint32_t flags = le32_to_cpu(req->flags);
> +    viommu_as *as;
> +    viommu_interval *interval;
> +    viommu_mapping *mapping;
> +
> +    interval = g_malloc0(sizeof(*interval));
> +
> +    interval->low = virt_addr;
> +    interval->high = virt_addr + size - 1;
> +
> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> +    if (!as) {
> +        return VIRTIO_IOMMU_S_INVAL;
> +    }
> +
> +    mapping = g_tree_lookup(as->mappings, (gpointer)interval);
> +    if (mapping) {
> +        g_free(interval);
> +        return VIRTIO_IOMMU_S_INVAL;
> +    }
> 
>      trace_virtio_iommu_map(asid, phys_addr, virt_addr, size, flags);
> 
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    mapping = g_malloc0(sizeof(*mapping));
> +    mapping->virt_addr = virt_addr;
> +    mapping->phys_addr = phys_addr;
> +    mapping->size = size;
> +    mapping->flags = flags;
> +
> +    g_tree_insert(as->mappings, interval, mapping);
> +
> +    return VIRTIO_IOMMU_S_OK;
>  }
> 
>  static int virtio_iommu_unmap(VirtIOIOMMU *s,
> @@ -133,10 +227,64 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
>      uint64_t size = le64_to_cpu(req->size);
>      uint32_t flags = le32_to_cpu(req->flags);
> +    viommu_mapping *mapping;
> +    viommu_interval interval;
> +    viommu_as *as;
> 
>      trace_virtio_iommu_unmap(asid, virt_addr, size, flags);
> 
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> +    if (!as) {
> +        error_report("%s: no as", __func__);
> +        return VIRTIO_IOMMU_S_INVAL;
> +    }
> +    interval.low = virt_addr;
> +    interval.high = virt_addr + size - 1;
> +
> +    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> +
> +    while (mapping) {
> +        viommu_interval current;
> +        uint64_t low  = mapping->virt_addr;
> +        uint64_t high = mapping->virt_addr + mapping->size - 1;
> +
> +        current.low = low;
> +        current.high = high;
> +
> +        if (low == interval.low && size >= mapping->size) {
> +            g_tree_remove(as->mappings, (gpointer)&current);
> +            interval.low = high + 1;
> +            trace_virtio_iommu_unmap_left_interval(current.low, current.high,
> +                interval.low, interval.high);
> +        } else if (high == interval.high && size >= mapping->size) {
> +            trace_virtio_iommu_unmap_right_interval(current.low, current.high,
> +                interval.low, interval.high);
> +            g_tree_remove(as->mappings, (gpointer)&current);
> +            interval.high = low - 1;
> +        } else if (low > interval.low && high < interval.high) {
> +            trace_virtio_iommu_unmap_inc_interval(current.low, current.high);
> +            g_tree_remove(as->mappings, (gpointer)&current);
> +        } else {
> +            break;
> +        }
> +        if (interval.low >= interval.high) {
> +            return VIRTIO_IOMMU_S_OK;
> +        } else {
> +            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> +        }
> +    }
> +
> +    if (mapping) {
> +        error_report("****** %s: Unmap 0x%"PRIx64" size=0x%"PRIx64
> +                     " from 0x%"PRIx64" size=0x%"PRIx64" is not supported",
> +                     __func__, interval.low, size,
> +                     mapping->virt_addr, mapping->size);
> +    } else {
> +        error_report("****** %s: no mapping for
> [0x%"PRIx64",0x%"PRIx64"]",
> +                     __func__, interval.low, interval.high);
> +    }
> +
> +    return VIRTIO_IOMMU_S_INVAL;
>  }
> 
>  #define get_payload_size(req) (\
> @@ -266,19 +414,46 @@ static IOMMUTLBEntry
> virtio_iommu_translate(MemoryRegion *mr, hwaddr addr,
>                                              IOMMUAccessFlags flag)
>  {
>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> +    VirtIOIOMMU *s = sdev->viommu;
>      uint32_t sid;
> +    viommu_dev *dev;
> +    viommu_mapping *mapping;
> +    viommu_interval interval;
> +
> +    interval.low = addr;
> +    interval.high = addr + 1;
> 
>      IOMMUTLBEntry entry = {
>          .target_as = &address_space_memory,
>          .iova = addr,
>          .translated_addr = addr,
> -        .addr_mask = ~(hwaddr)0,
> -        .perm = IOMMU_NONE,
> +        .addr_mask = (1 << 12) - 1, /* TODO */
> +        .perm = 3,
>      };
> 
>      sid = smmu_get_sid(sdev);
> 
>      trace_virtio_iommu_translate(mr->name, sid, addr, flag);
> +    qemu_mutex_lock(&s->mutex);
> +
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
> +    if (!dev) {
> +        /* device cannot be attached to another as */
> +        printf("%s sid=%d is not known!!\n", __func__, sid);
> +        goto unlock;
> +    }
> +
> +    mapping = g_tree_lookup(dev->as->mappings, (gpointer)&interval);
> +    if (!mapping) {
> +        printf("%s no mapping for 0x%"PRIx64" for sid=%d\n", __func__,
> +               addr, sid);
> +        goto unlock;
> +    }
> +    entry.translated_addr = addr - mapping->virt_addr + mapping-
> >phys_addr,
> +    trace_virtio_iommu_translate_result(addr, entry.translated_addr, sid);
> +
> +unlock:
> +    qemu_mutex_unlock(&s->mutex);
>      return entry;
>  }
> 
> @@ -341,6 +516,12 @@ static inline guint as_uint64_hash(gconstpointer v)
>      return (guint)*(const uint64_t *)v;
>  }
> 
> +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
> +{
> +    uint ua = GPOINTER_TO_UINT(a);
> +    uint ub = GPOINTER_TO_UINT(b);
> +    return (ua > ub) - (ua < ub);
> +}
> 
>  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>  {
> @@ -362,12 +543,21 @@ static void
> virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>                                              as_uint64_equal,
>                                              g_free, g_free);
> 
> +    s->address_spaces = g_tree_new_full((GCompareDataFunc)int_cmp,
> +                                         NULL, NULL, (GDestroyNotify)g_free);
> +    s->devices = g_tree_new_full((GCompareDataFunc)int_cmp,
> +                                         NULL, NULL, (GDestroyNotify)g_free);
> +
>      virtio_iommu_init_as(s);
>  }
> 
>  static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
> +
> +    g_tree_destroy(s->address_spaces);
> +    g_tree_destroy(s->devices);
> 
>      virtio_cleanup(vdev);
>  }
> --
> 2.5.5
Auger Eric July 5, 2017, 6:40 a.m. UTC | #3
Hi Bharat,

On 04/07/2017 11:13, Bharat Bhushan wrote:
> Hi Eric,
> 
>> -----Original Message-----
>> From: Eric Auger [mailto:eric.auger@redhat.com]
>> Sent: Wednesday, June 07, 2017 9:31 PM
>> To: eric.auger.pro@gmail.com; eric.auger@redhat.com;
>> peter.maydell@linaro.org; alex.williamson@redhat.com; mst@redhat.com;
>> qemu-arm@nongnu.org; qemu-devel@nongnu.org; jean-
>> philippe.brucker@arm.com
>> Cc: will.deacon@arm.com; robin.murphy@arm.com; kevin.tian@intel.com;
>> marc.zyngier@arm.com; christoffer.dall@linaro.org; drjones@redhat.com;
>> wei@redhat.com; tn@semihalf.com; Bharat Bhushan
>> <bharat.bhushan@nxp.com>
>> Subject: [RFC v2 6/8] virtio-iommu: Implement the translation and
>> commands
>>
>> This patch adds the actual implementation for the translation routine
>> and the virtio-iommu commands.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> v1 -> v2:
>> - fix compilation issue reported by autobuild system
>> ---
>>  hw/virtio/trace-events   |   6 ++
>>  hw/virtio/virtio-iommu.c | 202
>> +++++++++++++++++++++++++++++++++++++++++++++--
>>  2 files changed, 202 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
>> index 341dbdf..9196b63 100644
>> --- a/hw/virtio/trace-events
>> +++ b/hw/virtio/trace-events
>> @@ -33,3 +33,9 @@ virtio_iommu_detach(uint32_t dev, uint32_t flags)
>> "dev=%d flags=%d"
>>  virtio_iommu_map(uint32_t as, uint64_t phys_addr, uint64_t virt_addr,
>> uint64_t size, uint32_t flags) "as= %d phys_addr=0x%"PRIx64"
>> virt_addr=0x%"PRIx64" size=0x%"PRIx64" flags=%d"
>>  virtio_iommu_unmap(uint32_t as, uint64_t virt_addr, uint64_t size, uint32_t
>> reserved) "as= %d virt_addr=0x%"PRIx64" size=0x%"PRIx64" reserved=%d"
>>  virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int
>> flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
>> +virtio_iommu_new_asid(uint32_t asid) "Allocate a new asid=%d"
>> +virtio_iommu_new_devid(uint32_t devid) "Allocate a new devid=%d"
>> +virtio_iommu_unmap_left_interval(uint64_t low, uint64_t high, uint64_t
>> next_low, uint64_t next_high) "Unmap left [0x%"PRIx64",0x%"PRIx64"],
>> new interval=[0x%"PRIx64",0x%"PRIx64"]"
>> +virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t
>> next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"],
>> new interval=[0x%"PRIx64",0x%"PRIx64"]"
>> +virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc
>> [0x%"PRIx64",0x%"PRIx64"]"
>> +virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr,
>> uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>> index 902c779..cd188fc 100644
>> --- a/hw/virtio/virtio-iommu.c
>> +++ b/hw/virtio/virtio-iommu.c
>> @@ -32,10 +32,37 @@
>>  #include "hw/virtio/virtio-bus.h"
>>  #include "hw/virtio/virtio-access.h"
>>  #include "hw/virtio/virtio-iommu.h"
>> +#include "hw/pci/pci_bus.h"
>> +#include "hw/pci/pci.h"
>>
>>  /* Max size */
>>  #define VIOMMU_DEFAULT_QUEUE_SIZE 256
>>
>> +typedef struct viommu_as viommu_as;
>> +
>> +typedef struct viommu_mapping {
>> +    uint64_t virt_addr;
>> +    uint64_t phys_addr;
>> +    uint64_t size;
>> +    uint32_t flags;
>> +} viommu_mapping;
>> +
>> +typedef struct viommu_interval {
>> +    uint64_t low;
>> +    uint64_t high;
>> +} viommu_interval;
>> +
>> +typedef struct viommu_dev {
>> +    uint32_t id;
>> +    viommu_as *as;
>> +} viommu_dev;
>> +
>> +struct viommu_as {
>> +    uint32_t id;
>> +    uint32_t nr_devices;
>> +    GTree *mappings;
>> +};
>> +
>>  static inline uint16_t smmu_get_sid(IOMMUDevice *dev)
>>  {
>>      return  ((pci_bus_num(dev->bus) & 0xff) << 8) | dev->devfn;
>> @@ -88,6 +115,19 @@ static void virtio_iommu_init_as(VirtIOIOMMU *s)
>>      }
>>  }
>>
>> +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer
>> user_data)
>> +{
>> +    viommu_interval *inta = (viommu_interval *)a;
>> +    viommu_interval *intb = (viommu_interval *)b;
>> +
>> +    if (inta->high <= intb->low) {
>> +        return -1;
>> +    } else if (intb->high <= inta->low) {
>> +        return 1;
>> +    } else {
>> +        return 0;
>> +    }
>> +}
>>
>>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>>                                 struct virtio_iommu_req_attach *req)
>> @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>>      uint32_t asid = le32_to_cpu(req->address_space);
>>      uint32_t devid = le32_to_cpu(req->device);
>>      uint32_t reserved = le32_to_cpu(req->reserved);
>> +    viommu_as *as;
>> +    viommu_dev *dev;
>>
>>      trace_virtio_iommu_attach(asid, devid, reserved);
>>
>> -    return VIRTIO_IOMMU_S_UNSUPP;
>> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
>> +    if (dev) {
>> +        return -1;
>> +    }
>> +
>> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
>> +    if (!as) {
>> +        as = g_malloc0(sizeof(*as));
>> +        as->id = asid;
>> +        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
>> +                                         NULL, NULL, (GDestroyNotify)g_free);
> 
> Created the tree here but seems like missed destroy tree on detach.
Sure, I will fix that.

Thanks!

Eric
> 
> Thanks
> -Bharat
> 
>> +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);
>> +        trace_virtio_iommu_new_asid(asid);
>> +    }
>> +
>> +    dev = g_malloc0(sizeof(*dev));
>> +    dev->as = as;
>> +    dev->id = devid;
>> +    as->nr_devices++;
>> +    trace_virtio_iommu_new_devid(devid);
>> +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);
>> +
>> +    return VIRTIO_IOMMU_S_OK;
>>  }
>>
>>  static int virtio_iommu_detach(VirtIOIOMMU *s,
>> @@ -106,10 +170,13 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>>  {
>>      uint32_t devid = le32_to_cpu(req->device);
>>      uint32_t reserved = le32_to_cpu(req->reserved);
>> +    int ret;
>>
>>      trace_virtio_iommu_detach(devid, reserved);
>>
>> -    return VIRTIO_IOMMU_S_UNSUPP;
>> +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
>> +
>> +    return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
>>  }
>>
>>  static int virtio_iommu_map(VirtIOIOMMU *s,
>> @@ -120,10 +187,37 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
>>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
>>      uint64_t size = le64_to_cpu(req->size);
>>      uint32_t flags = le32_to_cpu(req->flags);
>> +    viommu_as *as;
>> +    viommu_interval *interval;
>> +    viommu_mapping *mapping;
>> +
>> +    interval = g_malloc0(sizeof(*interval));
>> +
>> +    interval->low = virt_addr;
>> +    interval->high = virt_addr + size - 1;
>> +
>> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
>> +    if (!as) {
>> +        return VIRTIO_IOMMU_S_INVAL;
>> +    }
>> +
>> +    mapping = g_tree_lookup(as->mappings, (gpointer)interval);
>> +    if (mapping) {
>> +        g_free(interval);
>> +        return VIRTIO_IOMMU_S_INVAL;
>> +    }
>>
>>      trace_virtio_iommu_map(asid, phys_addr, virt_addr, size, flags);
>>
>> -    return VIRTIO_IOMMU_S_UNSUPP;
>> +    mapping = g_malloc0(sizeof(*mapping));
>> +    mapping->virt_addr = virt_addr;
>> +    mapping->phys_addr = phys_addr;
>> +    mapping->size = size;
>> +    mapping->flags = flags;
>> +
>> +    g_tree_insert(as->mappings, interval, mapping);
>> +
>> +    return VIRTIO_IOMMU_S_OK;
>>  }
>>
>>  static int virtio_iommu_unmap(VirtIOIOMMU *s,
>> @@ -133,10 +227,64 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
>>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
>>      uint64_t size = le64_to_cpu(req->size);
>>      uint32_t flags = le32_to_cpu(req->flags);
>> +    viommu_mapping *mapping;
>> +    viommu_interval interval;
>> +    viommu_as *as;
>>
>>      trace_virtio_iommu_unmap(asid, virt_addr, size, flags);
>>
>> -    return VIRTIO_IOMMU_S_UNSUPP;
>> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
>> +    if (!as) {
>> +        error_report("%s: no as", __func__);
>> +        return VIRTIO_IOMMU_S_INVAL;
>> +    }
>> +    interval.low = virt_addr;
>> +    interval.high = virt_addr + size - 1;
>> +
>> +    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
>> +
>> +    while (mapping) {
>> +        viommu_interval current;
>> +        uint64_t low  = mapping->virt_addr;
>> +        uint64_t high = mapping->virt_addr + mapping->size - 1;
>> +
>> +        current.low = low;
>> +        current.high = high;
>> +
>> +        if (low == interval.low && size >= mapping->size) {
>> +            g_tree_remove(as->mappings, (gpointer)&current);
>> +            interval.low = high + 1;
>> +            trace_virtio_iommu_unmap_left_interval(current.low, current.high,
>> +                interval.low, interval.high);
>> +        } else if (high == interval.high && size >= mapping->size) {
>> +            trace_virtio_iommu_unmap_right_interval(current.low, current.high,
>> +                interval.low, interval.high);
>> +            g_tree_remove(as->mappings, (gpointer)&current);
>> +            interval.high = low - 1;
>> +        } else if (low > interval.low && high < interval.high) {
>> +            trace_virtio_iommu_unmap_inc_interval(current.low, current.high);
>> +            g_tree_remove(as->mappings, (gpointer)&current);
>> +        } else {
>> +            break;
>> +        }
>> +        if (interval.low >= interval.high) {
>> +            return VIRTIO_IOMMU_S_OK;
>> +        } else {
>> +            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
>> +        }
>> +    }
>> +
>> +    if (mapping) {
>> +        error_report("****** %s: Unmap 0x%"PRIx64" size=0x%"PRIx64
>> +                     " from 0x%"PRIx64" size=0x%"PRIx64" is not supported",
>> +                     __func__, interval.low, size,
>> +                     mapping->virt_addr, mapping->size);
>> +    } else {
>> +        error_report("****** %s: no mapping for
>> [0x%"PRIx64",0x%"PRIx64"]",
>> +                     __func__, interval.low, interval.high);
>> +    }
>> +
>> +    return VIRTIO_IOMMU_S_INVAL;
>>  }
>>
>>  #define get_payload_size(req) (\
>> @@ -266,19 +414,46 @@ static IOMMUTLBEntry
>> virtio_iommu_translate(MemoryRegion *mr, hwaddr addr,
>>                                              IOMMUAccessFlags flag)
>>  {
>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
>> +    VirtIOIOMMU *s = sdev->viommu;
>>      uint32_t sid;
>> +    viommu_dev *dev;
>> +    viommu_mapping *mapping;
>> +    viommu_interval interval;
>> +
>> +    interval.low = addr;
>> +    interval.high = addr + 1;
>>
>>      IOMMUTLBEntry entry = {
>>          .target_as = &address_space_memory,
>>          .iova = addr,
>>          .translated_addr = addr,
>> -        .addr_mask = ~(hwaddr)0,
>> -        .perm = IOMMU_NONE,
>> +        .addr_mask = (1 << 12) - 1, /* TODO */
>> +        .perm = 3,
>>      };
>>
>>      sid = smmu_get_sid(sdev);
>>
>>      trace_virtio_iommu_translate(mr->name, sid, addr, flag);
>> +    qemu_mutex_lock(&s->mutex);
>> +
>> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
>> +    if (!dev) {
>> +        /* device cannot be attached to another as */
>> +        printf("%s sid=%d is not known!!\n", __func__, sid);
>> +        goto unlock;
>> +    }
>> +
>> +    mapping = g_tree_lookup(dev->as->mappings, (gpointer)&interval);
>> +    if (!mapping) {
>> +        printf("%s no mapping for 0x%"PRIx64" for sid=%d\n", __func__,
>> +               addr, sid);
>> +        goto unlock;
>> +    }
>> +    entry.translated_addr = addr - mapping->virt_addr + mapping-
>>> phys_addr,
>> +    trace_virtio_iommu_translate_result(addr, entry.translated_addr, sid);
>> +
>> +unlock:
>> +    qemu_mutex_unlock(&s->mutex);
>>      return entry;
>>  }
>>
>> @@ -341,6 +516,12 @@ static inline guint as_uint64_hash(gconstpointer v)
>>      return (guint)*(const uint64_t *)v;
>>  }
>>
>> +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
>> +{
>> +    uint ua = GPOINTER_TO_UINT(a);
>> +    uint ub = GPOINTER_TO_UINT(b);
>> +    return (ua > ub) - (ua < ub);
>> +}
>>
>>  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>>  {
>> @@ -362,12 +543,21 @@ static void
>> virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>>                                              as_uint64_equal,
>>                                              g_free, g_free);
>>
>> +    s->address_spaces = g_tree_new_full((GCompareDataFunc)int_cmp,
>> +                                         NULL, NULL, (GDestroyNotify)g_free);
>> +    s->devices = g_tree_new_full((GCompareDataFunc)int_cmp,
>> +                                         NULL, NULL, (GDestroyNotify)g_free);
>> +
>>      virtio_iommu_init_as(s);
>>  }
>>
>>  static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
>>  {
>>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>> +    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
>> +
>> +    g_tree_destroy(s->address_spaces);
>> +    g_tree_destroy(s->devices);
>>
>>      virtio_cleanup(vdev);
>>  }
>> --
>> 2.5.5
> 
>
Peter Xu July 14, 2017, 2:17 a.m. UTC | #4
On Wed, Jun 07, 2017 at 06:01:25PM +0200, Eric Auger wrote:
> This patch adds the actual implementation for the translation routine
> and the virtio-iommu commands.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>

[...]

>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>                                 struct virtio_iommu_req_attach *req)
> @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>      uint32_t asid = le32_to_cpu(req->address_space);
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    viommu_as *as;
> +    viommu_dev *dev;
>  
>      trace_virtio_iommu_attach(asid, devid, reserved);
>  
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> +    if (dev) {
> +        return -1;
> +    }
> +
> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> +    if (!as) {
> +        as = g_malloc0(sizeof(*as));
> +        as->id = asid;
> +        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
> +                                         NULL, NULL, (GDestroyNotify)g_free);
> +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);
> +        trace_virtio_iommu_new_asid(asid);
> +    }
> +
> +    dev = g_malloc0(sizeof(*dev));
> +    dev->as = as;
> +    dev->id = devid;
> +    as->nr_devices++;
> +    trace_virtio_iommu_new_devid(devid);
> +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);

Here do we need to record something like a refcount for address space?
Since...

> +
> +    return VIRTIO_IOMMU_S_OK;
>  }
>  
>  static int virtio_iommu_detach(VirtIOIOMMU *s,
> @@ -106,10 +170,13 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>  {
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    int ret;
>  
>      trace_virtio_iommu_detach(devid, reserved);
>  
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));

... here when detach, imho we should check the refcount: if there is
no device using specific address space, we should release the address
space as well.

Otherwise there would have no way to destroy an address space?

> +
> +    return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
>  }

[...]

>  static int virtio_iommu_unmap(VirtIOIOMMU *s,
> @@ -133,10 +227,64 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
>      uint64_t size = le64_to_cpu(req->size);
>      uint32_t flags = le32_to_cpu(req->flags);
> +    viommu_mapping *mapping;
> +    viommu_interval interval;
> +    viommu_as *as;
>  
>      trace_virtio_iommu_unmap(asid, virt_addr, size, flags);
>  
> -    return VIRTIO_IOMMU_S_UNSUPP;
> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> +    if (!as) {
> +        error_report("%s: no as", __func__);
> +        return VIRTIO_IOMMU_S_INVAL;
> +    }
> +    interval.low = virt_addr;
> +    interval.high = virt_addr + size - 1;
> +
> +    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> +
> +    while (mapping) {
> +        viommu_interval current;
> +        uint64_t low  = mapping->virt_addr;
> +        uint64_t high = mapping->virt_addr + mapping->size - 1;
> +
> +        current.low = low;
> +        current.high = high;
> +
> +        if (low == interval.low && size >= mapping->size) {
> +            g_tree_remove(as->mappings, (gpointer)&current);
> +            interval.low = high + 1;
> +            trace_virtio_iommu_unmap_left_interval(current.low, current.high,
> +                interval.low, interval.high);
> +        } else if (high == interval.high && size >= mapping->size) {
> +            trace_virtio_iommu_unmap_right_interval(current.low, current.high,
> +                interval.low, interval.high);
> +            g_tree_remove(as->mappings, (gpointer)&current);
> +            interval.high = low - 1;
> +        } else if (low > interval.low && high < interval.high) {
> +            trace_virtio_iommu_unmap_inc_interval(current.low, current.high);
> +            g_tree_remove(as->mappings, (gpointer)&current);
> +        } else {
> +            break;
> +        }
> +        if (interval.low >= interval.high) {
> +            return VIRTIO_IOMMU_S_OK;
> +        } else {
> +            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> +        }
> +    }

IIUC for unmap here we are very strict - a extreme case is that when
an address space is just created (so no mapping inside), if we send
one UNMAP to a range, it'll fail currently, right? Should we loosen
it?

IMHO as long as we make sure all the mappings in the range of an UNMAP
request are destroyed, then we are good. I think at least both vfio
api and vt-d emuation have this assumption. But maybe I am wrong.
Please correct me if so.

> +
> +    if (mapping) {
> +        error_report("****** %s: Unmap 0x%"PRIx64" size=0x%"PRIx64
> +                     " from 0x%"PRIx64" size=0x%"PRIx64" is not supported",
> +                     __func__, interval.low, size,
> +                     mapping->virt_addr, mapping->size);
> +    } else {
> +        error_report("****** %s: no mapping for [0x%"PRIx64",0x%"PRIx64"]",
> +                     __func__, interval.low, interval.high);
> +    }
> +
> +    return VIRTIO_IOMMU_S_INVAL;
>  }

Thanks,
Bharat Bhushan July 14, 2017, 6:40 a.m. UTC | #5
Hi Peter,

> -----Original Message-----

> From: Peter Xu [mailto:peterx@redhat.com]

> Sent: Friday, July 14, 2017 7:48 AM

> To: Eric Auger <eric.auger@redhat.com>

> Cc: eric.auger.pro@gmail.com; peter.maydell@linaro.org;

> alex.williamson@redhat.com; mst@redhat.com; qemu-arm@nongnu.org;

> qemu-devel@nongnu.org; jean-philippe.brucker@arm.com;

> wei@redhat.com; kevin.tian@intel.com; Bharat Bhushan

> <bharat.bhushan@nxp.com>; marc.zyngier@arm.com; tn@semihalf.com;

> will.deacon@arm.com; drjones@redhat.com; robin.murphy@arm.com;

> christoffer.dall@linaro.org

> Subject: Re: [Qemu-devel] [RFC v2 6/8] virtio-iommu: Implement the

> translation and commands

> 

> On Wed, Jun 07, 2017 at 06:01:25PM +0200, Eric Auger wrote:

> > This patch adds the actual implementation for the translation routine

> > and the virtio-iommu commands.

> >

> > Signed-off-by: Eric Auger <eric.auger@redhat.com>

> 

> [...]

> 

> >  static int virtio_iommu_attach(VirtIOIOMMU *s,

> >                                 struct virtio_iommu_req_attach *req)

> > @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,

> >      uint32_t asid = le32_to_cpu(req->address_space);

> >      uint32_t devid = le32_to_cpu(req->device);

> >      uint32_t reserved = le32_to_cpu(req->reserved);

> > +    viommu_as *as;

> > +    viommu_dev *dev;

> >

> >      trace_virtio_iommu_attach(asid, devid, reserved);

> >

> > -    return VIRTIO_IOMMU_S_UNSUPP;

> > +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));

> > +    if (dev) {

> > +        return -1;

> > +    }

> > +

> > +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));

> > +    if (!as) {

> > +        as = g_malloc0(sizeof(*as));

> > +        as->id = asid;

> > +        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,

> > +                                         NULL, NULL, (GDestroyNotify)g_free);

> > +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);

> > +        trace_virtio_iommu_new_asid(asid);

> > +    }

> > +

> > +    dev = g_malloc0(sizeof(*dev));

> > +    dev->as = as;

> > +    dev->id = devid;

> > +    as->nr_devices++;

> > +    trace_virtio_iommu_new_devid(devid);

> > +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);

> 

> Here do we need to record something like a refcount for address space?

> Since...


We are using "nr_devices" as number of devices attached to an address-space

> 

> > +

> > +    return VIRTIO_IOMMU_S_OK;

> >  }

> >

> >  static int virtio_iommu_detach(VirtIOIOMMU *s, @@ -106,10 +170,13 @@

> > static int virtio_iommu_detach(VirtIOIOMMU *s,  {

> >      uint32_t devid = le32_to_cpu(req->device);

> >      uint32_t reserved = le32_to_cpu(req->reserved);

> > +    int ret;

> >

> >      trace_virtio_iommu_detach(devid, reserved);

> >

> > -    return VIRTIO_IOMMU_S_UNSUPP;

> > +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));

> 

> ... here when detach, imho we should check the refcount: if there is no

> device using specific address space, we should release the address space as

> well.

> 

> Otherwise there would have no way to destroy an address space?



Here if nr_devices == 0 then release the address space, is that ok? 

This is how I implemented as part of VFIO integration over this patch series.
	"[RFC PATCH 2/2] virtio-iommu: vfio integration with virtio-iommu"

Thanks
-Bharat
> 

> > +

> > +    return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;

> >  }

> 

> [...]

> 

> >  static int virtio_iommu_unmap(VirtIOIOMMU *s, @@ -133,10 +227,64 @@

> > static int virtio_iommu_unmap(VirtIOIOMMU *s,

> >      uint64_t virt_addr = le64_to_cpu(req->virt_addr);

> >      uint64_t size = le64_to_cpu(req->size);

> >      uint32_t flags = le32_to_cpu(req->flags);

> > +    viommu_mapping *mapping;

> > +    viommu_interval interval;

> > +    viommu_as *as;

> >

> >      trace_virtio_iommu_unmap(asid, virt_addr, size, flags);

> >

> > -    return VIRTIO_IOMMU_S_UNSUPP;

> > +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));

> > +    if (!as) {

> > +        error_report("%s: no as", __func__);

> > +        return VIRTIO_IOMMU_S_INVAL;

> > +    }

> > +    interval.low = virt_addr;

> > +    interval.high = virt_addr + size - 1;

> > +

> > +    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);

> > +

> > +    while (mapping) {

> > +        viommu_interval current;

> > +        uint64_t low  = mapping->virt_addr;

> > +        uint64_t high = mapping->virt_addr + mapping->size - 1;

> > +

> > +        current.low = low;

> > +        current.high = high;

> > +

> > +        if (low == interval.low && size >= mapping->size) {

> > +            g_tree_remove(as->mappings, (gpointer)&current);

> > +            interval.low = high + 1;

> > +            trace_virtio_iommu_unmap_left_interval(current.low,

> current.high,

> > +                interval.low, interval.high);

> > +        } else if (high == interval.high && size >= mapping->size) {

> > +            trace_virtio_iommu_unmap_right_interval(current.low,

> current.high,

> > +                interval.low, interval.high);

> > +            g_tree_remove(as->mappings, (gpointer)&current);

> > +            interval.high = low - 1;

> > +        } else if (low > interval.low && high < interval.high) {

> > +            trace_virtio_iommu_unmap_inc_interval(current.low,

> current.high);

> > +            g_tree_remove(as->mappings, (gpointer)&current);

> > +        } else {

> > +            break;

> > +        }

> > +        if (interval.low >= interval.high) {

> > +            return VIRTIO_IOMMU_S_OK;

> > +        } else {

> > +            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);

> > +        }

> > +    }

> 

> IIUC for unmap here we are very strict - a extreme case is that when an

> address space is just created (so no mapping inside), if we send one UNMAP

> to a range, it'll fail currently, right? Should we loosen it?

> 

> IMHO as long as we make sure all the mappings in the range of an UNMAP

> request are destroyed, then we are good. I think at least both vfio api and vt-

> d emuation have this assumption. But maybe I am wrong.

> Please correct me if so.

> 

> > +

> > +    if (mapping) {

> > +        error_report("****** %s: Unmap 0x%"PRIx64" size=0x%"PRIx64

> > +                     " from 0x%"PRIx64" size=0x%"PRIx64" is not supported",

> > +                     __func__, interval.low, size,

> > +                     mapping->virt_addr, mapping->size);

> > +    } else {

> > +        error_report("****** %s: no mapping for

> [0x%"PRIx64",0x%"PRIx64"]",

> > +                     __func__, interval.low, interval.high);

> > +    }

> > +

> > +    return VIRTIO_IOMMU_S_INVAL;

> >  }

> 

> Thanks,

> 

> --

> Peter Xu
Jean-Philippe Brucker July 14, 2017, 11:25 a.m. UTC | #6
Hi Peter,

On 14/07/17 03:17, Peter Xu wrote:
>
> [...]
> 
>>  static int virtio_iommu_unmap(VirtIOIOMMU *s,
>> @@ -133,10 +227,64 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
>>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
>>      uint64_t size = le64_to_cpu(req->size);
>>      uint32_t flags = le32_to_cpu(req->flags);
>> +    viommu_mapping *mapping;
>> +    viommu_interval interval;
>> +    viommu_as *as;
>>  
>>      trace_virtio_iommu_unmap(asid, virt_addr, size, flags);
>>  
>> -    return VIRTIO_IOMMU_S_UNSUPP;
>> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
>> +    if (!as) {
>> +        error_report("%s: no as", __func__);
>> +        return VIRTIO_IOMMU_S_INVAL;
>> +    }
>> +    interval.low = virt_addr;
>> +    interval.high = virt_addr + size - 1;
>> +
>> +    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
>> +
>> +    while (mapping) {
>> +        viommu_interval current;
>> +        uint64_t low  = mapping->virt_addr;
>> +        uint64_t high = mapping->virt_addr + mapping->size - 1;
>> +
>> +        current.low = low;
>> +        current.high = high;
>> +
>> +        if (low == interval.low && size >= mapping->size) {
>> +            g_tree_remove(as->mappings, (gpointer)&current);
>> +            interval.low = high + 1;
>> +            trace_virtio_iommu_unmap_left_interval(current.low, current.high,
>> +                interval.low, interval.high);
>> +        } else if (high == interval.high && size >= mapping->size) {
>> +            trace_virtio_iommu_unmap_right_interval(current.low, current.high,
>> +                interval.low, interval.high);
>> +            g_tree_remove(as->mappings, (gpointer)&current);
>> +            interval.high = low - 1;
>> +        } else if (low > interval.low && high < interval.high) {
>> +            trace_virtio_iommu_unmap_inc_interval(current.low, current.high);
>> +            g_tree_remove(as->mappings, (gpointer)&current);
>> +        } else {
>> +            break;
>> +        }
>> +        if (interval.low >= interval.high) {
>> +            return VIRTIO_IOMMU_S_OK;
>> +        } else {
>> +            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
>> +        }
>> +    }
> 
> IIUC for unmap here we are very strict - a extreme case is that when
> an address space is just created (so no mapping inside), if we send
> one UNMAP to a range, it'll fail currently, right? Should we loosen
> it?

In the next specification version I'd like to slightly redefine unmap
semantics (to make them more deterministic). Unmapping a range that is
only partially mapped or not mapped at all would not return an error, and
would unmap everything that is covered by the range.

Note that unmapping a partial range (splitting a single mapping) is still
forbidden. The following pseudocode sequences attempt to illustrate the
rules I'd like to set. There are no mappings in the address space prior to
each sequence.

(1) unmap(addr=0, size=5)        -> succeeds, doesn't unmap anything

(2) a = map(addr=0, size=10);
    unmap(0, 10)                 -> succeeds, unmaps a

(3) a = map(0, 5);
    b = map(5, 5);
    unmap(0, 10)                 -> succeeds, unmaps a and b

(4) a = map(0, 10);
    unmap(0, 5)                  -> faults, doesn't unmap anything

(5) a = map(0, 5);
    b = map(5, 5);
    unmap(0, 5)                  -> succeeds, unmaps a

(6) a = map(0, 5);
    unmap(0, 10)                 -> succeeds, unmaps a

(7) a = map(0, 5);
    b = map(10, 5);
    unmap(0, 15)                 -> succeeds, unmaps a and b

Previously I left (1), (6) and (7) as an implementation choice. The device
could either succeed and unmap, or fail and not unmap. This means that if
a driver wanted guarantees, it had to issue strict map/unmap sequences.

I believe that VFIO type1 v2 has these semantics, but "v1" allowed (4) to
succeed and unmap a.

Thanks,
Jean


> IMHO as long as we make sure all the mappings in the range of an UNMAP
> request are destroyed, then we are good. I think at least both vfio
> api and vt-d emuation have this assumption. But maybe I am wrong.
> Please correct me if so.
> 
>> +
>> +    if (mapping) {
>> +        error_report("****** %s: Unmap 0x%"PRIx64" size=0x%"PRIx64
>> +                     " from 0x%"PRIx64" size=0x%"PRIx64" is not supported",
>> +                     __func__, interval.low, size,
>> +                     mapping->virt_addr, mapping->size);
>> +    } else {
>> +        error_report("****** %s: no mapping for [0x%"PRIx64",0x%"PRIx64"]",
>> +                     __func__, interval.low, interval.high);
>> +    }
>> +
>> +    return VIRTIO_IOMMU_S_INVAL;
>>  }
> 
> Thanks,
>
Peter Xu July 17, 2017, 1:28 a.m. UTC | #7
On Fri, Jul 14, 2017 at 06:40:34AM +0000, Bharat Bhushan wrote:
> Hi Peter,
> 
> > -----Original Message-----
> > From: Peter Xu [mailto:peterx@redhat.com]
> > Sent: Friday, July 14, 2017 7:48 AM
> > To: Eric Auger <eric.auger@redhat.com>
> > Cc: eric.auger.pro@gmail.com; peter.maydell@linaro.org;
> > alex.williamson@redhat.com; mst@redhat.com; qemu-arm@nongnu.org;
> > qemu-devel@nongnu.org; jean-philippe.brucker@arm.com;
> > wei@redhat.com; kevin.tian@intel.com; Bharat Bhushan
> > <bharat.bhushan@nxp.com>; marc.zyngier@arm.com; tn@semihalf.com;
> > will.deacon@arm.com; drjones@redhat.com; robin.murphy@arm.com;
> > christoffer.dall@linaro.org
> > Subject: Re: [Qemu-devel] [RFC v2 6/8] virtio-iommu: Implement the
> > translation and commands
> > 
> > On Wed, Jun 07, 2017 at 06:01:25PM +0200, Eric Auger wrote:
> > > This patch adds the actual implementation for the translation routine
> > > and the virtio-iommu commands.
> > >
> > > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> > 
> > [...]
> > 
> > >  static int virtio_iommu_attach(VirtIOIOMMU *s,
> > >                                 struct virtio_iommu_req_attach *req)
> > > @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
> > >      uint32_t asid = le32_to_cpu(req->address_space);
> > >      uint32_t devid = le32_to_cpu(req->device);
> > >      uint32_t reserved = le32_to_cpu(req->reserved);
> > > +    viommu_as *as;
> > > +    viommu_dev *dev;
> > >
> > >      trace_virtio_iommu_attach(asid, devid, reserved);
> > >
> > > -    return VIRTIO_IOMMU_S_UNSUPP;
> > > +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> > > +    if (dev) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> > > +    if (!as) {
> > > +        as = g_malloc0(sizeof(*as));
> > > +        as->id = asid;
> > > +        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
> > > +                                         NULL, NULL, (GDestroyNotify)g_free);
> > > +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);
> > > +        trace_virtio_iommu_new_asid(asid);
> > > +    }
> > > +
> > > +    dev = g_malloc0(sizeof(*dev));
> > > +    dev->as = as;
> > > +    dev->id = devid;
> > > +    as->nr_devices++;
> > > +    trace_virtio_iommu_new_devid(devid);
> > > +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);
> > 
> > Here do we need to record something like a refcount for address space?
> > Since...
> 
> We are using "nr_devices" as number of devices attached to an address-space
> 
> > 
> > > +
> > > +    return VIRTIO_IOMMU_S_OK;
> > >  }
> > >
> > >  static int virtio_iommu_detach(VirtIOIOMMU *s, @@ -106,10 +170,13 @@
> > > static int virtio_iommu_detach(VirtIOIOMMU *s,  {
> > >      uint32_t devid = le32_to_cpu(req->device);
> > >      uint32_t reserved = le32_to_cpu(req->reserved);
> > > +    int ret;
> > >
> > >      trace_virtio_iommu_detach(devid, reserved);
> > >
> > > -    return VIRTIO_IOMMU_S_UNSUPP;
> > > +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
> > 
> > ... here when detach, imho we should check the refcount: if there is no
> > device using specific address space, we should release the address space as
> > well.
> > 
> > Otherwise there would have no way to destroy an address space?
> 
> 
> Here if nr_devices == 0 then release the address space, is that ok? 
> 
> This is how I implemented as part of VFIO integration over this patch series.
> 	"[RFC PATCH 2/2] virtio-iommu: vfio integration with virtio-iommu"

Sorry I didn't read that when posting. It is what I mean.  Thanks,
Peter Xu July 17, 2017, 1:37 a.m. UTC | #8
On Fri, Jul 14, 2017 at 12:25:13PM +0100, Jean-Philippe Brucker wrote:
> Hi Peter,
> 
> On 14/07/17 03:17, Peter Xu wrote:
> >
> > [...]
> > 
> >>  static int virtio_iommu_unmap(VirtIOIOMMU *s,
> >> @@ -133,10 +227,64 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
> >>      uint64_t virt_addr = le64_to_cpu(req->virt_addr);
> >>      uint64_t size = le64_to_cpu(req->size);
> >>      uint32_t flags = le32_to_cpu(req->flags);
> >> +    viommu_mapping *mapping;
> >> +    viommu_interval interval;
> >> +    viommu_as *as;
> >>  
> >>      trace_virtio_iommu_unmap(asid, virt_addr, size, flags);
> >>  
> >> -    return VIRTIO_IOMMU_S_UNSUPP;
> >> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
> >> +    if (!as) {
> >> +        error_report("%s: no as", __func__);
> >> +        return VIRTIO_IOMMU_S_INVAL;
> >> +    }
> >> +    interval.low = virt_addr;
> >> +    interval.high = virt_addr + size - 1;
> >> +
> >> +    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> >> +
> >> +    while (mapping) {
> >> +        viommu_interval current;
> >> +        uint64_t low  = mapping->virt_addr;
> >> +        uint64_t high = mapping->virt_addr + mapping->size - 1;
> >> +
> >> +        current.low = low;
> >> +        current.high = high;
> >> +
> >> +        if (low == interval.low && size >= mapping->size) {
> >> +            g_tree_remove(as->mappings, (gpointer)&current);
> >> +            interval.low = high + 1;
> >> +            trace_virtio_iommu_unmap_left_interval(current.low, current.high,
> >> +                interval.low, interval.high);
> >> +        } else if (high == interval.high && size >= mapping->size) {
> >> +            trace_virtio_iommu_unmap_right_interval(current.low, current.high,
> >> +                interval.low, interval.high);
> >> +            g_tree_remove(as->mappings, (gpointer)&current);
> >> +            interval.high = low - 1;
> >> +        } else if (low > interval.low && high < interval.high) {
> >> +            trace_virtio_iommu_unmap_inc_interval(current.low, current.high);
> >> +            g_tree_remove(as->mappings, (gpointer)&current);
> >> +        } else {
> >> +            break;
> >> +        }
> >> +        if (interval.low >= interval.high) {
> >> +            return VIRTIO_IOMMU_S_OK;
> >> +        } else {
> >> +            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> >> +        }
> >> +    }
> > 
> > IIUC for unmap here we are very strict - a extreme case is that when
> > an address space is just created (so no mapping inside), if we send
> > one UNMAP to a range, it'll fail currently, right? Should we loosen
> > it?
> 
> In the next specification version I'd like to slightly redefine unmap
> semantics (to make them more deterministic). Unmapping a range that is
> only partially mapped or not mapped at all would not return an error, and
> would unmap everything that is covered by the range.
> 
> Note that unmapping a partial range (splitting a single mapping) is still
> forbidden. The following pseudocode sequences attempt to illustrate the
> rules I'd like to set. There are no mappings in the address space prior to
> each sequence.
> 
> (1) unmap(addr=0, size=5)        -> succeeds, doesn't unmap anything
> 
> (2) a = map(addr=0, size=10);
>     unmap(0, 10)                 -> succeeds, unmaps a
> 
> (3) a = map(0, 5);
>     b = map(5, 5);
>     unmap(0, 10)                 -> succeeds, unmaps a and b
> 
> (4) a = map(0, 10);
>     unmap(0, 5)                  -> faults, doesn't unmap anything
> 
> (5) a = map(0, 5);
>     b = map(5, 5);
>     unmap(0, 5)                  -> succeeds, unmaps a
> 
> (6) a = map(0, 5);
>     unmap(0, 10)                 -> succeeds, unmaps a
> 
> (7) a = map(0, 5);
>     b = map(10, 5);
>     unmap(0, 15)                 -> succeeds, unmaps a and b
> 
> Previously I left (1), (6) and (7) as an implementation choice. The device
> could either succeed and unmap, or fail and not unmap. This means that if
> a driver wanted guarantees, it had to issue strict map/unmap sequences.
> 
> I believe that VFIO type1 v2 has these semantics, but "v1" allowed (4) to
> succeed and unmap a.

Thanks Jean. It looks good.

Actually I asked mostly for (7). IMHO it is really helpful sometimes
to allow the upper layer to release the things it holds in some easy
way.
Auger Eric July 31, 2017, 1:08 p.m. UTC | #9
Hi Peter, Bharat,

On 17/07/2017 03:28, Peter Xu wrote:
> On Fri, Jul 14, 2017 at 06:40:34AM +0000, Bharat Bhushan wrote:
>> Hi Peter,
>>
>>> -----Original Message-----
>>> From: Peter Xu [mailto:peterx@redhat.com]
>>> Sent: Friday, July 14, 2017 7:48 AM
>>> To: Eric Auger <eric.auger@redhat.com>
>>> Cc: eric.auger.pro@gmail.com; peter.maydell@linaro.org;
>>> alex.williamson@redhat.com; mst@redhat.com; qemu-arm@nongnu.org;
>>> qemu-devel@nongnu.org; jean-philippe.brucker@arm.com;
>>> wei@redhat.com; kevin.tian@intel.com; Bharat Bhushan
>>> <bharat.bhushan@nxp.com>; marc.zyngier@arm.com; tn@semihalf.com;
>>> will.deacon@arm.com; drjones@redhat.com; robin.murphy@arm.com;
>>> christoffer.dall@linaro.org
>>> Subject: Re: [Qemu-devel] [RFC v2 6/8] virtio-iommu: Implement the
>>> translation and commands
>>>
>>> On Wed, Jun 07, 2017 at 06:01:25PM +0200, Eric Auger wrote:
>>>> This patch adds the actual implementation for the translation routine
>>>> and the virtio-iommu commands.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> [...]
>>>
>>>>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>>>>                                 struct virtio_iommu_req_attach *req)
>>>> @@ -95,10 +135,34 @@ static int virtio_iommu_attach(VirtIOIOMMU *s,
>>>>      uint32_t asid = le32_to_cpu(req->address_space);
>>>>      uint32_t devid = le32_to_cpu(req->device);
>>>>      uint32_t reserved = le32_to_cpu(req->reserved);
>>>> +    viommu_as *as;
>>>> +    viommu_dev *dev;
>>>>
>>>>      trace_virtio_iommu_attach(asid, devid, reserved);
>>>>
>>>> -    return VIRTIO_IOMMU_S_UNSUPP;
>>>> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
>>>> +    if (dev) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
>>>> +    if (!as) {
>>>> +        as = g_malloc0(sizeof(*as));
>>>> +        as->id = asid;
>>>> +        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
>>>> +                                         NULL, NULL, (GDestroyNotify)g_free);
>>>> +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);
>>>> +        trace_virtio_iommu_new_asid(asid);
>>>> +    }
>>>> +
>>>> +    dev = g_malloc0(sizeof(*dev));
>>>> +    dev->as = as;
>>>> +    dev->id = devid;
>>>> +    as->nr_devices++;
>>>> +    trace_virtio_iommu_new_devid(devid);
>>>> +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);
>>>
>>> Here do we need to record something like a refcount for address space?
>>> Since...
>>
>> We are using "nr_devices" as number of devices attached to an address-space
>>
>>>
>>>> +
>>>> +    return VIRTIO_IOMMU_S_OK;
>>>>  }
>>>>
>>>>  static int virtio_iommu_detach(VirtIOIOMMU *s, @@ -106,10 +170,13 @@
>>>> static int virtio_iommu_detach(VirtIOIOMMU *s,  {
>>>>      uint32_t devid = le32_to_cpu(req->device);
>>>>      uint32_t reserved = le32_to_cpu(req->reserved);
>>>> +    int ret;
>>>>
>>>>      trace_virtio_iommu_detach(devid, reserved);
>>>>
>>>> -    return VIRTIO_IOMMU_S_UNSUPP;
>>>> +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
>>>
>>> ... here when detach, imho we should check the refcount: if there is no
>>> device using specific address space, we should release the address space as
>>> well.
>>>
>>> Otherwise there would have no way to destroy an address space?
>>
>>
>> Here if nr_devices == 0 then release the address space, is that ok? 
>>
>> This is how I implemented as part of VFIO integration over this patch series.
>> 	"[RFC PATCH 2/2] virtio-iommu: vfio integration with virtio-iommu"

glib provides g_tree_ref/g_tree_unref. I think the most elegant is to
use g_tree_ref when adding a device and decrementing when removing a
device, as suggested by Peter.

"if the reference count drops to 0, all keys and values will be
destroyed (if destroy functions were specified) and all memory allocated
by tree will be released."

That way we should be able to remove nr_devices from viommu_as

Bharat, if this breaks your implementation I will let you re-introduce
nr_devices as part of the VFIO patch.

Thanks

Eric
> 
> Sorry I didn't read that when posting. It is what I mean.  Thanks,
>
Bharat Bhushan Aug. 3, 2017, 10:48 a.m. UTC | #10
Hi Eric,

> -----Original Message-----

> From: Auger Eric [mailto:eric.auger@redhat.com]

> Sent: Monday, July 31, 2017 6:38 PM

> To: Peter Xu <peterx@redhat.com>; Bharat Bhushan

> <bharat.bhushan@nxp.com>

> Cc: wei@redhat.com; peter.maydell@linaro.org; kevin.tian@intel.com;

> drjones@redhat.com; mst@redhat.com; jean-philippe.brucker@arm.com;

> tn@semihalf.com; will.deacon@arm.com; qemu-devel@nongnu.org;

> alex.williamson@redhat.com; qemu-arm@nongnu.org;

> marc.zyngier@arm.com; robin.murphy@arm.com;

> christoffer.dall@linaro.org; eric.auger.pro@gmail.com

> Subject: Re: [Qemu-devel] [RFC v2 6/8] virtio-iommu: Implement the

> translation and commands

> 

> Hi Peter, Bharat,

> 

> On 17/07/2017 03:28, Peter Xu wrote:

> > On Fri, Jul 14, 2017 at 06:40:34AM +0000, Bharat Bhushan wrote:

> >> Hi Peter,

> >>

> >>> -----Original Message-----

> >>> From: Peter Xu [mailto:peterx@redhat.com]

> >>> Sent: Friday, July 14, 2017 7:48 AM

> >>> To: Eric Auger <eric.auger@redhat.com>

> >>> Cc: eric.auger.pro@gmail.com; peter.maydell@linaro.org;

> >>> alex.williamson@redhat.com; mst@redhat.com; qemu-

> arm@nongnu.org;

> >>> qemu-devel@nongnu.org; jean-philippe.brucker@arm.com;

> >>> wei@redhat.com; kevin.tian@intel.com; Bharat Bhushan

> >>> <bharat.bhushan@nxp.com>; marc.zyngier@arm.com;

> tn@semihalf.com;

> >>> will.deacon@arm.com; drjones@redhat.com; robin.murphy@arm.com;

> >>> christoffer.dall@linaro.org

> >>> Subject: Re: [Qemu-devel] [RFC v2 6/8] virtio-iommu: Implement the

> >>> translation and commands

> >>>

> >>> On Wed, Jun 07, 2017 at 06:01:25PM +0200, Eric Auger wrote:

> >>>> This patch adds the actual implementation for the translation

> >>>> routine and the virtio-iommu commands.

> >>>>

> >>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>

> >>>

> >>> [...]

> >>>

> >>>>  static int virtio_iommu_attach(VirtIOIOMMU *s,

> >>>>                                 struct virtio_iommu_req_attach

> >>>> *req) @@ -95,10 +135,34 @@ static int

> virtio_iommu_attach(VirtIOIOMMU *s,

> >>>>      uint32_t asid = le32_to_cpu(req->address_space);

> >>>>      uint32_t devid = le32_to_cpu(req->device);

> >>>>      uint32_t reserved = le32_to_cpu(req->reserved);

> >>>> +    viommu_as *as;

> >>>> +    viommu_dev *dev;

> >>>>

> >>>>      trace_virtio_iommu_attach(asid, devid, reserved);

> >>>>

> >>>> -    return VIRTIO_IOMMU_S_UNSUPP;

> >>>> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));

> >>>> +    if (dev) {

> >>>> +        return -1;

> >>>> +    }

> >>>> +

> >>>> +    as = g_tree_lookup(s->address_spaces,

> GUINT_TO_POINTER(asid));

> >>>> +    if (!as) {

> >>>> +        as = g_malloc0(sizeof(*as));

> >>>> +        as->id = asid;

> >>>> +        as->mappings =

> g_tree_new_full((GCompareDataFunc)interval_cmp,

> >>>> +                                         NULL, NULL, (GDestroyNotify)g_free);

> >>>> +        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);

> >>>> +        trace_virtio_iommu_new_asid(asid);

> >>>> +    }

> >>>> +

> >>>> +    dev = g_malloc0(sizeof(*dev));

> >>>> +    dev->as = as;

> >>>> +    dev->id = devid;

> >>>> +    as->nr_devices++;

> >>>> +    trace_virtio_iommu_new_devid(devid);

> >>>> +    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);

> >>>

> >>> Here do we need to record something like a refcount for address space?

> >>> Since...

> >>

> >> We are using "nr_devices" as number of devices attached to an

> >> address-space

> >>

> >>>

> >>>> +

> >>>> +    return VIRTIO_IOMMU_S_OK;

> >>>>  }

> >>>>

> >>>>  static int virtio_iommu_detach(VirtIOIOMMU *s, @@ -106,10 +170,13

> >>>> @@ static int virtio_iommu_detach(VirtIOIOMMU *s,  {

> >>>>      uint32_t devid = le32_to_cpu(req->device);

> >>>>      uint32_t reserved = le32_to_cpu(req->reserved);

> >>>> +    int ret;

> >>>>

> >>>>      trace_virtio_iommu_detach(devid, reserved);

> >>>>

> >>>> -    return VIRTIO_IOMMU_S_UNSUPP;

> >>>> +    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));

> >>>

> >>> ... here when detach, imho we should check the refcount: if there is

> >>> no device using specific address space, we should release the

> >>> address space as well.

> >>>

> >>> Otherwise there would have no way to destroy an address space?

> >>

> >>

> >> Here if nr_devices == 0 then release the address space, is that ok?

> >>

> >> This is how I implemented as part of VFIO integration over this patch

> series.

> >> 	"[RFC PATCH 2/2] virtio-iommu: vfio integration with virtio-iommu"

> 

> glib provides g_tree_ref/g_tree_unref. I think the most elegant is to use

> g_tree_ref when adding a device and decrementing when removing a

> device, as suggested by Peter.

> 

> "if the reference count drops to 0, all keys and values will be destroyed (if

> destroy functions were specified) and all memory allocated by tree will be

> released."

> 

> That way we should be able to remove nr_devices from viommu_as

> 

> Bharat, if this breaks your implementation I will let you re-introduce

> nr_devices as part of the VFIO patch.


We need to unmap() everything in physical-iommu when last device is detached, seems like g_tree_ref/unref() does not give any handle for this. I will go ahead with adding nr_devices with VFIO integration patches.

Thanks
-Bharat

> 

> Thanks

> 

> Eric

> >

> > Sorry I didn't read that when posting. It is what I mean.  Thanks,

> >

Patch
diff mbox

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 341dbdf..9196b63 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -33,3 +33,9 @@  virtio_iommu_detach(uint32_t dev, uint32_t flags) "dev=%d flags=%d"
 virtio_iommu_map(uint32_t as, uint64_t phys_addr, uint64_t virt_addr, uint64_t size, uint32_t flags) "as= %d phys_addr=0x%"PRIx64" virt_addr=0x%"PRIx64" size=0x%"PRIx64" flags=%d"
 virtio_iommu_unmap(uint32_t as, uint64_t virt_addr, uint64_t size, uint32_t reserved) "as= %d virt_addr=0x%"PRIx64" size=0x%"PRIx64" reserved=%d"
 virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d"
+virtio_iommu_new_asid(uint32_t asid) "Allocate a new asid=%d"
+virtio_iommu_new_devid(uint32_t devid) "Allocate a new devid=%d"
+virtio_iommu_unmap_left_interval(uint64_t low, uint64_t high, uint64_t next_low, uint64_t next_high) "Unmap left [0x%"PRIx64",0x%"PRIx64"], new interval=[0x%"PRIx64",0x%"PRIx64"]"
+virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"], new interval=[0x%"PRIx64",0x%"PRIx64"]"
+virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc [0x%"PRIx64",0x%"PRIx64"]"
+virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 902c779..cd188fc 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -32,10 +32,37 @@ 
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
 #include "hw/virtio/virtio-iommu.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci.h"
 
 /* Max size */
 #define VIOMMU_DEFAULT_QUEUE_SIZE 256
 
+typedef struct viommu_as viommu_as;
+
+typedef struct viommu_mapping {
+    uint64_t virt_addr;
+    uint64_t phys_addr;
+    uint64_t size;
+    uint32_t flags;
+} viommu_mapping;
+
+typedef struct viommu_interval {
+    uint64_t low;
+    uint64_t high;
+} viommu_interval;
+
+typedef struct viommu_dev {
+    uint32_t id;
+    viommu_as *as;
+} viommu_dev;
+
+struct viommu_as {
+    uint32_t id;
+    uint32_t nr_devices;
+    GTree *mappings;
+};
+
 static inline uint16_t smmu_get_sid(IOMMUDevice *dev)
 {
     return  ((pci_bus_num(dev->bus) & 0xff) << 8) | dev->devfn;
@@ -88,6 +115,19 @@  static void virtio_iommu_init_as(VirtIOIOMMU *s)
     }
 }
 
+static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    viommu_interval *inta = (viommu_interval *)a;
+    viommu_interval *intb = (viommu_interval *)b;
+
+    if (inta->high <= intb->low) {
+        return -1;
+    } else if (intb->high <= inta->low) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
 
 static int virtio_iommu_attach(VirtIOIOMMU *s,
                                struct virtio_iommu_req_attach *req)
@@ -95,10 +135,34 @@  static int virtio_iommu_attach(VirtIOIOMMU *s,
     uint32_t asid = le32_to_cpu(req->address_space);
     uint32_t devid = le32_to_cpu(req->device);
     uint32_t reserved = le32_to_cpu(req->reserved);
+    viommu_as *as;
+    viommu_dev *dev;
 
     trace_virtio_iommu_attach(asid, devid, reserved);
 
-    return VIRTIO_IOMMU_S_UNSUPP;
+    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
+    if (dev) {
+        return -1;
+    }
+
+    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
+    if (!as) {
+        as = g_malloc0(sizeof(*as));
+        as->id = asid;
+        as->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp,
+                                         NULL, NULL, (GDestroyNotify)g_free);
+        g_tree_insert(s->address_spaces, GUINT_TO_POINTER(asid), as);
+        trace_virtio_iommu_new_asid(asid);
+    }
+
+    dev = g_malloc0(sizeof(*dev));
+    dev->as = as;
+    dev->id = devid;
+    as->nr_devices++;
+    trace_virtio_iommu_new_devid(devid);
+    g_tree_insert(s->devices, GUINT_TO_POINTER(devid), dev);
+
+    return VIRTIO_IOMMU_S_OK;
 }
 
 static int virtio_iommu_detach(VirtIOIOMMU *s,
@@ -106,10 +170,13 @@  static int virtio_iommu_detach(VirtIOIOMMU *s,
 {
     uint32_t devid = le32_to_cpu(req->device);
     uint32_t reserved = le32_to_cpu(req->reserved);
+    int ret;
 
     trace_virtio_iommu_detach(devid, reserved);
 
-    return VIRTIO_IOMMU_S_UNSUPP;
+    ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
+
+    return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
 }
 
 static int virtio_iommu_map(VirtIOIOMMU *s,
@@ -120,10 +187,37 @@  static int virtio_iommu_map(VirtIOIOMMU *s,
     uint64_t virt_addr = le64_to_cpu(req->virt_addr);
     uint64_t size = le64_to_cpu(req->size);
     uint32_t flags = le32_to_cpu(req->flags);
+    viommu_as *as;
+    viommu_interval *interval;
+    viommu_mapping *mapping;
+
+    interval = g_malloc0(sizeof(*interval));
+
+    interval->low = virt_addr;
+    interval->high = virt_addr + size - 1;
+
+    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
+    if (!as) {
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+
+    mapping = g_tree_lookup(as->mappings, (gpointer)interval);
+    if (mapping) {
+        g_free(interval);
+        return VIRTIO_IOMMU_S_INVAL;
+    }
 
     trace_virtio_iommu_map(asid, phys_addr, virt_addr, size, flags);
 
-    return VIRTIO_IOMMU_S_UNSUPP;
+    mapping = g_malloc0(sizeof(*mapping));
+    mapping->virt_addr = virt_addr;
+    mapping->phys_addr = phys_addr;
+    mapping->size = size;
+    mapping->flags = flags;
+
+    g_tree_insert(as->mappings, interval, mapping);
+
+    return VIRTIO_IOMMU_S_OK;
 }
 
 static int virtio_iommu_unmap(VirtIOIOMMU *s,
@@ -133,10 +227,64 @@  static int virtio_iommu_unmap(VirtIOIOMMU *s,
     uint64_t virt_addr = le64_to_cpu(req->virt_addr);
     uint64_t size = le64_to_cpu(req->size);
     uint32_t flags = le32_to_cpu(req->flags);
+    viommu_mapping *mapping;
+    viommu_interval interval;
+    viommu_as *as;
 
     trace_virtio_iommu_unmap(asid, virt_addr, size, flags);
 
-    return VIRTIO_IOMMU_S_UNSUPP;
+    as = g_tree_lookup(s->address_spaces, GUINT_TO_POINTER(asid));
+    if (!as) {
+        error_report("%s: no as", __func__);
+        return VIRTIO_IOMMU_S_INVAL;
+    }
+    interval.low = virt_addr;
+    interval.high = virt_addr + size - 1;
+
+    mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
+
+    while (mapping) {
+        viommu_interval current;
+        uint64_t low  = mapping->virt_addr;
+        uint64_t high = mapping->virt_addr + mapping->size - 1;
+
+        current.low = low;
+        current.high = high;
+
+        if (low == interval.low && size >= mapping->size) {
+            g_tree_remove(as->mappings, (gpointer)&current);
+            interval.low = high + 1;
+            trace_virtio_iommu_unmap_left_interval(current.low, current.high,
+                interval.low, interval.high);
+        } else if (high == interval.high && size >= mapping->size) {
+            trace_virtio_iommu_unmap_right_interval(current.low, current.high,
+                interval.low, interval.high);
+            g_tree_remove(as->mappings, (gpointer)&current);
+            interval.high = low - 1;
+        } else if (low > interval.low && high < interval.high) {
+            trace_virtio_iommu_unmap_inc_interval(current.low, current.high);
+            g_tree_remove(as->mappings, (gpointer)&current);
+        } else {
+            break;
+        }
+        if (interval.low >= interval.high) {
+            return VIRTIO_IOMMU_S_OK;
+        } else {
+            mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
+        }
+    }
+
+    if (mapping) {
+        error_report("****** %s: Unmap 0x%"PRIx64" size=0x%"PRIx64
+                     " from 0x%"PRIx64" size=0x%"PRIx64" is not supported",
+                     __func__, interval.low, size,
+                     mapping->virt_addr, mapping->size);
+    } else {
+        error_report("****** %s: no mapping for [0x%"PRIx64",0x%"PRIx64"]",
+                     __func__, interval.low, interval.high);
+    }
+
+    return VIRTIO_IOMMU_S_INVAL;
 }
 
 #define get_payload_size(req) (\
@@ -266,19 +414,46 @@  static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr, hwaddr addr,
                                             IOMMUAccessFlags flag)
 {
     IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    VirtIOIOMMU *s = sdev->viommu;
     uint32_t sid;
+    viommu_dev *dev;
+    viommu_mapping *mapping;
+    viommu_interval interval;
+
+    interval.low = addr;
+    interval.high = addr + 1;
 
     IOMMUTLBEntry entry = {
         .target_as = &address_space_memory,
         .iova = addr,
         .translated_addr = addr,
-        .addr_mask = ~(hwaddr)0,
-        .perm = IOMMU_NONE,
+        .addr_mask = (1 << 12) - 1, /* TODO */
+        .perm = 3,
     };
 
     sid = smmu_get_sid(sdev);
 
     trace_virtio_iommu_translate(mr->name, sid, addr, flag);
+    qemu_mutex_lock(&s->mutex);
+
+    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
+    if (!dev) {
+        /* device cannot be attached to another as */
+        printf("%s sid=%d is not known!!\n", __func__, sid);
+        goto unlock;
+    }
+
+    mapping = g_tree_lookup(dev->as->mappings, (gpointer)&interval);
+    if (!mapping) {
+        printf("%s no mapping for 0x%"PRIx64" for sid=%d\n", __func__,
+               addr, sid);
+        goto unlock;
+    }
+    entry.translated_addr = addr - mapping->virt_addr + mapping->phys_addr,
+    trace_virtio_iommu_translate_result(addr, entry.translated_addr, sid);
+
+unlock:
+    qemu_mutex_unlock(&s->mutex);
     return entry;
 }
 
@@ -341,6 +516,12 @@  static inline guint as_uint64_hash(gconstpointer v)
     return (guint)*(const uint64_t *)v;
 }
 
+static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+    uint ua = GPOINTER_TO_UINT(a);
+    uint ub = GPOINTER_TO_UINT(b);
+    return (ua > ub) - (ua < ub);
+}
 
 static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
 {
@@ -362,12 +543,21 @@  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
                                             as_uint64_equal,
                                             g_free, g_free);
 
+    s->address_spaces = g_tree_new_full((GCompareDataFunc)int_cmp,
+                                         NULL, NULL, (GDestroyNotify)g_free);
+    s->devices = g_tree_new_full((GCompareDataFunc)int_cmp,
+                                         NULL, NULL, (GDestroyNotify)g_free);
+
     virtio_iommu_init_as(s);
 }
 
 static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
+
+    g_tree_destroy(s->address_spaces);
+    g_tree_destroy(s->devices);
 
     virtio_cleanup(vdev);
 }