Patchwork [3/8] vfio: Introduce VFIO address spaces

login
register
mail settings
Submitter Alexey Kardashevskiy
Date Aug. 7, 2013, 8:21 a.m.
Message ID <1375863692-12207-4-git-send-email-aik@ozlabs.ru>
Download mbox | patch
Permalink /patch/265394/
State New
Headers show

Comments

Alexey Kardashevskiy - Aug. 7, 2013, 8:21 a.m.
From: David Gibson <david@gibson.dropbear.id.au>

The only model so far supported for VFIO passthrough devices is the model
usually used on x86, where all of the guest's RAM is mapped into the
(host) IOMMU and there is no IOMMU visible in the guest.

This patch begins to relax this model, introducing the notion of a
VFIOAddressSpace.  This represents a logical DMA address space which will
be visible to one or more VFIO devices by appropriate mapping in the (host)
IOMMU.  Thus the currently global list of containers becomes local to
a VFIOAddressSpace, and we verify that we don't attempt to add a VFIO
group to multiple address spaces.

For now, only one VFIOAddressSpace is created and used, corresponding to
main system memory, that will change in future patches.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/misc/vfio.c | 57 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 12 deletions(-)
Alex Williamson - Aug. 12, 2013, 10:07 p.m.
On Wed, 2013-08-07 at 18:21 +1000, Alexey Kardashevskiy wrote:
> From: David Gibson <david@gibson.dropbear.id.au>
> 
> The only model so far supported for VFIO passthrough devices is the model
> usually used on x86, where all of the guest's RAM is mapped into the
> (host) IOMMU and there is no IOMMU visible in the guest.
> 
> This patch begins to relax this model, introducing the notion of a
> VFIOAddressSpace.  This represents a logical DMA address space which will
> be visible to one or more VFIO devices by appropriate mapping in the (host)
> IOMMU.  Thus the currently global list of containers becomes local to
> a VFIOAddressSpace, and we verify that we don't attempt to add a VFIO
> group to multiple address spaces.
> 
> For now, only one VFIOAddressSpace is created and used, corresponding to
> main system memory, that will change in future patches.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/misc/vfio.c | 57 +++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 45 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index adcd23d..bcd67a0 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -130,9 +130,17 @@ enum {
>      VFIO_INT_MSIX = 3,
>  };
>  
> +typedef struct VFIOAddressSpace {
> +    AddressSpace *as;
> +    QLIST_HEAD(, VFIOContainer) containers;
> +} VFIOAddressSpace;
> +
> +static VFIOAddressSpace vfio_address_space_memory;
> +
>  struct VFIOGroup;
>  
>  typedef struct VFIOContainer {
> +    VFIOAddressSpace *space;
>      int fd; /* /dev/vfio/vfio, empowered by the attached groups */
>      struct {
>          /* enable abstraction to support various iommu backends */
> @@ -197,9 +205,6 @@ typedef struct VFIOGroup {
>  
>  #define MSIX_CAP_LENGTH 12
>  
> -static QLIST_HEAD(, VFIOContainer)
> -    container_list = QLIST_HEAD_INITIALIZER(container_list);
> -
>  static QLIST_HEAD(, VFIOGroup)
>      group_list = QLIST_HEAD_INITIALIZER(group_list);
>  
> @@ -2604,16 +2609,28 @@ static int vfio_load_rom(VFIODevice *vdev)
>      return 0;
>  }
>  
> -static int vfio_connect_container(VFIOGroup *group)
> +static void vfio_address_space_init(VFIOAddressSpace *space, AddressSpace *as)
> +{
> +    space->as = as;
> +    QLIST_INIT(&space->containers);
> +}
> +
> +static int vfio_connect_container(VFIOGroup *group, VFIOAddressSpace *space)
>  {
>      VFIOContainer *container;
>      int ret, fd;
>  
>      if (group->container) {
> -        return 0;
> +        if (group->container->space == space) {
> +            return 0;
> +        } else {
> +            error_report("vfio: group %d used in multiple address spaces",
> +                         group->groupid);
> +            return -EBUSY;
> +        }
>      }

This original group->container test seems bogus to me, I don't think we
can get here with a container already attached to a group.
>  
> -    QLIST_FOREACH(container, &container_list, next) {
> +    QLIST_FOREACH(container, &space->containers, next) {
>          if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
>              group->container = container;
>              QLIST_INSERT_HEAD(&container->group_list, group, container_next);
> @@ -2636,6 +2653,7 @@ static int vfio_connect_container(VFIOGroup *group)
>      }
>  
>      container = g_malloc0(sizeof(*container));
> +    container->space = space;
>      container->fd = fd;
>  
>      if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
> @@ -2658,7 +2676,8 @@ static int vfio_connect_container(VFIOGroup *group)
>          container->iommu_data.listener = vfio_memory_listener;
>          container->iommu_data.release = vfio_listener_release;
>  
> -        memory_listener_register(&container->iommu_data.listener, &address_space_memory);
> +        memory_listener_register(&container->iommu_data.listener,
> +                                 container->space->as);
>      } else {
>          error_report("vfio: No available IOMMU models");
>          g_free(container);
> @@ -2667,7 +2686,7 @@ static int vfio_connect_container(VFIOGroup *group)
>      }
>  
>      QLIST_INIT(&container->group_list);
> -    QLIST_INSERT_HEAD(&container_list, container, next);
> +    QLIST_INSERT_HEAD(&space->containers, container, next);
>  
>      group->container = container;
>      QLIST_INSERT_HEAD(&container->group_list, group, container_next);
> @@ -2698,7 +2717,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>      }
>  }
>  
> -static VFIOGroup *vfio_get_group(int groupid)
> +static VFIOGroup *vfio_get_group(int groupid, VFIOAddressSpace *space)
>  {
>      VFIOGroup *group;
>      char path[32];
> @@ -2706,7 +2725,15 @@ static VFIOGroup *vfio_get_group(int groupid)
>  
>      QLIST_FOREACH(group, &group_list, next) {
>          if (group->groupid == groupid) {
> -            return group;
> +            /* Found it.  Now is it already in the right context? */
> +            assert(group->container);

How would a group w/o a container exist?  (I really don't like asserts -
note the only assert in vfio was added by Avi).

> +            if (group->container->space == space) {
> +                return group;
> +            } else {
> +                error_report("vfio: group %d used in multiple address spaces",
> +                             group->groupid);
> +                return NULL;
> +            }
>          }
>      }
>  
> @@ -2739,7 +2766,7 @@ static VFIOGroup *vfio_get_group(int groupid)
>      group->groupid = groupid;
>      QLIST_INIT(&group->device_list);
>  
> -    if (vfio_connect_container(group)) {
> +    if (vfio_connect_container(group, space)) {
>          error_report("vfio: failed to setup container for group %d", groupid);
>          close(group->fd);
>          g_free(group);
> @@ -3093,7 +3120,12 @@ static int vfio_initfn(PCIDevice *pdev)
>      DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
>              vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
>  
> -    group = vfio_get_group(groupid);
> +    if (pci_iommu_as(pdev) != &address_space_memory) {
> +        error_report("vfio: DMA address space must be system memory");
> +        return -ENXIO;

-EFAULT?  It's a bad address of sorts.

> +    }
> +
> +    group = vfio_get_group(groupid, &vfio_address_space_memory);
>      if (!group) {
>          error_report("vfio: failed to get group %d", groupid);
>          return -ENOENT;
> @@ -3316,6 +3348,7 @@ static const TypeInfo vfio_pci_dev_info = {
>  
>  static void register_vfio_pci_dev_type(void)
>  {
> +    vfio_address_space_init(&vfio_address_space_memory, &address_space_memory);


Not a fan of this here, but it's short lived, so ok.

>      type_register_static(&vfio_pci_dev_info);
>  }
>
Paolo Bonzini - Aug. 19, 2013, 1:15 p.m.
Il 13/08/2013 00:07, Alex Williamson ha scritto:
>> > +    if (pci_iommu_as(pdev) != &address_space_memory) {
>> > +        error_report("vfio: DMA address space must be system memory");
>> > +        return -ENXIO;
> -EFAULT?  It's a bad address of sorts.
> 

Accessing it would SIGSEGV, so it is not really EFAULT.  I would just
use EINVAL, the numeric error code will go away as soon as initfn is
changed to use "Error *" (which is needed to propagate sensible error
messages to the QMP client).

Paolo

Patch

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index adcd23d..bcd67a0 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -130,9 +130,17 @@  enum {
     VFIO_INT_MSIX = 3,
 };
 
+typedef struct VFIOAddressSpace {
+    AddressSpace *as;
+    QLIST_HEAD(, VFIOContainer) containers;
+} VFIOAddressSpace;
+
+static VFIOAddressSpace vfio_address_space_memory;
+
 struct VFIOGroup;
 
 typedef struct VFIOContainer {
+    VFIOAddressSpace *space;
     int fd; /* /dev/vfio/vfio, empowered by the attached groups */
     struct {
         /* enable abstraction to support various iommu backends */
@@ -197,9 +205,6 @@  typedef struct VFIOGroup {
 
 #define MSIX_CAP_LENGTH 12
 
-static QLIST_HEAD(, VFIOContainer)
-    container_list = QLIST_HEAD_INITIALIZER(container_list);
-
 static QLIST_HEAD(, VFIOGroup)
     group_list = QLIST_HEAD_INITIALIZER(group_list);
 
@@ -2604,16 +2609,28 @@  static int vfio_load_rom(VFIODevice *vdev)
     return 0;
 }
 
-static int vfio_connect_container(VFIOGroup *group)
+static void vfio_address_space_init(VFIOAddressSpace *space, AddressSpace *as)
+{
+    space->as = as;
+    QLIST_INIT(&space->containers);
+}
+
+static int vfio_connect_container(VFIOGroup *group, VFIOAddressSpace *space)
 {
     VFIOContainer *container;
     int ret, fd;
 
     if (group->container) {
-        return 0;
+        if (group->container->space == space) {
+            return 0;
+        } else {
+            error_report("vfio: group %d used in multiple address spaces",
+                         group->groupid);
+            return -EBUSY;
+        }
     }
 
-    QLIST_FOREACH(container, &container_list, next) {
+    QLIST_FOREACH(container, &space->containers, next) {
         if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
             group->container = container;
             QLIST_INSERT_HEAD(&container->group_list, group, container_next);
@@ -2636,6 +2653,7 @@  static int vfio_connect_container(VFIOGroup *group)
     }
 
     container = g_malloc0(sizeof(*container));
+    container->space = space;
     container->fd = fd;
 
     if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
@@ -2658,7 +2676,8 @@  static int vfio_connect_container(VFIOGroup *group)
         container->iommu_data.listener = vfio_memory_listener;
         container->iommu_data.release = vfio_listener_release;
 
-        memory_listener_register(&container->iommu_data.listener, &address_space_memory);
+        memory_listener_register(&container->iommu_data.listener,
+                                 container->space->as);
     } else {
         error_report("vfio: No available IOMMU models");
         g_free(container);
@@ -2667,7 +2686,7 @@  static int vfio_connect_container(VFIOGroup *group)
     }
 
     QLIST_INIT(&container->group_list);
-    QLIST_INSERT_HEAD(&container_list, container, next);
+    QLIST_INSERT_HEAD(&space->containers, container, next);
 
     group->container = container;
     QLIST_INSERT_HEAD(&container->group_list, group, container_next);
@@ -2698,7 +2717,7 @@  static void vfio_disconnect_container(VFIOGroup *group)
     }
 }
 
-static VFIOGroup *vfio_get_group(int groupid)
+static VFIOGroup *vfio_get_group(int groupid, VFIOAddressSpace *space)
 {
     VFIOGroup *group;
     char path[32];
@@ -2706,7 +2725,15 @@  static VFIOGroup *vfio_get_group(int groupid)
 
     QLIST_FOREACH(group, &group_list, next) {
         if (group->groupid == groupid) {
-            return group;
+            /* Found it.  Now is it already in the right context? */
+            assert(group->container);
+            if (group->container->space == space) {
+                return group;
+            } else {
+                error_report("vfio: group %d used in multiple address spaces",
+                             group->groupid);
+                return NULL;
+            }
         }
     }
 
@@ -2739,7 +2766,7 @@  static VFIOGroup *vfio_get_group(int groupid)
     group->groupid = groupid;
     QLIST_INIT(&group->device_list);
 
-    if (vfio_connect_container(group)) {
+    if (vfio_connect_container(group, space)) {
         error_report("vfio: failed to setup container for group %d", groupid);
         close(group->fd);
         g_free(group);
@@ -3093,7 +3120,12 @@  static int vfio_initfn(PCIDevice *pdev)
     DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
             vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
 
-    group = vfio_get_group(groupid);
+    if (pci_iommu_as(pdev) != &address_space_memory) {
+        error_report("vfio: DMA address space must be system memory");
+        return -ENXIO;
+    }
+
+    group = vfio_get_group(groupid, &vfio_address_space_memory);
     if (!group) {
         error_report("vfio: failed to get group %d", groupid);
         return -ENOENT;
@@ -3316,6 +3348,7 @@  static const TypeInfo vfio_pci_dev_info = {
 
 static void register_vfio_pci_dev_type(void)
 {
+    vfio_address_space_init(&vfio_address_space_memory, &address_space_memory);
     type_register_static(&vfio_pci_dev_info);
 }