Message ID | 1427779727-13353-5-git-send-email-aik@ozlabs.ru |
---|---|
State | New |
Headers | show |
On Tue, Mar 31, 2015 at 04:28:39PM +1100, Alexey Kardashevskiy wrote: > This enables multiple IOMMU groups in one VFIO container which means > that multiple devices from different groups can share the same IOMMU > table (or tables if DDW). > > This removes a group id from vfio_container_ioctl(). The kernel support > is required for this; if the host kernel does not have the support, > it will allow only one group per container. The PHB's "iommuid" property > is ignored. > > This adds a sanity check that there is just one VFIO container per > PHB address space. > > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [snip] > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index b012620..99e1900 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -915,21 +915,23 @@ void vfio_put_base_device(VFIODevice *vbasedev) > close(vbasedev->fd); > } > > -static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, > +static int vfio_container_do_ioctl(AddressSpace *as, > int req, void *param) > { > - VFIOGroup *group; > VFIOContainer *container; > - int ret = -1; > + int ret; > + VFIOAddressSpace *space; > > - group = vfio_get_group(groupid, as); > - if (!group) { > - error_report("vfio: group %d not registered", groupid); > - return ret; > - } > + space = vfio_get_address_space(as); > + container = QLIST_FIRST(&space->containers); So getting the container handle from the address space, rather than the group id certainly makes more sense to me. > - container = group->container; > - if (group->container) { > + if (!container) { > + error_report("vfio: container is not set"); > + return -1; > + } else if (QLIST_NEXT(container, next)) { > + error_report("vfio: multiple containers per PHB are not supported"); > + return -1; But if only one PHB per address space is possible, why is the containers field a list in the first place? > + } else { > ret = ioctl(container->fd, req, param); > if (ret < 0) { > error_report("vfio: failed to ioctl %d to container: ret=%d, %s", > @@ -937,12 +939,10 @@ static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, > } > } > > - vfio_put_group(group); > - > return ret; > } > > -int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > +int vfio_container_ioctl(AddressSpace *as, > int req, void *param) > { > /* We allow only certain ioctls to the container */ > @@ -957,5 +957,5 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > return -1; > } > > - return vfio_container_do_ioctl(as, groupid, req, param); > + return vfio_container_do_ioctl(as, req, param); > } > diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h > index 0b26cd8..76b5744 100644 > --- a/include/hw/vfio/vfio.h > +++ b/include/hw/vfio/vfio.h > @@ -3,7 +3,7 @@ > > #include "qemu/typedefs.h" > > -extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > +extern int vfio_container_ioctl(AddressSpace *as, > int req, void *param); > > #endif
On 04/08/2015 12:01 PM, David Gibson wrote: > On Tue, Mar 31, 2015 at 04:28:39PM +1100, Alexey Kardashevskiy wrote: >> This enables multiple IOMMU groups in one VFIO container which means >> that multiple devices from different groups can share the same IOMMU >> table (or tables if DDW). >> >> This removes a group id from vfio_container_ioctl(). The kernel support >> is required for this; if the host kernel does not have the support, >> it will allow only one group per container. The PHB's "iommuid" property >> is ignored. >> >> This adds a sanity check that there is just one VFIO container per >> PHB address space. >> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > > [snip] >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index b012620..99e1900 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -915,21 +915,23 @@ void vfio_put_base_device(VFIODevice *vbasedev) >> close(vbasedev->fd); >> } >> >> -static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, >> +static int vfio_container_do_ioctl(AddressSpace *as, >> int req, void *param) >> { >> - VFIOGroup *group; >> VFIOContainer *container; >> - int ret = -1; >> + int ret; >> + VFIOAddressSpace *space; >> >> - group = vfio_get_group(groupid, as); >> - if (!group) { >> - error_report("vfio: group %d not registered", groupid); >> - return ret; >> - } >> + space = vfio_get_address_space(as); >> + container = QLIST_FIRST(&space->containers); > > So getting the container handle from the address space, rather than > the group id certainly makes more sense to me. > >> - container = group->container; >> - if (group->container) { >> + if (!container) { >> + error_report("vfio: container is not set"); >> + return -1; >> + } else if (QLIST_NEXT(container, next)) { >> + error_report("vfio: multiple containers per PHB are not supported"); >> + return -1; > > But if only one PHB per address space is possible, why is the > containers field a list in the first place? Historically the list was added in 3df3e0a5872 (the patch of yours :) ). In theory we could implement spapr-pci-bridge (derived from pci-bridge) with isolation capability (i.e. its own LIOBN/DMA window), in this case there could be multiple containers per PHB address space. Other archs could want multiple containers for some other reason. It would help me a lot if you remembered why you kept the list at the first place :) For now I guess I'll move the next patch ("vfio: spapr: Move SPAPR-related code to a separate file") before this one, do s/vfio_container_do_ioctl/ vfio_spapr_container_do_ioctl/ and move it to hw/vfio/spapr.c. Makes sense? >> + } else { >> ret = ioctl(container->fd, req, param); >> if (ret < 0) { >> error_report("vfio: failed to ioctl %d to container: ret=%d, %s", >> @@ -937,12 +939,10 @@ static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, >> } >> } >> >> - vfio_put_group(group); >> - >> return ret; >> } >> >> -int vfio_container_ioctl(AddressSpace *as, int32_t groupid, >> +int vfio_container_ioctl(AddressSpace *as, >> int req, void *param) >> { >> /* We allow only certain ioctls to the container */ >> @@ -957,5 +957,5 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, >> return -1; >> } >> >> - return vfio_container_do_ioctl(as, groupid, req, param); >> + return vfio_container_do_ioctl(as, req, param); >> } >> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h >> index 0b26cd8..76b5744 100644 >> --- a/include/hw/vfio/vfio.h >> +++ b/include/hw/vfio/vfio.h >> @@ -3,7 +3,7 @@ >> >> #include "qemu/typedefs.h" >> >> -extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, >> +extern int vfio_container_ioctl(AddressSpace *as, >> int req, void *param); >> >> #endif >
On Wed, Apr 08, 2015 at 01:45:19PM +1000, Alexey Kardashevskiy wrote: > On 04/08/2015 12:01 PM, David Gibson wrote: > >On Tue, Mar 31, 2015 at 04:28:39PM +1100, Alexey Kardashevskiy wrote: > >>This enables multiple IOMMU groups in one VFIO container which means > >>that multiple devices from different groups can share the same IOMMU > >>table (or tables if DDW). > >> > >>This removes a group id from vfio_container_ioctl(). The kernel support > >>is required for this; if the host kernel does not have the support, > >>it will allow only one group per container. The PHB's "iommuid" property > >>is ignored. > >> > >>This adds a sanity check that there is just one VFIO container per > >>PHB address space. > >> > >>Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > > > >[snip] > >>diff --git a/hw/vfio/common.c b/hw/vfio/common.c > >>index b012620..99e1900 100644 > >>--- a/hw/vfio/common.c > >>+++ b/hw/vfio/common.c > >>@@ -915,21 +915,23 @@ void vfio_put_base_device(VFIODevice *vbasedev) > >> close(vbasedev->fd); > >> } > >> > >>-static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, > >>+static int vfio_container_do_ioctl(AddressSpace *as, > >> int req, void *param) > >> { > >>- VFIOGroup *group; > >> VFIOContainer *container; > >>- int ret = -1; > >>+ int ret; > >>+ VFIOAddressSpace *space; > >> > >>- group = vfio_get_group(groupid, as); > >>- if (!group) { > >>- error_report("vfio: group %d not registered", groupid); > >>- return ret; > >>- } > >>+ space = vfio_get_address_space(as); > >>+ container = QLIST_FIRST(&space->containers); > > > >So getting the container handle from the address space, rather than > >the group id certainly makes more sense to me. > > > >>- container = group->container; > >>- if (group->container) { > >>+ if (!container) { > >>+ error_report("vfio: container is not set"); > >>+ return -1; > >>+ } else if (QLIST_NEXT(container, next)) { > >>+ error_report("vfio: multiple containers per PHB are not supported"); > >>+ return -1; > > > >But if only one PHB per address space is possible, why is the > >containers field a list in the first place? > > > Historically the list was added in 3df3e0a5872 (the patch of yours > :) ). Heh. > In theory we could implement spapr-pci-bridge (derived from pci-bridge) with > isolation capability (i.e. its own LIOBN/DMA window), in this case there > could be multiple containers per PHB address space. Other archs could want > multiple containers for some other reason. It would help me a lot if you > remembered why you kept the list at the first place :) Ok, I've looked over the patch and it has jogged my memory a bit. So the dumb answer is that it's because the per address-space list was replacing a global list of containers The more useful answer is that I think it was because I was anticipating the possibility of working around the one-group-per-container limit by allowing a single VFIOAddressSpace in qemu to be backed by several containers, whose mappings would be kept in sync from the userspace side by duplicating all mappings. Anyway, I think that means the right way to implement this is by duplicating the ioctl() across all the attached containers, rather than picking just one. > For now I guess I'll move the next patch ("vfio: spapr: Move SPAPR-related > code to a separate file") before this one, do s/vfio_container_do_ioctl/ > vfio_spapr_container_do_ioctl/ and move it to hw/vfio/spapr.c. Makes > sense? That sounds fine, though I don't see that it really addresses the question here. > > > >>+ } else { > >> ret = ioctl(container->fd, req, param); > >> if (ret < 0) { > >> error_report("vfio: failed to ioctl %d to container: ret=%d, %s", > >>@@ -937,12 +939,10 @@ static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, > >> } > >> } > >> > >>- vfio_put_group(group); > >>- > >> return ret; > >> } > >> > >>-int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > >>+int vfio_container_ioctl(AddressSpace *as, > >> int req, void *param) > >> { > >> /* We allow only certain ioctls to the container */ > >>@@ -957,5 +957,5 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > >> return -1; > >> } > >> > >>- return vfio_container_do_ioctl(as, groupid, req, param); > >>+ return vfio_container_do_ioctl(as, req, param); > >> } > >>diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h > >>index 0b26cd8..76b5744 100644 > >>--- a/include/hw/vfio/vfio.h > >>+++ b/include/hw/vfio/vfio.h > >>@@ -3,7 +3,7 @@ > >> > >> #include "qemu/typedefs.h" > >> > >>-extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, > >>+extern int vfio_container_ioctl(AddressSpace *as, > >> int req, void *param); > >> > >> #endif > > > >
On 04/09/2015 04:43 PM, David Gibson wrote: > On Wed, Apr 08, 2015 at 01:45:19PM +1000, Alexey Kardashevskiy wrote: >> On 04/08/2015 12:01 PM, David Gibson wrote: >>> On Tue, Mar 31, 2015 at 04:28:39PM +1100, Alexey Kardashevskiy wrote: >>>> This enables multiple IOMMU groups in one VFIO container which means >>>> that multiple devices from different groups can share the same IOMMU >>>> table (or tables if DDW). >>>> >>>> This removes a group id from vfio_container_ioctl(). The kernel support >>>> is required for this; if the host kernel does not have the support, >>>> it will allow only one group per container. The PHB's "iommuid" property >>>> is ignored. >>>> >>>> This adds a sanity check that there is just one VFIO container per >>>> PHB address space. >>>> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> >>> >>> [snip] >>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >>>> index b012620..99e1900 100644 >>>> --- a/hw/vfio/common.c >>>> +++ b/hw/vfio/common.c >>>> @@ -915,21 +915,23 @@ void vfio_put_base_device(VFIODevice *vbasedev) >>>> close(vbasedev->fd); >>>> } >>>> >>>> -static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, >>>> +static int vfio_container_do_ioctl(AddressSpace *as, >>>> int req, void *param) >>>> { >>>> - VFIOGroup *group; >>>> VFIOContainer *container; >>>> - int ret = -1; >>>> + int ret; >>>> + VFIOAddressSpace *space; >>>> >>>> - group = vfio_get_group(groupid, as); >>>> - if (!group) { >>>> - error_report("vfio: group %d not registered", groupid); >>>> - return ret; >>>> - } >>>> + space = vfio_get_address_space(as); >>>> + container = QLIST_FIRST(&space->containers); >>> >>> So getting the container handle from the address space, rather than >>> the group id certainly makes more sense to me. >>> >>>> - container = group->container; >>>> - if (group->container) { >>>> + if (!container) { >>>> + error_report("vfio: container is not set"); >>>> + return -1; >>>> + } else if (QLIST_NEXT(container, next)) { >>>> + error_report("vfio: multiple containers per PHB are not supported"); >>>> + return -1; >>> >>> But if only one PHB per address space is possible, why is the >>> containers field a list in the first place? >> >> >> Historically the list was added in 3df3e0a5872 (the patch of yours >> :) ). > > Heh. > >> In theory we could implement spapr-pci-bridge (derived from pci-bridge) with >> isolation capability (i.e. its own LIOBN/DMA window), in this case there >> could be multiple containers per PHB address space. Other archs could want >> multiple containers for some other reason. It would help me a lot if you >> remembered why you kept the list at the first place :) > > Ok, I've looked over the patch and it has jogged my memory a bit. So > the dumb answer is that it's because the per address-space list was > replacing a global list of containers > > The more useful answer is that I think it was because I was > anticipating the possibility of working around the > one-group-per-container limit by allowing a single VFIOAddressSpace in > qemu to be backed by several containers, whose mappings would be kept > in sync from the userspace side by duplicating all mappings. > > Anyway, I think that means the right way to implement this is by > duplicating the ioctl() across all the attached containers, rather > than picking just one. Right. I will do that. >> For now I guess I'll move the next patch ("vfio: spapr: Move SPAPR-related >> code to a separate file") before this one, do s/vfio_container_do_ioctl/ >> vfio_spapr_container_do_ioctl/ and move it to hw/vfio/spapr.c. Makes >> sense? > > That sounds fine, though I don't see that it really addresses the > question here. You are right, it does not. I won't do it in this patchset then. Thanks. > > >> >> >>>> + } else { >>>> ret = ioctl(container->fd, req, param); >>>> if (ret < 0) { >>>> error_report("vfio: failed to ioctl %d to container: ret=%d, %s", >>>> @@ -937,12 +939,10 @@ static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, >>>> } >>>> } >>>> >>>> - vfio_put_group(group); >>>> - >>>> return ret; >>>> } >>>> >>>> -int vfio_container_ioctl(AddressSpace *as, int32_t groupid, >>>> +int vfio_container_ioctl(AddressSpace *as, >>>> int req, void *param) >>>> { >>>> /* We allow only certain ioctls to the container */ >>>> @@ -957,5 +957,5 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, >>>> return -1; >>>> } >>>> >>>> - return vfio_container_do_ioctl(as, groupid, req, param); >>>> + return vfio_container_do_ioctl(as, req, param); >>>> } >>>> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h >>>> index 0b26cd8..76b5744 100644 >>>> --- a/include/hw/vfio/vfio.h >>>> +++ b/include/hw/vfio/vfio.h >>>> @@ -3,7 +3,7 @@ >>>> >>>> #include "qemu/typedefs.h" >>>> >>>> -extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, >>>> +extern int vfio_container_ioctl(AddressSpace *as, >>>> int req, void *param); >>>> >>>> #endif >>> >> >> >
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index 99a1be5..f8b503e 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -35,12 +35,7 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) sPAPRTCETable *tcet; uint32_t liobn = svphb->phb.dma_liobn; - if (svphb->iommugroupid == -1) { - error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); - return; - } - - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&svphb->phb.iommu_as, VFIO_CHECK_EXTENSION, (void *) VFIO_SPAPR_TCE_IOMMU); if (ret != 1) { @@ -49,7 +44,7 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) return; } - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&sphb->iommu_as, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); if (ret) { error_setg_errno(errp, -ret, @@ -116,7 +111,7 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb, return RTAS_OUT_PARAM_ERROR; } - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&svphb->phb.iommu_as, VFIO_EEH_PE_OP, &op); if (ret < 0) { return RTAS_OUT_HW_ERROR; @@ -132,7 +127,7 @@ static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state) int ret; op.op = VFIO_EEH_PE_GET_STATE; - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&svphb->phb.iommu_as, VFIO_EEH_PE_OP, &op); if (ret < 0) { return RTAS_OUT_PARAM_ERROR; @@ -162,7 +157,7 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option) return RTAS_OUT_PARAM_ERROR; } - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&svphb->phb.iommu_as, VFIO_EEH_PE_OP, &op); if (ret < 0) { return RTAS_OUT_HW_ERROR; @@ -178,7 +173,7 @@ static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb) int ret; op.op = VFIO_EEH_PE_CONFIGURE; - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&svphb->phb.iommu_as, VFIO_EEH_PE_OP, &op); if (ret < 0) { return RTAS_OUT_PARAM_ERROR; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index b012620..99e1900 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -915,21 +915,23 @@ void vfio_put_base_device(VFIODevice *vbasedev) close(vbasedev->fd); } -static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, +static int vfio_container_do_ioctl(AddressSpace *as, int req, void *param) { - VFIOGroup *group; VFIOContainer *container; - int ret = -1; + int ret; + VFIOAddressSpace *space; - group = vfio_get_group(groupid, as); - if (!group) { - error_report("vfio: group %d not registered", groupid); - return ret; - } + space = vfio_get_address_space(as); + container = QLIST_FIRST(&space->containers); - container = group->container; - if (group->container) { + if (!container) { + error_report("vfio: container is not set"); + return -1; + } else if (QLIST_NEXT(container, next)) { + error_report("vfio: multiple containers per PHB are not supported"); + return -1; + } else { ret = ioctl(container->fd, req, param); if (ret < 0) { error_report("vfio: failed to ioctl %d to container: ret=%d, %s", @@ -937,12 +939,10 @@ static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid, } } - vfio_put_group(group); - return ret; } -int vfio_container_ioctl(AddressSpace *as, int32_t groupid, +int vfio_container_ioctl(AddressSpace *as, int req, void *param) { /* We allow only certain ioctls to the container */ @@ -957,5 +957,5 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid, return -1; } - return vfio_container_do_ioctl(as, groupid, req, param); + return vfio_container_do_ioctl(as, req, param); } diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h index 0b26cd8..76b5744 100644 --- a/include/hw/vfio/vfio.h +++ b/include/hw/vfio/vfio.h @@ -3,7 +3,7 @@ #include "qemu/typedefs.h" -extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid, +extern int vfio_container_ioctl(AddressSpace *as, int req, void *param); #endif
This enables multiple IOMMU groups in one VFIO container which means that multiple devices from different groups can share the same IOMMU table (or tables if DDW). This removes a group id from vfio_container_ioctl(). The kernel support is required for this; if the host kernel does not have the support, it will allow only one group per container. The PHB's "iommuid" property is ignored. This adds a sanity check that there is just one VFIO container per PHB address space. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- hw/ppc/spapr_pci_vfio.c | 17 ++++++----------- hw/vfio/common.c | 28 ++++++++++++++-------------- include/hw/vfio/vfio.h | 2 +- 3 files changed, 21 insertions(+), 26 deletions(-)