diff mbox

[RFC,11/14] spapr_pci: Allow EEH on spapr-pci-host-bridge

Message ID 1442647117-2726-12-git-send-email-david@gibson.dropbear.id.au
State New
Headers show

Commit Message

David Gibson Sept. 19, 2015, 7:18 a.m. UTC
The pseries machine type has two variants of the PCI Host Bridge device:
spapr-pci-host-bridge and spapr-pci-vfio-host-bridge.  Originally, only the
latter could support VFIO devices, but we've now extended the VFIO code so
that they both can.

However, it's still only spapr-pci-vfio-host-bridge which supports the PAPR
Enhanced Error Handling (EEH) interfaces.  The reason is that we don't yet
have a way to determine the correct VFIOGroup for EEH operations on the
"plain" host bridge.

Handling this in general is problematic due to some limitations in the
current code, and some bugs in the kernel EEH interfaces.  However, it's
easy enough to do for the unambiguous case: where there is only one VFIO
group used on the whole host bridge i.e. one Partitionable Endpoint (PE).

This re-implements spapr_phb_check_vfio_group() in terms of the host
bridge's Partitionable Endpoints, allowing EEH on any host bridge in the
unambiguous case.  This is enough to make spapr-pci-host-bridge support
EEH as well as spapr-pci-vfio-host-bridge, since the latter only supported
devices from one IOMMU group anyway (although it didn't properly
enforce that).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_pci.c          | 36 ++++++++++++++++++++++++++++++++++++
 hw/ppc/spapr_pci_vfio.c     | 18 ------------------
 include/hw/pci-host/spapr.h |  1 -
 3 files changed, 36 insertions(+), 19 deletions(-)

Comments

Alex Williamson Sept. 23, 2015, 5:28 p.m. UTC | #1
On Sat, 2015-09-19 at 17:18 +1000, David Gibson wrote:
> The pseries machine type has two variants of the PCI Host Bridge device:
> spapr-pci-host-bridge and spapr-pci-vfio-host-bridge.  Originally, only the
> latter could support VFIO devices, but we've now extended the VFIO code so
> that they both can.
> 
> However, it's still only spapr-pci-vfio-host-bridge which supports the PAPR
> Enhanced Error Handling (EEH) interfaces.  The reason is that we don't yet
> have a way to determine the correct VFIOGroup for EEH operations on the
> "plain" host bridge.
> 
> Handling this in general is problematic due to some limitations in the
> current code, and some bugs in the kernel EEH interfaces.  However, it's
> easy enough to do for the unambiguous case: where there is only one VFIO
> group used on the whole host bridge i.e. one Partitionable Endpoint (PE).
> 
> This re-implements spapr_phb_check_vfio_group() in terms of the host
> bridge's Partitionable Endpoints, allowing EEH on any host bridge in the
> unambiguous case.  This is enough to make spapr-pci-host-bridge support
> EEH as well as spapr-pci-vfio-host-bridge, since the latter only supported
> devices from one IOMMU group anyway (although it didn't properly
> enforce that).
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr_pci.c          | 36 ++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_pci_vfio.c     | 18 ------------------
>  include/hw/pci-host/spapr.h |  1 -
>  3 files changed, 36 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 81ad3ae..5614b45 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -41,6 +41,8 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "sysemu/device_tree.h"
>  
> +#include "hw/vfio/vfio-common.h"
> +
>  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
>  #define RTAS_QUERY_FN           0
>  #define RTAS_CHANGE_FN          1
> @@ -446,6 +448,40 @@ static sPAPRPHBGuestPE *spapr_phb_pe_by_device(sPAPRPHBState *phb,
>      return pe;
>  }
>  
> +static int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> +{
> +    sPAPRPHBGuestPE *pe;
> +
> +    if (QLIST_EMPTY(&phb->pe_list)) {
> +        /* No EEH capable devices on this PHB */
> +        return RTAS_OUT_PARAM_ERROR;
> +    }
> +
> +    /* Limitations in both qemu and the kernel mean that, for now, EEH
> +     * won't work if there are devices from more than one PE
> +     * (i.e. IOMMU group) on the same PHB */
> +    pe = QLIST_FIRST(&phb->pe_list);
> +    if (QLIST_NEXT(pe, list)) {
> +        error_report("spapr-pci-host-bridge: EEH attempted on PHB with multiple"
> +                     " IOMMU groups");

Don't wrap lines with search-able strings

> +        return RTAS_OUT_HW_ERROR;
> +    }
> +
> +    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> +        sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(phb);
> +        /* FIXME: this is an abstraction violation */
> +        if (pe->group->groupid != svphb->iommugroupid) {
> +            error_report("spapr-pci-vfio-host-bridge: Bad IOMMU group");
> +            return RTAS_OUT_HW_ERROR;
> +        }
> +    }
> +
> +    if (gpp) {
> +        *gpp = pe->group;
> +    }
> +    return RTAS_OUT_SUCCESS;


The structure of this function finally makes some sense once we get
here.  I'm still not sure whether RTAS error codes make sense though,
but it's just a nit.  I'd still prefer the function was renamed, "check"
indicates a verification, not a return.  The primary purpose of calling
this function is to get a group, not simply to check the validity.

> +}
> +
>  static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
>                                      sPAPRMachineState *spapr,
>                                      uint32_t token, uint32_t nargs,
> diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> index b61923c..a08292a 100644
> --- a/hw/ppc/spapr_pci_vfio.c
> +++ b/hw/ppc/spapr_pci_vfio.c
> @@ -24,29 +24,11 @@
>  #include "hw/vfio/vfio.h"
>  #include "hw/vfio/vfio-eeh.h"
>  
> -#include "hw/vfio/vfio-common.h"
> -
>  static Property spapr_phb_vfio_properties[] = {
>      DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> -int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> -{
> -    VFIOGroup *group;
> -
> -    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> -        return RTAS_OUT_PARAM_ERROR;
> -    }
> -
> -    /* FIXME: this is an abstraction violation */
> -    group = vfio_get_group(SPAPR_PCI_VFIO_HOST_BRIDGE(phb)->iommugroupid,
> -                           &phb->iommu_as);
> -    if (gpp)
> -        *gpp = group;
> -    return RTAS_OUT_SUCCESS;
> -}
> -
>  static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
>  {
>      sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 535e5ef..8c8d187 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -138,7 +138,6 @@ void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
>  
>  void spapr_pci_rtas_init(void);
>  
> -int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp);
>  sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
>  PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
>                                uint32_t config_addr);
David Gibson Sept. 24, 2015, 1:49 a.m. UTC | #2
On Wed, Sep 23, 2015 at 11:28:01AM -0600, Alex Williamson wrote:
> On Sat, 2015-09-19 at 17:18 +1000, David Gibson wrote:
> > The pseries machine type has two variants of the PCI Host Bridge device:
> > spapr-pci-host-bridge and spapr-pci-vfio-host-bridge.  Originally, only the
> > latter could support VFIO devices, but we've now extended the VFIO code so
> > that they both can.
> > 
> > However, it's still only spapr-pci-vfio-host-bridge which supports the PAPR
> > Enhanced Error Handling (EEH) interfaces.  The reason is that we don't yet
> > have a way to determine the correct VFIOGroup for EEH operations on the
> > "plain" host bridge.
> > 
> > Handling this in general is problematic due to some limitations in the
> > current code, and some bugs in the kernel EEH interfaces.  However, it's
> > easy enough to do for the unambiguous case: where there is only one VFIO
> > group used on the whole host bridge i.e. one Partitionable Endpoint (PE).
> > 
> > This re-implements spapr_phb_check_vfio_group() in terms of the host
> > bridge's Partitionable Endpoints, allowing EEH on any host bridge in the
> > unambiguous case.  This is enough to make spapr-pci-host-bridge support
> > EEH as well as spapr-pci-vfio-host-bridge, since the latter only supported
> > devices from one IOMMU group anyway (although it didn't properly
> > enforce that).
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr_pci.c          | 36 ++++++++++++++++++++++++++++++++++++
> >  hw/ppc/spapr_pci_vfio.c     | 18 ------------------
> >  include/hw/pci-host/spapr.h |  1 -
> >  3 files changed, 36 insertions(+), 19 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 81ad3ae..5614b45 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -41,6 +41,8 @@
> >  #include "hw/ppc/spapr_drc.h"
> >  #include "sysemu/device_tree.h"
> >  
> > +#include "hw/vfio/vfio-common.h"
> > +
> >  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
> >  #define RTAS_QUERY_FN           0
> >  #define RTAS_CHANGE_FN          1
> > @@ -446,6 +448,40 @@ static sPAPRPHBGuestPE *spapr_phb_pe_by_device(sPAPRPHBState *phb,
> >      return pe;
> >  }
> >  
> > +static int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> > +{
> > +    sPAPRPHBGuestPE *pe;
> > +
> > +    if (QLIST_EMPTY(&phb->pe_list)) {
> > +        /* No EEH capable devices on this PHB */
> > +        return RTAS_OUT_PARAM_ERROR;
> > +    }
> > +
> > +    /* Limitations in both qemu and the kernel mean that, for now, EEH
> > +     * won't work if there are devices from more than one PE
> > +     * (i.e. IOMMU group) on the same PHB */
> > +    pe = QLIST_FIRST(&phb->pe_list);
> > +    if (QLIST_NEXT(pe, list)) {
> > +        error_report("spapr-pci-host-bridge: EEH attempted on PHB with multiple"
> > +                     " IOMMU groups");
> 
> Don't wrap lines with search-able strings

Noted.

> > +        return RTAS_OUT_HW_ERROR;
> > +    }
> > +
> > +    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> > +        sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(phb);
> > +        /* FIXME: this is an abstraction violation */
> > +        if (pe->group->groupid != svphb->iommugroupid) {
> > +            error_report("spapr-pci-vfio-host-bridge: Bad IOMMU group");
> > +            return RTAS_OUT_HW_ERROR;
> > +        }
> > +    }
> > +
> > +    if (gpp) {
> > +        *gpp = pe->group;
> > +    }
> > +    return RTAS_OUT_SUCCESS;
> 
> 
> The structure of this function finally makes some sense once we get
> here.

Yeah, I started off with something more like what you suggested in
earlier comments, but I changed to this rather awkward structure to
avoid churning the callers as this moves towards its final form

> I'm still not sure whether RTAS error codes make sense though,
> but it's just a nit.

Well, (nearly) all the callers are in RTAS, and translating the error
codes there seems pretty ugly too.

> I'd still prefer the function was renamed, "check"
> indicates a verification, not a return.  The primary purpose of calling
> this function is to get a group, not simply to check the validity.

Yeah, it's a crap name.  How about spapr_phb_find_vfio_group().

Fwiw, I plan to make this function go away to as I rework things to
remove the one-group-per-PHB restriction.

> > +}
> > +
> >  static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
> >                                      sPAPRMachineState *spapr,
> >                                      uint32_t token, uint32_t nargs,
> > diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> > index b61923c..a08292a 100644
> > --- a/hw/ppc/spapr_pci_vfio.c
> > +++ b/hw/ppc/spapr_pci_vfio.c
> > @@ -24,29 +24,11 @@
> >  #include "hw/vfio/vfio.h"
> >  #include "hw/vfio/vfio-eeh.h"
> >  
> > -#include "hw/vfio/vfio-common.h"
> > -
> >  static Property spapr_phb_vfio_properties[] = {
> >      DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
> >      DEFINE_PROP_END_OF_LIST(),
> >  };
> >  
> > -int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> > -{
> > -    VFIOGroup *group;
> > -
> > -    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> > -        return RTAS_OUT_PARAM_ERROR;
> > -    }
> > -
> > -    /* FIXME: this is an abstraction violation */
> > -    group = vfio_get_group(SPAPR_PCI_VFIO_HOST_BRIDGE(phb)->iommugroupid,
> > -                           &phb->iommu_as);
> > -    if (gpp)
> > -        *gpp = group;
> > -    return RTAS_OUT_SUCCESS;
> > -}
> > -
> >  static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
> >  {
> >      sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> > diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> > index 535e5ef..8c8d187 100644
> > --- a/include/hw/pci-host/spapr.h
> > +++ b/include/hw/pci-host/spapr.h
> > @@ -138,7 +138,6 @@ void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
> >  
> >  void spapr_pci_rtas_init(void);
> >  
> > -int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp);
> >  sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
> >  PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
> >                                uint32_t config_addr);
> 
> 
>
Alex Williamson Sept. 24, 2015, 2:19 a.m. UTC | #3
On Thu, 2015-09-24 at 11:49 +1000, David Gibson wrote:
> On Wed, Sep 23, 2015 at 11:28:01AM -0600, Alex Williamson wrote:
> > On Sat, 2015-09-19 at 17:18 +1000, David Gibson wrote:
> > > The pseries machine type has two variants of the PCI Host Bridge device:
> > > spapr-pci-host-bridge and spapr-pci-vfio-host-bridge.  Originally, only the
> > > latter could support VFIO devices, but we've now extended the VFIO code so
> > > that they both can.
> > > 
> > > However, it's still only spapr-pci-vfio-host-bridge which supports the PAPR
> > > Enhanced Error Handling (EEH) interfaces.  The reason is that we don't yet
> > > have a way to determine the correct VFIOGroup for EEH operations on the
> > > "plain" host bridge.
> > > 
> > > Handling this in general is problematic due to some limitations in the
> > > current code, and some bugs in the kernel EEH interfaces.  However, it's
> > > easy enough to do for the unambiguous case: where there is only one VFIO
> > > group used on the whole host bridge i.e. one Partitionable Endpoint (PE).
> > > 
> > > This re-implements spapr_phb_check_vfio_group() in terms of the host
> > > bridge's Partitionable Endpoints, allowing EEH on any host bridge in the
> > > unambiguous case.  This is enough to make spapr-pci-host-bridge support
> > > EEH as well as spapr-pci-vfio-host-bridge, since the latter only supported
> > > devices from one IOMMU group anyway (although it didn't properly
> > > enforce that).
> > > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  hw/ppc/spapr_pci.c          | 36 ++++++++++++++++++++++++++++++++++++
> > >  hw/ppc/spapr_pci_vfio.c     | 18 ------------------
> > >  include/hw/pci-host/spapr.h |  1 -
> > >  3 files changed, 36 insertions(+), 19 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > > index 81ad3ae..5614b45 100644
> > > --- a/hw/ppc/spapr_pci.c
> > > +++ b/hw/ppc/spapr_pci.c
> > > @@ -41,6 +41,8 @@
> > >  #include "hw/ppc/spapr_drc.h"
> > >  #include "sysemu/device_tree.h"
> > >  
> > > +#include "hw/vfio/vfio-common.h"
> > > +
> > >  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
> > >  #define RTAS_QUERY_FN           0
> > >  #define RTAS_CHANGE_FN          1
> > > @@ -446,6 +448,40 @@ static sPAPRPHBGuestPE *spapr_phb_pe_by_device(sPAPRPHBState *phb,
> > >      return pe;
> > >  }
> > >  
> > > +static int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> > > +{
> > > +    sPAPRPHBGuestPE *pe;
> > > +
> > > +    if (QLIST_EMPTY(&phb->pe_list)) {
> > > +        /* No EEH capable devices on this PHB */
> > > +        return RTAS_OUT_PARAM_ERROR;
> > > +    }
> > > +
> > > +    /* Limitations in both qemu and the kernel mean that, for now, EEH
> > > +     * won't work if there are devices from more than one PE
> > > +     * (i.e. IOMMU group) on the same PHB */
> > > +    pe = QLIST_FIRST(&phb->pe_list);
> > > +    if (QLIST_NEXT(pe, list)) {
> > > +        error_report("spapr-pci-host-bridge: EEH attempted on PHB with multiple"
> > > +                     " IOMMU groups");
> > 
> > Don't wrap lines with search-able strings
> 
> Noted.
> 
> > > +        return RTAS_OUT_HW_ERROR;
> > > +    }
> > > +
> > > +    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> > > +        sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(phb);
> > > +        /* FIXME: this is an abstraction violation */
> > > +        if (pe->group->groupid != svphb->iommugroupid) {
> > > +            error_report("spapr-pci-vfio-host-bridge: Bad IOMMU group");
> > > +            return RTAS_OUT_HW_ERROR;
> > > +        }
> > > +    }
> > > +
> > > +    if (gpp) {
> > > +        *gpp = pe->group;
> > > +    }
> > > +    return RTAS_OUT_SUCCESS;
> > 
> > 
> > The structure of this function finally makes some sense once we get
> > here.
> 
> Yeah, I started off with something more like what you suggested in
> earlier comments, but I changed to this rather awkward structure to
> avoid churning the callers as this moves towards its final form
> 
> > I'm still not sure whether RTAS error codes make sense though,
> > but it's just a nit.
> 
> Well, (nearly) all the callers are in RTAS, and translating the error
> codes there seems pretty ugly too.
> 
> > I'd still prefer the function was renamed, "check"
> > indicates a verification, not a return.  The primary purpose of calling
> > this function is to get a group, not simply to check the validity.
> 
> Yeah, it's a crap name.  How about spapr_phb_find_vfio_group().

Why not just spapr_phb_get_vfio_group() since this started out as just a
wrapper for vfio_get_group() and that's still what it's doing even
though it's now cached on sPAPRPHBGuestPE rather than retrieved each
time.

> Fwiw, I plan to make this function go away to as I rework things to
> remove the one-group-per-PHB restriction.

I figured, thanks for taking on the challenge of cleaning up this
layering.

Alex

> > > +}
> > > +
> > >  static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
> > >                                      sPAPRMachineState *spapr,
> > >                                      uint32_t token, uint32_t nargs,
> > > diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> > > index b61923c..a08292a 100644
> > > --- a/hw/ppc/spapr_pci_vfio.c
> > > +++ b/hw/ppc/spapr_pci_vfio.c
> > > @@ -24,29 +24,11 @@
> > >  #include "hw/vfio/vfio.h"
> > >  #include "hw/vfio/vfio-eeh.h"
> > >  
> > > -#include "hw/vfio/vfio-common.h"
> > > -
> > >  static Property spapr_phb_vfio_properties[] = {
> > >      DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
> > >      DEFINE_PROP_END_OF_LIST(),
> > >  };
> > >  
> > > -int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> > > -{
> > > -    VFIOGroup *group;
> > > -
> > > -    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> > > -        return RTAS_OUT_PARAM_ERROR;
> > > -    }
> > > -
> > > -    /* FIXME: this is an abstraction violation */
> > > -    group = vfio_get_group(SPAPR_PCI_VFIO_HOST_BRIDGE(phb)->iommugroupid,
> > > -                           &phb->iommu_as);
> > > -    if (gpp)
> > > -        *gpp = group;
> > > -    return RTAS_OUT_SUCCESS;
> > > -}
> > > -
> > >  static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
> > >  {
> > >      sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> > > diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> > > index 535e5ef..8c8d187 100644
> > > --- a/include/hw/pci-host/spapr.h
> > > +++ b/include/hw/pci-host/spapr.h
> > > @@ -138,7 +138,6 @@ void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
> > >  
> > >  void spapr_pci_rtas_init(void);
> > >  
> > > -int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp);
> > >  sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
> > >  PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
> > >                                uint32_t config_addr);
> > 
> > 
> > 
>
David Gibson Sept. 24, 2015, 4:11 a.m. UTC | #4
On Wed, Sep 23, 2015 at 08:19:36PM -0600, Alex Williamson wrote:
> On Thu, 2015-09-24 at 11:49 +1000, David Gibson wrote:
> > On Wed, Sep 23, 2015 at 11:28:01AM -0600, Alex Williamson wrote:
> > > On Sat, 2015-09-19 at 17:18 +1000, David Gibson wrote:
> > > > The pseries machine type has two variants of the PCI Host Bridge device:
> > > > spapr-pci-host-bridge and spapr-pci-vfio-host-bridge.  Originally, only the
> > > > latter could support VFIO devices, but we've now extended the VFIO code so
> > > > that they both can.
> > > > 
> > > > However, it's still only spapr-pci-vfio-host-bridge which supports the PAPR
> > > > Enhanced Error Handling (EEH) interfaces.  The reason is that we don't yet
> > > > have a way to determine the correct VFIOGroup for EEH operations on the
> > > > "plain" host bridge.
> > > > 
> > > > Handling this in general is problematic due to some limitations in the
> > > > current code, and some bugs in the kernel EEH interfaces.  However, it's
> > > > easy enough to do for the unambiguous case: where there is only one VFIO
> > > > group used on the whole host bridge i.e. one Partitionable Endpoint (PE).
> > > > 
> > > > This re-implements spapr_phb_check_vfio_group() in terms of the host
> > > > bridge's Partitionable Endpoints, allowing EEH on any host bridge in the
> > > > unambiguous case.  This is enough to make spapr-pci-host-bridge support
> > > > EEH as well as spapr-pci-vfio-host-bridge, since the latter only supported
> > > > devices from one IOMMU group anyway (although it didn't properly
> > > > enforce that).
> > > > 
> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > >  hw/ppc/spapr_pci.c          | 36 ++++++++++++++++++++++++++++++++++++
> > > >  hw/ppc/spapr_pci_vfio.c     | 18 ------------------
> > > >  include/hw/pci-host/spapr.h |  1 -
> > > >  3 files changed, 36 insertions(+), 19 deletions(-)
> > > > 
> > > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > > > index 81ad3ae..5614b45 100644
> > > > --- a/hw/ppc/spapr_pci.c
> > > > +++ b/hw/ppc/spapr_pci.c
> > > > @@ -41,6 +41,8 @@
> > > >  #include "hw/ppc/spapr_drc.h"
> > > >  #include "sysemu/device_tree.h"
> > > >  
> > > > +#include "hw/vfio/vfio-common.h"
> > > > +
> > > >  /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
> > > >  #define RTAS_QUERY_FN           0
> > > >  #define RTAS_CHANGE_FN          1
> > > > @@ -446,6 +448,40 @@ static sPAPRPHBGuestPE *spapr_phb_pe_by_device(sPAPRPHBState *phb,
> > > >      return pe;
> > > >  }
> > > >  
> > > > +static int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
> > > > +{
> > > > +    sPAPRPHBGuestPE *pe;
> > > > +
> > > > +    if (QLIST_EMPTY(&phb->pe_list)) {
> > > > +        /* No EEH capable devices on this PHB */
> > > > +        return RTAS_OUT_PARAM_ERROR;
> > > > +    }
> > > > +
> > > > +    /* Limitations in both qemu and the kernel mean that, for now, EEH
> > > > +     * won't work if there are devices from more than one PE
> > > > +     * (i.e. IOMMU group) on the same PHB */
> > > > +    pe = QLIST_FIRST(&phb->pe_list);
> > > > +    if (QLIST_NEXT(pe, list)) {
> > > > +        error_report("spapr-pci-host-bridge: EEH attempted on PHB with multiple"
> > > > +                     " IOMMU groups");
> > > 
> > > Don't wrap lines with search-able strings
> > 
> > Noted.
> > 
> > > > +        return RTAS_OUT_HW_ERROR;
> > > > +    }
> > > > +
> > > > +    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
> > > > +        sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(phb);
> > > > +        /* FIXME: this is an abstraction violation */
> > > > +        if (pe->group->groupid != svphb->iommugroupid) {
> > > > +            error_report("spapr-pci-vfio-host-bridge: Bad IOMMU group");
> > > > +            return RTAS_OUT_HW_ERROR;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    if (gpp) {
> > > > +        *gpp = pe->group;
> > > > +    }
> > > > +    return RTAS_OUT_SUCCESS;
> > > 
> > > 
> > > The structure of this function finally makes some sense once we get
> > > here.
> > 
> > Yeah, I started off with something more like what you suggested in
> > earlier comments, but I changed to this rather awkward structure to
> > avoid churning the callers as this moves towards its final form
> > 
> > > I'm still not sure whether RTAS error codes make sense though,
> > > but it's just a nit.
> > 
> > Well, (nearly) all the callers are in RTAS, and translating the error
> > codes there seems pretty ugly too.
> > 
> > > I'd still prefer the function was renamed, "check"
> > > indicates a verification, not a return.  The primary purpose of calling
> > > this function is to get a group, not simply to check the validity.
> > 
> > Yeah, it's a crap name.  How about spapr_phb_find_vfio_group().
> 
> Why not just spapr_phb_get_vfio_group() since this started out as just a
> wrapper for vfio_get_group() and that's still what it's doing even
> though it's now cached on sPAPRPHBGuestPE rather than retrieved each
> time.

Well, "get" often implies taking a persistent reference in some way,
which is why I avoided it.
diff mbox

Patch

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 81ad3ae..5614b45 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -41,6 +41,8 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 
+#include "hw/vfio/vfio-common.h"
+
 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
 #define RTAS_QUERY_FN           0
 #define RTAS_CHANGE_FN          1
@@ -446,6 +448,40 @@  static sPAPRPHBGuestPE *spapr_phb_pe_by_device(sPAPRPHBState *phb,
     return pe;
 }
 
+static int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
+{
+    sPAPRPHBGuestPE *pe;
+
+    if (QLIST_EMPTY(&phb->pe_list)) {
+        /* No EEH capable devices on this PHB */
+        return RTAS_OUT_PARAM_ERROR;
+    }
+
+    /* Limitations in both qemu and the kernel mean that, for now, EEH
+     * won't work if there are devices from more than one PE
+     * (i.e. IOMMU group) on the same PHB */
+    pe = QLIST_FIRST(&phb->pe_list);
+    if (QLIST_NEXT(pe, list)) {
+        error_report("spapr-pci-host-bridge: EEH attempted on PHB with multiple"
+                     " IOMMU groups");
+        return RTAS_OUT_HW_ERROR;
+    }
+
+    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
+        sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(phb);
+        /* FIXME: this is an abstraction violation */
+        if (pe->group->groupid != svphb->iommugroupid) {
+            error_report("spapr-pci-vfio-host-bridge: Bad IOMMU group");
+            return RTAS_OUT_HW_ERROR;
+        }
+    }
+
+    if (gpp) {
+        *gpp = pe->group;
+    }
+    return RTAS_OUT_SUCCESS;
+}
+
 static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
                                     sPAPRMachineState *spapr,
                                     uint32_t token, uint32_t nargs,
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index b61923c..a08292a 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -24,29 +24,11 @@ 
 #include "hw/vfio/vfio.h"
 #include "hw/vfio/vfio-eeh.h"
 
-#include "hw/vfio/vfio-common.h"
-
 static Property spapr_phb_vfio_properties[] = {
     DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
-int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp)
-{
-    VFIOGroup *group;
-
-    if (!object_dynamic_cast(OBJECT(phb), "spapr-pci-vfio-host-bridge")) {
-        return RTAS_OUT_PARAM_ERROR;
-    }
-
-    /* FIXME: this is an abstraction violation */
-    group = vfio_get_group(SPAPR_PCI_VFIO_HOST_BRIDGE(phb)->iommugroupid,
-                           &phb->iommu_as);
-    if (gpp)
-        *gpp = group;
-    return RTAS_OUT_SUCCESS;
-}
-
 static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
 {
     sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 535e5ef..8c8d187 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -138,7 +138,6 @@  void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
 
 void spapr_pci_rtas_init(void);
 
-int spapr_phb_check_vfio_group(sPAPRPHBState *phb, VFIOGroup **gpp);
 sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
 PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
                               uint32_t config_addr);