diff mbox

[1/3] VFIO: Clear stale MSIx table during EEH reset

Message ID 1426054314-19564-1-git-send-email-gwshan@linux.vnet.ibm.com
State New
Headers show

Commit Message

Gavin Shan March 11, 2015, 6:11 a.m. UTC
The PCI device MSIx table is cleaned out in hardware after EEH PE
reset. However, we still hold the stale MSIx entries in QEMU, which
should be cleared accordingly. Otherwise, we will run into another
(recursive) EEH error and the PCI devices contained in the PE have
to be offlined exceptionally.

The patch clears stale MSIx table before EEH PE reset so that MSIx
table could be restored properly after EEH PE reset.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 hw/vfio/common.c       |  6 +++++-
 hw/vfio/pci.c          | 39 +++++++++++++++++++++++++++++++++++++++
 include/hw/vfio/vfio.h |  3 ++-
 3 files changed, 46 insertions(+), 2 deletions(-)

Comments

David Gibson March 12, 2015, 1:04 a.m. UTC | #1
On Wed, Mar 11, 2015 at 05:11:52PM +1100, Gavin Shan wrote:
> The PCI device MSIx table is cleaned out in hardware after EEH PE
> reset. However, we still hold the stale MSIx entries in QEMU, which
> should be cleared accordingly. Otherwise, we will run into another
> (recursive) EEH error and the PCI devices contained in the PE have
> to be offlined exceptionally.
> 
> The patch clears stale MSIx table before EEH PE reset so that MSIx
> table could be restored properly after EEH PE reset.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  hw/vfio/common.c       |  6 +++++-
>  hw/vfio/pci.c          | 39 +++++++++++++++++++++++++++++++++++++++
>  include/hw/vfio/vfio.h |  3 ++-
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 148eb53..e3833f4 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -949,8 +949,12 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>      switch (req) {
>      case VFIO_CHECK_EXTENSION:
>      case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
> -    case VFIO_EEH_PE_OP:
>          break;
> +    case VFIO_EEH_PE_OP:
> +        if (!vfio_container_eeh_event(as, groupid, param)) {

Please use == 0 not !, remembering that !some_function() is the
success case hurts my brain.

> +            break;
> +        }
> +        /* fallthru */

It doesn't look like the fallthrough will generate the correct error
message: it will say "unsupported ioctl" but
vfio_container_eeh_event() could fail for some other reason.

>      default:
>          /* Return an error on unknown requests */
>          error_report("vfio: unsupported ioctl %X", req);
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 6b80539..8c4a8cb 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3319,6 +3319,45 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
> +                             struct vfio_eeh_pe_op *op)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    group = vfio_get_group(groupid, as);
> +    if (!group) {
> +        vfio_put_group(group);

Is vfio_put_group(NULL) really what you want?

> +        error_report("vfio: group %d not found\n", groupid);
> +        return -1;
> +    }
> +
> +    switch (op->op) {
> +    case VFIO_EEH_PE_RESET_HOT:
> +    case VFIO_EEH_PE_RESET_FUNDAMENTAL:
> +        /*
> +         * The MSIx table will be cleaned out by reset. We need
> +         * disable it so that it can be reenabled properly. Also,
> +         * the cached MSIx table should be cleared as it's not
> +         * reflecting the contents in hardware.
> +         */
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if (msix_enabled(&vdev->pdev)) {
> +                vfio_disable_msix(vdev);
> +            }
> +
> +            msix_reset(&vdev->pdev);
> +        }
> +
> +        break;
> +    }
> +
> +    vfio_put_group(group);
> +    return 0;
> +}
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
> index 0b26cd8..99528a3 100644
> --- a/include/hw/vfio/vfio.h
> +++ b/include/hw/vfio/vfio.h
> @@ -5,5 +5,6 @@
>  
>  extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>                                  int req, void *param);
> -
> +extern int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
> +                                    struct vfio_eeh_pe_op *op);
>  #endif
Gavin Shan March 12, 2015, 3:02 a.m. UTC | #2
On Thu, Mar 12, 2015 at 12:04:59PM +1100, David Gibson wrote:
>On Wed, Mar 11, 2015 at 05:11:52PM +1100, Gavin Shan wrote:
>> The PCI device MSIx table is cleaned out in hardware after EEH PE
>> reset. However, we still hold the stale MSIx entries in QEMU, which
>> should be cleared accordingly. Otherwise, we will run into another
>> (recursive) EEH error and the PCI devices contained in the PE have
>> to be offlined exceptionally.
>> 
>> The patch clears stale MSIx table before EEH PE reset so that MSIx
>> table could be restored properly after EEH PE reset.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  hw/vfio/common.c       |  6 +++++-
>>  hw/vfio/pci.c          | 39 +++++++++++++++++++++++++++++++++++++++
>>  include/hw/vfio/vfio.h |  3 ++-
>>  3 files changed, 46 insertions(+), 2 deletions(-)
>> 
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 148eb53..e3833f4 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -949,8 +949,12 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>>      switch (req) {
>>      case VFIO_CHECK_EXTENSION:
>>      case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
>> -    case VFIO_EEH_PE_OP:
>>          break;
>> +    case VFIO_EEH_PE_OP:
>> +        if (!vfio_container_eeh_event(as, groupid, param)) {
>
>Please use == 0 not !, remembering that !some_function() is the
>success case hurts my brain.
>

Yes, I'll fix it as below.

>> +            break;
>> +        }
>> +        /* fallthru */
>
>It doesn't look like the fallthrough will generate the correct error
>message: it will say "unsupported ioctl" but
>vfio_container_eeh_event() could fail for some other reason.
>

For now, vfio_container_eeh_event() fails when VFIO group can't be
found, which is checked by vfio_container_do_ioctl() as well. However,
it's worthy to have precise message as follows:

	case VFIO_EEH_PE_OP:
	    if (vfio_container_eeh_event(as, groupid, param) != 0) {
                error_report("vfio: cannot handle EEH event on group %d\n",
                             groupid);
            }

            break;

>>      default:
>>          /* Return an error on unknown requests */
>>          error_report("vfio: unsupported ioctl %X", req);
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 6b80539..8c4a8cb 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3319,6 +3319,45 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>      vdev->req_enabled = false;
>>  }
>>  
>> +int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
>> +                             struct vfio_eeh_pe_op *op)
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +
>> +    group = vfio_get_group(groupid, as);
>> +    if (!group) {
>> +        vfio_put_group(group);
>
>Is vfio_put_group(NULL) really what you want?
>

No, it should be dropped.

Thanks,
Gavin

>> +        error_report("vfio: group %d not found\n", groupid);
>> +        return -1;
>> +    }
>> +
>> +    switch (op->op) {
>> +    case VFIO_EEH_PE_RESET_HOT:
>> +    case VFIO_EEH_PE_RESET_FUNDAMENTAL:
>> +        /*
>> +         * The MSIx table will be cleaned out by reset. We need
>> +         * disable it so that it can be reenabled properly. Also,
>> +         * the cached MSIx table should be cleared as it's not
>> +         * reflecting the contents in hardware.
>> +         */
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +            if (msix_enabled(&vdev->pdev)) {
>> +                vfio_disable_msix(vdev);
>> +            }
>> +
>> +            msix_reset(&vdev->pdev);
>> +        }
>> +
>> +        break;
>> +    }
>> +
>> +    vfio_put_group(group);
>> +    return 0;
>> +}
>> +
>>  static int vfio_initfn(PCIDevice *pdev)
>>  {
>>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
>> index 0b26cd8..99528a3 100644
>> --- a/include/hw/vfio/vfio.h
>> +++ b/include/hw/vfio/vfio.h
>> @@ -5,5 +5,6 @@
>>  
>>  extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>>                                  int req, void *param);
>> -
>> +extern int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
>> +                                    struct vfio_eeh_pe_op *op);
>>  #endif
>
>-- 
>David Gibson			| I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
>				| _way_ _around_!
>http://www.ozlabs.org/~dgibson
Alex Williamson March 13, 2015, 9:33 p.m. UTC | #3
On Wed, 2015-03-11 at 17:11 +1100, Gavin Shan wrote:
> The PCI device MSIx table is cleaned out in hardware after EEH PE
> reset. However, we still hold the stale MSIx entries in QEMU, which
> should be cleared accordingly. Otherwise, we will run into another
> (recursive) EEH error and the PCI devices contained in the PE have
> to be offlined exceptionally.
> 
> The patch clears stale MSIx table before EEH PE reset so that MSIx
> table could be restored properly after EEH PE reset.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  hw/vfio/common.c       |  6 +++++-
>  hw/vfio/pci.c          | 39 +++++++++++++++++++++++++++++++++++++++
>  include/hw/vfio/vfio.h |  3 ++-
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 148eb53..e3833f4 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -949,8 +949,12 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>      switch (req) {
>      case VFIO_CHECK_EXTENSION:
>      case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
> -    case VFIO_EEH_PE_OP:
>          break;
> +    case VFIO_EEH_PE_OP:
> +        if (!vfio_container_eeh_event(as, groupid, param)) {
> +            break;
> +        }
> +        /* fallthru */
>      default:
>          /* Return an error on unknown requests */
>          error_report("vfio: unsupported ioctl %X", req);
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 6b80539..8c4a8cb 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3319,6 +3319,45 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
> +                             struct vfio_eeh_pe_op *op)

Called by common, but lives in pci.  This would need a stub if QEMU is
built without CONFIG_PCI.  Thanks,

Alex

> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    group = vfio_get_group(groupid, as);
> +    if (!group) {
> +        vfio_put_group(group);
> +        error_report("vfio: group %d not found\n", groupid);
> +        return -1;
> +    }
> +
> +    switch (op->op) {
> +    case VFIO_EEH_PE_RESET_HOT:
> +    case VFIO_EEH_PE_RESET_FUNDAMENTAL:
> +        /*
> +         * The MSIx table will be cleaned out by reset. We need
> +         * disable it so that it can be reenabled properly. Also,
> +         * the cached MSIx table should be cleared as it's not
> +         * reflecting the contents in hardware.
> +         */
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if (msix_enabled(&vdev->pdev)) {
> +                vfio_disable_msix(vdev);
> +            }
> +
> +            msix_reset(&vdev->pdev);
> +        }
> +
> +        break;
> +    }
> +
> +    vfio_put_group(group);
> +    return 0;
> +}
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
> index 0b26cd8..99528a3 100644
> --- a/include/hw/vfio/vfio.h
> +++ b/include/hw/vfio/vfio.h
> @@ -5,5 +5,6 @@
>  
>  extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>                                  int req, void *param);
> -
> +extern int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
> +                                    struct vfio_eeh_pe_op *op);
>  #endif
Gavin Shan March 15, 2015, 10:27 p.m. UTC | #4
On Fri, Mar 13, 2015 at 03:33:50PM -0600, Alex Williamson wrote:
>On Wed, 2015-03-11 at 17:11 +1100, Gavin Shan wrote:
>> The PCI device MSIx table is cleaned out in hardware after EEH PE
>> reset. However, we still hold the stale MSIx entries in QEMU, which
>> should be cleared accordingly. Otherwise, we will run into another
>> (recursive) EEH error and the PCI devices contained in the PE have
>> to be offlined exceptionally.
>> 
>> The patch clears stale MSIx table before EEH PE reset so that MSIx
>> table could be restored properly after EEH PE reset.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  hw/vfio/common.c       |  6 +++++-
>>  hw/vfio/pci.c          | 39 +++++++++++++++++++++++++++++++++++++++
>>  include/hw/vfio/vfio.h |  3 ++-
>>  3 files changed, 46 insertions(+), 2 deletions(-)
>> 
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 148eb53..e3833f4 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -949,8 +949,12 @@ int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>>      switch (req) {
>>      case VFIO_CHECK_EXTENSION:
>>      case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
>> -    case VFIO_EEH_PE_OP:
>>          break;
>> +    case VFIO_EEH_PE_OP:
>> +        if (!vfio_container_eeh_event(as, groupid, param)) {
>> +            break;
>> +        }
>> +        /* fallthru */
>>      default:
>>          /* Return an error on unknown requests */
>>          error_report("vfio: unsupported ioctl %X", req);
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 6b80539..8c4a8cb 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3319,6 +3319,45 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>      vdev->req_enabled = false;
>>  }
>>  
>> +int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
>> +                             struct vfio_eeh_pe_op *op)
>
>Called by common, but lives in pci.  This would need a stub if QEMU is
>built without CONFIG_PCI.  Thanks,
>

Right. I'll add a stub for !CONFIG_PCI.

Thanks,
Gavin

>Alex
>
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +
>> +    group = vfio_get_group(groupid, as);
>> +    if (!group) {
>> +        vfio_put_group(group);
>> +        error_report("vfio: group %d not found\n", groupid);
>> +        return -1;
>> +    }
>> +
>> +    switch (op->op) {
>> +    case VFIO_EEH_PE_RESET_HOT:
>> +    case VFIO_EEH_PE_RESET_FUNDAMENTAL:
>> +        /*
>> +         * The MSIx table will be cleaned out by reset. We need
>> +         * disable it so that it can be reenabled properly. Also,
>> +         * the cached MSIx table should be cleared as it's not
>> +         * reflecting the contents in hardware.
>> +         */
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +            if (msix_enabled(&vdev->pdev)) {
>> +                vfio_disable_msix(vdev);
>> +            }
>> +
>> +            msix_reset(&vdev->pdev);
>> +        }
>> +
>> +        break;
>> +    }
>> +
>> +    vfio_put_group(group);
>> +    return 0;
>> +}
>> +
>>  static int vfio_initfn(PCIDevice *pdev)
>>  {
>>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
>> index 0b26cd8..99528a3 100644
>> --- a/include/hw/vfio/vfio.h
>> +++ b/include/hw/vfio/vfio.h
>> @@ -5,5 +5,6 @@
>>  
>>  extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
>>                                  int req, void *param);
>> -
>> +extern int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
>> +                                    struct vfio_eeh_pe_op *op);
>>  #endif
>
>
>
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 148eb53..e3833f4 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -949,8 +949,12 @@  int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
     switch (req) {
     case VFIO_CHECK_EXTENSION:
     case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
-    case VFIO_EEH_PE_OP:
         break;
+    case VFIO_EEH_PE_OP:
+        if (!vfio_container_eeh_event(as, groupid, param)) {
+            break;
+        }
+        /* fallthru */
     default:
         /* Return an error on unknown requests */
         error_report("vfio: unsupported ioctl %X", req);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6b80539..8c4a8cb 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3319,6 +3319,45 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
+                             struct vfio_eeh_pe_op *op)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+
+    group = vfio_get_group(groupid, as);
+    if (!group) {
+        vfio_put_group(group);
+        error_report("vfio: group %d not found\n", groupid);
+        return -1;
+    }
+
+    switch (op->op) {
+    case VFIO_EEH_PE_RESET_HOT:
+    case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+        /*
+         * The MSIx table will be cleaned out by reset. We need
+         * disable it so that it can be reenabled properly. Also,
+         * the cached MSIx table should be cleared as it's not
+         * reflecting the contents in hardware.
+         */
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if (msix_enabled(&vdev->pdev)) {
+                vfio_disable_msix(vdev);
+            }
+
+            msix_reset(&vdev->pdev);
+        }
+
+        break;
+    }
+
+    vfio_put_group(group);
+    return 0;
+}
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
index 0b26cd8..99528a3 100644
--- a/include/hw/vfio/vfio.h
+++ b/include/hw/vfio/vfio.h
@@ -5,5 +5,6 @@ 
 
 extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
                                 int req, void *param);
-
+extern int vfio_container_eeh_event(AddressSpace *as, int32_t groupid,
+                                    struct vfio_eeh_pe_op *op);
 #endif