diff mbox

[RFC,4/4] vfio-pci: pass the aer error to guest

Message ID 33e32b09871167f1a258330511acbf9c26f6b479.1421028274.git.chen.fan.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

chenfan Jan. 12, 2015, 3:04 a.m. UTC
when the vfio device encounters an uncorrectable error in host,
 the vfio_pci driver will signal the eventfd registered by this
 vfio device, the results in the qemu eventfd handler getting
 invoked.

this patch is to pass the error to guest and have the guest driver
recover from the error.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pcie_aer.c         |  2 +-
 hw/vfio/pci.c             | 35 ++++++++++++++++++++++++++++-------
 include/hw/pci/pcie_aer.h |  3 ++-
 3 files changed, 31 insertions(+), 9 deletions(-)

Comments

Alex Williamson Jan. 12, 2015, 3:24 p.m. UTC | #1
On Mon, 2015-01-12 at 11:04 +0800, Chen Fan wrote:
> when the vfio device encounters an uncorrectable error in host,
>  the vfio_pci driver will signal the eventfd registered by this
>  vfio device, the results in the qemu eventfd handler getting
>  invoked.
> 
> this patch is to pass the error to guest and have the guest driver
> recover from the error.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pcie_aer.c         |  2 +-
>  hw/vfio/pci.c             | 35 ++++++++++++++++++++++++++++-------
>  include/hw/pci/pcie_aer.h |  3 ++-
>  3 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 571dc92..4812e3d 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -371,7 +371,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>   *
>   * Walk up the bus tree from the device, propagate the error message.
>   */
> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>  {
>      uint8_t type;
>  

Better to split the AER core changes to a separate patch.

> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 0ee6326..6f0b265 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3106,20 +3106,41 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>  static void vfio_err_notifier_handler(void *opaque)
>  {
>      VFIOPCIDevice *vdev = opaque;
> +    PCIDevice *dev = &vdev->pdev;
> +    PCIEAERMsg msg = {
> +        .severity = 0,
> +        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
> +    };
>  
>      if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>          return;
>      }
>  
> -    /*
> -     * TBD. Retrieve the error details and decide what action
> -     * needs to be taken. One of the actions could be to pass
> -     * the error to the guest and have the guest driver recover
> -     * from the error. This requires that PCIe capabilities be
> -     * exposed to the guest. For now, we just terminate the
> -     * guest to contain the error.
> +    /* we should read the error details from the real hardware
> +     * configration spaces, here we only need to do is signaling

s/configration/configuration/

> +     * to guest an uncorrectable error has occurred.
>       */
> +    if (dev->exp.aer_cap) {
> +        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +        uint32_t uncor_status;
> +        bool isfatal;
> +
> +        uncor_status = vfio_pci_read_config(dev,
> +                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
> +
> +        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
> +
> +        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
> +                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
>  
> +        pcie_aer_msg(dev, &msg);
> +        return;

Two things here, first, what happens when the guest attempts to do bus
resets or any sort of operation with the parent downstream port to
recover the device?  Those ops aren't passed through to hardware.  Is a
guest reboot necessarily going to be sufficient to clear the error and
doesn't that depend on what kind of reset vfio can do to the device on
the host?

Second, what happens on 440fx?  It looks like the message goes nowhere,
which causes a regression from our current error containment there.  So
any 440fx machine would need to disable the property that Paolo is
requesting.

> +    }
> +
> +    /*
> +     * If the aer capability is not exposed to the guest. we just
> +     * terminate the guest to contain the error.
> +     */
>      error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
>                   "Please collect any data possible and then kill the guest",
>                   __func__, vdev->host.domain, vdev->host.bus,
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index bcac80a..6c4bf3b 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -51,7 +51,7 @@ struct PCIEAERLog {
>      PCIEAERErr *log;
>  };
>  
> -/* aer error message: error signaling message has only error sevirity and
> +/* aer error message: error signaling message has only error severity and
>     source id. See 2.2.8.3 error signaling messages */
>  struct PCIEAERMsg {
>      /*
> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>  
>  /* error injection */
>  int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>  
>  #endif /* QEMU_PCIE_AER_H */
chenfan Jan. 28, 2015, 8:51 a.m. UTC | #2
On 01/12/2015 11:24 PM, Alex Williamson wrote:
> On Mon, 2015-01-12 at 11:04 +0800, Chen Fan wrote:
>> when the vfio device encounters an uncorrectable error in host,
>>   the vfio_pci driver will signal the eventfd registered by this
>>   vfio device, the results in the qemu eventfd handler getting
>>   invoked.
>>
>> this patch is to pass the error to guest and have the guest driver
>> recover from the error.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/pci/pcie_aer.c         |  2 +-
>>   hw/vfio/pci.c             | 35 ++++++++++++++++++++++++++++-------
>>   include/hw/pci/pcie_aer.h |  3 ++-
>>   3 files changed, 31 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
>> index 571dc92..4812e3d 100644
>> --- a/hw/pci/pcie_aer.c
>> +++ b/hw/pci/pcie_aer.c
>> @@ -371,7 +371,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>>    *
>>    * Walk up the bus tree from the device, propagate the error message.
>>    */
>> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>>   {
>>       uint8_t type;
>>   
> Better to split the AER core changes to a separate patch.
>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 0ee6326..6f0b265 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3106,20 +3106,41 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>>   static void vfio_err_notifier_handler(void *opaque)
>>   {
>>       VFIOPCIDevice *vdev = opaque;
>> +    PCIDevice *dev = &vdev->pdev;
>> +    PCIEAERMsg msg = {
>> +        .severity = 0,
>> +        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
>> +    };
>>   
>>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>>           return;
>>       }
>>   
>> -    /*
>> -     * TBD. Retrieve the error details and decide what action
>> -     * needs to be taken. One of the actions could be to pass
>> -     * the error to the guest and have the guest driver recover
>> -     * from the error. This requires that PCIe capabilities be
>> -     * exposed to the guest. For now, we just terminate the
>> -     * guest to contain the error.
>> +    /* we should read the error details from the real hardware
>> +     * configration spaces, here we only need to do is signaling
> s/configration/configuration/
>
>> +     * to guest an uncorrectable error has occurred.
>>        */
>> +    if (dev->exp.aer_cap) {
>> +        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
>> +        uint32_t uncor_status;
>> +        bool isfatal;
>> +
>> +        uncor_status = vfio_pci_read_config(dev,
>> +                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
>> +
>> +        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
>> +
>> +        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
>> +                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
>>   
>> +        pcie_aer_msg(dev, &msg);
>> +        return;
Hi Alex,

> Two things here, first, what happens when the guest attempts to do bus
> resets or any sort of operation with the parent downstream port to
> recover the device?  Those ops aren't passed through to hardware.  Is a
> guest reboot necessarily going to be sufficient to clear the error and
> doesn't that depend on what kind of reset vfio can do to the device on
> the host?
for now, the supported aer capability bridges are ioh3420, and 
upstream/downstream,
when need to reset link for fatal error, which will cause these device 
reset by trigger secondary bus reset,
then do qbus_reset_all(): which will trigger vfio_pci_reset to reset 
vfio device,
in vfio_pci_reset, vfio device will via ioctl to notify hardware to 
reset by method VFIO_DEVICE_RESET.
I think it seems be ok.

>
> Second, what happens on 440fx?  It looks like the message goes nowhere,
> which causes a regression from our current error containment there.  So
> any 440fx machine would need to disable the property that Paolo is
> requesting.
For 440fx, it don't support pcie root port, so there will not trigger
aer report in qemu.

Thanks,
Chen

>> +    }
>> +
>> +    /*
>> +     * If the aer capability is not exposed to the guest. we just
>> +     * terminate the guest to contain the error.
>> +     */
>>       error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
>>                    "Please collect any data possible and then kill the guest",
>>                    __func__, vdev->host.domain, vdev->host.bus,
>> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
>> index bcac80a..6c4bf3b 100644
>> --- a/include/hw/pci/pcie_aer.h
>> +++ b/include/hw/pci/pcie_aer.h
>> @@ -51,7 +51,7 @@ struct PCIEAERLog {
>>       PCIEAERErr *log;
>>   };
>>   
>> -/* aer error message: error signaling message has only error sevirity and
>> +/* aer error message: error signaling message has only error severity and
>>      source id. See 2.2.8.3 error signaling messages */
>>   struct PCIEAERMsg {
>>       /*
>> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>>   
>>   /* error injection */
>>   int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
>> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>>   
>>   #endif /* QEMU_PCIE_AER_H */
>
>
> .
>
diff mbox

Patch

diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 571dc92..4812e3d 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -371,7 +371,7 @@  static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
  *
  * Walk up the bus tree from the device, propagate the error message.
  */
-static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 {
     uint8_t type;
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0ee6326..6f0b265 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3106,20 +3106,41 @@  static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
-    /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+    /* we should read the error details from the real hardware
+     * configration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
      */
+    if (dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
 
+        pcie_aer_msg(dev, &msg);
+        return;
+    }
+
+    /*
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
+     */
     error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
                  "Please collect any data possible and then kill the guest",
                  __func__, vdev->host.domain, vdev->host.bus,
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index bcac80a..6c4bf3b 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -51,7 +51,7 @@  struct PCIEAERLog {
     PCIEAERErr *log;
 };
 
-/* aer error message: error signaling message has only error sevirity and
+/* aer error message: error signaling message has only error severity and
    source id. See 2.2.8.3 error signaling messages */
 struct PCIEAERMsg {
     /*
@@ -102,5 +102,6 @@  void pcie_aer_root_write_config(PCIDevice *dev,
 
 /* error injection */
 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
 
 #endif /* QEMU_PCIE_AER_H */