diff mbox

[v9,03/11] vfio: add aer support for vfio device

Message ID 1468913909-21811-4-git-send-email-zhoujie2011@cn.fujitsu.com
State New
Headers show

Commit Message

Zhou Jie July 19, 2016, 7:38 a.m. UTC
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h |  3 +++
 2 files changed, 77 insertions(+), 1 deletion(-)

Comments

Alex Williamson Aug. 31, 2016, 7:44 p.m. UTC | #1
On Tue, 19 Jul 2016 15:38:21 +0800
Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Calling pcie_aer_init to initilize aer related registers for
> vfio device, then reload physical related registers to expose
> device capability.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/vfio/pci.h |  3 +++
>  2 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 6a6160b..11c895c 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1854,6 +1854,66 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
> +                          int pos, uint16_t size)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    PCIDevice *dev_iter;
> +    uint8_t type;
> +    uint32_t errcap;
> +
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
> +                            cap_ver, pos, size);
> +        return 0;
> +    }
> +
> +    dev_iter = pci_bridge_get_device(pdev->bus);
> +    if (!dev_iter) {
> +        goto error;
> +    }
> +
> +    while (dev_iter) {
> +        if (!pci_is_express(dev_iter)) {
> +            goto error;
> +        }
> +
> +        type = pcie_cap_get_type(dev_iter);
> +        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
> +             type != PCI_EXP_TYPE_UPSTREAM &&
> +             type != PCI_EXP_TYPE_DOWNSTREAM)) {
> +            goto error;
> +        }
> +
> +        if (!dev_iter->exp.aer_cap) {
> +            goto error;
> +        }
> +
> +        dev_iter = pci_bridge_get_device(dev_iter->bus);
> +    }
> +
> +    errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
> +    /*
> +     * The ability to record multiple headers is depending on
> +     * the state of the Multiple Header Recording Capable bit and
> +     * enabled by the Multiple Header Recording Enable bit.
> +     */
> +    if ((errcap & PCI_ERR_CAP_MHRC) &&
> +        (errcap & PCI_ERR_CAP_MHRE)) {
> +        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
> +    } else {
> +        pdev->exp.aer_log.log_max = 0;
> +    }
> +
> +    pcie_cap_deverr_init(pdev);
> +    return pcie_aer_init(pdev, pos, size);

pcie_aer_init() adds a v2 AER capability regardless of the version of
the AER capability on the device.  Is this expected?  v2 defines a lot
more bits in various registers than v1, so are we simply hoping that
devices have reserved bits as zero like they're supposed to?  It's a
bit strange that for an Intel 82576 NIC I get a v1 AER capability w/o
aer=on and a v2 with.  Thanks,

Alex

> +
> +error:
> +    error_report("vfio: Unable to enable AER for device %s, parent bus "
> +                 "does not support AER signaling", vdev->vbasedev.name);
> +    return -1;
> +}
> +
>  static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> @@ -1861,6 +1921,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>      uint16_t cap_id, next, size;
>      uint8_t cap_ver;
>      uint8_t *config;
> +    int ret = 0;
>  
>      /* Only add extended caps if we have them and the guest can see them */
>      if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
> @@ -1914,6 +1975,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>                                     PCI_EXT_CAP_NEXT_MASK);
>  
>          switch (cap_id) {
> +        case PCI_EXT_CAP_ID_ERR:
> +            ret = vfio_setup_aer(vdev, cap_ver, next, size);
> +            break;
>          case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
>              trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
>              break;
> @@ -1921,6 +1985,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>              pcie_add_capability(pdev, cap_id, cap_ver, next, size);
>          }
>  
> +        if (ret) {
> +            goto out;
> +        }
>      }
>  
>      /* Cleanup chain head ID if necessary */
> @@ -1928,8 +1995,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>          pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
>      }
>  
> +out:
>      g_free(config);
> -    return 0;
> +    return ret;
>  }
>  
>  static int vfio_add_capabilities(VFIOPCIDevice *vdev)
> @@ -2698,6 +2766,11 @@ static int vfio_initfn(PCIDevice *pdev)
>          goto out_teardown;
>      }
>  
> +    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !pdev->exp.aer_cap) {
> +        goto out_teardown;
> +    }
> +
>      if (vdev->vga) {
>          vfio_vga_quirk_setup(vdev);
>      }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 7d482d9..5483044 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"
>  #include "qemu/queue.h"
> @@ -132,6 +133,8 @@ typedef struct VFIOPCIDevice {
>  #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
>  #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
>                                  (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
> +#define VFIO_FEATURE_ENABLE_AER_BIT 3
> +#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
>      int32_t bootindex;
>      uint32_t igd_gms;
>      uint8_t pm_cap;
Dou Liyang Sept. 13, 2016, 6:56 a.m. UTC | #2
Hi Alex,

I am Dou.
I am testing these patches.

At 09/01/2016 03:44 AM, Alex Williamson wrote:

[...]

>> +
>> +    pcie_cap_deverr_init(pdev);
>> +    return pcie_aer_init(pdev, pos, size);
>
> pcie_aer_init() adds a v2 AER capability regardless of the version of
> the AER capability on the device.  Is this expected?

I think it is not good.

  v2 defines a lot
> more bits in various registers than v1, so are we simply hoping that
> devices have reserved bits as zero like they're supposed to?

I read the Capability Version in PCIe Spec. it says:

Capability Version – This field is a PCI-SIG defined version number
that indicates the version of the Capability structure present.
OR
This field must be 2h if the End-End TLP Prefix Supported bit (see
Section 7.8.15) is Set and must be 1h or 2h otherwise.

In my option, I guess that the spec may mean that v1/v2 is used for the
End-End TLP Prefix Supported bit supported.

And I find that Kernel and QEmu don't do some special work for it. this
feature may not affect the registers in AER.

can you give me some AER bits that are defined in v2 ,but not in v1?

It's a
> bit strange that for an Intel 82576 NIC I get a v1 AER capability w/o
> aer=on and a v2 with.  Thanks,
>

Yes, I do the test, and get the same result for me.

I use the following device to test:

06:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network
Connection (rev 01)

Firstly, In host, I use the command "lspci -vvv -s 06:00.0":

[...]
Capabilities: [100 v1] Advanced Error Reporting
UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
  MalfTLP- ECRC- UnsupReq- ACSViol-
UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
MalfTLP- ECRC- UnsupReq+ ACSViol-
UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
MalfTLP+ ECRC- UnsupReq- ACSViol-
CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
[...]

Secondly, I pass through the device to the guest. In the guest:

[...]
Capabilities: [100 v2] Advanced Error Reporting
UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
  MalfTLP- ECRC- UnsupReq- ACSViol-
UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
MalfTLP- ECRC- UnsupReq+ ACSViol-
UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
MalfTLP+ ECRC- UnsupReq- ACSViol-
CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
[...]

They look the same, except the capabilities version.

Thirdly, I drop the patches and do the test. After I launch QEmu,
In guest:

[...]
Capabilities: [100 v1] Advanced Error Reporting
[...]

Fourthly, I modify the PCI_ERR_VER: #define PCI_ERR_VER  1.
In guest:
[...]
Capabilities: [100 v1] Advanced Error Reporting
[...]

They also have the same features.

At last, I think the v1/v2 is not influence on our AER function.
But, it is actually strange that we can get a v1 AER capability
w/o aer=on and a v2 with.
I suggest that we add a capability version parameter to extend the
pcie_aer_init(),or add a new pcie_aer_v1_init() for the devices with
v1 AER cap.

[...]

Thanks,
Dou.
Alex Williamson Sept. 13, 2016, 3:14 p.m. UTC | #3
Hi Dou,

On Tue, 13 Sep 2016 14:56:15 +0800
Dou Liyang <douly.fnst@cn.fujitsu.com> wrote:

> Hi Alex,
> 
> I am Dou.
> I am testing these patches.
> 
> At 09/01/2016 03:44 AM, Alex Williamson wrote:
> 
> [...]
> 
> >> +
> >> +    pcie_cap_deverr_init(pdev);
> >> +    return pcie_aer_init(pdev, pos, size);  
> >
> > pcie_aer_init() adds a v2 AER capability regardless of the version of
> > the AER capability on the device.  Is this expected?  
> 
> I think it is not good.
> 
>   v2 defines a lot
> > more bits in various registers than v1, so are we simply hoping that
> > devices have reserved bits as zero like they're supposed to?  
> 
> I read the Capability Version in PCIe Spec. it says:
> 
> Capability Version – This field is a PCI-SIG defined version number
> that indicates the version of the Capability structure present.
> OR
> This field must be 2h if the End-End TLP Prefix Supported bit (see
> Section 7.8.15) is Set and must be 1h or 2h otherwise.
> 
> In my option, I guess that the spec may mean that v1/v2 is used for the
> End-End TLP Prefix Supported bit supported.
> 
> And I find that Kernel and QEmu don't do some special work for it. this
> feature may not affect the registers in AER.
> 
> can you give me some AER bits that are defined in v2 ,but not in v1?

There are quite a lot.  In the uncorrectable error status register bits
21-25 are only defined for v2, same for the mask and severity for this
register.  Bits 14 & 15 are new for v2 in the correctable error status
registers, status and mask.  Bits 9-11 are new in the control register.

> It's a
> > bit strange that for an Intel 82576 NIC I get a v1 AER capability w/o
> > aer=on and a v2 with.  Thanks,
> >  
> 
> Yes, I do the test, and get the same result for me.
> 
> I use the following device to test:
> 
> 06:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network
> Connection (rev 01)
> 
> Firstly, In host, I use the command "lspci -vvv -s 06:00.0":
> 
> [...]
> Capabilities: [100 v1] Advanced Error Reporting
> UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
>   MalfTLP- ECRC- UnsupReq- ACSViol-
> UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
> MalfTLP- ECRC- UnsupReq+ ACSViol-
> UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
> MalfTLP+ ECRC- UnsupReq- ACSViol-
> CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
> AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
> [...]
> 
> Secondly, I pass through the device to the guest. In the guest:
> 
> [...]
> Capabilities: [100 v2] Advanced Error Reporting
> UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
>   MalfTLP- ECRC- UnsupReq- ACSViol-
> UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
> MalfTLP- ECRC- UnsupReq+ ACSViol-
> UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
> MalfTLP+ ECRC- UnsupReq- ACSViol-
> CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
> AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
> [...]
> 
> They look the same, except the capabilities version.
> 
> Thirdly, I drop the patches and do the test. After I launch QEmu,
> In guest:
> 
> [...]
> Capabilities: [100 v1] Advanced Error Reporting
> [...]
> 
> Fourthly, I modify the PCI_ERR_VER: #define PCI_ERR_VER  1.
> In guest:
> [...]
> Capabilities: [100 v1] Advanced Error Reporting
> [...]
> 
> They also have the same features.
> 
> At last, I think the v1/v2 is not influence on our AER function.
> But, it is actually strange that we can get a v1 AER capability
> w/o aer=on and a v2 with.
> I suggest that we add a capability version parameter to extend the
> pcie_aer_init(),or add a new pcie_aer_v1_init() for the devices with
> v1 AER cap.

Agreed.  Thanks,

Alex
diff mbox

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6a6160b..11c895c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1854,6 +1854,66 @@  static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+                          int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIDevice *dev_iter;
+    uint8_t type;
+    uint32_t errcap;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+                            cap_ver, pos, size);
+        return 0;
+    }
+
+    dev_iter = pci_bridge_get_device(pdev->bus);
+    if (!dev_iter) {
+        goto error;
+    }
+
+    while (dev_iter) {
+        if (!pci_is_express(dev_iter)) {
+            goto error;
+        }
+
+        type = pcie_cap_get_type(dev_iter);
+        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+             type != PCI_EXP_TYPE_UPSTREAM &&
+             type != PCI_EXP_TYPE_DOWNSTREAM)) {
+            goto error;
+        }
+
+        if (!dev_iter->exp.aer_cap) {
+            goto error;
+        }
+
+        dev_iter = pci_bridge_get_device(dev_iter->bus);
+    }
+
+    errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
+    /*
+     * The ability to record multiple headers is depending on
+     * the state of the Multiple Header Recording Capable bit and
+     * enabled by the Multiple Header Recording Enable bit.
+     */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+
+    pcie_cap_deverr_init(pdev);
+    return pcie_aer_init(pdev, pos, size);
+
+error:
+    error_report("vfio: Unable to enable AER for device %s, parent bus "
+                 "does not support AER signaling", vdev->vbasedev.name);
+    return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1861,6 +1921,7 @@  static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
     uint8_t *config;
+    int ret = 0;
 
     /* Only add extended caps if we have them and the guest can see them */
     if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
@@ -1914,6 +1975,9 @@  static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
                                    PCI_EXT_CAP_NEXT_MASK);
 
         switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, cap_ver, next, size);
+            break;
         case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
             trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
             break;
@@ -1921,6 +1985,9 @@  static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
             pcie_add_capability(pdev, cap_id, cap_ver, next, size);
         }
 
+        if (ret) {
+            goto out;
+        }
     }
 
     /* Cleanup chain head ID if necessary */
@@ -1928,8 +1995,9 @@  static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
         pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
     }
 
+out:
     g_free(config);
-    return 0;
+    return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
@@ -2698,6 +2766,11 @@  static int vfio_initfn(PCIDevice *pdev)
         goto out_teardown;
     }
 
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !pdev->exp.aer_cap) {
+        goto out_teardown;
+    }
+
     if (vdev->vga) {
         vfio_vga_quirk_setup(vdev);
     }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 7d482d9..5483044 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@ 
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/queue.h"
@@ -132,6 +133,8 @@  typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
                                 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 3
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint32_t igd_gms;
     uint8_t pm_cap;