diff mbox

[RFC,v2,2/8] vfio-pci: add aer capability support

Message ID 515cec0e8873b456341be9a0272f5fbea805c65f.1422433767.git.chen.fan.fnst@cn.fujitsu.com
State New
Headers show

Commit Message

chenfan Jan. 28, 2015, 8:37 a.m. UTC
if we detect the aer capability in vfio device, then
we should initialize the vfio device aer rigister bits.
so guest OS can set this bits as needed.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

Comments

Alex Williamson Feb. 2, 2015, 8:15 p.m. UTC | #1
On Wed, 2015-01-28 at 16:37 +0800, Chen Fan wrote:
> if we detect the aer capability in vfio device, then
> we should initialize the vfio device aer rigister bits.
> so guest OS can set this bits as needed.

s/rigister/register/

> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 72 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 014a92c..2072261 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2670,6 +2670,73 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev)
>      return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
>  }
>  
> +static void vfio_pci_aer_init(VFIOPCIDevice *vdev)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    PCIExpressDevice *exp = &pdev->exp;
> +    uint16_t offset = exp->aer_cap;
> +
> +    if (!offset) {
> +        return;
> +    }
> +

All of these need to be documented with comments.

> +    memset(pdev->wmask + offset, 0, PCI_ERR_SIZEOF);
> +    memset(pdev->w1cmask + offset, 0, PCI_ERR_SIZEOF);
> +    memset(pdev->cmask + offset, 0xFF, PCI_ERR_SIZEOF);
> +
> +    pci_long_test_and_set_mask(pdev->wmask + exp->exp_cap + PCI_EXP_DEVCTL,
> +                               PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
> +                               PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
> +    pci_long_test_and_set_mask(pdev->w1cmask + exp->exp_cap + PCI_EXP_DEVSTA,
> +                               PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
> +                               PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD);
> +
> +    pci_set_long(pdev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
> +                 PCI_ERR_UNC_SUPPORTED);
> +    pci_set_long(pdev->wmask + offset + PCI_ERR_UNCOR_SEVER,
> +                 PCI_ERR_UNC_SUPPORTED);
> +    pci_long_test_and_set_mask(pdev->w1cmask + offset + PCI_ERR_COR_STATUS,
> +                               PCI_ERR_COR_STATUS);
> +    pci_set_long(pdev->wmask + offset + PCI_ERR_COR_MASK,
> +                 PCI_ERR_COR_SUPPORTED);
> +
> +    pci_set_long(pdev->wmask + offset + PCI_ERR_CAP,
> +                 PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
> +                 PCI_ERR_CAP_MHRE);
> +}
> +
> +static int vfio_add_ext_capabilities(VFIOPCIDevice *vdev)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    PCIExpressDevice *exp;
> +    uint32_t header;
> +    uint16_t next = PCI_CONFIG_SPACE_SIZE;
> +
> +    if (pci_config_size(pdev) <= PCI_CONFIG_SPACE_SIZE) {
> +        return 0;
> +    }
> +
> +    header = pci_get_long(pdev->config + next);
> +    while (header) {
> +        switch (PCI_EXT_CAP_ID(header)) {
> +        case PCI_EXT_CAP_ID_ERR:
> +             exp = &pdev->exp;
> +             exp->aer_cap = next;

Shouldn't we call pcie_aer_init() here?

> +
> +             vfio_pci_aer_init(vdev);
> +             break;
> +        };
> +
> +        next = PCI_EXT_CAP_NEXT(header);
> +        if (!next) {
> +            return 0;
> +        }
> +        header = pci_get_long(pdev->config + next);

I'd like to see this look more like vfio_add_std_cap(), registering
every capability with the QEMU PCIe-core and setting up emulation to
allow QEMU to skip capabilities that it doesn't want to expose.

> +    }
> +
> +    return 0;
> +}
> +
>  static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> @@ -3296,6 +3363,11 @@ static int vfio_initfn(PCIDevice *pdev)
>          goto out_teardown;
>      }
>  
> +    ret = vfio_add_ext_capabilities(vdev);
> +    if (ret) {
> +        goto out_teardown;
> +    }
> +

Why not extend vfio_add_capabilities()?  It specifically calls
vfio_add_std_cap() in order that there could be a vfio_add_ext_cap().

>      /* QEMU emulates all of MSI & MSIX */
>      if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
>          memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
chenfan Feb. 6, 2015, 7:03 a.m. UTC | #2
On 02/03/2015 04:15 AM, Alex Williamson wrote:
> On Wed, 2015-01-28 at 16:37 +0800, Chen Fan wrote:
>> if we detect the aer capability in vfio device, then
>> we should initialize the vfio device aer rigister bits.
>> so guest OS can set this bits as needed.
> s/rigister/register/
>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 72 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 014a92c..2072261 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2670,6 +2670,73 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev)
>>       return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
>>   }
>>   
>> +static void vfio_pci_aer_init(VFIOPCIDevice *vdev)
>> +{
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    PCIExpressDevice *exp = &pdev->exp;
>> +    uint16_t offset = exp->aer_cap;
>> +
>> +    if (!offset) {
>> +        return;
>> +    }
>> +
> All of these need to be documented with comments.
>
>> +    memset(pdev->wmask + offset, 0, PCI_ERR_SIZEOF);
>> +    memset(pdev->w1cmask + offset, 0, PCI_ERR_SIZEOF);
>> +    memset(pdev->cmask + offset, 0xFF, PCI_ERR_SIZEOF);
>> +
>> +    pci_long_test_and_set_mask(pdev->wmask + exp->exp_cap + PCI_EXP_DEVCTL,
>> +                               PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
>> +                               PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
>> +    pci_long_test_and_set_mask(pdev->w1cmask + exp->exp_cap + PCI_EXP_DEVSTA,
>> +                               PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
>> +                               PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD);
>> +
>> +    pci_set_long(pdev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
>> +                 PCI_ERR_UNC_SUPPORTED);
>> +    pci_set_long(pdev->wmask + offset + PCI_ERR_UNCOR_SEVER,
>> +                 PCI_ERR_UNC_SUPPORTED);
>> +    pci_long_test_and_set_mask(pdev->w1cmask + offset + PCI_ERR_COR_STATUS,
>> +                               PCI_ERR_COR_STATUS);
>> +    pci_set_long(pdev->wmask + offset + PCI_ERR_COR_MASK,
>> +                 PCI_ERR_COR_SUPPORTED);
>> +
>> +    pci_set_long(pdev->wmask + offset + PCI_ERR_CAP,
>> +                 PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
>> +                 PCI_ERR_CAP_MHRE);
>> +}
>> +
>> +static int vfio_add_ext_capabilities(VFIOPCIDevice *vdev)
>> +{
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    PCIExpressDevice *exp;
>> +    uint32_t header;
>> +    uint16_t next = PCI_CONFIG_SPACE_SIZE;
>> +
>> +    if (pci_config_size(pdev) <= PCI_CONFIG_SPACE_SIZE) {
>> +        return 0;
>> +    }
>> +
>> +    header = pci_get_long(pdev->config + next);
>> +    while (header) {
>> +        switch (PCI_EXT_CAP_ID(header)) {
>> +        case PCI_EXT_CAP_ID_ERR:
>> +             exp = &pdev->exp;
>> +             exp->aer_cap = next;
> Shouldn't we call pcie_aer_init() here?
I am afraid pcie_aer_init() maybe impact the corresponding values
in pdev->config.

>
>> +
>> +             vfio_pci_aer_init(vdev);
>> +             break;
>> +        };
>> +
>> +        next = PCI_EXT_CAP_NEXT(header);
>> +        if (!next) {
>> +            return 0;
>> +        }
>> +        header = pci_get_long(pdev->config + next);
> I'd like to see this look more like vfio_add_std_cap(), registering
> every capability with the QEMU PCIe-core and setting up emulation to
> allow QEMU to skip capabilities that it doesn't want to expose.
>
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
>>   {
>>       PCIDevice *pdev = &vdev->pdev;
>> @@ -3296,6 +3363,11 @@ static int vfio_initfn(PCIDevice *pdev)
>>           goto out_teardown;
>>       }
>>   
>> +    ret = vfio_add_ext_capabilities(vdev);
>> +    if (ret) {
>> +        goto out_teardown;
>> +    }
>> +
> Why not extend vfio_add_capabilities()?  It specifically calls
> vfio_add_std_cap() in order that there could be a vfio_add_ext_cap().
it is a good idea.

Thanks,
Chen


>
>>       /* QEMU emulates all of MSI & MSIX */
>>       if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
>>           memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
>
>
> .
>
Alex Williamson Feb. 6, 2015, 1:43 p.m. UTC | #3
On Fri, 2015-02-06 at 15:03 +0800, Chen Fan wrote:
> On 02/03/2015 04:15 AM, Alex Williamson wrote:
> > On Wed, 2015-01-28 at 16:37 +0800, Chen Fan wrote:
> >> if we detect the aer capability in vfio device, then
> >> we should initialize the vfio device aer rigister bits.
> >> so guest OS can set this bits as needed.
> > s/rigister/register/
> >
> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >>   hw/vfio/pci.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>   1 file changed, 72 insertions(+)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 014a92c..2072261 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -2670,6 +2670,73 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev)
> >>       return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
> >>   }
> >>   
> >> +static void vfio_pci_aer_init(VFIOPCIDevice *vdev)
> >> +{
> >> +    PCIDevice *pdev = &vdev->pdev;
> >> +    PCIExpressDevice *exp = &pdev->exp;
> >> +    uint16_t offset = exp->aer_cap;
> >> +
> >> +    if (!offset) {
> >> +        return;
> >> +    }
> >> +
> > All of these need to be documented with comments.
> >
> >> +    memset(pdev->wmask + offset, 0, PCI_ERR_SIZEOF);
> >> +    memset(pdev->w1cmask + offset, 0, PCI_ERR_SIZEOF);
> >> +    memset(pdev->cmask + offset, 0xFF, PCI_ERR_SIZEOF);
> >> +
> >> +    pci_long_test_and_set_mask(pdev->wmask + exp->exp_cap + PCI_EXP_DEVCTL,
> >> +                               PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
> >> +                               PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
> >> +    pci_long_test_and_set_mask(pdev->w1cmask + exp->exp_cap + PCI_EXP_DEVSTA,
> >> +                               PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
> >> +                               PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD);
> >> +
> >> +    pci_set_long(pdev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
> >> +                 PCI_ERR_UNC_SUPPORTED);
> >> +    pci_set_long(pdev->wmask + offset + PCI_ERR_UNCOR_SEVER,
> >> +                 PCI_ERR_UNC_SUPPORTED);
> >> +    pci_long_test_and_set_mask(pdev->w1cmask + offset + PCI_ERR_COR_STATUS,
> >> +                               PCI_ERR_COR_STATUS);
> >> +    pci_set_long(pdev->wmask + offset + PCI_ERR_COR_MASK,
> >> +                 PCI_ERR_COR_SUPPORTED);
> >> +
> >> +    pci_set_long(pdev->wmask + offset + PCI_ERR_CAP,
> >> +                 PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
> >> +                 PCI_ERR_CAP_MHRE);
> >> +}
> >> +
> >> +static int vfio_add_ext_capabilities(VFIOPCIDevice *vdev)
> >> +{
> >> +    PCIDevice *pdev = &vdev->pdev;
> >> +    PCIExpressDevice *exp;
> >> +    uint32_t header;
> >> +    uint16_t next = PCI_CONFIG_SPACE_SIZE;
> >> +
> >> +    if (pci_config_size(pdev) <= PCI_CONFIG_SPACE_SIZE) {
> >> +        return 0;
> >> +    }
> >> +
> >> +    header = pci_get_long(pdev->config + next);
> >> +    while (header) {
> >> +        switch (PCI_EXT_CAP_ID(header)) {
> >> +        case PCI_EXT_CAP_ID_ERR:
> >> +             exp = &pdev->exp;
> >> +             exp->aer_cap = next;
> > Shouldn't we call pcie_aer_init() here?
> I am afraid pcie_aer_init() maybe impact the corresponding values
> in pdev->config.

Then we should do cleanup after calling pcie_aer_init() or modify
pcie_aer_init() to do what we need.  Doing raw aer manipulation outside
of core support just causes maintenance issues down the road.  Thanks,

Alex

> >> +
> >> +             vfio_pci_aer_init(vdev);
> >> +             break;
> >> +        };
> >> +
> >> +        next = PCI_EXT_CAP_NEXT(header);
> >> +        if (!next) {
> >> +            return 0;
> >> +        }
> >> +        header = pci_get_long(pdev->config + next);
> > I'd like to see this look more like vfio_add_std_cap(), registering
> > every capability with the QEMU PCIe-core and setting up emulation to
> > allow QEMU to skip capabilities that it doesn't want to expose.
> >
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>   static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
> >>   {
> >>       PCIDevice *pdev = &vdev->pdev;
> >> @@ -3296,6 +3363,11 @@ static int vfio_initfn(PCIDevice *pdev)
> >>           goto out_teardown;
> >>       }
> >>   
> >> +    ret = vfio_add_ext_capabilities(vdev);
> >> +    if (ret) {
> >> +        goto out_teardown;
> >> +    }
> >> +
> > Why not extend vfio_add_capabilities()?  It specifically calls
> > vfio_add_std_cap() in order that there could be a vfio_add_ext_cap().
> it is a good idea.
> 
> Thanks,
> Chen
> 
> 
> >
> >>       /* QEMU emulates all of MSI & MSIX */
> >>       if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
> >>           memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
> >
> >
> > .
> >
>
diff mbox

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 014a92c..2072261 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2670,6 +2670,73 @@  static int vfio_add_capabilities(VFIOPCIDevice *vdev)
     return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
 }
 
+static void vfio_pci_aer_init(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIExpressDevice *exp = &pdev->exp;
+    uint16_t offset = exp->aer_cap;
+
+    if (!offset) {
+        return;
+    }
+
+    memset(pdev->wmask + offset, 0, PCI_ERR_SIZEOF);
+    memset(pdev->w1cmask + offset, 0, PCI_ERR_SIZEOF);
+    memset(pdev->cmask + offset, 0xFF, PCI_ERR_SIZEOF);
+
+    pci_long_test_and_set_mask(pdev->wmask + exp->exp_cap + PCI_EXP_DEVCTL,
+                               PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE |
+                               PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE);
+    pci_long_test_and_set_mask(pdev->w1cmask + exp->exp_cap + PCI_EXP_DEVSTA,
+                               PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED |
+                               PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_URD);
+
+    pci_set_long(pdev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
+                 PCI_ERR_UNC_SUPPORTED);
+    pci_set_long(pdev->wmask + offset + PCI_ERR_UNCOR_SEVER,
+                 PCI_ERR_UNC_SUPPORTED);
+    pci_long_test_and_set_mask(pdev->w1cmask + offset + PCI_ERR_COR_STATUS,
+                               PCI_ERR_COR_STATUS);
+    pci_set_long(pdev->wmask + offset + PCI_ERR_COR_MASK,
+                 PCI_ERR_COR_SUPPORTED);
+
+    pci_set_long(pdev->wmask + offset + PCI_ERR_CAP,
+                 PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
+                 PCI_ERR_CAP_MHRE);
+}
+
+static int vfio_add_ext_capabilities(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIExpressDevice *exp;
+    uint32_t header;
+    uint16_t next = PCI_CONFIG_SPACE_SIZE;
+
+    if (pci_config_size(pdev) <= PCI_CONFIG_SPACE_SIZE) {
+        return 0;
+    }
+
+    header = pci_get_long(pdev->config + next);
+    while (header) {
+        switch (PCI_EXT_CAP_ID(header)) {
+        case PCI_EXT_CAP_ID_ERR:
+             exp = &pdev->exp;
+             exp->aer_cap = next;
+
+             vfio_pci_aer_init(vdev);
+             break;
+        };
+
+        next = PCI_EXT_CAP_NEXT(header);
+        if (!next) {
+            return 0;
+        }
+        header = pci_get_long(pdev->config + next);
+    }
+
+    return 0;
+}
+
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -3296,6 +3363,11 @@  static int vfio_initfn(PCIDevice *pdev)
         goto out_teardown;
     }
 
+    ret = vfio_add_ext_capabilities(vdev);
+    if (ret) {
+        goto out_teardown;
+    }
+
     /* QEMU emulates all of MSI & MSIX */
     if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
         memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,