diff mbox

[2/2,v2] vfio: blacklist loading of unstable roms

Message ID 1392841255-22741-3-git-send-email-bsd@redhat.com
State New
Headers show

Commit Message

Bandan Das Feb. 19, 2014, 8:20 p.m. UTC
Certain cards such as the Broadcom BCM57810 have rom quirks
that exhibit unstable system behavior duing device assignment. In
the particular case of 57810, rom execution hangs and if a FLR
follows, the device becomes inoperable until a power cycle. This
is a simple change to blacklist loading of rom for such cards
unless the user specifies a romfile or rombar=1 on the cmd line

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 hw/misc/vfio.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

Comments

Alex Williamson Feb. 19, 2014, 8:58 p.m. UTC | #1
On Wed, 2014-02-19 at 15:20 -0500, Bandan Das wrote:
> Certain cards such as the Broadcom BCM57810 have rom quirks
> that exhibit unstable system behavior duing device assignment. In
> the particular case of 57810, rom execution hangs and if a FLR
> follows, the device becomes inoperable until a power cycle. This
> is a simple change to blacklist loading of rom for such cards
> unless the user specifies a romfile or rombar=1 on the cmd line
> 
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>  hw/misc/vfio.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 8db182f..58f348e 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -209,6 +209,16 @@ typedef struct VFIOGroup {
>      QLIST_ENTRY(VFIOGroup) container_next;
>  } VFIOGroup;
>  
> +typedef struct VFIORomBlacklistEntry {
> +    uint16_t vendor_id;
> +    uint16_t device_id;
> +} VFIORomBlacklistEntry;
> +
> +static const VFIORomBlacklistEntry romblacklist[] = {
> +    /* Broadcom BCM 57810 */
> +    { 0x14e4, 0x168e }
> +};
> +

Ideally we'd want to be able to reference a bugzilla here so we have
some reference in the code to track developments.  Also, can we capture
the ROM version known to cause this problem so if somebody later says
that it works we can have something to compare?  The PCI firmware spec
defines the data structure.  Effectively you can dump the ROM from the
device, run xxd on it and look for the "PCIR" string that defines the
start of the PCI data structure.  The 2 bytes at offset 12h (where 0h is
the 'P' in "PCIR") have a revision level field.

>  #define MSIX_CAP_LENGTH 12
>  
>  static QLIST_HEAD(, VFIOContainer)
> @@ -1197,6 +1207,26 @@ static const MemoryRegionOps vfio_rom_ops = {
>      .endianness = DEVICE_LITTLE_ENDIAN,
>  };
>  
> +static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint16_t vendor_id, device_id;
> +    int count = 0;
> +
> +    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
> +    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
> +
> +    while (count < ARRAY_SIZE(romblacklist)) {
> +        if (romblacklist[count].vendor_id == vendor_id &&
> +            romblacklist[count].device_id == device_id) {
> +                return true;
> +        }
> +        count++;
> +    }
> +
> +    return false;
> +}
> +
>  static void vfio_pci_size_rom(VFIODevice *vdev)
>  {
>      uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
> @@ -1204,9 +1234,37 @@ static void vfio_pci_size_rom(VFIODevice *vdev)
>      char name[32];
>  
>      if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
> +        /* Since pci handles romfile, just print a message and return */
> +        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
> +                         "is known to cause system instability issues during "
> +                         "option rom execution. "
> +                         "Proceeding anyway since user specified romfile\n",
> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +                         vdev->host.function);
> +        }
>          return;
>      }
>  
> +    if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.rom_bar) {
> +        if (vdev->pdev.rom_bar == 1) {
> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
> +                         "is known to cause system instability issues during "
> +                         "option rom execution. "
> +                         "Proceeding anyway since user specified rombar=1\n",
> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +                         vdev->host.function);
> +        } else {
> +            error_printf("Warning : Rom loading for device at "
> +                         "%04x:%02x:%02x.%x has been disabled due to "
> +                         "system instability issues. "
> +                         "Specify rombar=1 or romfile to force\n",
> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +                         vdev->host.function);
> +            return;
> +        }
> +    }
> +

What happens if this card, or some later user of this blacklisting, has
SKUs that don't have a ROM?  Aren't we going to print this last warning
regardless of whether we found anything to load?  Just shifting this
whole block down below the next couple tests is probably sufficient.
Thanks,

Alex

>      /*
>       * Use the same size ROM BAR as the physical device.  The contents
>       * will get filled in later when the guest tries to read it.
Bandan Das Feb. 20, 2014, 5:27 p.m. UTC | #2
Alex Williamson <alex.williamson@redhat.com> writes:

> On Wed, 2014-02-19 at 15:20 -0500, Bandan Das wrote:
>> Certain cards such as the Broadcom BCM57810 have rom quirks
>> that exhibit unstable system behavior duing device assignment. In
>> the particular case of 57810, rom execution hangs and if a FLR
>> follows, the device becomes inoperable until a power cycle. This
>> is a simple change to blacklist loading of rom for such cards
>> unless the user specifies a romfile or rombar=1 on the cmd line
>> 
>> Signed-off-by: Bandan Das <bsd@redhat.com>
>> ---
>>  hw/misc/vfio.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>> 
>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>> index 8db182f..58f348e 100644
>> --- a/hw/misc/vfio.c
>> +++ b/hw/misc/vfio.c
>> @@ -209,6 +209,16 @@ typedef struct VFIOGroup {
>>      QLIST_ENTRY(VFIOGroup) container_next;
>>  } VFIOGroup;
>>  
>> +typedef struct VFIORomBlacklistEntry {
>> +    uint16_t vendor_id;
>> +    uint16_t device_id;
>> +} VFIORomBlacklistEntry;
>> +
>> +static const VFIORomBlacklistEntry romblacklist[] = {
>> +    /* Broadcom BCM 57810 */
>> +    { 0x14e4, 0x168e }
>> +};
>> +
>
> Ideally we'd want to be able to reference a bugzilla here so we have
> some reference in the code to track developments.  Also, can we capture
> the ROM version known to cause this problem so if somebody later says
> that it works we can have something to compare?  The PCI firmware spec
> defines the data structure.  Effectively you can dump the ROM from the
> device, run xxd on it and look for the "PCIR" string that defines the
> start of the PCI data structure.  The 2 bytes at offset 12h (where 0h is
> the 'P' in "PCIR") have a revision level field.
>
>>  #define MSIX_CAP_LENGTH 12
>>  
>>  static QLIST_HEAD(, VFIOContainer)
>> @@ -1197,6 +1207,26 @@ static const MemoryRegionOps vfio_rom_ops = {
>>      .endianness = DEVICE_LITTLE_ENDIAN,
>>  };
>>  
>> +static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
>> +{
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    uint16_t vendor_id, device_id;
>> +    int count = 0;
>> +
>> +    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
>> +    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
>> +
>> +    while (count < ARRAY_SIZE(romblacklist)) {
>> +        if (romblacklist[count].vendor_id == vendor_id &&
>> +            romblacklist[count].device_id == device_id) {
>> +                return true;
>> +        }
>> +        count++;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>>  static void vfio_pci_size_rom(VFIODevice *vdev)
>>  {
>>      uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
>> @@ -1204,9 +1234,37 @@ static void vfio_pci_size_rom(VFIODevice *vdev)
>>      char name[32];
>>  
>>      if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
>> +        /* Since pci handles romfile, just print a message and return */
>> +        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
>> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
>> +                         "is known to cause system instability issues during "
>> +                         "option rom execution. "
>> +                         "Proceeding anyway since user specified romfile\n",
>> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
>> +                         vdev->host.function);
>> +        }
>>          return;
>>      }
>>  
>> +    if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.rom_bar) {
>> +        if (vdev->pdev.rom_bar == 1) {
>> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
>> +                         "is known to cause system instability issues during "
>> +                         "option rom execution. "
>> +                         "Proceeding anyway since user specified rombar=1\n",
>> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
>> +                         vdev->host.function);
>> +        } else {
>> +            error_printf("Warning : Rom loading for device at "
>> +                         "%04x:%02x:%02x.%x has been disabled due to "
>> +                         "system instability issues. "
>> +                         "Specify rombar=1 or romfile to force\n",
>> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
>> +                         vdev->host.function);
>> +            return;
>> +        }
>> +    }
>> +
>
> What happens if this card, or some later user of this blacklisting, has
> SKUs that don't have a ROM?  Aren't we going to print this last warning
> regardless of whether we found anything to load?  Just shifting this
> whole block down below the next couple tests is probably sufficient.
> Thanks,

Thanks for catching this! I will shift this down for the next version..

> Alex
>
>>      /*
>>       * Use the same size ROM BAR as the physical device.  The contents
>>       * will get filled in later when the guest tries to read it.
diff mbox

Patch

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 8db182f..58f348e 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -209,6 +209,16 @@  typedef struct VFIOGroup {
     QLIST_ENTRY(VFIOGroup) container_next;
 } VFIOGroup;
 
+typedef struct VFIORomBlacklistEntry {
+    uint16_t vendor_id;
+    uint16_t device_id;
+} VFIORomBlacklistEntry;
+
+static const VFIORomBlacklistEntry romblacklist[] = {
+    /* Broadcom BCM 57810 */
+    { 0x14e4, 0x168e }
+};
+
 #define MSIX_CAP_LENGTH 12
 
 static QLIST_HEAD(, VFIOContainer)
@@ -1197,6 +1207,26 @@  static const MemoryRegionOps vfio_rom_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint16_t vendor_id, device_id;
+    int count = 0;
+
+    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
+    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+    while (count < ARRAY_SIZE(romblacklist)) {
+        if (romblacklist[count].vendor_id == vendor_id &&
+            romblacklist[count].device_id == device_id) {
+                return true;
+        }
+        count++;
+    }
+
+    return false;
+}
+
 static void vfio_pci_size_rom(VFIODevice *vdev)
 {
     uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
@@ -1204,9 +1234,37 @@  static void vfio_pci_size_rom(VFIODevice *vdev)
     char name[32];
 
     if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
+        /* Since pci handles romfile, just print a message and return */
+        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
+            error_printf("Warning : Device at %04x:%02x:%02x.%x "
+                         "is known to cause system instability issues during "
+                         "option rom execution. "
+                         "Proceeding anyway since user specified romfile\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+        }
         return;
     }
 
+    if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.rom_bar) {
+        if (vdev->pdev.rom_bar == 1) {
+            error_printf("Warning : Device at %04x:%02x:%02x.%x "
+                         "is known to cause system instability issues during "
+                         "option rom execution. "
+                         "Proceeding anyway since user specified rombar=1\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+        } else {
+            error_printf("Warning : Rom loading for device at "
+                         "%04x:%02x:%02x.%x has been disabled due to "
+                         "system instability issues. "
+                         "Specify rombar=1 or romfile to force\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+            return;
+        }
+    }
+
     /*
      * Use the same size ROM BAR as the physical device.  The contents
      * will get filled in later when the guest tries to read it.