diff mbox

[2/2,v3] vfio: blacklist loading of unstable roms

Message ID 1393302864-11348-3-git-send-email-bsd@redhat.com
State New
Headers show

Commit Message

Bandan Das Feb. 25, 2014, 4:34 a.m. UTC
Certain cards such as the Broadcom BCM57810 have rom quirks
that exhibit unstable system behavior duing device assignment. In
the particular case of 57810, rom execution hangs and if a FLR
follows, the device becomes inoperable until a power cycle. This
change blacklists loading of rom for such cards unless the user
specifies a romfile or rombar=1 on the cmd line

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 hw/misc/vfio.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

Comments

Alex Williamson Feb. 25, 2014, 3:41 p.m. UTC | #1
On Mon, 2014-02-24 at 23:34 -0500, Bandan Das wrote:
> Certain cards such as the Broadcom BCM57810 have rom quirks
> that exhibit unstable system behavior duing device assignment. In
> the particular case of 57810, rom execution hangs and if a FLR
> follows, the device becomes inoperable until a power cycle. This
> change blacklists loading of rom for such cards unless the user
> specifies a romfile or rombar=1 on the cmd line
> 
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>  hw/misc/vfio.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 60 insertions(+)
> 
> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> index 8db182f..df3ceee 100644
> --- a/hw/misc/vfio.c
> +++ b/hw/misc/vfio.c
> @@ -209,6 +209,16 @@ typedef struct VFIOGroup {
>      QLIST_ENTRY(VFIOGroup) container_next;
>  } VFIOGroup;
>  
> +typedef struct VFIORomBlacklistEntry {
> +    uint16_t vendor_id;
> +    uint16_t device_id;
> +} VFIORomBlacklistEntry;
> +
> +static const VFIORomBlacklistEntry romblacklist[] = {
> +    /* Broadcom BCM 57810 */
> +    { 0x14e4, 0x168e }
> +};
> +

Any progress on a bug reference or trying to extract a version from the
ROM so we can compare against future ROMs?  We can always file a new bug
in launchpad for tracking if needed.

>  #define MSIX_CAP_LENGTH 12
>  
>  static QLIST_HEAD(, VFIOContainer)
> @@ -1197,13 +1207,43 @@ static const MemoryRegionOps vfio_rom_ops = {
>      .endianness = DEVICE_LITTLE_ENDIAN,
>  };
>  
> +static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint16_t vendor_id, device_id;
> +    int count = 0;
> +
> +    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
> +    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
> +
> +    while (count < ARRAY_SIZE(romblacklist)) {
> +        if (romblacklist[count].vendor_id == vendor_id &&
> +            romblacklist[count].device_id == device_id) {
> +                return true;
> +        }
> +        count++;
> +    }
> +
> +    return false;
> +}
> +
>  static void vfio_pci_size_rom(VFIODevice *vdev)
>  {
>      uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
>      off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
> +    DeviceState *dev = DEVICE(vdev);
>      char name[32];
>  
>      if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
> +        /* Since pci handles romfile, just print a message and return */
> +        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
> +                         "is known to cause system instability issues during "
> +                         "option rom execution. "
> +                         "Proceeding anyway since user specified romfile\n",
> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +                         vdev->host.function);
> +        }
>          return;
>      }
>  
> @@ -1227,6 +1267,26 @@ static void vfio_pci_size_rom(VFIODevice *vdev)
>          return;
>      }
>  
> +    if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.rom_bar) {

We would have taken the return above if !rom_bar, so that test is
unnecessary here.  Thanks,

Alex

> +        if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
> +                         "is known to cause system instability issues during "
> +                         "option rom execution. "
> +                         "Proceeding anyway since user specified non zero value for "
> +                         "rombar\n",
> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +                         vdev->host.function);
> +        } else {
> +            error_printf("Warning : Rom loading for device at "
> +                         "%04x:%02x:%02x.%x has been disabled due to "
> +                         "system instability issues. "
> +                         "Specify rombar=1 or romfile to force\n",
> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
> +                         vdev->host.function);
> +            return;
> +        }
> +    }
> +
>      DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain,
>              vdev->host.bus, vdev->host.slot, vdev->host.function, size);
>
Bandan Das Feb. 25, 2014, 4:51 p.m. UTC | #2
Alex Williamson <alex.williamson@redhat.com> writes:

> On Mon, 2014-02-24 at 23:34 -0500, Bandan Das wrote:
>> Certain cards such as the Broadcom BCM57810 have rom quirks
>> that exhibit unstable system behavior duing device assignment. In
>> the particular case of 57810, rom execution hangs and if a FLR
>> follows, the device becomes inoperable until a power cycle. This
>> change blacklists loading of rom for such cards unless the user
>> specifies a romfile or rombar=1 on the cmd line
>> 
>> Signed-off-by: Bandan Das <bsd@redhat.com>
>> ---
>>  hw/misc/vfio.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 60 insertions(+)
>> 
>> diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
>> index 8db182f..df3ceee 100644
>> --- a/hw/misc/vfio.c
>> +++ b/hw/misc/vfio.c
>> @@ -209,6 +209,16 @@ typedef struct VFIOGroup {
>>      QLIST_ENTRY(VFIOGroup) container_next;
>>  } VFIOGroup;
>>  
>> +typedef struct VFIORomBlacklistEntry {
>> +    uint16_t vendor_id;
>> +    uint16_t device_id;
>> +} VFIORomBlacklistEntry;
>> +
>> +static const VFIORomBlacklistEntry romblacklist[] = {
>> +    /* Broadcom BCM 57810 */
>> +    { 0x14e4, 0x168e }
>> +};
>> +
>
> Any progress on a bug reference or trying to extract a version from the
> ROM so we can compare against future ROMs?

I will get to it when there is actually a version of rom code that fixes it.
AFAIK Broadcom is looking into it but I am not sure if there will be a fix. 
Comparing versions (or something else in the rom that can be compared) is 
probably not a very good idea considering there could be other cards in the 
wild with this issue and we then have to manage a three item list for all these
cards - devid, vendorid, "version which fixed the problem", not to mention the 
comparision method might be different based on conventions that each vendor 
has adopted. (It appears I am seeing something similar with an Emulex card too!,
still investigating)
But anyway, I don't have a better idea to offer as of now.

> We can always file a new bug
> in launchpad for tracking if needed.

Sure. Should I file a bug at https://bugs.launchpad.net/qemu/ and post
a link to it in the code comments ?

>>  #define MSIX_CAP_LENGTH 12
>>  
>>  static QLIST_HEAD(, VFIOContainer)
>> @@ -1197,13 +1207,43 @@ static const MemoryRegionOps vfio_rom_ops = {
>>      .endianness = DEVICE_LITTLE_ENDIAN,
>>  };
>>  
>> +static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
>> +{
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    uint16_t vendor_id, device_id;
>> +    int count = 0;
>> +
>> +    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
>> +    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
>> +
>> +    while (count < ARRAY_SIZE(romblacklist)) {
>> +        if (romblacklist[count].vendor_id == vendor_id &&
>> +            romblacklist[count].device_id == device_id) {
>> +                return true;
>> +        }
>> +        count++;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>>  static void vfio_pci_size_rom(VFIODevice *vdev)
>>  {
>>      uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
>>      off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
>> +    DeviceState *dev = DEVICE(vdev);
>>      char name[32];
>>  
>>      if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
>> +        /* Since pci handles romfile, just print a message and return */
>> +        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
>> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
>> +                         "is known to cause system instability issues during "
>> +                         "option rom execution. "
>> +                         "Proceeding anyway since user specified romfile\n",
>> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
>> +                         vdev->host.function);
>> +        }
>>          return;
>>      }
>>  
>> @@ -1227,6 +1267,26 @@ static void vfio_pci_size_rom(VFIODevice *vdev)
>>          return;
>>      }
>>  
>> +    if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.rom_bar) {
>
> We would have taken the return above if !rom_bar, so that test is
> unnecessary here.  Thanks,

Agreed, sorry! will fix in the next version.

> Alex
>
>> +        if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
>> +            error_printf("Warning : Device at %04x:%02x:%02x.%x "
>> +                         "is known to cause system instability issues during "
>> +                         "option rom execution. "
>> +                         "Proceeding anyway since user specified non zero value for "
>> +                         "rombar\n",
>> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
>> +                         vdev->host.function);
>> +        } else {
>> +            error_printf("Warning : Rom loading for device at "
>> +                         "%04x:%02x:%02x.%x has been disabled due to "
>> +                         "system instability issues. "
>> +                         "Specify rombar=1 or romfile to force\n",
>> +                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
>> +                         vdev->host.function);
>> +            return;
>> +        }
>> +    }
>> +
>>      DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain,
>>              vdev->host.bus, vdev->host.slot, vdev->host.function, size);
>>
diff mbox

Patch

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 8db182f..df3ceee 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -209,6 +209,16 @@  typedef struct VFIOGroup {
     QLIST_ENTRY(VFIOGroup) container_next;
 } VFIOGroup;
 
+typedef struct VFIORomBlacklistEntry {
+    uint16_t vendor_id;
+    uint16_t device_id;
+} VFIORomBlacklistEntry;
+
+static const VFIORomBlacklistEntry romblacklist[] = {
+    /* Broadcom BCM 57810 */
+    { 0x14e4, 0x168e }
+};
+
 #define MSIX_CAP_LENGTH 12
 
 static QLIST_HEAD(, VFIOContainer)
@@ -1197,13 +1207,43 @@  static const MemoryRegionOps vfio_rom_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint16_t vendor_id, device_id;
+    int count = 0;
+
+    vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
+    device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+    while (count < ARRAY_SIZE(romblacklist)) {
+        if (romblacklist[count].vendor_id == vendor_id &&
+            romblacklist[count].device_id == device_id) {
+                return true;
+        }
+        count++;
+    }
+
+    return false;
+}
+
 static void vfio_pci_size_rom(VFIODevice *vdev)
 {
     uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
     off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
+    DeviceState *dev = DEVICE(vdev);
     char name[32];
 
     if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
+        /* Since pci handles romfile, just print a message and return */
+        if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.romfile) {
+            error_printf("Warning : Device at %04x:%02x:%02x.%x "
+                         "is known to cause system instability issues during "
+                         "option rom execution. "
+                         "Proceeding anyway since user specified romfile\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+        }
         return;
     }
 
@@ -1227,6 +1267,26 @@  static void vfio_pci_size_rom(VFIODevice *vdev)
         return;
     }
 
+    if (vfio_blacklist_opt_rom(vdev) && vdev->pdev.rom_bar) {
+        if (dev->opts && qemu_opt_get(dev->opts, "rombar")) {
+            error_printf("Warning : Device at %04x:%02x:%02x.%x "
+                         "is known to cause system instability issues during "
+                         "option rom execution. "
+                         "Proceeding anyway since user specified non zero value for "
+                         "rombar\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+        } else {
+            error_printf("Warning : Rom loading for device at "
+                         "%04x:%02x:%02x.%x has been disabled due to "
+                         "system instability issues. "
+                         "Specify rombar=1 or romfile to force\n",
+                         vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                         vdev->host.function);
+            return;
+        }
+    }
+
     DPRINTF("%04x:%02x:%02x.%x ROM size 0x%x\n", vdev->host.domain,
             vdev->host.bus, vdev->host.slot, vdev->host.function, size);