Patchwork [PULL,3/6] vfio-pci: Lazy PCI option ROM loading

login
register
mail settings
Submitter Alex Williamson
Date Oct. 4, 2013, 2:30 p.m.
Message ID <1380897010.2673.186.camel@ul30vt.home>
Download mbox | patch
Permalink /patch/280645/
State New
Headers show

Comments

Alex Williamson - Oct. 4, 2013, 2:30 p.m.
On Fri, 2013-10-04 at 22:13 +1000, Alexey Kardashevskiy wrote:
> On 10/04/2013 01:39 AM, Alex Williamson wrote:
> > During vfio-pci initfn, the device is not always in a state where the
> > option ROM can be read.  In the case of graphics cards, there's often
> > no per function reset, which means we have host driver state affecting
> > whether the option ROM is usable.  Ideally we want to move reading the
> > option ROM past any co-assigned device resets to the point where the
> > guest first tries to read the ROM itself.
> > 
> > To accomplish this, we switch the memory region for the option rom to
> > an I/O region rather than a memory mapped region.  This has the side
> > benefit that we don't waste KVM memory slots for a BAR where we don't
> > care about performance.  This also allows us to delay loading the ROM
> > from the device until the first read by the guest.  We then use the
> > PCI config space size of the ROM BAR when setting up the BAR through
> > QEMU PCI.
> > 
> > Another benefit of this approach is that previously when a user set
> > the ROM to a file using the romfile= option, we still probed VFIO for
> > the parameters of the ROM, which can result in dmesg errors about an
> > invalid ROM.  We now only probe VFIO to get the ROM contents if the
> > guest actually tries to read the ROM.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> >  hw/misc/vfio.c |  184 +++++++++++++++++++++++++++++++++++++-------------------
> >  1 file changed, 122 insertions(+), 62 deletions(-)
> > 
> > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
> > index ede026d..730dec5 100644
> > --- a/hw/misc/vfio.c
> > +++ b/hw/misc/vfio.c
> > @@ -166,6 +166,7 @@ typedef struct VFIODevice {
> >      off_t config_offset; /* Offset of config space region within device fd */
> >      unsigned int rom_size;
> >      off_t rom_offset; /* Offset of ROM region within device fd */
> > +    void *rom;
> >      int msi_cap_size;
> >      VFIOMSIVector *msi_vectors;
> >      VFIOMSIXInfo *msix;
> > @@ -1058,6 +1059,125 @@ static const MemoryRegionOps vfio_bar_ops = {
> >      .endianness = DEVICE_LITTLE_ENDIAN,
> >  };
> >  
> > +static void vfio_pci_load_rom(VFIODevice *vdev)
> > +{
> > +    struct vfio_region_info reg_info = {
> > +        .argsz = sizeof(reg_info),
> > +        .index = VFIO_PCI_ROM_REGION_INDEX
> > +    };
> > +    uint64_t size;
> > +    off_t off = 0;
> > +    size_t bytes;
> > +
> > +    if (ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
> > +        error_report("vfio: Error getting ROM info: %m");
> > +        return;
> > +    }
> > +
> > +    DPRINTF("Device %04x:%02x:%02x.%x ROM:\n", vdev->host.domain,
> > +            vdev->host.bus, vdev->host.slot, vdev->host.function);
> > +    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
> > +            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
> > +            (unsigned long)reg_info.flags);
> > +
> > +    vdev->rom_size = size = reg_info.size;
> > +    vdev->rom_offset = reg_info.offset;
> > +
> > +    if (!vdev->rom_size) {
> > +        return;
> > +    }
> > +
> > +    vdev->rom = g_malloc(size);
> > +    memset(vdev->rom, 0xff, size);
> > +
> > +    while (size) {
> > +        bytes = pread(vdev->fd, vdev->rom + off, size, vdev->rom_offset + off);
> > +        if (bytes == 0) {
> > +            break;
> > +        } else if (bytes > 0) {
> > +            off += bytes;
> > +            size -= bytes;
> > +        } else {
> > +            if (errno == EINTR || errno == EAGAIN) {
> > +                continue;
> > +            }
> > +            error_report("vfio: Error reading device ROM: %m");
> > +            break;
> > +        }
> > +    }
> > +}
> > +
> > +static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
> > +{
> > +    VFIODevice *vdev = opaque;
> > +    uint64_t val = ((uint64_t)1 << (size * 8)) - 1;
> > +
> > +    /* Load the ROM lazily when the guest tries to read it */
> > +    if (unlikely(!vdev->rom)) {
> > +        vfio_pci_load_rom(vdev);
> > +    }
> > +
> > +    memcpy(&val, vdev->rom + addr,
> > +           (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
> > +
> > +    DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n",
> > +            __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
> > +            vdev->host.function, addr, size, val);
> > +
> > +    return val;
> > +}
> > +
> > +static const MemoryRegionOps vfio_rom_ops = {
> > +    .read = vfio_rom_read,
> > +    .endianness = DEVICE_LITTLE_ENDIAN,
> > +};
> > +
> > +static void vfio_pci_size_rom(VFIODevice *vdev)
> > +{
> > +    uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK;
> > +    off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
> > +    char name[32];
> > +
> > +    if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
> > +        return;
> > +    }
> > +
> > +    /*
> > +     * Use the same size ROM BAR as the physical device.  The contents
> > +     * will get filled in later when the guest tries to read it.
> > +     */
> > +    if (pread(vdev->fd, &orig, 4, offset) != 4 ||
> > +        pwrite(vdev->fd, &size, 4, offset) != 4 ||
> > +        pread(vdev->fd, &size, 4, offset) != 4 ||
> > +        pwrite(vdev->fd, &orig, 4, offset) != 4) {
> 
> 
> Do not you want to do byteswapping here?
> This chunk fails on powerpc64. The rest works (with the corresponding
> kernel changes).

Yes, I think we just need this:


Does that work for you?  Thanks,

Alex

Patch

--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1142,7 +1142,7 @@  static const MemoryRegionOps vfio_rom_ops = {
 
 static void vfio_pci_size_rom(VFIODevice *vdev)
 {
-    uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK;
+    uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
     off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
     char name[32];
 
@@ -1164,7 +1164,7 @@  static void vfio_pci_size_rom(VFIODevice *vdev)
         return;
     }
 
-    size = ~(size & PCI_ROM_ADDRESS_MASK) + 1;
+    size = ~(le32_to_cpu(size) & PCI_ROM_ADDRESS_MASK) + 1;
 
     if (!size) {
         return;