diff mbox

[RFC] fw/pci: Add support for mapping Intel IGD OpRegion via QEMU

Message ID 20160202041510.32092.58972.stgit@gimli.home
State New
Headers show

Commit Message

Alex Williamson Feb. 2, 2016, 4:15 a.m. UTC
The proposed IGD OpRegion support in QEMU via vfio maps the host
OpRegion into VM system memory at the address written to the ASL
Storage register (0xFC).  The OpRegion contains a 16-byte signature
followed by a 4-byte size field.  Therefore SeaBIOS can allocate a
buffer of the typical size (8KB), this results in a matching e820
reserved entry for the range, then write the buffer address to the ASL
Storage register, blinking the OpRegion into the VM address space.  At
that point SeaBIOS can validate the signature and size and remap if
necessary.  If the OpRegion is larger than 8KB, this would result in
any conflicting ranges being temporarily mapped with the OpRegion,
which probably needs further discussion on what that could break.
Other options might be to use the same algorithm with an obscenely
sized initial buffer to make sure we do not overlap, always
re-allocating the proper sized buffer, or perhaps we could pass the
OpRegion itself or information about the OpRegion through fw_cfg.

With the posted kernel series[1] and QEMU series[2] (on top of Gerd's
IGD patches[3]), this makes the OpRegion available to the VM and
tracing shows that the guest driver does access it.

[1] https://lkml.org/lkml/2016/2/1/884
[2] https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00202.html
[3] https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg00244.html

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 src/fw/pciinit.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

Comments

Laszlo Ersek Feb. 2, 2016, 11:43 a.m. UTC | #1
On 02/02/16 05:15, Alex Williamson wrote:
> The proposed IGD OpRegion support in QEMU via vfio maps the host
> OpRegion into VM system memory at the address written to the ASL
> Storage register (0xFC).  The OpRegion contains a 16-byte signature
> followed by a 4-byte size field.  Therefore SeaBIOS can allocate a
> buffer of the typical size (8KB), this results in a matching e820
> reserved entry for the range, then write the buffer address to the ASL
> Storage register, blinking the OpRegion into the VM address space.  At
> that point SeaBIOS can validate the signature and size and remap if
> necessary.  If the OpRegion is larger than 8KB, this would result in
> any conflicting ranges being temporarily mapped with the OpRegion,
> which probably needs further discussion on what that could break.
> Other options might be to use the same algorithm with an obscenely
> sized initial buffer to make sure we do not overlap, always
> re-allocating the proper sized buffer, or perhaps we could pass the
> OpRegion itself or information about the OpRegion through fw_cfg.
> 
> With the posted kernel series[1] and QEMU series[2] (on top of Gerd's
> IGD patches[3]), this makes the OpRegion available to the VM and
> tracing shows that the guest driver does access it.
> 
> [1] https://lkml.org/lkml/2016/2/1/884
> [2] https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00202.html
> [3] https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg00244.html
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  src/fw/pciinit.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 50 insertions(+)
> 
> diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> index c31c2fa..4f3251e 100644
> --- a/src/fw/pciinit.c
> +++ b/src/fw/pciinit.c
> @@ -257,6 +257,52 @@ static void ich9_smbus_setup(struct pci_device *dev, void *arg)
>      pci_config_writeb(bdf, ICH9_SMB_HOSTC, ICH9_SMB_HOSTC_HST_EN);
>  }
>  
> +static void intel_igd_opregion_setup(struct pci_device *dev, void *arg)
> +{
> +    u16 bdf = dev->bdf;
> +    u32 orig;
> +    void *opregion;
> +    int size = 8;
> +
> +    if (!CONFIG_QEMU)
> +        return;
> +
> +    orig = pci_config_readl(bdf, 0xFC);
> +
> +realloc:
> +    opregion = malloc_high(size * 1024);
> +    if (!opregion) {
> +        warn_noalloc();
> +        return;
> +    }
> +
> +    /*
> +     * QEMU maps the OpRegion into system memory at the address written here,
> +     * this overlaps our malloc, which marks the range e820 reserved.
> +     */
> +    pci_config_writel(bdf, 0xFC, cpu_to_le32((u32)opregion));
> +
> +    if (memcmp(opregion, "IntelGraphicsMem", 16)) {
> +        pci_config_writel(bdf, 0xFC, orig);
> +        free(opregion);
> +        return; /* the opregion didn't magically appear, not supported */
> +    }
> +
> +    if (size == le32_to_cpu(*(u32 *)(opregion + 16))) {
> +        dprintf(1, "Intel IGD OpRegion enabled on %02x:%02x.%x\n",
> +                pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf), pci_bdf_to_fn(bdf));
> +        return; /* success! */
> +    }
> +
> +    pci_config_writel(bdf, 0xFC, orig);
> +    free(opregion);
> +
> +    if (size == 8) { /* try once more with a new size */
> +        size = le32_to_cpu(*(u32 *)(opregion + 16));
> +        goto realloc;

Is this idiomatic SeaBIOS coding style?

How about "for (;;)" -- the code has many instances -- and reworking
this last branch accordingly?

(Apologies, not a very important comment.)

Thanks
Laszlo

> +    }
> +}
> +
>  static const struct pci_device_id pci_device_tbl[] = {
>      /* PIIX3/PIIX4 PCI to ISA bridge */
>      PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0,
> @@ -290,6 +336,10 @@ static const struct pci_device_id pci_device_tbl[] = {
>      PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_setup),
>      PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_setup),
>  
> +    /* Intel IGD OpRegion setup */
> +    PCI_DEVICE_CLASS(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA,
> +                     intel_igd_opregion_setup),
> +
>      PCI_DEVICE_END,
>  };
>  
> 
> 
> _______________________________________________
> SeaBIOS mailing list
> SeaBIOS@seabios.org
> http://www.seabios.org/mailman/listinfo/seabios
>
Alex Williamson Feb. 2, 2016, 3:56 p.m. UTC | #2
On Tue, 2016-02-02 at 12:43 +0100, Laszlo Ersek wrote:
> On 02/02/16 05:15, Alex Williamson wrote:
> > The proposed IGD OpRegion support in QEMU via vfio maps the host
> > OpRegion into VM system memory at the address written to the ASL
> > Storage register (0xFC).  The OpRegion contains a 16-byte signature
> > followed by a 4-byte size field.  Therefore SeaBIOS can allocate a
> > buffer of the typical size (8KB), this results in a matching e820
> > reserved entry for the range, then write the buffer address to the ASL
> > Storage register, blinking the OpRegion into the VM address space.  At
> > that point SeaBIOS can validate the signature and size and remap if
> > necessary.  If the OpRegion is larger than 8KB, this would result in
> > any conflicting ranges being temporarily mapped with the OpRegion,
> > which probably needs further discussion on what that could break.
> > Other options might be to use the same algorithm with an obscenely
> > sized initial buffer to make sure we do not overlap, always
> > re-allocating the proper sized buffer, or perhaps we could pass the
> > OpRegion itself or information about the OpRegion through fw_cfg.
> > 
> > With the posted kernel series[1] and QEMU series[2] (on top of Gerd's
> > IGD patches[3]), this makes the OpRegion available to the VM and
> > tracing shows that the guest driver does access it.
> > 
> > [1] https://lkml.org/lkml/2016/2/1/884
> > [2] https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00202.html
> > [3] https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg00244.html
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> >  src/fw/pciinit.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 50 insertions(+)
> > 
> > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > index c31c2fa..4f3251e 100644
> > --- a/src/fw/pciinit.c
> > +++ b/src/fw/pciinit.c
> > @@ -257,6 +257,52 @@ static void ich9_smbus_setup(struct pci_device *dev, void *arg)
> >      pci_config_writeb(bdf, ICH9_SMB_HOSTC, ICH9_SMB_HOSTC_HST_EN);
> >  }
> >  
> > +static void intel_igd_opregion_setup(struct pci_device *dev, void *arg)
> > +{
> > +    u16 bdf = dev->bdf;
> > +    u32 orig;
> > +    void *opregion;
> > +    int size = 8;
> > +
> > +    if (!CONFIG_QEMU)
> > +        return;
> > +
> > +    orig = pci_config_readl(bdf, 0xFC);
> > +
> > +realloc:
> > +    opregion = malloc_high(size * 1024);
> > +    if (!opregion) {
> > +        warn_noalloc();
> > +        return;
> > +    }
> > +
> > +    /*
> > +     * QEMU maps the OpRegion into system memory at the address written here,
> > +     * this overlaps our malloc, which marks the range e820 reserved.
> > +     */
> > +    pci_config_writel(bdf, 0xFC, cpu_to_le32((u32)opregion));
> > +
> > +    if (memcmp(opregion, "IntelGraphicsMem", 16)) {
> > +        pci_config_writel(bdf, 0xFC, orig);
> > +        free(opregion);
> > +        return; /* the opregion didn't magically appear, not supported */
> > +    }
> > +
> > +    if (size == le32_to_cpu(*(u32 *)(opregion + 16))) {
> > +        dprintf(1, "Intel IGD OpRegion enabled on %02x:%02x.%x\n",
> > +                pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf), pci_bdf_to_fn(bdf));
> > +        return; /* success! */
> > +    }
> > +
> > +    pci_config_writel(bdf, 0xFC, orig);
> > +    free(opregion);
> > +
> > +    if (size == 8) { /* try once more with a new size */
> > +        size = le32_to_cpu(*(u32 *)(opregion + 16));
> > +        goto realloc;

> Is this idiomatic SeaBIOS coding style?

> How about "for (;;)" -- the code has many instances -- and reworking
> this last branch accordingly?

> (Apologies, not a very important comment.)

I did check that I wasn't the only one using a goto in SeaBIOS and I
don't see any sort of CodingStyle doc precluding it, but I apologize if
there have been previous discussions that I've missed.  I don't have the
same aversion to gotos as many people do, but of course the loop can be
reworked in any number of ways if there's a preference to avoid them.
Thanks,

Alex


> > +    }
> > +}
> > +
> >  static const struct pci_device_id pci_device_tbl[] = {
> >      /* PIIX3/PIIX4 PCI to ISA bridge */
> >      PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0,
> > @@ -290,6 +336,10 @@ static const struct pci_device_id pci_device_tbl[] = {
> >      PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_setup),
> >      PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_setup),
> >  
> > +    /* Intel IGD OpRegion setup */
> > +    PCI_DEVICE_CLASS(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA,
> > +                     intel_igd_opregion_setup),
> > +
> >      PCI_DEVICE_END,
> >  };
> >  
> > 
> > 
> > _______________________________________________
> > SeaBIOS mailing list
> > SeaBIOS@seabios.org
> > http://www.seabios.org/mailman/listinfo/seabios
> > 
>
Laszlo Ersek Feb. 2, 2016, 4:13 p.m. UTC | #3
On 02/02/16 16:56, Alex Williamson wrote:
> On Tue, 2016-02-02 at 12:43 +0100, Laszlo Ersek wrote:
>> On 02/02/16 05:15, Alex Williamson wrote:
>>> The proposed IGD OpRegion support in QEMU via vfio maps the host
>>> OpRegion into VM system memory at the address written to the ASL
>>> Storage register (0xFC).  The OpRegion contains a 16-byte signature
>>> followed by a 4-byte size field.  Therefore SeaBIOS can allocate a
>>> buffer of the typical size (8KB), this results in a matching e820
>>> reserved entry for the range, then write the buffer address to the ASL
>>> Storage register, blinking the OpRegion into the VM address space.  At
>>> that point SeaBIOS can validate the signature and size and remap if
>>> necessary.  If the OpRegion is larger than 8KB, this would result in
>>> any conflicting ranges being temporarily mapped with the OpRegion,
>>> which probably needs further discussion on what that could break.
>>> Other options might be to use the same algorithm with an obscenely
>>> sized initial buffer to make sure we do not overlap, always
>>> re-allocating the proper sized buffer, or perhaps we could pass the
>>> OpRegion itself or information about the OpRegion through fw_cfg.
>>>  
>>> With the posted kernel series[1] and QEMU series[2] (on top of Gerd's
>>> IGD patches[3]), this makes the OpRegion available to the VM and
>>> tracing shows that the guest driver does access it.
>>>  
>>> [1] https://lkml.org/lkml/2016/2/1/884
>>> [2] https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg00202.html
>>> [3] https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg00244.html
>>>  
>>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>> ---
>>>  src/fw/pciinit.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 50 insertions(+)
>>>  
>>> diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
>>> index c31c2fa..4f3251e 100644
>>> --- a/src/fw/pciinit.c
>>> +++ b/src/fw/pciinit.c
>>> @@ -257,6 +257,52 @@ static void ich9_smbus_setup(struct pci_device *dev, void *arg)
>>>      pci_config_writeb(bdf, ICH9_SMB_HOSTC, ICH9_SMB_HOSTC_HST_EN);
>>>  }
>>>  
>>> +static void intel_igd_opregion_setup(struct pci_device *dev, void *arg)
>>> +{
>>> +    u16 bdf = dev->bdf;
>>> +    u32 orig;
>>> +    void *opregion;
>>> +    int size = 8;
>>> +
>>> +    if (!CONFIG_QEMU)
>>> +        return;
>>> +
>>> +    orig = pci_config_readl(bdf, 0xFC);
>>> +
>>> +realloc:
>>> +    opregion = malloc_high(size * 1024);
>>> +    if (!opregion) {
>>> +        warn_noalloc();
>>> +        return;
>>> +    }
>>> +
>>> +    /*
>>> +     * QEMU maps the OpRegion into system memory at the address written here,
>>> +     * this overlaps our malloc, which marks the range e820 reserved.
>>> +     */
>>> +    pci_config_writel(bdf, 0xFC, cpu_to_le32((u32)opregion));
>>> +
>>> +    if (memcmp(opregion, "IntelGraphicsMem", 16)) {
>>> +        pci_config_writel(bdf, 0xFC, orig);
>>> +        free(opregion);
>>> +        return; /* the opregion didn't magically appear, not supported */
>>> +    }
>>> +
>>> +    if (size == le32_to_cpu(*(u32 *)(opregion + 16))) {
>>> +        dprintf(1, "Intel IGD OpRegion enabled on %02x:%02x.%x\n",
>>> +                pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf), pci_bdf_to_fn(bdf));
>>> +        return; /* success! */
>>> +    }
>>> +
>>> +    pci_config_writel(bdf, 0xFC, orig);
>>> +    free(opregion);
>>> +
>>> +    if (size == 8) { /* try once more with a new size */
>>> +        size = le32_to_cpu(*(u32 *)(opregion + 16));
>>> +        goto realloc;
>>  
>> Is this idiomatic SeaBIOS coding style?
>>  
>> How about "for (;;)" -- the code has many instances -- and reworking
>> this last branch accordingly?
>>  
>> (Apologies, not a very important comment.)
> 
> I did check that I wasn't the only one using a goto in SeaBIOS and I
> don't see any sort of CodingStyle doc precluding it, but I apologize if
> there have been previous discussions that I've missed.

Not that I know of...

> I don't have the
> same aversion to gotos as many people do, but of course the loop can be
> reworked in any number of ways if there's a preference to avoid them.

... but I thought that goto's were mostly reserved for cleanup / error
paths in SeaBIOS (a quick grep seemed to confirm).

I recall seeing this kind of construct in the kernel source (especially
if the jump is from within a deeply nested conditional). I'm not against
it, just thought that it could count as unusual for SeaBIOS. Kevin will
know. :)

Thanks
Laszlo

> Thanks,
> 
> Alex
> 
> 
>>> +    }
>>> +}
>>> +
>>>  static const struct pci_device_id pci_device_tbl[] = {
>>>      /* PIIX3/PIIX4 PCI to ISA bridge */
>>>      PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0,
>>> @@ -290,6 +336,10 @@ static const struct pci_device_id pci_device_tbl[] = {
>>>      PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_setup),
>>>      PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_setup),
>>>  
>>> +    /* Intel IGD OpRegion setup */
>>> +    PCI_DEVICE_CLASS(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA,
>>> +                     intel_igd_opregion_setup),
>>> +
>>>      PCI_DEVICE_END,
>>>  };
>>>  
>>>  
>>>  
>>> _______________________________________________
>>> SeaBIOS mailing list
>>> SeaBIOS@seabios.org
>>> http://www.seabios.org/mailman/listinfo/seabios
>>>  
>>  
>
diff mbox

Patch

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
index c31c2fa..4f3251e 100644
--- a/src/fw/pciinit.c
+++ b/src/fw/pciinit.c
@@ -257,6 +257,52 @@  static void ich9_smbus_setup(struct pci_device *dev, void *arg)
     pci_config_writeb(bdf, ICH9_SMB_HOSTC, ICH9_SMB_HOSTC_HST_EN);
 }
 
+static void intel_igd_opregion_setup(struct pci_device *dev, void *arg)
+{
+    u16 bdf = dev->bdf;
+    u32 orig;
+    void *opregion;
+    int size = 8;
+
+    if (!CONFIG_QEMU)
+        return;
+
+    orig = pci_config_readl(bdf, 0xFC);
+
+realloc:
+    opregion = malloc_high(size * 1024);
+    if (!opregion) {
+        warn_noalloc();
+        return;
+    }
+
+    /*
+     * QEMU maps the OpRegion into system memory at the address written here,
+     * this overlaps our malloc, which marks the range e820 reserved.
+     */
+    pci_config_writel(bdf, 0xFC, cpu_to_le32((u32)opregion));
+
+    if (memcmp(opregion, "IntelGraphicsMem", 16)) {
+        pci_config_writel(bdf, 0xFC, orig);
+        free(opregion);
+        return; /* the opregion didn't magically appear, not supported */
+    }
+
+    if (size == le32_to_cpu(*(u32 *)(opregion + 16))) {
+        dprintf(1, "Intel IGD OpRegion enabled on %02x:%02x.%x\n",
+                pci_bdf_to_bus(bdf), pci_bdf_to_dev(bdf), pci_bdf_to_fn(bdf));
+        return; /* success! */
+    }
+
+    pci_config_writel(bdf, 0xFC, orig);
+    free(opregion);
+
+    if (size == 8) { /* try once more with a new size */
+        size = le32_to_cpu(*(u32 *)(opregion + 16));
+        goto realloc;
+    }
+}
+
 static const struct pci_device_id pci_device_tbl[] = {
     /* PIIX3/PIIX4 PCI to ISA bridge */
     PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0,
@@ -290,6 +336,10 @@  static const struct pci_device_id pci_device_tbl[] = {
     PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0017, 0xff00, apple_macio_setup),
     PCI_DEVICE_CLASS(PCI_VENDOR_ID_APPLE, 0x0022, 0xff00, apple_macio_setup),
 
+    /* Intel IGD OpRegion setup */
+    PCI_DEVICE_CLASS(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA,
+                     intel_igd_opregion_setup),
+
     PCI_DEVICE_END,
 };