diff mbox

[IGDVFIO,3/8] RFC and help completing: Intel IGD Direct Assignment with VFIO

Message ID CAKJ_wKQs9h4aJZy7LFUey05kG8c25fYp-_5Q71SQETEF90UF8Q@mail.gmail.com
State New
Headers show

Commit Message

Andrew Barnes Sept. 24, 2014, 1:20 p.m. UTC
hw/pci-host/q35.c

this patch adds:
* redirect some PCI config reads/writes to host
* memory map BSDM, BGSM, TSEG

patch
---------------------

     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
@@ -284,11 +300,193 @@ static void mch_set_smm(int smm, void *arg)
     memory_region_transaction_commit();
 }

-static void mch_write_config(PCIDevice *d,
+/* TODO: Move these variables/defines to be consistent with Q35 coding
style */
+MemoryRegion bdsm;
+uint32_t bdsm_host;
+MemoryRegion bgsm;
+uint32_t bgsm_host;
+MemoryRegion tseg;
+uint32_t tseg_host;
+#define MCH_CONFIG_BDSM 0xb0
+#define MCH_CONFIG_BGSM 0xb4
+#define MCH_CONFIG_TSEG 0xb8
+#define MCH_CONFIG_GMCH 0x50
+
+/* Setup memory map for Stolen Memory */
+static void mch_setup_bdsm(PCIDevice *d)
+{
+    int fd;
+    char name[64];
+    void *map;
+    bdsm_host =
host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BDSM,4);
+    int dsm_size;
+    uint16_t gmch_host =
host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
+    uint16_t gmch_dsm = (gmch_host & 0xF8) >> 3;
+    MCHPCIState *mch = MCH_PCI_DEVICE(d);
+
+    Q35_DPRINTF("%s Setup BDSM: %x\n",__func__,bdsm_host);
+
+    snprintf(name, sizeof(name), "MCH-BDSM-mmap");
+    fd = open("/dev/mem", O_RDWR);
+
+    switch (gmch_dsm)
+    {
+      case 0x0: dsm_size = 0; break;
+      case 0x1: dsm_size = 32; break;
+      case 0x2: dsm_size = 64; break;
+      case 0x3: dsm_size = 96; break;
+      case 0x4: dsm_size = 128; break;
+      case 0x5: dsm_size = 160; break;
+      case 0x6: dsm_size = 192; break;
+      case 0x7: dsm_size = 224; break;
+      case 0x8: dsm_size = 256; break;
+      case 0x9: dsm_size = 288; break;
+      case 0xa: dsm_size = 320; break;
+      case 0xb: dsm_size = 352; break;
+      case 0xc: dsm_size = 384; break;
+      case 0xd: dsm_size = 416; break;
+      case 0xe: dsm_size = 448; break;
+      case 0xf: dsm_size = 480; break;
+      case 0x10: dsm_size = 512; break;
+     // default: /* panic */
+    }
+
+    dsm_size = dsm_size * 1024 * 1024;
+    map = mmap(NULL,dsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bdsm_host
& 0xFFF00000));
+
+    if (map == MAP_FAILED)
+    {
+        map = NULL;
+        Q35_DPRINTF("%s Setup BDSM: MAP_FAILED\n",__func__);
+    }
+
+    memory_region_init_ram_ptr(&bdsm, OBJECT(mch), name, dsm_size, map);
+}
+
+/* this function memory maps the region of host memory assigned to Stolen
memory
+ * to the guest location provided by seabios. */
+static void mch_map_bdsm(PCIDevice *d, uint32_t bdsm_new)
+{
+    MemoryRegion *guest_memory = get_system_memory();
+    uint32_t bdsm_current = pci_default_read_config(d,MCH_CONFIG_BDSM,4);
+
+    if ( bdsm_current != bdsm_host && bdsm_current != 0 )
+    {
+        // remap
+        Q35_DPRINTF("%s Delete BDSM mapping: %x\n",__func__,(bdsm_current
& 0xFFF00000));
+        memory_region_del_subregion(guest_memory, &bdsm);
+    }
+    Q35_DPRINTF("%s Add BDSM mapping: %x ->
%x\n",__func__,bdsm_host,bdsm_new);
+    memory_region_add_subregion(guest_memory, (bdsm_new & 0xFFF00000),
&bdsm);
+}
+
+/* Setup memory map for GTT Stolen Memory */
+static void mch_setup_bgsm(PCIDevice *d)
+{
+    int fd;
+    char name[64];
+    void *map;
+    bgsm_host =
host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BGSM,4);
+    int gsm_size;
+    uint16_t gmch_host =
host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
+    uint16_t gmch_gsm = (gmch_host & 0x300) >> 8;
+    MCHPCIState *mch = MCH_PCI_DEVICE(d);
+
+    Q35_DPRINTF("%s Setup BGSM: %x\n",__func__,bgsm_host);
+
+    snprintf(name, sizeof(name), "MCH %04x:%02x:%02x.%x BGSM mmap",
+             0, 0, 0, 0);
+
+    fd = open("/dev/mem", O_RDWR);
+
+    switch (gmch_gsm)
+    {
+      case 0x2: gsm_size = 2; break;
+      case 0x1: gsm_size = 1; break;
+      case 0x0: gsm_size = 0; break;
+     // default: /* panic */
+    }
+
+    gsm_size = gsm_size * 1024 * 1024;
+
+    map = mmap(NULL,gsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bgsm_host
& 0xFFF00000));
+    if (map == MAP_FAILED)
+    {
+        map = NULL;
+        Q35_DPRINTF("%s Setup BGSM: MAP_FAILED\n",__func__);
+    }
+
+    memory_region_init_ram_ptr(&bgsm, OBJECT(mch), name, gsm_size, map);
+}
+
+/* this function memory maps the region of host memory assigned to GTT
Stolen memory
+ * to the guest location provided by seabios. */
+static void mch_map_bgsm(PCIDevice *d, uint32_t bgsm_new)
+{
+    MemoryRegion *guest_memory = get_system_memory();
+    uint32_t bgsm_current = pci_default_read_config(d,MCH_CONFIG_BGSM,4);
+
+    if ( bgsm_current != bgsm_host && bgsm_current != 0)
+    {
+        // remap
+        Q35_DPRINTF("%s Delete BGSM mapping: %x\n",__func__,(bgsm_current
& 0xFFF00000));
+        memory_region_del_subregion(guest_memory, &bgsm);
+    }
+    Q35_DPRINTF("%s Add BGSM mapping: %x ->
%x\n",__func__,bgsm_host,bgsm_new);
+    memory_region_add_subregion(guest_memory, (bgsm_new & 0xFFF00000),
&bgsm);
+}
+
+/* Setup memory map for TSEG */
+static void mch_setup_tseg(PCIDevice *d)
+{
+    int fd;
+    char name[64];
+    void *map;
+    tseg_host =
host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_TSEG,4);
+    int tseg_size = 8 * 1024 * 1024;
+    MCHPCIState *mch = MCH_PCI_DEVICE(d);
+
+    Q35_DPRINTF("%s Setup TSEG: %x\n",__func__,tseg_host);
+
+    snprintf(name, sizeof(name), "MCH-TSEG-mmap");
+
+    fd = open("/dev/mem", O_RDWR);
+
+    map =
mmap(NULL,tseg_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(tseg_host &
0xFFF00000));
+    if (map == MAP_FAILED)
+    {
+        map = NULL;
+        Q35_DPRINTF("%s Setup TSEG: MAP_FAILED\n",__func__);
+    }
+
+    memory_region_init_ram_ptr(&tseg, OBJECT(mch), name, tseg_size, map);
+}
+
+/* this function memory maps the region of host memory assigned to TSEG
+ * to the guest location provided by seabios. */
+static void mch_map_tseg(PCIDevice *d, uint32_t tseg_new)
+{
+    MemoryRegion *guest_memory = get_system_memory();
+    uint32_t tseg_current = pci_default_read_config(d,MCH_CONFIG_TSEG,4);
+
+    if ( tseg_current != tseg_host && tseg_current != 0)
+    {
+        // remap
+        Q35_DPRINTF("%s Delete TSEG mapping: %x\n",__func__,(tseg_current
& 0xFFF00000));
+        memory_region_del_subregion(guest_memory, &tseg);
+    }
+    Q35_DPRINTF("%s Add TSEG mapping: %x ->
%x\n",__func__,tseg_host,tseg_new);
+    memory_region_add_subregion(guest_memory, (tseg_new & 0xFFF00000),
&tseg);
+}
+
+static void mch_write_config_default(PCIDevice *d,
                               uint32_t address, uint32_t val, int len)
 {
     MCHPCIState *mch = MCH_PCI_DEVICE(d);

+    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
+                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
len, val);
+
     /* XXX: implement SMRAM.D_LOCK */
     pci_default_write_config(d, address, val, len);

@@ -308,6 +506,73 @@ static void mch_write_config(PCIDevice *d,
     }
 }

+static void mch_write_config(PCIDevice *d,
+                             uint32_t address, uint32_t val, int len)
+{
+    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
+                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
len, val);
+
+    /* PAVPC - Protected Audio Video Path Control */
+    if (IGD_PASSTHROUGH && ranges_overlap(address, len, 0x58, 4))
+    {
+
 host_pci_write_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len,val);
+    }
+    /* BDSM - */
+    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
MCH_CONFIG_BDSM, 4))
+    {
+        mch_map_bdsm(d,val);
+        mch_write_config_default(d,address,val,len);
+    }
+    /* BGSM - */
+    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
MCH_CONFIG_BGSM, 4))
+    {
+        mch_map_bgsm(d,val);
+        mch_write_config_default(d,address,val,len);
+    }
+    /* TSEG - */
+    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
MCH_CONFIG_TSEG, 4))
+    {
+        mch_map_tseg(d,val);
+        mch_write_config_default(d,address,val,len);
+    }
+    else
+    {
+        mch_write_config_default(d,address,val,len);
+    }
+}
+
+static uint32_t mch_read_config(PCIDevice *d,
+                                uint32_t address, int len)
+{
+    uint32_t val;
+
+    if (IGD_PASSTHROUGH)
+    {
+        if (ranges_overlap(address, len, 0x2c, 2) || /* SVID - Subsystem
Vendor Identification */
+            ranges_overlap(address, len, 0x2e, 2) || /* SID - Subsystem
Identification */
+            ranges_overlap(address, len, 0x50, 2) || /* GMCH - SNB
Graphics Control Register */
+            ranges_overlap(address, len, 0x58, 4))   /* PAVPC - Protected
Audio Video Path Control  */
+        {
+            val =
host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len);
+        }
+        else
+        {
+            goto defaultread;
+        }
+    }
+    else
+    {
+defaultread:
+        val = pci_default_read_config(d,address,len);
+    }
+
+    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
+                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
len, val);
+
+    return val;
+
+}
+
 static void mch_update(MCHPCIState *mch)
 {
     mch_update_pciexbar(mch);
@@ -370,6 +635,69 @@ static int mch_init(PCIDevice *d)
                  &mch->pam_regions[i+1], PAM_EXPAN_BASE + i *
PAM_EXPAN_SIZE,
                  PAM_EXPAN_SIZE);
     }
+
+    mch_setup_bdsm(d);
+    mch_setup_bgsm(d);
+    mch_setup_tseg(d);
+
+    /* Unsure if this is important. but the following is as per INTEL spec
+     * which otherwise is non-conforming. */
+#ifdef CORRECT_CONFIG
+    /* PCICMD Register */
+    pci_set_word(d->wmask + D0F0_PCICMD,(D0F0_PCICMD_SERR |
D0F0_PCICMD_PERR)); // set writable
+    pci_set_word(d->config + D0F0_PCICMD,(D0F0_PCICMD_MAE |
D0F0_PCICMD_BME)); // set 1
+
+    /* PCISTS Register */
+    pci_set_word(d->w1cmask + D0F0_PCISTS,(D0F0_PCISTS_DPE |
D0F0_PCISTS_SSE | D0F0_PCISTS_RMAS |
+                  D0F0_PCISTS_RTAS | D0F0_PCISTS_DPD));
+    pci_set_word(d->config + D0F0_PCISTS,(D0F0_PCISTS_CLIST |
D0F0_PCISTS_FB2B));
+
+    /* CC Register */
+    /* No RW Registers */
+    //pci_set_byte(d->config + D0F0_CC + D0F0_CC_BCC, 0x06); /* indicating
a bridge device */
+
+    /* HDR Register */
+    /* No RW Registers */
+    /* !nnz */
+
+    /* SVID and SID need not be changed */
+
+    /* PXPEPBAR - TODO? */
+    pci_set_quad(d->wmask + D0F0_PXPEPBAR, (D0F0_PXPEPBAR_PXPEPBAR |
D0F0_PXPEPBAR_PXPEPBAREN)); // set writable
+
+    /* MCHBAR - TODO? */
+    pci_set_quad(d->wmask + D0F0_MCHBAR, (D0F0_MCHBAR_MCHBAR |
D0F0_MCHBAR_MCHBAREN)); // set writable
+
+    /* GGC-GMCH */
+    /* RW Registers can be locked by GCCLCK - TODO, assumed RO */
+    //pci_set_word(d->config + D0F0_GGC, (data & (D0F0_GGC_VAMEN |
D0F0_GGC_GGMS | D0F0_GGC_GMS | D0F0_GGC_GGCLCK)));
+
+    /* DEVEN */
+    pci_set_long(d->wmask + D0F0_DEVEN, D0F0_DEVEN_D0EN);
+    pci_set_long(d->config + D0F0_DEVEN, D0F0_DEVEN_D2EN);
+
+    /* DMIBAR */
+    pci_set_quad(d->wmask + D0F0_DMIBAR, (D0F0_DMIBAR_DMIBAR |
D0F0_DMIBAR_DMIBAREN)); // set writable
+
+    /* TOM - Top Of Memory Register */
+    pci_set_quad(d->wmask + D0F0_TOM, (D0F0_TOM_TOM | D0F0_TOM_LOCK ));
+
+    /* TOUUD - Top Of Upper Usable DRAM Register */
+    pci_set_quad(d->wmask + D0F0_TOUUD, (D0F0_TOUUD_TOUUD |
D0F0_TOUUD_LOCK ));
+
+    /* BDSM - Base Data of Stolen Memory Register */
+    pci_set_long(d->wmask + D0F0_BDSM, (D0F0_BDSM_BDSM | D0F0_BDSM_LOCK));
+
+    /* BGSM - Base of GTT Stolen Memory Register */
+    pci_set_long(d->wmask + D0F0_BGSM, (D0F0_BGSM_BGSM | D0F0_BGSM_LOCK));
+
+    /* TSEG - G Memory Base Register */
+    pci_set_long(d->wmask + D0F0_TSEG, (D0F0_TSEG_TSEGMB |
D0F0_TSEG_LOCK));
+
+    /* TOLUD - Top of Low Usable DRAM */
+    pci_set_long(d->wmask + D0F0_TOLUD, (D0F0_TOLUD_TOLUD |
D0F0_TOLUD_LOCK));
+#endif
+
     return 0;
 }

@@ -390,6 +718,7 @@ static void mch_class_init(ObjectClass *klass, void
*data)

     k->init = mch_init;
     k->config_write = mch_write_config;
+    k->config_read = mch_read_config;
     dc->reset = mch_reset;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->desc = "Host bridge";
@@ -403,6 +732,16 @@ static void mch_class_init(ObjectClass *klass, void
*data)
      * host-facing part, which can't be device_add'ed, yet.
      */
     dc->cannot_instantiate_with_device_add_yet = true;
+
+    /* XEN code suggests these values should match the host.
+     * However, it's not that simple because currently seabios expects
+     * either PII3X or Q35.
+     * Further more, I have only found reason for the LPC to match the host
+     * for the PCH identification. No mention about the host-bridge. */
+
+//    k->vendor_id = host_pci_read_config(0,0,0,0x00,2);
+//    k->device_id = host_pci_read_config(0,0,0,0x02,2);
+//    k->revision = host_pci_read_config(0,0,0,0x08,1);
 }

 static const TypeInfo mch_info = {

Comments

Alex Williamson Sept. 24, 2014, 7:47 p.m. UTC | #1
On Wed, 2014-09-24 at 14:20 +0100, Andrew Barnes wrote:
> hw/pci-host/q35.c
> 
> this patch adds:
> * redirect some PCI config reads/writes to host
> * memory map BSDM, BGSM, TSEG
> 
> patch
> ---------------------
> 
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index a0a3068..05348ac 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -6,6 +6,7 @@
>   *               Isaku Yamahata <yamahata at valinux co jp>
>   *               VA Linux Systems Japan K.K.
>   * Copyright (C) 2012 Jason Baron <jbaron@redhat.com>
> + *               2014 Andrew barnes <andy@outsideglobe.com> IGD Support
>   *
>   * This is based on piix_pci.c, but heavily modified.
>   *
> @@ -30,11 +31,26 @@
>  #include "hw/hw.h"
>  #include "hw/pci-host/q35.h"
>  #include "qapi/visitor.h"
> +#include "hw/pci/pci.h"
> +#include <sys/mman.h> /* memory map functions */
> +
> +/* #define DEBUG_Q35 */
> +#ifdef DEBUG_Q35
> +# define Q35_DPRINTF(format, ...)       printf("Q35: " format, ##
> __VA_ARGS__)
> +#else
> +# define Q35_DPRINTF(format, ...)       do { } while (0)
> +#endif
> +
> +/* for intel-spec conforming config */
> +/* #define CORRECT_CONFIG */
> 
>  /****************************************************************************
>   * Q35 host
>   */
> 
> +/* BEWARE: only set this if you are passing IGD through */
> +static bool IGD_PASSTHROUGH = true;
> +
>  static void q35_host_realize(DeviceState *dev, Error **errp)
>  {
>      PCIHostState *pci = PCI_HOST_BRIDGE(dev);
> @@ -284,11 +300,193 @@ static void mch_set_smm(int smm, void *arg)
>      memory_region_transaction_commit();
>  }
> 
> -static void mch_write_config(PCIDevice *d,
> +/* TODO: Move these variables/defines to be consistent with Q35 coding
> style */
> +MemoryRegion bdsm;
> +uint32_t bdsm_host;
> +MemoryRegion bgsm;
> +uint32_t bgsm_host;
> +MemoryRegion tseg;
> +uint32_t tseg_host;
> +#define MCH_CONFIG_BDSM 0xb0
> +#define MCH_CONFIG_BGSM 0xb4
> +#define MCH_CONFIG_TSEG 0xb8
> +#define MCH_CONFIG_GMCH 0x50
> +
> +/* Setup memory map for Stolen Memory */
> +static void mch_setup_bdsm(PCIDevice *d)
> +{
> +    int fd;
> +    char name[64];
> +    void *map;
> +    bdsm_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BDSM,4);
> +    int dsm_size;
> +    uint16_t gmch_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
> +    uint16_t gmch_dsm = (gmch_host & 0xF8) >> 3;
> +    MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> +    Q35_DPRINTF("%s Setup BDSM: %x\n",__func__,bdsm_host);
> +
> +    snprintf(name, sizeof(name), "MCH-BDSM-mmap");
> +    fd = open("/dev/mem", O_RDWR);
> +
> +    switch (gmch_dsm)
> +    {
> +      case 0x0: dsm_size = 0; break;
> +      case 0x1: dsm_size = 32; break;
> +      case 0x2: dsm_size = 64; break;
> +      case 0x3: dsm_size = 96; break;
> +      case 0x4: dsm_size = 128; break;
> +      case 0x5: dsm_size = 160; break;
> +      case 0x6: dsm_size = 192; break;
> +      case 0x7: dsm_size = 224; break;
> +      case 0x8: dsm_size = 256; break;
> +      case 0x9: dsm_size = 288; break;
> +      case 0xa: dsm_size = 320; break;
> +      case 0xb: dsm_size = 352; break;
> +      case 0xc: dsm_size = 384; break;
> +      case 0xd: dsm_size = 416; break;
> +      case 0xe: dsm_size = 448; break;
> +      case 0xf: dsm_size = 480; break;
> +      case 0x10: dsm_size = 512; break;
> +     // default: /* panic */

Certainly do something other than pass a multiple of an uninitialized
variable through to mmap.

> +    }
> +
> +    dsm_size = dsm_size * 1024 * 1024;
> +    map = mmap(NULL,dsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bdsm_host
> & 0xFFF00000));
> +
> +    if (map == MAP_FAILED)
> +    {
> +        map = NULL;
> +        Q35_DPRINTF("%s Setup BDSM: MAP_FAILED\n",__func__);
> +    }
> +
> +    memory_region_init_ram_ptr(&bdsm, OBJECT(mch), name, dsm_size, map);
> +}

Presumably this is something else we'd need to provide through the vfio
device.  So we're up to

1. opregion (3 pages... wasn't it 2 before?)
2. stolen memory (up to 512M)


> +
> +/* this function memory maps the region of host memory assigned to Stolen
> memory
> + * to the guest location provided by seabios. */
> +static void mch_map_bdsm(PCIDevice *d, uint32_t bdsm_new)
> +{
> +    MemoryRegion *guest_memory = get_system_memory();
> +    uint32_t bdsm_current = pci_default_read_config(d,MCH_CONFIG_BDSM,4);
> +
> +    if ( bdsm_current != bdsm_host && bdsm_current != 0 )
> +    {
> +        // remap
> +        Q35_DPRINTF("%s Delete BDSM mapping: %x\n",__func__,(bdsm_current
> & 0xFFF00000));
> +        memory_region_del_subregion(guest_memory, &bdsm);
> +    }
> +    Q35_DPRINTF("%s Add BDSM mapping: %x ->
> %x\n",__func__,bdsm_host,bdsm_new);
> +    memory_region_add_subregion(guest_memory, (bdsm_new & 0xFFF00000),
> &bdsm);
> +}
> +
> +/* Setup memory map for GTT Stolen Memory */
> +static void mch_setup_bgsm(PCIDevice *d)
> +{
> +    int fd;
> +    char name[64];
> +    void *map;
> +    bgsm_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BGSM,4);
> +    int gsm_size;
> +    uint16_t gmch_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
> +    uint16_t gmch_gsm = (gmch_host & 0x300) >> 8;
> +    MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> +    Q35_DPRINTF("%s Setup BGSM: %x\n",__func__,bgsm_host);
> +
> +    snprintf(name, sizeof(name), "MCH %04x:%02x:%02x.%x BGSM mmap",
> +             0, 0, 0, 0);
> +
> +    fd = open("/dev/mem", O_RDWR);
> +
> +    switch (gmch_gsm)
> +    {
> +      case 0x2: gsm_size = 2; break;
> +      case 0x1: gsm_size = 1; break;
> +      case 0x0: gsm_size = 0; break;
> +     // default: /* panic */
> +    }
> +
> +    gsm_size = gsm_size * 1024 * 1024;
> +
> +    map = mmap(NULL,gsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bgsm_host
> & 0xFFF00000));
> +    if (map == MAP_FAILED)
> +    {
> +        map = NULL;
> +        Q35_DPRINTF("%s Setup BGSM: MAP_FAILED\n",__func__);
> +    }
> +
> +    memory_region_init_ram_ptr(&bgsm, OBJECT(mch), name, gsm_size, map);
> +}

And a third:

3. GTT stolen memory (up to 2M)

Somehow vfio needs to be in control of these mappings since they'll go
through a vfio device file descriptor.

> +
> +/* this function memory maps the region of host memory assigned to GTT
> Stolen memory
> + * to the guest location provided by seabios. */
> +static void mch_map_bgsm(PCIDevice *d, uint32_t bgsm_new)
> +{
> +    MemoryRegion *guest_memory = get_system_memory();
> +    uint32_t bgsm_current = pci_default_read_config(d,MCH_CONFIG_BGSM,4);
> +
> +    if ( bgsm_current != bgsm_host && bgsm_current != 0)
> +    {
> +        // remap
> +        Q35_DPRINTF("%s Delete BGSM mapping: %x\n",__func__,(bgsm_current
> & 0xFFF00000));
> +        memory_region_del_subregion(guest_memory, &bgsm);
> +    }
> +    Q35_DPRINTF("%s Add BGSM mapping: %x ->
> %x\n",__func__,bgsm_host,bgsm_new);
> +    memory_region_add_subregion(guest_memory, (bgsm_new & 0xFFF00000),
> &bgsm);
> +}
> +
> +/* Setup memory map for TSEG */
> +static void mch_setup_tseg(PCIDevice *d)
> +{
> +    int fd;
> +    char name[64];
> +    void *map;
> +    tseg_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_TSEG,4);
> +    int tseg_size = 8 * 1024 * 1024;
> +    MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> +    Q35_DPRINTF("%s Setup TSEG: %x\n",__func__,tseg_host);
> +
> +    snprintf(name, sizeof(name), "MCH-TSEG-mmap");
> +
> +    fd = open("/dev/mem", O_RDWR);
> +
> +    map =
> mmap(NULL,tseg_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(tseg_host &
> 0xFFF00000));
> +    if (map == MAP_FAILED)
> +    {
> +        map = NULL;
> +        Q35_DPRINTF("%s Setup TSEG: MAP_FAILED\n",__func__);
> +    }
> +
> +    memory_region_init_ram_ptr(&tseg, OBJECT(mch), name, tseg_size, map);
> +}


Ugh, number four?!

4. tseg (8M)

> +
> +/* this function memory maps the region of host memory assigned to TSEG
> + * to the guest location provided by seabios. */
> +static void mch_map_tseg(PCIDevice *d, uint32_t tseg_new)
> +{
> +    MemoryRegion *guest_memory = get_system_memory();
> +    uint32_t tseg_current = pci_default_read_config(d,MCH_CONFIG_TSEG,4);
> +
> +    if ( tseg_current != tseg_host && tseg_current != 0)
> +    {
> +        // remap
> +        Q35_DPRINTF("%s Delete TSEG mapping: %x\n",__func__,(tseg_current
> & 0xFFF00000));
> +        memory_region_del_subregion(guest_memory, &tseg);
> +    }
> +    Q35_DPRINTF("%s Add TSEG mapping: %x ->
> %x\n",__func__,tseg_host,tseg_new);
> +    memory_region_add_subregion(guest_memory, (tseg_new & 0xFFF00000),
> &tseg);
> +}
> +
> +static void mch_write_config_default(PCIDevice *d,
>                                uint32_t address, uint32_t val, int len)
>  {
>      MCHPCIState *mch = MCH_PCI_DEVICE(d);
> 
> +    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> +                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
>      /* XXX: implement SMRAM.D_LOCK */
>      pci_default_write_config(d, address, val, len);
> 
> @@ -308,6 +506,73 @@ static void mch_write_config(PCIDevice *d,
>      }
>  }
> 
> +static void mch_write_config(PCIDevice *d,
> +                             uint32_t address, uint32_t val, int len)
> +{
> +    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> +                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> +    /* PAVPC - Protected Audio Video Path Control */
> +    if (IGD_PASSTHROUGH && ranges_overlap(address, len, 0x58, 4))
> +    {
> +
>  host_pci_write_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len,val);

Direct access to a completely different host device than assigned.
Lovely.  Somehow this likely needs to get bundled into vfio too unless
Intel modifies their driver to not need it.

> +    }
> +    /* BDSM - */
> +    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_BDSM, 4))
> +    {
> +        mch_map_bdsm(d,val);
> +        mch_write_config_default(d,address,val,len);
> +    }
> +    /* BGSM - */
> +    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_BGSM, 4))
> +    {
> +        mch_map_bgsm(d,val);
> +        mch_write_config_default(d,address,val,len);
> +    }
> +    /* TSEG - */
> +    else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_TSEG, 4))
> +    {
> +        mch_map_tseg(d,val);
> +        mch_write_config_default(d,address,val,len);
> +    }

So the opregion is mapped by a config write on the IGD device itself and
the other 3 regions, that we know about so far, are mapped via writes to
the host bridge.  This is pretty ridiculous, Intel.

> +    else
> +    {
> +        mch_write_config_default(d,address,val,len);
> +    }
> +}
> +
> +static uint32_t mch_read_config(PCIDevice *d,
> +                                uint32_t address, int len)
> +{
> +    uint32_t val;
> +
> +    if (IGD_PASSTHROUGH)
> +    {
> +        if (ranges_overlap(address, len, 0x2c, 2) || /* SVID - Subsystem
> Vendor Identification */
> +            ranges_overlap(address, len, 0x2e, 2) || /* SID - Subsystem
> Identification */
> +            ranges_overlap(address, len, 0x50, 2) || /* GMCH - SNB
> Graphics Control Register */
> +            ranges_overlap(address, len, 0x58, 4))   /* PAVPC - Protected
> Audio Video Path Control  */
> +        {
> +            val =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len);
> +        }
> +        else
> +        {
> +            goto defaultread;
> +        }
> +    }
> +    else
> +    {
> +defaultread:
> +        val = pci_default_read_config(d,address,len);
> +    }
> +
> +    Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> +                0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> +    return val;
> +
> +}
> +
>  static void mch_update(MCHPCIState *mch)
>  {
>      mch_update_pciexbar(mch);
> @@ -370,6 +635,69 @@ static int mch_init(PCIDevice *d)
>                   &mch->pam_regions[i+1], PAM_EXPAN_BASE + i *
> PAM_EXPAN_SIZE,
>                   PAM_EXPAN_SIZE);
>      }
> +
> +    mch_setup_bdsm(d);
> +    mch_setup_bgsm(d);
> +    mch_setup_tseg(d);
> +
> +    /* Unsure if this is important. but the following is as per INTEL spec
> +     * which otherwise is non-conforming. */
> +#ifdef CORRECT_CONFIG
> +    /* PCICMD Register */
> +    pci_set_word(d->wmask + D0F0_PCICMD,(D0F0_PCICMD_SERR |
> D0F0_PCICMD_PERR)); // set writable
> +    pci_set_word(d->config + D0F0_PCICMD,(D0F0_PCICMD_MAE |
> D0F0_PCICMD_BME)); // set 1
> +
> +    /* PCISTS Register */
> +    pci_set_word(d->w1cmask + D0F0_PCISTS,(D0F0_PCISTS_DPE |
> D0F0_PCISTS_SSE | D0F0_PCISTS_RMAS |
> +                  D0F0_PCISTS_RTAS | D0F0_PCISTS_DPD));
> +    pci_set_word(d->config + D0F0_PCISTS,(D0F0_PCISTS_CLIST |
> D0F0_PCISTS_FB2B));
> +
> +    /* CC Register */
> +    /* No RW Registers */
> +    //pci_set_byte(d->config + D0F0_CC + D0F0_CC_BCC, 0x06); /* indicating
> a bridge device */
> +
> +    /* HDR Register */
> +    /* No RW Registers */
> +    /* !nnz */
> +
> +    /* SVID and SID need not be changed */
> +
> +    /* PXPEPBAR - TODO? */
> +    pci_set_quad(d->wmask + D0F0_PXPEPBAR, (D0F0_PXPEPBAR_PXPEPBAR |
> D0F0_PXPEPBAR_PXPEPBAREN)); // set writable
> +
> +    /* MCHBAR - TODO? */
> +    pci_set_quad(d->wmask + D0F0_MCHBAR, (D0F0_MCHBAR_MCHBAR |
> D0F0_MCHBAR_MCHBAREN)); // set writable
> +
> +    /* GGC-GMCH */
> +    /* RW Registers can be locked by GCCLCK - TODO, assumed RO */
> +    //pci_set_word(d->config + D0F0_GGC, (data & (D0F0_GGC_VAMEN |
> D0F0_GGC_GGMS | D0F0_GGC_GMS | D0F0_GGC_GGCLCK)));
> +
> +    /* DEVEN */
> +    pci_set_long(d->wmask + D0F0_DEVEN, D0F0_DEVEN_D0EN);
> +    pci_set_long(d->config + D0F0_DEVEN, D0F0_DEVEN_D2EN);
> +
> +    /* DMIBAR */
> +    pci_set_quad(d->wmask + D0F0_DMIBAR, (D0F0_DMIBAR_DMIBAR |
> D0F0_DMIBAR_DMIBAREN)); // set writable
> +
> +    /* TOM - Top Of Memory Register */
> +    pci_set_quad(d->wmask + D0F0_TOM, (D0F0_TOM_TOM | D0F0_TOM_LOCK ));
> +
> +    /* TOUUD - Top Of Upper Usable DRAM Register */
> +    pci_set_quad(d->wmask + D0F0_TOUUD, (D0F0_TOUUD_TOUUD |
> D0F0_TOUUD_LOCK ));
> +
> +    /* BDSM - Base Data of Stolen Memory Register */
> +    pci_set_long(d->wmask + D0F0_BDSM, (D0F0_BDSM_BDSM | D0F0_BDSM_LOCK));
> +
> +    /* BGSM - Base of GTT Stolen Memory Register */
> +    pci_set_long(d->wmask + D0F0_BGSM, (D0F0_BGSM_BGSM | D0F0_BGSM_LOCK));
> +
> +    /* TSEG - G Memory Base Register */
> +    pci_set_long(d->wmask + D0F0_TSEG, (D0F0_TSEG_TSEGMB |
> D0F0_TSEG_LOCK));
> +
> +    /* TOLUD - Top of Low Usable DRAM */
> +    pci_set_long(d->wmask + D0F0_TOLUD, (D0F0_TOLUD_TOLUD |
> D0F0_TOLUD_LOCK));
> +#endif
> +

A huge pile of migration breaks if nothing else.

>      return 0;
>  }
> 
> @@ -390,6 +718,7 @@ static void mch_class_init(ObjectClass *klass, void
> *data)
> 
>      k->init = mch_init;
>      k->config_write = mch_write_config;
> +    k->config_read = mch_read_config;
>      dc->reset = mch_reset;
>      set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>      dc->desc = "Host bridge";
> @@ -403,6 +732,16 @@ static void mch_class_init(ObjectClass *klass, void
> *data)
>       * host-facing part, which can't be device_add'ed, yet.
>       */
>      dc->cannot_instantiate_with_device_add_yet = true;
> +
> +    /* XEN code suggests these values should match the host.
> +     * However, it's not that simple because currently seabios expects
> +     * either PII3X or Q35.
> +     * Further more, I have only found reason for the LPC to match the host
> +     * for the PCH identification. No mention about the host-bridge. */
> +
> +//    k->vendor_id = host_pci_read_config(0,0,0,0x00,2);
> +//    k->device_id = host_pci_read_config(0,0,0,0x02,2);
> +//    k->revision = host_pci_read_config(0,0,0,0x08,1);
>  }
> 
>  static const TypeInfo mch_info = {
Paolo Bonzini Sept. 24, 2014, 8:31 p.m. UTC | #2
Il 24/09/2014 21:47, Alex Williamson ha scritto:
> So the opregion is mapped by a config write on the IGD device itself and
> the other 3 regions, that we know about so far, are mapped via writes to
> the host bridge.

AFAIU the opregion is mapped by the (host) BIOS, that writes the address
to a well-known scratch dword in the configuration space.  The host
reads from the dword and finds the opregion that way.

Paolo
Alex Williamson Sept. 24, 2014, 8:57 p.m. UTC | #3
On Wed, 2014-09-24 at 22:31 +0200, Paolo Bonzini wrote:
> Il 24/09/2014 21:47, Alex Williamson ha scritto:
> > So the opregion is mapped by a config write on the IGD device itself and
> > the other 3 regions, that we know about so far, are mapped via writes to
> > the host bridge.
> 
> AFAIU the opregion is mapped by the (host) BIOS, that writes the address
> to a well-known scratch dword in the configuration space.  The host
> reads from the dword and finds the opregion that way.

Right, that's the physical mapping, Andy's patches are mimicking that
behavior virtually.  Seabios reserves memory, creates e820 entries, and
"maps" the hardware by writing to these registers.  That triggers QEMU
to adjust the MemoryRegion in the guest address space which is an mmap
to the host address space, using /dev/mem for now, but hopefully the
vfio file descriptor in the future (I should be careful what I hope
for).

The opregion is pretty trivial because the write is to the IGD itself.
The others are to the host bridge, so we need to figure out what sort of
abstraction makes sense to get that back into vfio code.

Perhaps vfio creates all the memory regions and registers them into an
igd service and the host bridge can make calls like:

gtt = igd_get_gtt_mr();

which returns NULL and nothing happens or the registered MemoryRegion
and the host bridge places it into the address space.  Thanks,

Alex
Paolo Bonzini Sept. 24, 2014, 9:18 p.m. UTC | #4
Il 24/09/2014 22:57, Alex Williamson ha scritto:
> Right, that's the physical mapping, Andy's patches are mimicking that
> behavior virtually.  Seabios reserves memory, creates e820 entries, and
> "maps" the hardware by writing to these registers.  That triggers QEMU
> to adjust the MemoryRegion in the guest address space which is an mmap
> to the host address space, using /dev/mem for now, but hopefully the
> vfio file descriptor in the future (I should be careful what I hope
> for).

Yeah, I remember discussing that with Andrew on IRC.  So he did
implement that idea.

> The opregion is pretty trivial because the write is to the IGD itself.
> The others are to the host bridge, so we need to figure out what sort of
> abstraction makes sense to get that back into vfio code.

Do we have to support all chipsets?  IIUC the more recent devices need
fewer and fewer "backdoors".

Paolo

> Perhaps vfio creates all the memory regions and registers them into an
> igd service and the host bridge can make calls like:
> 
> gtt = igd_get_gtt_mr();
> 
> which returns NULL and nothing happens or the registered MemoryRegion
> and the host bridge places it into the address space.  Thanks,
diff mbox

Patch

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index a0a3068..05348ac 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -6,6 +6,7 @@ 
  *               Isaku Yamahata <yamahata at valinux co jp>
  *               VA Linux Systems Japan K.K.
  * Copyright (C) 2012 Jason Baron <jbaron@redhat.com>
+ *               2014 Andrew barnes <andy@outsideglobe.com> IGD Support
  *
  * This is based on piix_pci.c, but heavily modified.
  *
@@ -30,11 +31,26 @@ 
 #include "hw/hw.h"
 #include "hw/pci-host/q35.h"
 #include "qapi/visitor.h"
+#include "hw/pci/pci.h"
+#include <sys/mman.h> /* memory map functions */
+
+/* #define DEBUG_Q35 */
+#ifdef DEBUG_Q35
+# define Q35_DPRINTF(format, ...)       printf("Q35: " format, ##
__VA_ARGS__)
+#else
+# define Q35_DPRINTF(format, ...)       do { } while (0)
+#endif
+
+/* for intel-spec conforming config */
+/* #define CORRECT_CONFIG */

 /****************************************************************************
  * Q35 host
  */

+/* BEWARE: only set this if you are passing IGD through */
+static bool IGD_PASSTHROUGH = true;
+
 static void q35_host_realize(DeviceState *dev, Error **errp)
 {