diff mbox

[2/2] intel_iommu: Add support for translation for devices behind bridges.

Message ID 1413844443-28894-3-git-send-email-knut.omang@oracle.com
State New
Headers show

Commit Message

Knut Omang Oct. 20, 2014, 10:34 p.m. UTC
- Add call to pci_setup_iommu for the secondary bus in a bridge.
- Refactor IntelIOMMUState to use a list instead of tables based on
  bus/devfn, as bus numbers can change dynamically.
- Instead store reference to the VTDAddressSpace as an AddressSpace
  pointer, dma_as within PCIDevice.
- Use NULL dev to q35_host_dma_iommu to indicate a special (non-pci)
  device (needed by interrupt remapping logic)

Signed-off-by: Knut Omang <knut.omang@oracle.com>
---
 hw/i386/intel_iommu.c         | 58 ++++++++++++++++++-------------------------
 hw/pci-host/q35.c             | 40 ++++++++++++-----------------
 hw/pci/pci_bridge.c           |  6 +++++
 include/hw/i386/intel_iommu.h |  6 +++--
 4 files changed, 50 insertions(+), 60 deletions(-)

Comments

Jan Kiszka Oct. 25, 2014, 11:36 a.m. UTC | #1
On 2014-10-21 00:34, Knut Omang wrote:
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 40c97b1..e6832c4 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -376,8 +376,14 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
>      sec_bus->address_space_io = &br->address_space_io;
>      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
>      br->windows = pci_bridge_region_init(br);
> +
>      QLIST_INIT(&sec_bus->child);
>      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> +
> +    if (dev->bus->iommu_opaque) {
> +        pci_setup_iommu(sec_bus, dev->bus->iommu_fn, dev->bus->iommu_opaque);
> +    }
> +

So, if I followed the discussion in the cover-letter thread correctly,
this should rather move into the bridge device init functions because
the PCI[e]-PCI bridge ("pci-bridge") would not call it, right?

Jan
Jan Kiszka Oct. 25, 2014, 12:24 p.m. UTC | #2
On 2014-10-25 13:36, Jan Kiszka wrote:
> On 2014-10-21 00:34, Knut Omang wrote:
>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>> index 40c97b1..e6832c4 100644
>> --- a/hw/pci/pci_bridge.c
>> +++ b/hw/pci/pci_bridge.c
>> @@ -376,8 +376,14 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
>>      sec_bus->address_space_io = &br->address_space_io;
>>      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
>>      br->windows = pci_bridge_region_init(br);
>> +
>>      QLIST_INIT(&sec_bus->child);
>>      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
>> +
>> +    if (dev->bus->iommu_opaque) {
>> +        pci_setup_iommu(sec_bus, dev->bus->iommu_fn, dev->bus->iommu_opaque);
>> +    }
>> +
> 
> So, if I followed the discussion in the cover-letter thread correctly,
> this should rather move into the bridge device init functions because
> the PCI[e]-PCI bridge ("pci-bridge") would not call it, right?

Not right. We need the setup in any case (except for the virtio bridges
I'm currently thinking of for encapsulating non-translatable virtio
devices). But something still has to change to reflect the requester ID
aliasing of the PCIe-PCI bridge, no?

Jan
Jan Kiszka Oct. 25, 2014, 12:28 p.m. UTC | #3
On 2014-10-21 00:34, Knut Omang wrote:
> @@ -1801,8 +1792,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
>          return ret;
>      }
>  
> -    vtd_do_iommu_translate(vtd_as, vtd_as->bus_num, vtd_as->devfn, addr,
> -                           is_write, &ret);
> +    vtd_do_iommu_translate(vtd_as, addr, is_write, &ret);
>      VTD_DPRINTF(MMU,
>                  "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
>                  " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, vtd_as->bus_num,

You need to update the VTD_DPRINTF as well when removing bus_num from
VTDAddressSpace.

Jan
Knut Omang Oct. 26, 2014, 4:46 a.m. UTC | #4
On Sat, 2014-10-25 at 14:24 +0200, Jan Kiszka wrote:
> On 2014-10-25 13:36, Jan Kiszka wrote:
> > On 2014-10-21 00:34, Knut Omang wrote:
> >> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> >> index 40c97b1..e6832c4 100644
> >> --- a/hw/pci/pci_bridge.c
> >> +++ b/hw/pci/pci_bridge.c
> >> @@ -376,8 +376,14 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename)
> >>      sec_bus->address_space_io = &br->address_space_io;
> >>      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
> >>      br->windows = pci_bridge_region_init(br);
> >> +
> >>      QLIST_INIT(&sec_bus->child);
> >>      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
> >> +
> >> +    if (dev->bus->iommu_opaque) {
> >> +        pci_setup_iommu(sec_bus, dev->bus->iommu_fn, dev->bus->iommu_opaque);
> >> +    }
> >> +
> > 
> > So, if I followed the discussion in the cover-letter thread correctly,
> > this should rather move into the bridge device init functions because
> > the PCI[e]-PCI bridge ("pci-bridge") would not call it, right?
> 
> Not right. We need the setup in any case (except for the virtio bridges
> I'm currently thinking of for encapsulating non-translatable virtio
> devices). But something still has to change to reflect the requester ID
> aliasing of the PCIe-PCI bridge, no?

Yes, that's my understanding too.

Knut
Jan Kiszka Oct. 26, 2014, 12:06 p.m. UTC | #5
On 2014-10-21 00:34, Knut Omang wrote:
> @@ -65,11 +66,12 @@ struct VTDContextCacheEntry {
>  };
>  
>  struct VTDAddressSpace {
> -    uint8_t bus_num;
> +    PCIDevice *dev;

This change is not helpful for clean handling of non-PCI devices (i.e.
platform device interrupt remapping => you had to pull
Q35_PSEUDO_BUS_PLATFORM into intel_iommu, which is violating the
layering). Please leave bus_num in place - or convert to a 16-bit SID.

>      uint8_t devfn;
>      AddressSpace as;
>      MemoryRegion iommu;
>      IntelIOMMUState *iommu_state;
> +    QLIST_ENTRY(VTDAddressSpace) iommu_next; /* For traversal by the iommu */
>      VTDContextCacheEntry context_cache_entry;
>  };
>  

Jan
Knut Omang Oct. 26, 2014, 1:15 p.m. UTC | #6
On Sun, 2014-10-26 at 13:06 +0100, Jan Kiszka wrote:
> On 2014-10-21 00:34, Knut Omang wrote:
> > @@ -65,11 +66,12 @@ struct VTDContextCacheEntry {
> >  };
> >  
> >  struct VTDAddressSpace {
> > -    uint8_t bus_num;
> > +    PCIDevice *dev;
> 
> This change is not helpful for clean handling of non-PCI devices (i.e.
> platform device interrupt remapping => you had to pull
> Q35_PSEUDO_BUS_PLATFORM into intel_iommu, which is violating the
> layering). Please leave bus_num in place - or convert to a 16-bit SID.

Hmm - I see..
- the problem I tried to solve is that the bus number of devices below a
root port or downstream switch has not been initialized when
q35_host_dma_iommu is called, so what happens is that the device in the
root port gets indexed as if it were on bus 0.

I am not that familiar with what type of non-pci devices that exists but
I suppose moving this up to the most generic device type that has a bus
associated with it is one way to go.

An alternative implementation that would work in the intel case would be
to keep the list in intel_iommu but provide a callback that iommus can
subscribe to to get notifications when bus numbers change?

Knut

> >      uint8_t devfn;
> >      AddressSpace as;
> >      MemoryRegion iommu;
> >      IntelIOMMUState *iommu_state;
> > +    QLIST_ENTRY(VTDAddressSpace) iommu_next; /* For traversal by the iommu */
> >      VTDContextCacheEntry context_cache_entry;
> >  };
> >  
> 
> Jan
> 
>
Michael S. Tsirkin Oct. 26, 2014, 3:20 p.m. UTC | #7
On Sun, Oct 26, 2014 at 02:15:24PM +0100, Knut Omang wrote:
> On Sun, 2014-10-26 at 13:06 +0100, Jan Kiszka wrote:
> > On 2014-10-21 00:34, Knut Omang wrote:
> > > @@ -65,11 +66,12 @@ struct VTDContextCacheEntry {
> > >  };
> > >  
> > >  struct VTDAddressSpace {
> > > -    uint8_t bus_num;
> > > +    PCIDevice *dev;
> > 
> > This change is not helpful for clean handling of non-PCI devices (i.e.
> > platform device interrupt remapping => you had to pull
> > Q35_PSEUDO_BUS_PLATFORM into intel_iommu, which is violating the
> > layering). Please leave bus_num in place - or convert to a 16-bit SID.
> 
> Hmm - I see..
> - the problem I tried to solve is that the bus number of devices below a
> root port or downstream switch has not been initialized when
> q35_host_dma_iommu is called, so what happens is that the device in the
> root port gets indexed as if it were on bus 0.
> I am not that familiar with what type of non-pci devices that exists but
> I suppose moving this up to the most generic device type that has a bus
> associated with it is one way to go.
> 
> An alternative implementation that would work in the intel case would be
> to keep the list in intel_iommu but provide a callback that iommus can
> subscribe to to get notifications when bus numbers change?
> 
> Knut

I dislike callbacks.

IMO the right thing as usual is to do what real hardware does.

After all it's devices put the requester id in transactions.

How about we add "source id" in AddressSpace structure,
or add a wrapper structure including AddressSpace and
source id.
Update in pci core on config writes into bus number.

Paolo, would that be ok with you?


> > >      uint8_t devfn;
> > >      AddressSpace as;
> > >      MemoryRegion iommu;
> > >      IntelIOMMUState *iommu_state;
> > > +    QLIST_ENTRY(VTDAddressSpace) iommu_next; /* For traversal by the iommu */
> > >      VTDContextCacheEntry context_cache_entry;
> > >  };
> > >  
> > 
> > Jan
> > 
> > 
>
diff mbox

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0a4282a..d23c019 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -20,6 +20,7 @@ 
  */
 
 #include "hw/sysbus.h"
+#include "hw/pci/pci.h"
 #include "exec/address-spaces.h"
 #include "intel_iommu_internal.h"
 
@@ -30,6 +31,7 @@  enum {
     DEBUG_CACHE,
 };
 #define VTD_DBGBIT(x)   (1 << DEBUG_##x)
+
 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
 
 #define VTD_DPRINTF(what, fmt, ...) do { \
@@ -166,24 +168,11 @@  static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
  */
 static void vtd_reset_context_cache(IntelIOMMUState *s)
 {
-    VTDAddressSpace **pvtd_as;
     VTDAddressSpace *vtd_as;
-    uint32_t bus_it;
-    uint32_t devfn_it;
 
     VTD_DPRINTF(CACHE, "global context_cache_gen=1");
-    for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) {
-        pvtd_as = s->address_spaces[bus_it];
-        if (!pvtd_as) {
-            continue;
-        }
-        for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
-            vtd_as = pvtd_as[devfn_it];
-            if (!vtd_as) {
-                continue;
-            }
-            vtd_as->context_cache_entry.context_cache_gen = 0;
-        }
+    QLIST_FOREACH(vtd_as, &s->address_spaces, iommu_next) {
+        vtd_as->context_cache_entry.context_cache_gen = 0;
     }
     s->context_cache_gen = 1;
 }
@@ -745,13 +734,11 @@  static inline bool vtd_is_interrupt_addr(hwaddr addr)
 
 /* Map dev to context-entry then do a paging-structures walk to do a iommu
  * translation.
- * @bus_num: The bus number
- * @devfn: The devfn, which is the  combined of device and function number
+ * @vtd_as: The address space to translate against
  * @is_write: The access is a write operation
  * @entry: IOMMUTLBEntry that contain the addr to be translated and result
  */
-static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
-                                   uint8_t devfn, hwaddr addr, bool is_write,
+static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, hwaddr addr, bool is_write,
                                    IOMMUTLBEntry *entry)
 {
     IntelIOMMUState *s = vtd_as->iommu_state;
@@ -759,6 +746,8 @@  static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
     VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
     uint64_t slpte;
     uint32_t level;
+    uint8_t bus_num = pci_bus_num(vtd_as->dev->bus);
+    uint8_t devfn = vtd_as->devfn;
     uint16_t source_id = vtd_make_source_id(bus_num, devfn);
     int ret_fr;
     bool is_fpd_set = false;
@@ -878,10 +867,10 @@  static void vtd_context_device_invalidate(IntelIOMMUState *s,
                                           uint16_t func_mask)
 {
     uint16_t mask;
-    VTDAddressSpace **pvtd_as;
     VTDAddressSpace *vtd_as;
     uint16_t devfn;
-    uint16_t devfn_it;
+    uint16_t devfn_it = 0;
+    uint8_t bus_num, bus_num_it;
 
     switch (func_mask & 3) {
     case 0:
@@ -899,16 +888,18 @@  static void vtd_context_device_invalidate(IntelIOMMUState *s,
     }
     VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
                     " mask %"PRIu16, source_id, mask);
-    pvtd_as = s->address_spaces[VTD_SID_TO_BUS(source_id)];
-    if (pvtd_as) {
-        devfn = VTD_SID_TO_DEVFN(source_id);
-        for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
-            vtd_as = pvtd_as[devfn_it];
-            if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
-                VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
-                            devfn_it);
-                vtd_as->context_cache_entry.context_cache_gen = 0;
-            }
+    bus_num = VTD_SID_TO_BUS(source_id);
+    devfn = VTD_SID_TO_DEVFN(source_id);
+
+    QLIST_FOREACH(vtd_as, &s->address_spaces, iommu_next) {
+        bus_num_it = pci_bus_num(vtd_as->dev->bus);
+        if (bus_num_it != bus_num) {
+            continue;
+        }
+        if ((devfn_it & mask) == (devfn & mask)) {
+            VTD_DPRINTF(INV, "invalidate context-cahce of devfn 0x%"PRIx16,
+                        devfn_it);
+            vtd_as->context_cache_entry.context_cache_gen = 0;
         }
     }
 }
@@ -1801,8 +1792,7 @@  static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
         return ret;
     }
 
-    vtd_do_iommu_translate(vtd_as, vtd_as->bus_num, vtd_as->devfn, addr,
-                           is_write, &ret);
+    vtd_do_iommu_translate(vtd_as, addr, is_write, &ret);
     VTD_DPRINTF(MMU,
                 "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRIu8
                 " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, vtd_as->bus_num,
@@ -1927,7 +1917,7 @@  static void vtd_realize(DeviceState *dev, Error **errp)
     IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
 
     VTD_DPRINTF(GENERAL, "");
-    memset(s->address_spaces, 0, sizeof(s->address_spaces));
+    QLIST_INIT(&s->address_spaces);
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index c087c96..ae90a84 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -349,33 +349,25 @@  static void mch_reset(DeviceState *qdev)
 
 static AddressSpace *q35_host_dma_iommu(PCIDevice *dev, void *opaque)
 {
-    IntelIOMMUState *s = opaque;
-    VTDAddressSpace **pvtd_as;
-    int bus_num = pci_bus_num(dev->bus);
-    int devfn = dev->devfn;
-
-    assert(0 <= bus_num && bus_num <= VTD_PCI_BUS_MAX);
-    assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
-
-    pvtd_as = s->address_spaces[bus_num];
-    if (!pvtd_as) {
-        /* No corresponding free() */
-        pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
-        s->address_spaces[bus_num] = pvtd_as;
+    VTDAddressSpace *as = NULL;
+    struct IntelIOMMUState *s = opaque;
+
+    if (dev && dev->dma_as) {
+        as = container_of(dev->dma_as, VTDAddressSpace, as);
     }
-    if (!pvtd_as[devfn]) {
-        pvtd_as[devfn] = g_malloc0(sizeof(VTDAddressSpace));
-
-        pvtd_as[devfn]->bus_num = (uint8_t)bus_num;
-        pvtd_as[devfn]->devfn = (uint8_t)devfn;
-        pvtd_as[devfn]->iommu_state = s;
-        pvtd_as[devfn]->context_cache_entry.context_cache_gen = 0;
-        memory_region_init_iommu(&pvtd_as[devfn]->iommu, OBJECT(s),
+    if (!as) {
+        as = g_malloc0(sizeof(VTDAddressSpace));
+        as->dev = dev;
+        as->devfn = dev->devfn;
+        as->iommu_state = s;
+        as->context_cache_entry.context_cache_gen = 0;
+        memory_region_init_iommu(&as->iommu, OBJECT(s),
                                  &s->iommu_ops, "intel_iommu", UINT64_MAX);
-        address_space_init(&pvtd_as[devfn]->as,
-                           &pvtd_as[devfn]->iommu, "intel_iommu");
+        address_space_init(&as->as,
+                           &as->iommu, "intel_iommu");
+        QLIST_INSERT_HEAD(&s->address_spaces, as, iommu_next);
     }
-    return &pvtd_as[devfn]->as;
+    return &as->as;
 }
 
 static void mch_init_dmar(MCHPCIState *mch)
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 40c97b1..e6832c4 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -376,8 +376,14 @@  int pci_bridge_initfn(PCIDevice *dev, const char *typename)
     sec_bus->address_space_io = &br->address_space_io;
     memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 65536);
     br->windows = pci_bridge_region_init(br);
+
     QLIST_INIT(&sec_bus->child);
     QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
+
+    if (dev->bus->iommu_opaque) {
+        pci_setup_iommu(sec_bus, dev->bus->iommu_fn, dev->bus->iommu_opaque);
+    }
+
     return 0;
 }
 
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index f4701e1..b349c6e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -21,6 +21,7 @@ 
 
 #ifndef INTEL_IOMMU_H
 #define INTEL_IOMMU_H
+#include "qemu/queue.h"
 #include "hw/qdev.h"
 #include "sysemu/dma.h"
 
@@ -65,11 +66,12 @@  struct VTDContextCacheEntry {
 };
 
 struct VTDAddressSpace {
-    uint8_t bus_num;
+    PCIDevice *dev;
     uint8_t devfn;
     AddressSpace as;
     MemoryRegion iommu;
     IntelIOMMUState *iommu_state;
+    QLIST_ENTRY(VTDAddressSpace) iommu_next; /* For traversal by the iommu */
     VTDContextCacheEntry context_cache_entry;
 };
 
@@ -114,7 +116,7 @@  struct IntelIOMMUState {
     GHashTable *iotlb;              /* IOTLB */
 
     MemoryRegionIOMMUOps iommu_ops;
-    VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX];
+    QLIST_HEAD(, VTDAddressSpace) address_spaces;
 };
 
 #endif