diff mbox

[v5,08/11] spapr-iommu: add SPAPR VFIO IOMMU device

Message ID 1394603550-11556-9-git-send-email-aik@ozlabs.ru
State New
Headers show

Commit Message

Alexey Kardashevskiy March 12, 2014, 5:52 a.m. UTC
This adds SPAPR VFIO IOMMU device in order to support DMA operations
for VFIO devices.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v7:
* fixed to adjust changes to support VFIO KVM device
---
 hw/ppc/spapr_iommu.c   | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |  5 +++
 2 files changed, 102 insertions(+)

Comments

Alexander Graf April 3, 2014, 12:17 p.m. UTC | #1
On 12.03.14 06:52, Alexey Kardashevskiy wrote:
> This adds SPAPR VFIO IOMMU device in order to support DMA operations
> for VFIO devices.

Sorry if this has been mentioned before, but why exactly do you need a 
separate IOMMU for VFIO? Couldn't the existing IOMMU backend drive things?


Alex
Alexey Kardashevskiy April 7, 2014, 4:07 a.m. UTC | #2
On 04/03/2014 11:17 PM, Alexander Graf wrote:
> 
> On 12.03.14 06:52, Alexey Kardashevskiy wrote:
>> This adds SPAPR VFIO IOMMU device in order to support DMA operations
>> for VFIO devices.
> 
> Sorry if this has been mentioned before, but why exactly do you need a
> separate IOMMU for VFIO? Couldn't the existing IOMMU backend drive things?

Well... Since I started VFIO on SPAPR, the emulated and VFIO IOMMU became
almost the same thing and I'll rework that too before I post things again.

However one difference still remains - IOMMU for emulated PCI and VIO keeps
a TCE table (allocated in QEMU or mmap'ed from the host kernel) and VFIO
IOMMU works with the table which is allocated and owned by the host kernel.

Since TCE tables are used only by devices, the IOMMU translation callback
is never called by VFIO devices and that's ok and I checked - it works.

So I either need a property in the IOMMU device to tell it is TCE table and
MemoryRegionIOMMUOps::translate() are required. Or a new IOMMU device
class. What to choose?

Oh. btw. There is H_GET_TCE now which I have to implement for VFIO :( This
will never ever end.
Alexander Graf April 10, 2014, 12:13 p.m. UTC | #3
On 07.04.14 06:07, Alexey Kardashevskiy wrote:
> On 04/03/2014 11:17 PM, Alexander Graf wrote:
>> On 12.03.14 06:52, Alexey Kardashevskiy wrote:
>>> This adds SPAPR VFIO IOMMU device in order to support DMA operations
>>> for VFIO devices.
>> Sorry if this has been mentioned before, but why exactly do you need a
>> separate IOMMU for VFIO? Couldn't the existing IOMMU backend drive things?
> Well... Since I started VFIO on SPAPR, the emulated and VFIO IOMMU became
> almost the same thing and I'll rework that too before I post things again.
>
> However one difference still remains - IOMMU for emulated PCI and VIO keeps
> a TCE table (allocated in QEMU or mmap'ed from the host kernel) and VFIO
> IOMMU works with the table which is allocated and owned by the host kernel.
>
> Since TCE tables are used only by devices, the IOMMU translation callback
> is never called by VFIO devices and that's ok and I checked - it works.
>
> So I either need a property in the IOMMU device to tell it is TCE table and
> MemoryRegionIOMMUOps::translate() are required. Or a new IOMMU device
> class. What to choose?

We need to handle in-kernel TCE tables with the emulated device IOMMU as 
well, so I'd

> Oh. btw. There is H_GET_TCE now which I have to implement for VFIO :( This
> will never ever end.

... which means you get H_GET_TCE for free as well ;).


Alex
diff mbox

Patch

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index a54f96f..f39cc4a 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -445,9 +445,106 @@  static TypeInfo spapr_tce_table_info = {
     .instance_finalize = spapr_tce_table_finalize,
 };
 
+/*
+ * SPAPR TCE VFIO IOMMU
+ */
+static IOMMUTLBEntry spapr_vfio_translate_iommu(MemoryRegion *iommu,
+                                                hwaddr addr)
+{
+    IOMMUTLBEntry entry;
+    /*
+     * This callback would normally be used by a QEMU device for DMA
+     * but in this case the vfio-pci device does not do any DMA.
+     * Instead, the real hardware does DMA and hardware TCE table
+     * performs the address translation.
+     */
+    assert(0);
+    return entry;
+}
+
+static MemoryRegionIOMMUOps spapr_vfio_iommu_ops = {
+    .translate = spapr_vfio_translate_iommu,
+};
+
+static int spapr_tce_table_vfio_realize(DeviceState *dev)
+{
+    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
+
+    memory_region_init_iommu(&tcet->iommu, NULL, &spapr_vfio_iommu_ops,
+                             "iommu-vfio-spapr", UINT64_MAX);
+
+    QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
+
+    return 0;
+}
+
+sPAPRTCETable *spapr_vfio_new_table(DeviceState *owner, uint32_t liobn)
+{
+    sPAPRTCETable *tcet;
+
+    if (spapr_tce_find_by_liobn(liobn)) {
+        fprintf(stderr, "Attempted to create TCE table with duplicate"
+                " LIOBN 0x%x\n", liobn);
+        return NULL;
+    }
+    tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE_VFIO));
+    tcet->liobn = liobn;
+    object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
+
+    object_property_set_bool(OBJECT(tcet), true, "realized", NULL);
+
+    return tcet;
+}
+
+static target_ulong put_tce_vfio(sPAPRTCETable *tcet, target_ulong ioba,
+                                 target_ulong tce)
+{
+    IOMMUTLBEntry entry;
+
+    entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
+    entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
+    entry.addr_mask = SPAPR_TCE_PAGE_MASK;
+    entry.perm = 0;
+    if ((tce & SPAPR_TCE_RO) == SPAPR_TCE_RO) {
+        entry.perm |= IOMMU_RO;
+    }
+    if ((tce & SPAPR_TCE_WO) == SPAPR_TCE_WO) {
+        entry.perm |= IOMMU_WO;
+    }
+    memory_region_notify_iommu(&tcet->iommu, entry);
+
+    return H_SUCCESS;
+}
+
+static void spapr_tce_table_vfio_finalize(Object *obj)
+{
+    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(obj);
+
+    QLIST_REMOVE(tcet, list);
+}
+
+static void spapr_tce_table_vfio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    sPAPRTCETableClass *stc = SPAPR_TCE_TABLE_CLASS(klass);
+
+    dc->init = spapr_tce_table_vfio_realize;
+    stc->put_tce = put_tce_vfio;
+}
+
+static TypeInfo spapr_tce_table_vfio_info = {
+    .name = TYPE_SPAPR_TCE_TABLE_VFIO,
+    .parent = TYPE_SPAPR_TCE_TABLE,
+    .instance_size = sizeof(sPAPRTCETable),
+    .class_init = spapr_tce_table_vfio_class_init,
+    .class_size = sizeof(sPAPRTCETableClass),
+    .instance_finalize = spapr_tce_table_vfio_finalize,
+};
+
 static void register_types(void)
 {
     type_register_static(&spapr_tce_table_info);
+    type_register_static(&spapr_tce_table_vfio_info);
 }
 
 type_init(register_types);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index ebcef7f..ceda354 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -383,6 +383,10 @@  typedef struct sPAPRTCETable sPAPRTCETable;
 #define SPAPR_TCE_TABLE(obj) \
     OBJECT_CHECK(sPAPRTCETable, (obj), TYPE_SPAPR_TCE_TABLE)
 
+#define TYPE_SPAPR_TCE_TABLE_VFIO "spapr-tce-table-vfio"
+#define SPAPR_TCE_TABLE_VFIO(obj) \
+    OBJECT_CHECK(sPAPRTCETable, (obj), TYPE_SPAPR_TCE_TABLE_VFIO)
+
 #define SPAPR_TCE_TABLE_CLASS(klass) \
      OBJECT_CLASS_CHECK(sPAPRTCETableClass, (klass), TYPE_SPAPR_TCE_TABLE)
 #define SPAPR_TCE_TABLE_GET_CLASS(obj) \
@@ -411,6 +415,7 @@  void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    size_t window_size);
+sPAPRTCETable *spapr_vfio_new_table(DeviceState *owner, uint32_t liobn);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,