Patchwork [21/34] pseries: Rework implementation of TCE bypass

login
register
mail settings
Submitter Alexander Graf
Date Oct. 4, 2012, 1:56 p.m.
Message ID <1349359016-13107-22-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/189171/
State New
Headers show

Comments

Alexander Graf - Oct. 4, 2012, 1:56 p.m.
From: David Gibson <david@gibson.dropbear.id.au>

On the pseries machine the IOMMU (aka TCE tables) is always active for all
PCI and VIO devices.  Mostly to simplify the SLOF firmware, we implement an
extension which allows the IOMMU to be temporarily disabled for certain
devices.

Currently this is implemented by setting the device's DMAContext pointer to
NULL (thus reverting to qemu's default no-IOMMU DMA behaviour), then
replacing it when bypass mode is disabled.

This approach causes a bunch of complications though.  It complexifies the
management of the DMAContext lifetimes, it's problematic for savevm/loadvm,
and it means that while bypass is active we have nowhere to store the
device's LIOBN (Logical IO Bus Number, used to identify DMA address
spaces).  At present we regenerate the LIOBN from other address information
but this restricts how we can allocate LIOBNs.

This patch gives up on this approach, replacing it with the much simpler
one of having a 'bypass' boolean flag in the TCE state structure.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 hw/spapr.h       |    1 +
 hw/spapr_iommu.c |   25 +++++++++++++++++++------
 hw/spapr_vio.c   |   26 ++++++++++----------------
 hw/spapr_vio.h   |    1 -
 4 files changed, 30 insertions(+), 23 deletions(-)

Patch

diff --git a/hw/spapr.h b/hw/spapr.h
index 51a966b..e984e3f 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -339,6 +339,7 @@  void spapr_iommu_init(void);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
 void spapr_tce_reset(DMAContext *dma);
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 216aa06..38034c0 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -42,6 +42,7 @@  struct sPAPRTCETable {
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
+    bool bypass;
     int fd;
     QLIST_ENTRY(sPAPRTCETable) list;
 };
@@ -78,6 +79,12 @@  static int spapr_tce_translate(DMAContext *dma,
             DMA_ADDR_FMT "\n", tcet->liobn, addr);
 #endif
 
+    if (tcet->bypass) {
+        *paddr = addr;
+        *len = (target_phys_addr_t)-1;
+        return 0;
+    }
+
     /* Check if we are in bound */
     if (addr >= tcet->window_size) {
 #ifdef DEBUG_TCE
@@ -162,15 +169,21 @@  void spapr_tce_free(DMAContext *dma)
     }
 }
 
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+
+    tcet->bypass = bypass;
+}
+
 void spapr_tce_reset(DMAContext *dma)
 {
-    if (dma) {
-        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-        size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
-            * sizeof(sPAPRTCE);
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
+        * sizeof(sPAPRTCE);
 
-        memset(tcet->table, 0, table_size);
-    }
+    tcet->bypass = false;
+    memset(tcet->table, 0, table_size);
 }
 
 static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index 752836e..848806d 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -316,14 +316,9 @@  int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-
     if (dev->dma) {
-        spapr_tce_free(dev->dma);
+        spapr_tce_reset(dev->dma);
     }
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
-
     free_crq(dev);
 }
 
@@ -346,16 +341,14 @@  static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
         rtas_st(rets, 0, -3);
         return;
     }
-    if (enable) {
-        spapr_tce_free(dev->dma);
-        dev->dma = NULL;
-    } else {
-        VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 
-        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (!dev->dma) {
+        rtas_st(rets, 0, -3);
+        return;
     }
 
+    spapr_tce_set_bypass(dev->dma, !!enable);
+
     rtas_st(rets, 0, 0);
 }
 
@@ -421,7 +414,6 @@  static int spapr_vio_busdev_init(DeviceState *qdev)
 {
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn;
     char *id;
 
     if (dev->reg != -1) {
@@ -463,8 +455,10 @@  static int spapr_vio_busdev_init(DeviceState *qdev)
         return -1;
     }
 
-    liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (pc->rtce_window_size) {
+        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    }
 
     return pc->init(dev);
 }
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index acef65e..cc85d26 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -131,7 +131,6 @@  void spapr_vscsi_create(VIOsPAPRBus *bus);
 
 VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus);
 
-int spapr_tce_set_bypass(uint32_t unit, uint32_t enable);
 void spapr_vio_quiesce(void);
 
 #endif /* _HW_SPAPR_VIO_H */