Patchwork [5/5,RFC] vfio: setup iova-base for msi interrupts for vfio assigned device

login
register
mail settings
Submitter Bharat Bhushan
Date Oct. 29, 2013, 11:27 a.m.
Message ID <1383046062-16520-5-git-send-email-Bharat.Bhushan@freescale.com>
Download mbox | patch
Permalink /patch/286804/
State Superseded
Headers show

Comments

Bharat Bhushan - Oct. 29, 2013, 11:27 a.m.
PAMU (FSL IOMMU) has a concept of primary window and subwindows.
Primary window corresponds to the complete guest iova address space
(including MSI space), with respect to IOMMU_API this is termed as
geometry. IOVA Base of subwindow is determined from the number of
subwindows (configurable using iommu API).
MSI I/O page must be within the geometry and maximum supported
subwindows, so MSI IO-page is setup just after guest memory iova space.

This patch is for setting up MSI iova-base for vfio devices assigned
in msi subsystem, so that when msi-message will be composed then this
configured iova will be used.

According to this design vfio will make msi_set_iova() msi-API call to
setup iova for a device. MSI will keep track of iova-base of all device
under a msi-bank. When composing the MSI address and data this list will
be traversed, if device found in the list then device used by vfio and
its iova-base will be taken from here otherwise iova-base will be taken
as before.

This is a draft patch to describe the interface to setup iova in MSI
(what Alex Williamson proposed earlier on related patchset).
Currently I have bundled all changes in one patch to take initial
review comment on design. I will divide this in multiple logical
patches once this design is accepted.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/include/asm/machdep.h |    2 +
 arch/powerpc/kernel/msi.c          |   10 ++++++
 arch/powerpc/sysdev/fsl_msi.c      |   64 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/sysdev/fsl_msi.h      |   10 ++++-
 drivers/pci/msi.c                  |   12 +++++++
 include/linux/pci.h                |    8 ++++
 6 files changed, 104 insertions(+), 2 deletions(-)

Patch

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 8d1b787..e87b806 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -132,6 +132,8 @@  struct machdep_calls {
 	/* Returns the requested region's address and size */
 	int		(*msi_get_region)(int region_num,
 					  struct msi_region *region);
+	int		(*msi_set_iova)(struct pci_dev *pdev, int region_num,
+					dma_addr_t iova, bool set);
 #endif
 
 	void		(*restart)(char *cmd);
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 1a67787..e2bd555 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -13,6 +13,16 @@ 
 
 #include <asm/machdep.h>
 
+int arch_msi_set_iova(struct pci_dev *pdev, int region_num,
+		      dma_addr_t iova, bool set)
+{
+	if (ppc_md.msi_set_iova) {
+		pr_debug("msi: Using platform get_region_count routine.\n");
+		return ppc_md.msi_set_iova(pdev, region_num, iova, set);
+	}
+	return 0;
+}
+
 int arch_msi_get_region_count(void)
 {
 	if (ppc_md.msi_get_region_count) {
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index eeebbf0..ad22d74 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -137,6 +137,46 @@  static int fsl_msi_get_region(int region_num, struct msi_region *region)
 	return -ENODEV;
 }
 
+static int fsl_msi_set_iova(struct pci_dev *pdev, int region_num,
+			    dma_addr_t iova, bool set)
+{
+	struct fsl_msi *msi_data;
+	struct fsl_msi_device *device;
+
+	list_for_each_entry(msi_data, &msi_head, list) {
+		if (msi_data->bank_index != region_num)
+			continue;
+		mutex_lock(&msi_data->lock);
+		if (set) {
+			list_for_each_entry(device, &msi_data->device_list, list) {
+				if (device->dev == pdev) {
+					device->iova = iova;
+					mutex_unlock(&msi_data->lock);
+					return 0;
+				}
+			}
+
+			device = kzalloc(sizeof(struct fsl_msi_device), GFP_KERNEL);
+			device->dev = pdev;
+			device->iova = iova;
+			list_add_tail(&device->list, &msi_data->device_list);
+		} else {
+			list_for_each_entry(device, &msi_data->device_list, list) {
+				if (device->dev == pdev) {
+					list_del(&device->list);
+					kfree(device);
+					mutex_unlock(&msi_data->lock);
+					return 0;
+				}
+			}
+		}
+
+		mutex_unlock(&msi_data->lock);
+		break;
+	}
+	return 0;
+}
+
 static int fsl_msi_check_device(struct pci_dev *pdev, int nvec, int type)
 {
 	if (type == PCI_CAP_ID_MSIX)
@@ -167,6 +207,7 @@  static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 				struct msi_msg *msg,
 				struct fsl_msi *fsl_msi_data)
 {
+	struct fsl_msi_device *device;
 	struct fsl_msi *msi_data = fsl_msi_data;
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	u64 address; /* Physical address of the MSIIR */
@@ -181,6 +222,18 @@  static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 		address = fsl_pci_immrbar_base(hose) +
 			   (msi_data->msiir & 0xfffff);
 
+	printk("%s address = %llx\n", __func__, address);
+	
+	mutex_lock(&msi_data->lock);
+	list_for_each_entry(device, &msi_data->device_list, list) {
+		if (device->dev == pdev) {
+			address = device->iova | (msi_data->msiir & 0xfff);
+			break;
+		}
+	}
+	mutex_unlock(&msi_data->lock);
+	printk("%s address = %llx\n", __func__, address);
+
 	msg->address_lo = lower_32_bits(address);
 	msg->address_hi = upper_32_bits(address);
 
@@ -356,6 +409,7 @@  static int fsl_of_msi_remove(struct platform_device *ofdev)
 	struct fsl_msi *msi = platform_get_drvdata(ofdev);
 	int virq, i;
 	struct fsl_msi_cascade_data *cascade_data;
+	struct fsl_msi_device *device;
 
 	if (msi->list.prev != NULL)
 		list_del(&msi->list);
@@ -371,6 +425,13 @@  static int fsl_of_msi_remove(struct platform_device *ofdev)
 		msi_bitmap_free(&msi->bitmap);
 	if ((msi->feature & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC)
 		iounmap(msi->msi_regs);
+
+	mutex_lock(&msi->lock);
+	list_for_each_entry(device, &msi->device_list, list) {
+		list_del(&device->list);
+		kfree(device);
+	}
+	mutex_unlock(&msi->lock);
 	kfree(msi);
 
 	return 0;
@@ -436,6 +497,8 @@  static int fsl_of_msi_probe(struct platform_device *dev)
 		dev_err(&dev->dev, "No memory for MSI structure\n");
 		return -ENOMEM;
 	}
+	INIT_LIST_HEAD(&msi->device_list);
+	mutex_init(&msi->lock);
 	platform_set_drvdata(dev, msi);
 
 	msi->irqhost = irq_domain_add_linear(dev->dev.of_node,
@@ -558,6 +621,7 @@  static int fsl_of_msi_probe(struct platform_device *dev)
 		ppc_md.msi_check_device = fsl_msi_check_device;
 		ppc_md.msi_get_region_count = fsl_msi_get_region_count;
 		ppc_md.msi_get_region = fsl_msi_get_region;
+		ppc_md.msi_set_iova = fsl_msi_set_iova;
 	} else if (ppc_md.setup_msi_irqs != fsl_setup_msi_irqs) {
 		dev_err(&dev->dev, "Different MSI driver already installed!\n");
 		err = -ENODEV;
diff --git a/arch/powerpc/sysdev/fsl_msi.h b/arch/powerpc/sysdev/fsl_msi.h
index a2cc5a2..adda5c3 100644
--- a/arch/powerpc/sysdev/fsl_msi.h
+++ b/arch/powerpc/sysdev/fsl_msi.h
@@ -27,9 +27,15 @@ 
 #define FSL_PIC_IP_IPIC   0x00000002
 #define FSL_PIC_IP_VMPIC  0x00000003
 
+struct fsl_msi_device {
+	struct list_head list;
+	struct pci_dev *dev;
+	dma_addr_t iova;
+};
+
 struct fsl_msi {
 	struct irq_domain *irqhost;
-
+	struct mutex lock;
 	unsigned long cascade_irq;
 	phys_addr_t msiir; /* MSIIR Address in CCSR */
 	u32 ibs_shift; /* Shift of interrupt bit select */
@@ -37,7 +43,7 @@  struct fsl_msi {
 	void __iomem *msi_regs;
 	u32 feature;
 	int msi_virqs[NR_MSI_REG_MAX];
-
+	struct list_head device_list;
 	/*
 	 * During probe each bank is assigned a index number.
 	 * index number start from 0.
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2643a29..59ec465 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -77,6 +77,18 @@  int __weak arch_msi_get_region(int region_num, struct msi_region *region)
 	return 0;
 }
 
+int __weak arch_msi_set_iova(struct pci_dev *pdev, int region_num,
+			     dma_addr_t iova, bool set)
+{
+	return 0;
+}
+
+int msi_set_iova(struct pci_dev *pdev, int region_num, dma_addr_t iova, bool set)
+{
+	return arch_msi_set_iova(pdev, region_num, iova, set);
+}
+EXPORT_SYMBOL(msi_set_iova);
+
 int msi_get_region_count(void)
 {
 	return arch_msi_get_region_count();
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c587034..c6d3e58 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1195,6 +1195,12 @@  static inline int msi_get_region(int region_num, struct msi_region *region)
 {
 	return 0;
 }
+
+static inline int msi_set_iova(struct pci_dev *pdev, int region_num,
+			       dma_addr_t iova, bool set)
+{
+	return 0;
+}
 #else
 int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
 int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
@@ -1209,6 +1215,8 @@  void pci_restore_msi_state(struct pci_dev *dev);
 int pci_msi_enabled(void);
 int msi_get_region_count(void);
 int msi_get_region(int region_num, struct msi_region *region);
+int msi_set_iova(struct pci_dev *pdev, int region_num,
+		 dma_addr_t iova, bool set);
 #endif
 
 #ifdef CONFIG_PCIEPORTBUS