diff mbox series

[v2] nvme-pci: Use PCI bus address for data/queues in CMB

Message ID 1506763722-10687-1-git-send-email-abhishek.shah@broadcom.com
State Not Applicable
Headers show
Series [v2] nvme-pci: Use PCI bus address for data/queues in CMB | expand

Commit Message

Abhishek Shah Sept. 30, 2017, 9:28 a.m. UTC
Currently, NVMe PCI host driver is programming CMB dma address as
I/O SQs addresses. This results in failures on systems where 1:1
outbound mapping is not used (example Broadcom iProc SOCs) because
CMB BAR will be progammed with PCI bus address but NVMe PCI EP will
try to access CMB using dma address.

To have CMB working on systems without 1:1 outbound mapping, we
program PCI bus address for I/O SQs instead of dma address. This
approach will work on systems with/without 1:1 outbound mapping.

The patch is tested on Broadcom Stingray platform(arm64), which
does not have 1:1 outbound mapping, as well as on x86 platform,
which has 1:1 outbound mapping.

Fixes: 8ffaadf7 ("NVMe: Use CMB for the IO SQes if available")
Cc: stable@vger.kernel.org
Signed-off-by: Abhishek Shah <abhishek.shah@broadcom.com>
Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
---
 drivers/nvme/host/pci.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

Comments

Christoph Hellwig Oct. 1, 2017, 7:42 a.m. UTC | #1
This looks very convoluted, mostly because the existing code is
doing weird things.  For one thing what is sq_dma_addr currently
is not a DMA adddress - we either need the resource address
for the ioremap, but we don't need to stash that away, and second
the one programmed into the controller should be a pci_bus_addr_t.

Second we already have a nice PCI-layer helper called pci_bus_address
to get the bus address for us and we should use it.

Something like the patch below should solve the issue:

---
From b78f4164881125c4fecfdb87878d0120b2177c53 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 1 Oct 2017 09:37:35 +0200
Subject: nvme-pci: Use PCI bus address for data/queues in CMB

Currently, NVMe PCI host driver is programming CMB dma address as
I/O SQs addresses. This results in failures on systems where 1:1
outbound mapping is not used (example Broadcom iProc SOCs) because
CMB BAR will be progammed with PCI bus address but NVMe PCI EP will
try to access CMB using dma address.

To have CMB working on systems without 1:1 outbound mapping, we
program PCI bus address for I/O SQs instead of dma address. This
approach will work on systems with/without 1:1 outbound mapping.

Based on a report and previous patch from Abhishek Shah.

Fixes: 8ffaadf7 ("NVMe: Use CMB for the IO SQes if available")
Cc: stable@vger.kernel.org
Reported-by: Abhishek Shah <abhishek.shah@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cb73bc8cad3b..3f5a04c586ce 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -94,7 +94,7 @@ struct nvme_dev {
 	struct mutex shutdown_lock;
 	bool subsystem;
 	void __iomem *cmb;
-	dma_addr_t cmb_dma_addr;
+	pci_bus_addr_t cmb_bus_addr;
 	u64 cmb_size;
 	u32 cmbsz;
 	u32 cmbloc;
@@ -1226,7 +1226,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 	if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
 		unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
 						      dev->ctrl.page_size);
-		nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
 		nvmeq->sq_cmds_io = dev->cmb + offset;
 	} else {
 		nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
@@ -1527,7 +1527,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 	resource_size_t bar_size;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	void __iomem *cmb;
-	dma_addr_t dma_addr;
+	int bar;
 
 	dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
 	if (!(NVME_CMB_SZ(dev->cmbsz)))
@@ -1540,7 +1540,8 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 	szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
 	size = szu * NVME_CMB_SZ(dev->cmbsz);
 	offset = szu * NVME_CMB_OFST(dev->cmbloc);
-	bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc));
+	bar = NVME_CMB_BIR(dev->cmbloc);
+	bar_size = pci_resource_len(pdev, bar);
 
 	if (offset > bar_size)
 		return NULL;
@@ -1553,12 +1554,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 	if (size > bar_size - offset)
 		size = bar_size - offset;
 
-	dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
-	cmb = ioremap_wc(dma_addr, size);
+	cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
 	if (!cmb)
 		return NULL;
 
-	dev->cmb_dma_addr = dma_addr;
+	dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
 	dev->cmb_size = size;
 	return cmb;
 }
Keith Busch Oct. 2, 2017, 5:21 p.m. UTC | #2
On Sun, Oct 01, 2017 at 09:42:03AM +0200, Christoph Hellwig wrote:
> This looks very convoluted, mostly because the existing code is
> doing weird things.  For one thing what is sq_dma_addr currently
> is not a DMA adddress - we either need the resource address
> for the ioremap, but we don't need to stash that away, and second
> the one programmed into the controller should be a pci_bus_addr_t.
> 
> Second we already have a nice PCI-layer helper called pci_bus_address
> to get the bus address for us and we should use it.
> 
> Something like the patch below should solve the issue:

Yah, calling this a DMA address was a misnomer and confusing.

> ---
> From b78f4164881125c4fecfdb87878d0120b2177c53 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Sun, 1 Oct 2017 09:37:35 +0200
> Subject: nvme-pci: Use PCI bus address for data/queues in CMB
> 
> Currently, NVMe PCI host driver is programming CMB dma address as
> I/O SQs addresses. This results in failures on systems where 1:1
> outbound mapping is not used (example Broadcom iProc SOCs) because
> CMB BAR will be progammed with PCI bus address but NVMe PCI EP will
> try to access CMB using dma address.
> 
> To have CMB working on systems without 1:1 outbound mapping, we
> program PCI bus address for I/O SQs instead of dma address. This
> approach will work on systems with/without 1:1 outbound mapping.
> 
> Based on a report and previous patch from Abhishek Shah.
> 
> Fixes: 8ffaadf7 ("NVMe: Use CMB for the IO SQes if available")
> Cc: stable@vger.kernel.org
> Reported-by: Abhishek Shah <abhishek.shah@broadcom.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

This looks good.

Reviewed-by: Keith Busch <keith.busch@intel.com>
Christoph Hellwig Oct. 4, 2017, 6:30 a.m. UTC | #3
On Mon, Oct 02, 2017 at 11:21:29AM -0600, Keith Busch wrote:
> Yah, calling this a DMA address was a misnomer and confusing.

Abhishek, can you test if this works for you?
Abhishek Shah Oct. 4, 2017, 9:37 a.m. UTC | #4
yes, this patch works for our platform.

On Wed, Oct 4, 2017 at 12:00 PM, Christoph Hellwig <hch@lst.de> wrote:
> On Mon, Oct 02, 2017 at 11:21:29AM -0600, Keith Busch wrote:
>> Yah, calling this a DMA address was a misnomer and confusing.
>
> Abhishek, can you test if this works for you?
Sagi Grimberg Oct. 11, 2017, 10 a.m. UTC | #5
> ---
>  From b78f4164881125c4fecfdb87878d0120b2177c53 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Sun, 1 Oct 2017 09:37:35 +0200
> Subject: nvme-pci: Use PCI bus address for data/queues in CMB
> 
> Currently, NVMe PCI host driver is programming CMB dma address as
> I/O SQs addresses. This results in failures on systems where 1:1
> outbound mapping is not used (example Broadcom iProc SOCs) because
> CMB BAR will be progammed with PCI bus address but NVMe PCI EP will
> try to access CMB using dma address.
> 
> To have CMB working on systems without 1:1 outbound mapping, we
> program PCI bus address for I/O SQs instead of dma address. This
> approach will work on systems with/without 1:1 outbound mapping.
> 
> Based on a report and previous patch from Abhishek Shah.
> 
> Fixes: 8ffaadf7 ("NVMe: Use CMB for the IO SQes if available")
> Cc: stable@vger.kernel.org
> Reported-by: Abhishek Shah <abhishek.shah@broadcom.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

This looks good,

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
diff mbox series

Patch

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4a21213..1387050 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -94,6 +94,7 @@  struct nvme_dev {
 	bool subsystem;
 	void __iomem *cmb;
 	dma_addr_t cmb_dma_addr;
+	pci_bus_addr_t cmb_bus_addr;
 	u64 cmb_size;
 	u32 cmbsz;
 	u32 cmbloc;
@@ -1220,7 +1221,7 @@  static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 	if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
 		unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
 						      dev->ctrl.page_size);
-		nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
+		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
 		nvmeq->sq_cmds_io = dev->cmb + offset;
 	} else {
 		nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
@@ -1514,6 +1515,25 @@  static ssize_t nvme_cmb_show(struct device *dev,
 }
 static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
 
+static int nvme_find_cmb_bus_addr(struct pci_dev *pdev,
+				  dma_addr_t dma_addr,
+				  u64 size,
+				  pci_bus_addr_t *bus_addr)
+{
+	struct resource *res;
+	struct pci_bus_region region;
+	struct resource tres = DEFINE_RES_MEM(dma_addr, size);
+
+	res = pci_find_resource(pdev, &tres);
+	if (!res)
+		return -EIO;
+
+	pcibios_resource_to_bus(pdev->bus, &region, res);
+	*bus_addr = region.start + (dma_addr - res->start);
+
+	return 0;
+}
+
 static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 {
 	u64 szu, size, offset;
@@ -1547,6 +1567,9 @@  static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 		size = bar_size - offset;
 
 	dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
+	if (nvme_find_cmb_bus_addr(pdev, dma_addr, size, &dev->cmb_bus_addr))
+		return NULL;
+
 	cmb = ioremap_wc(dma_addr, size);
 	if (!cmb)
 		return NULL;