diff mbox series

[RFC,4/4] nvme: translate virtual addresses into the bus's address space

Message ID ad0644a33852f459dc32e77bd0452397ba95ab1d.1632439220.git.stefan@agner.ch
State RFC
Delegated to: Tom Rini
Headers show
Series [RFC,1/4] Revert "nvme: Correct the prps per page calculation method" | expand

Commit Message

Stefan Agner Sept. 23, 2021, 11:20 p.m. UTC
So far we've been content with passing physical/CPU addresses when
configuring memory addresses into NVMe controllers, but not all
platforms have buses with transparent mappings. Specifically the
Raspberry Pi 4 might introduce an offset to memory accesses incoming
from its PCIe port.

Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these
limitations, and make sure we don't break non DM users.
For devices where PCIe's view of host memory doesn't match the memory
as seen by the CPU.

A similar change has been introduced for XHCI controller with
commit 1a474559d90a ("xhci: translate virtual addresses into the bus's
address space").

Signed-off-by: Stefan Agner <stefan@agner.ch>
---

 drivers/nvme/nvme.c | 32 ++++++++++++++++++--------------
 drivers/nvme/nvme.h | 15 +++++++++++++++
 2 files changed, 33 insertions(+), 14 deletions(-)

Comments

Stefan Agner Sept. 23, 2021, 11:22 p.m. UTC | #1
On 2021-09-24 01:20, Stefan Agner wrote:
> So far we've been content with passing physical/CPU addresses when
> configuring memory addresses into NVMe controllers, but not all
> platforms have buses with transparent mappings. Specifically the
> Raspberry Pi 4 might introduce an offset to memory accesses incoming
> from its PCIe port.
> 
> Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these
> limitations, and make sure we don't break non DM users.
> For devices where PCIe's view of host memory doesn't match the memory
> as seen by the CPU.
> 
> A similar change has been introduced for XHCI controller with
> commit 1a474559d90a ("xhci: translate virtual addresses into the bus's
> address space").
> 
> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
> 
>  drivers/nvme/nvme.c | 32 ++++++++++++++++++--------------
>  drivers/nvme/nvme.h | 15 +++++++++++++++
>  2 files changed, 33 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
> index 4c4dc7cc4d..0b7082d71b 100644
> --- a/drivers/nvme/nvme.c
> +++ b/drivers/nvme/nvme.c
> @@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
>  		buffer += (page_size - offset);
>  
>  	if (length <= page_size) {
> -		*prp2 = (u64)buffer;
> +		*prp2 = nvme_virt_to_bus(dev, buffer);
>  		return 0;
>  	}
>  
> @@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev,
> u64 *prp2,
>  	i = 0;
>  	while (nprps) {
>  		if (i == prps_per_page) {
> -			u64 next_prp_list = (u64)prp_pool + page_size;
> -			*(prp_pool + i) = cpu_to_le64(next_prp_list);
> +			u64 next = nvme_virt_to_bus(dev, prp_pool + page_size);
> +			*(prp_pool + i) = cpu_to_le64(next);
>  			i = 0;
>  			prp_pool += page_size;
>  		}
> -		*(prp_pool + i++) = cpu_to_le64((u64)buffer);
> +		*(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  		buffer += page_size;
>  		nprps--;
>  	}
> -	*prp2 = (u64)dev->prp_pool;
> +	*prp2 = nvme_virt_to_bus(dev, dev->prp_pool);
>  
>  	flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
>  			   dev->prp_entry_num * sizeof(u64));
> @@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
>  	int result;
>  	u32 aqa;
>  	u64 cap = dev->cap;
> +	u64 dma_addr;
>  	struct nvme_queue *nvmeq;
>  	/* most architectures use 4KB as the page size */
>  	unsigned page_shift = 12;
> @@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
>  	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
>  
>  	writel(aqa, &dev->bar->aqa);
> -	nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
> -	nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
> +	dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds);
> +	nvme_writeq(dma_addr, &dev->bar->asq);
> +	dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes);
> +	nvme_writeq(dma_addr, &dev->bar->acq);
>  
>  	result = nvme_enable_ctrl(dev);
>  	if (result)
> @@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
>  
>  	memset(&c, 0, sizeof(c));
>  	c.create_cq.opcode = nvme_admin_create_cq;
> -	c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
> +	c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes));
>  	c.create_cq.cqid = cpu_to_le16(qid);
>  	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>  	c.create_cq.cq_flags = cpu_to_le16(flags);
> @@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
>  
>  	memset(&c, 0, sizeof(c));
>  	c.create_sq.opcode = nvme_admin_create_sq;
> -	c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
> +	c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds));
>  	c.create_sq.sqid = cpu_to_le16(qid);
>  	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>  	c.create_sq.sq_flags = cpu_to_le16(flags);
> @@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
>  	memset(&c, 0, sizeof(c));
>  	c.identify.opcode = nvme_admin_identify;
>  	c.identify.nsid = cpu_to_le32(nsid);
> -	c.identify.prp1 = cpu_to_le64((u64)buffer);
> +	c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  
>  	length -= (page_size - offset);
>  	if (length <= 0) {
>  		c.identify.prp2 = 0;
>  	} else {
>  		buffer += (page_size - offset);
> -		c.identify.prp2 = cpu_to_le64((u64)buffer);
> +		c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  	}
>  
>  	c.identify.cns = cpu_to_le32(cns);
> @@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev,
> unsigned fid, unsigned nsid,
>  	memset(&c, 0, sizeof(c));
>  	c.features.opcode = nvme_admin_get_features;
>  	c.features.nsid = cpu_to_le32(nsid);
> -	c.features.prp1 = cpu_to_le64((u64)buffer);
> +	c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  	c.features.fid = cpu_to_le32(fid);
>  
>  	ret = nvme_submit_admin_cmd(dev, &c, result);
> @@ -519,7 +522,7 @@ int nvme_set_features(struct nvme_dev *dev,
> unsigned fid, unsigned dword11,
>  
>  	memset(&c, 0, sizeof(c));
>  	c.features.opcode = nvme_admin_set_features;
> -	c.features.prp1 = cpu_to_le64((u64)buffer);
> +	c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  	c.features.fid = cpu_to_le32(fid);
>  	c.features.dword11 = cpu_to_le32(dword11);
>  
> @@ -775,7 +778,7 @@ static ulong nvme_blk_rw(struct udevice *udev,
> lbaint_t blknr,
>  		c.rw.slba = cpu_to_le64(slba);
>  		slba += lbas;
>  		c.rw.length = cpu_to_le16(lbas - 1);
> -		c.rw.prp1 = cpu_to_le64((ulong)buffer);
> +		c.rw.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  		c.rw.prp2 = cpu_to_le64(prp2);
>  		status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
>  				&c, NULL, IO_TIMEOUT);
> @@ -834,6 +837,7 @@ static int nvme_probe(struct udevice *udev)
>  	struct nvme_id_ns *id;
>  
>  	ndev->instance = trailing_strtol(udev->name);
> +	ndev->dev = udev->parent;

It only translates addresses correctly once I chose the parent device. I
am pretty sure that this is not the right way to fix it. Shouldn't the
child automatically assume that the same translation is required?

--
Stefan

>  
>  	INIT_LIST_HEAD(&ndev->namespaces);
>  	ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
> diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h
> index c6aae4da5d..31e6899bca 100644
> --- a/drivers/nvme/nvme.h
> +++ b/drivers/nvme/nvme.h
> @@ -7,8 +7,15 @@
>  #ifndef __DRIVER_NVME_H__
>  #define __DRIVER_NVME_H__
>  
> +#include <phys2bus.h>
>  #include <asm/io.h>
>  
> +#if CONFIG_IS_ENABLED(DM_USB)
> +#define nvme_to_dev(_dev)     _dev->dev
> +#else
> +#define nvme_to_dev(_dev)     NULL
> +#endif
> +
>  struct nvme_id_power_state {
>  	__le16			max_power;	/* centiwatts */
>  	__u8			rsvd2;
> @@ -596,6 +603,9 @@ enum {
>  
>  /* Represents an NVM Express device. Each nvme_dev is a PCI function. */
>  struct nvme_dev {
> +#if CONFIG_IS_ENABLED(DM_USB)
> +	struct udevice *dev;
> +#endif
>  	struct list_head node;
>  	struct nvme_queue **queues;
>  	u32 __iomem *dbs;
> @@ -635,4 +645,9 @@ struct nvme_ns {
>  	u8 flbas;
>  };
>  
> +static inline dma_addr_t nvme_virt_to_bus(struct nvme_dev *dev, void *addr)
> +{
> +	return dev_phys_to_bus(nvme_to_dev(dev), virt_to_phys(addr));
> +}
> +
>  #endif /* __DRIVER_NVME_H__ */
diff mbox series

Patch

diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
index 4c4dc7cc4d..0b7082d71b 100644
--- a/drivers/nvme/nvme.c
+++ b/drivers/nvme/nvme.c
@@ -95,7 +95,7 @@  static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
 		buffer += (page_size - offset);
 
 	if (length <= page_size) {
-		*prp2 = (u64)buffer;
+		*prp2 = nvme_virt_to_bus(dev, buffer);
 		return 0;
 	}
 
@@ -120,16 +120,16 @@  static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
 	i = 0;
 	while (nprps) {
 		if (i == prps_per_page) {
-			u64 next_prp_list = (u64)prp_pool + page_size;
-			*(prp_pool + i) = cpu_to_le64(next_prp_list);
+			u64 next = nvme_virt_to_bus(dev, prp_pool + page_size);
+			*(prp_pool + i) = cpu_to_le64(next);
 			i = 0;
 			prp_pool += page_size;
 		}
-		*(prp_pool + i++) = cpu_to_le64((u64)buffer);
+		*(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 		buffer += page_size;
 		nprps--;
 	}
-	*prp2 = (u64)dev->prp_pool;
+	*prp2 = nvme_virt_to_bus(dev, dev->prp_pool);
 
 	flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
 			   dev->prp_entry_num * sizeof(u64));
@@ -356,6 +356,7 @@  static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	int result;
 	u32 aqa;
 	u64 cap = dev->cap;
+	u64 dma_addr;
 	struct nvme_queue *nvmeq;
 	/* most architectures use 4KB as the page size */
 	unsigned page_shift = 12;
@@ -396,8 +397,10 @@  static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
 	writel(aqa, &dev->bar->aqa);
-	nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
-	nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
+	dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds);
+	nvme_writeq(dma_addr, &dev->bar->asq);
+	dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes);
+	nvme_writeq(dma_addr, &dev->bar->acq);
 
 	result = nvme_enable_ctrl(dev);
 	if (result)
@@ -423,7 +426,7 @@  static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
 
 	memset(&c, 0, sizeof(c));
 	c.create_cq.opcode = nvme_admin_create_cq;
-	c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
+	c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes));
 	c.create_cq.cqid = cpu_to_le16(qid);
 	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
 	c.create_cq.cq_flags = cpu_to_le16(flags);
@@ -440,7 +443,7 @@  static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
 
 	memset(&c, 0, sizeof(c));
 	c.create_sq.opcode = nvme_admin_create_sq;
-	c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
+	c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds));
 	c.create_sq.sqid = cpu_to_le16(qid);
 	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
 	c.create_sq.sq_flags = cpu_to_le16(flags);
@@ -461,14 +464,14 @@  int nvme_identify(struct nvme_dev *dev, unsigned nsid,
 	memset(&c, 0, sizeof(c));
 	c.identify.opcode = nvme_admin_identify;
 	c.identify.nsid = cpu_to_le32(nsid);
-	c.identify.prp1 = cpu_to_le64((u64)buffer);
+	c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 
 	length -= (page_size - offset);
 	if (length <= 0) {
 		c.identify.prp2 = 0;
 	} else {
 		buffer += (page_size - offset);
-		c.identify.prp2 = cpu_to_le64((u64)buffer);
+		c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 	}
 
 	c.identify.cns = cpu_to_le32(cns);
@@ -493,7 +496,7 @@  int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 	memset(&c, 0, sizeof(c));
 	c.features.opcode = nvme_admin_get_features;
 	c.features.nsid = cpu_to_le32(nsid);
-	c.features.prp1 = cpu_to_le64((u64)buffer);
+	c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 	c.features.fid = cpu_to_le32(fid);
 
 	ret = nvme_submit_admin_cmd(dev, &c, result);
@@ -519,7 +522,7 @@  int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 
 	memset(&c, 0, sizeof(c));
 	c.features.opcode = nvme_admin_set_features;
-	c.features.prp1 = cpu_to_le64((u64)buffer);
+	c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
@@ -775,7 +778,7 @@  static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr,
 		c.rw.slba = cpu_to_le64(slba);
 		slba += lbas;
 		c.rw.length = cpu_to_le16(lbas - 1);
-		c.rw.prp1 = cpu_to_le64((ulong)buffer);
+		c.rw.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 		c.rw.prp2 = cpu_to_le64(prp2);
 		status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
 				&c, NULL, IO_TIMEOUT);
@@ -834,6 +837,7 @@  static int nvme_probe(struct udevice *udev)
 	struct nvme_id_ns *id;
 
 	ndev->instance = trailing_strtol(udev->name);
+	ndev->dev = udev->parent;
 
 	INIT_LIST_HEAD(&ndev->namespaces);
 	ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h
index c6aae4da5d..31e6899bca 100644
--- a/drivers/nvme/nvme.h
+++ b/drivers/nvme/nvme.h
@@ -7,8 +7,15 @@ 
 #ifndef __DRIVER_NVME_H__
 #define __DRIVER_NVME_H__
 
+#include <phys2bus.h>
 #include <asm/io.h>
 
+#if CONFIG_IS_ENABLED(DM_USB)
+#define nvme_to_dev(_dev)     _dev->dev
+#else
+#define nvme_to_dev(_dev)     NULL
+#endif
+
 struct nvme_id_power_state {
 	__le16			max_power;	/* centiwatts */
 	__u8			rsvd2;
@@ -596,6 +603,9 @@  enum {
 
 /* Represents an NVM Express device. Each nvme_dev is a PCI function. */
 struct nvme_dev {
+#if CONFIG_IS_ENABLED(DM_USB)
+	struct udevice *dev;
+#endif
 	struct list_head node;
 	struct nvme_queue **queues;
 	u32 __iomem *dbs;
@@ -635,4 +645,9 @@  struct nvme_ns {
 	u8 flbas;
 };
 
+static inline dma_addr_t nvme_virt_to_bus(struct nvme_dev *dev, void *addr)
+{
+	return dev_phys_to_bus(nvme_to_dev(dev), virt_to_phys(addr));
+}
+
 #endif /* __DRIVER_NVME_H__ */