Message ID | ad0644a33852f459dc32e77bd0452397ba95ab1d.1632439220.git.stefan@agner.ch |
---|---|
State | RFC |
Delegated to: | Tom Rini |
Headers | show |
Series | [RFC,1/4] Revert "nvme: Correct the prps per page calculation method" | expand |
On 2021-09-24 01:20, Stefan Agner wrote: > So far we've been content with passing physical/CPU addresses when > configuring memory addresses into NVMe controllers, but not all > platforms have buses with transparent mappings. Specifically the > Raspberry Pi 4 might introduce an offset to memory accesses incoming > from its PCIe port. > > Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these > limitations, and make sure we don't break non DM users. > For devices where PCIe's view of host memory doesn't match the memory > as seen by the CPU. > > A similar change has been introduced for XHCI controller with > commit 1a474559d90a ("xhci: translate virtual addresses into the bus's > address space"). > > Signed-off-by: Stefan Agner <stefan@agner.ch> > --- > > drivers/nvme/nvme.c | 32 ++++++++++++++++++-------------- > drivers/nvme/nvme.h | 15 +++++++++++++++ > 2 files changed, 33 insertions(+), 14 deletions(-) > > diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c > index 4c4dc7cc4d..0b7082d71b 100644 > --- a/drivers/nvme/nvme.c > +++ b/drivers/nvme/nvme.c > @@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, > buffer += (page_size - offset); > > if (length <= page_size) { > - *prp2 = (u64)buffer; > + *prp2 = nvme_virt_to_bus(dev, buffer); > return 0; > } > > @@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, > u64 *prp2, > i = 0; > while (nprps) { > if (i == prps_per_page) { > - u64 next_prp_list = (u64)prp_pool + page_size; > - *(prp_pool + i) = cpu_to_le64(next_prp_list); > + u64 next = nvme_virt_to_bus(dev, prp_pool + page_size); > + *(prp_pool + i) = cpu_to_le64(next); > i = 0; > prp_pool += page_size; > } > - *(prp_pool + i++) = cpu_to_le64((u64)buffer); > + *(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > buffer += page_size; > nprps--; > } > - *prp2 = (u64)dev->prp_pool; > + *prp2 = nvme_virt_to_bus(dev, dev->prp_pool); > > flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + > dev->prp_entry_num * sizeof(u64)); > @@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) > int result; > u32 aqa; > u64 cap = dev->cap; > + u64 dma_addr; > struct nvme_queue *nvmeq; > /* most architectures use 4KB as the page size */ > unsigned page_shift = 12; > @@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) > dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; > > writel(aqa, &dev->bar->aqa); > - nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); > - nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); > + dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds); > + nvme_writeq(dma_addr, &dev->bar->asq); > + dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes); > + nvme_writeq(dma_addr, &dev->bar->acq); > > result = nvme_enable_ctrl(dev); > if (result) > @@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, > > memset(&c, 0, sizeof(c)); > c.create_cq.opcode = nvme_admin_create_cq; > - c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); > + c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes)); > c.create_cq.cqid = cpu_to_le16(qid); > c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); > c.create_cq.cq_flags = cpu_to_le16(flags); > @@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, > > memset(&c, 0, sizeof(c)); > c.create_sq.opcode = nvme_admin_create_sq; > - c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); > + c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds)); > c.create_sq.sqid = cpu_to_le16(qid); > c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); > c.create_sq.sq_flags = cpu_to_le16(flags); > @@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, > memset(&c, 0, sizeof(c)); > c.identify.opcode = nvme_admin_identify; > c.identify.nsid = cpu_to_le32(nsid); > - c.identify.prp1 = cpu_to_le64((u64)buffer); > + c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > > length -= (page_size - offset); > if (length <= 0) { > c.identify.prp2 = 0; > } else { > buffer += (page_size - offset); > - c.identify.prp2 = cpu_to_le64((u64)buffer); > + c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > } > > c.identify.cns = cpu_to_le32(cns); > @@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev, > unsigned fid, unsigned nsid, > memset(&c, 0, sizeof(c)); > c.features.opcode = nvme_admin_get_features; > c.features.nsid = cpu_to_le32(nsid); > - c.features.prp1 = cpu_to_le64((u64)buffer); > + c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > c.features.fid = cpu_to_le32(fid); > > ret = nvme_submit_admin_cmd(dev, &c, result); > @@ -519,7 +522,7 @@ int nvme_set_features(struct nvme_dev *dev, > unsigned fid, unsigned dword11, > > memset(&c, 0, sizeof(c)); > c.features.opcode = nvme_admin_set_features; > - c.features.prp1 = cpu_to_le64((u64)buffer); > + c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > c.features.fid = cpu_to_le32(fid); > c.features.dword11 = cpu_to_le32(dword11); > > @@ -775,7 +778,7 @@ static ulong nvme_blk_rw(struct udevice *udev, > lbaint_t blknr, > c.rw.slba = cpu_to_le64(slba); > slba += lbas; > c.rw.length = cpu_to_le16(lbas - 1); > - c.rw.prp1 = cpu_to_le64((ulong)buffer); > + c.rw.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > c.rw.prp2 = cpu_to_le64(prp2); > status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], > &c, NULL, IO_TIMEOUT); > @@ -834,6 +837,7 @@ static int nvme_probe(struct udevice *udev) > struct nvme_id_ns *id; > > ndev->instance = trailing_strtol(udev->name); > + ndev->dev = udev->parent; It only translates addresses correctly once I chose the parent device. I am pretty sure that this is not the right way to fix it. Shouldn't the child automatically assume that the same translation is required? -- Stefan > > INIT_LIST_HEAD(&ndev->namespaces); > ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, > diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h > index c6aae4da5d..31e6899bca 100644 > --- a/drivers/nvme/nvme.h > +++ b/drivers/nvme/nvme.h > @@ -7,8 +7,15 @@ > #ifndef __DRIVER_NVME_H__ > #define __DRIVER_NVME_H__ > > +#include <phys2bus.h> > #include <asm/io.h> > > +#if CONFIG_IS_ENABLED(DM_USB) > +#define nvme_to_dev(_dev) _dev->dev > +#else > +#define nvme_to_dev(_dev) NULL > +#endif > + > struct nvme_id_power_state { > __le16 max_power; /* centiwatts */ > __u8 rsvd2; > @@ -596,6 +603,9 @@ enum { > > /* Represents an NVM Express device. Each nvme_dev is a PCI function. */ > struct nvme_dev { > +#if CONFIG_IS_ENABLED(DM_USB) > + struct udevice *dev; > +#endif > struct list_head node; > struct nvme_queue **queues; > u32 __iomem *dbs; > @@ -635,4 +645,9 @@ struct nvme_ns { > u8 flbas; > }; > > +static inline dma_addr_t nvme_virt_to_bus(struct nvme_dev *dev, void *addr) > +{ > + return dev_phys_to_bus(nvme_to_dev(dev), virt_to_phys(addr)); > +} > + > #endif /* __DRIVER_NVME_H__ */
diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c index 4c4dc7cc4d..0b7082d71b 100644 --- a/drivers/nvme/nvme.c +++ b/drivers/nvme/nvme.c @@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, buffer += (page_size - offset); if (length <= page_size) { - *prp2 = (u64)buffer; + *prp2 = nvme_virt_to_bus(dev, buffer); return 0; } @@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, i = 0; while (nprps) { if (i == prps_per_page) { - u64 next_prp_list = (u64)prp_pool + page_size; - *(prp_pool + i) = cpu_to_le64(next_prp_list); + u64 next = nvme_virt_to_bus(dev, prp_pool + page_size); + *(prp_pool + i) = cpu_to_le64(next); i = 0; prp_pool += page_size; } - *(prp_pool + i++) = cpu_to_le64((u64)buffer); + *(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); buffer += page_size; nprps--; } - *prp2 = (u64)dev->prp_pool; + *prp2 = nvme_virt_to_bus(dev, dev->prp_pool); flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + dev->prp_entry_num * sizeof(u64)); @@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) int result; u32 aqa; u64 cap = dev->cap; + u64 dma_addr; struct nvme_queue *nvmeq; /* most architectures use 4KB as the page size */ unsigned page_shift = 12; @@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; writel(aqa, &dev->bar->aqa); - nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); - nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); + dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds); + nvme_writeq(dma_addr, &dev->bar->asq); + dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes); + nvme_writeq(dma_addr, &dev->bar->acq); result = nvme_enable_ctrl(dev); if (result) @@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, memset(&c, 0, sizeof(c)); c.create_cq.opcode = nvme_admin_create_cq; - c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); + c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes)); c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); @@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, memset(&c, 0, sizeof(c)); c.create_sq.opcode = nvme_admin_create_sq; - c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); + c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds)); c.create_sq.sqid = cpu_to_le16(qid); c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_sq.sq_flags = cpu_to_le16(flags); @@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, memset(&c, 0, sizeof(c)); c.identify.opcode = nvme_admin_identify; c.identify.nsid = cpu_to_le32(nsid); - c.identify.prp1 = cpu_to_le64((u64)buffer); + c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); length -= (page_size - offset); if (length <= 0) { c.identify.prp2 = 0; } else { buffer += (page_size - offset); - c.identify.prp2 = cpu_to_le64((u64)buffer); + c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); } c.identify.cns = cpu_to_le32(cns); @@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_get_features; c.features.nsid = cpu_to_le32(nsid); - c.features.prp1 = cpu_to_le64((u64)buffer); + c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); c.features.fid = cpu_to_le32(fid); ret = nvme_submit_admin_cmd(dev, &c, result); @@ -519,7 +522,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, memset(&c, 0, sizeof(c)); c.features.opcode = nvme_admin_set_features; - c.features.prp1 = cpu_to_le64((u64)buffer); + c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); @@ -775,7 +778,7 @@ static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr, c.rw.slba = cpu_to_le64(slba); slba += lbas; c.rw.length = cpu_to_le16(lbas - 1); - c.rw.prp1 = cpu_to_le64((ulong)buffer); + c.rw.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); c.rw.prp2 = cpu_to_le64(prp2); status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], &c, NULL, IO_TIMEOUT); @@ -834,6 +837,7 @@ static int nvme_probe(struct udevice *udev) struct nvme_id_ns *id; ndev->instance = trailing_strtol(udev->name); + ndev->dev = udev->parent; INIT_LIST_HEAD(&ndev->namespaces); ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h index c6aae4da5d..31e6899bca 100644 --- a/drivers/nvme/nvme.h +++ b/drivers/nvme/nvme.h @@ -7,8 +7,15 @@ #ifndef __DRIVER_NVME_H__ #define __DRIVER_NVME_H__ +#include <phys2bus.h> #include <asm/io.h> +#if CONFIG_IS_ENABLED(DM_USB) +#define nvme_to_dev(_dev) _dev->dev +#else +#define nvme_to_dev(_dev) NULL +#endif + struct nvme_id_power_state { __le16 max_power; /* centiwatts */ __u8 rsvd2; @@ -596,6 +603,9 @@ enum { /* Represents an NVM Express device. Each nvme_dev is a PCI function. */ struct nvme_dev { +#if CONFIG_IS_ENABLED(DM_USB) + struct udevice *dev; +#endif struct list_head node; struct nvme_queue **queues; u32 __iomem *dbs; @@ -635,4 +645,9 @@ struct nvme_ns { u8 flbas; }; +static inline dma_addr_t nvme_virt_to_bus(struct nvme_dev *dev, void *addr) +{ + return dev_phys_to_bus(nvme_to_dev(dev), virt_to_phys(addr)); +} + #endif /* __DRIVER_NVME_H__ */
So far we've been content with passing physical/CPU addresses when configuring memory addresses into NVMe controllers, but not all platforms have buses with transparent mappings. Specifically the Raspberry Pi 4 might introduce an offset to memory accesses incoming from its PCIe port. Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these limitations, and make sure we don't break non DM users. For devices where PCIe's view of host memory doesn't match the memory as seen by the CPU. A similar change has been introduced for XHCI controller with commit 1a474559d90a ("xhci: translate virtual addresses into the bus's address space"). Signed-off-by: Stefan Agner <stefan@agner.ch> --- drivers/nvme/nvme.c | 32 ++++++++++++++++++-------------- drivers/nvme/nvme.h | 15 +++++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-)