diff mbox series

[v2] PCI: dwc: Strengthen the MSI address allocation logic

Message ID 20240111042103.392939-1-ajayagarwal@google.com
State New
Headers show
Series [v2] PCI: dwc: Strengthen the MSI address allocation logic | expand

Commit Message

Ajay Agarwal Jan. 11, 2024, 4:21 a.m. UTC
There can be platforms that do not use/have 32-bit DMA addresses
but want to enumerate endpoints which support only 32-bit MSI
address. The current implementation of 32-bit IOVA allocation can
fail for such platforms, eventually leading to the probe failure.

If there is a memory region reserved for the pci->dev, pick up
the MSI data from this region. This can be used by the platforms
described above.

Else, if the memory region is not reserved, try to allocate a
32-bit IOVA. Additionally, if this allocation also fails, attempt
a 64-bit allocation for probe to be successful. If the 64-bit MSI
address is allocated, then the EPs supporting 32-bit MSI address
will not work.

Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
---
Changelog since v1:
 - Use reserved memory, if it exists, to setup the MSI data
 - Fallback to 64-bit IOVA allocation if 32-bit allocation fails

 .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
 drivers/pci/controller/dwc/pcie-designware.h  |  1 +
 2 files changed, 39 insertions(+), 12 deletions(-)

Comments

William McVicker Jan. 11, 2024, 5:38 p.m. UTC | #1
Hi Ajay,

Thanks for sending the patch!

On 01/11/2024, Ajay Agarwal wrote:
> There can be platforms that do not use/have 32-bit DMA addresses
> but want to enumerate endpoints which support only 32-bit MSI
> address. The current implementation of 32-bit IOVA allocation can
> fail for such platforms, eventually leading to the probe failure.
> 
> If there is a memory region reserved for the pci->dev, pick up
> the MSI data from this region. This can be used by the platforms
> described above.
> 
> Else, if the memory region is not reserved, try to allocate a
> 32-bit IOVA. Additionally, if this allocation also fails, attempt
> a 64-bit allocation for probe to be successful. If the 64-bit MSI
> address is allocated, then the EPs supporting 32-bit MSI address
> will not work.
> 
> Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
> ---
> Changelog since v1:
>  - Use reserved memory, if it exists, to setup the MSI data
>  - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
> 
>  .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
>  drivers/pci/controller/dwc/pcie-designware.h  |  1 +
>  2 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> index 7991f0e179b2..8c7c77b49ca8 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>  	u64 *msi_vaddr;
>  	int ret;
>  	u32 ctrl, num_ctrls;
> +	struct device_node *np;
> +	struct resource r;
>  
>  	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
>  		pp->irq_mask[ctrl] = ~0;
> @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>  	 * order not to miss MSI TLPs from those devices the MSI target
>  	 * address has to be within the lowest 4GB.
>  	 *
> -	 * Note until there is a better alternative found the reservation is
> -	 * done by allocating from the artificially limited DMA-coherent
> -	 * memory.
> +	 * Check if there is memory region reserved for this device. If yes,
> +	 * pick up the msi_data from this region. This will be helpful for
> +	 * platforms that do not use/have 32-bit DMA addresses but want to use
> +	 * endpoints which support only 32-bit MSI address.
> +	 * Else, if the memory region is not reserved, try to allocate a 32-bit
> +	 * IOVA. Additionally, if this allocation also fails, attempt a 64-bit
> +	 * allocation. If the 64-bit MSI address is allocated, then the EPs
> +	 * supporting 32-bit MSI address will not work.
>  	 */
> -	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> -	if (ret)
> -		dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> +	np = of_parse_phandle(dev->of_node, "memory-region", 0);
> +	if (np) {
> +		ret = of_address_to_resource(np, 0, &r);
> +		if (ret) {
> +			dev_err(dev, "No memory address assigned to the region\n");
> +			return ret;
> +		}
>  
> -	msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> -					GFP_KERNEL);
> -	if (!msi_vaddr) {
> -		dev_err(dev, "Failed to alloc and map MSI data\n");
> -		dw_pcie_free_msi(pp);
> -		return -ENOMEM;
> +		pp->msi_data = r.start;
> +	} else {
> +		dev_dbg(dev, "No %s specified\n", "memory-region");
> +		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> +		if (ret)
> +			dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> +
> +		msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +						GFP_KERNEL);
> +		if (!msi_vaddr) {
> +			dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
> +			dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
> +			msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +							GFP_KERNEL);
> +		}
> +
> +		if (!msi_vaddr) {
> +			dev_err(dev, "Failed to alloc and map MSI data\n");
> +			dw_pcie_free_msi(pp);
> +			return -ENOMEM;
> +		}

Should we just put this second if-check inside the above fallback?

>  	}
>  
>  	return 0;
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 55ff76e3d384..c85cf4d56e98 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -317,6 +317,7 @@ struct dw_pcie_rp {
>  	phys_addr_t		io_bus_addr;
>  	u32			io_size;
>  	int			irq;
> +	u8			coherent_dma_bits;
>  	const struct dw_pcie_host_ops *ops;
>  	int			msi_irq[MAX_MSI_CTRLS];
>  	struct irq_domain	*irq_domain;

Looks like this is a lingering change? Please drop.

Thanks,
Will

> -- 
> 2.43.0.275.g3460e3d667-goog
>
Ajay Agarwal Jan. 11, 2024, 6:02 p.m. UTC | #2
On Thu, Jan 11, 2024 at 09:38:03AM -0800, William McVicker wrote:
> Hi Ajay,
> 
> Thanks for sending the patch!
> 
> On 01/11/2024, Ajay Agarwal wrote:
> > There can be platforms that do not use/have 32-bit DMA addresses
> > but want to enumerate endpoints which support only 32-bit MSI
> > address. The current implementation of 32-bit IOVA allocation can
> > fail for such platforms, eventually leading to the probe failure.
> > 
> > If there is a memory region reserved for the pci->dev, pick up
> > the MSI data from this region. This can be used by the platforms
> > described above.
> > 
> > Else, if the memory region is not reserved, try to allocate a
> > 32-bit IOVA. Additionally, if this allocation also fails, attempt
> > a 64-bit allocation for probe to be successful. If the 64-bit MSI
> > address is allocated, then the EPs supporting 32-bit MSI address
> > will not work.
> > 
> > Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
> > ---
> > Changelog since v1:
> >  - Use reserved memory, if it exists, to setup the MSI data
> >  - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
> > 
> >  .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
> >  drivers/pci/controller/dwc/pcie-designware.h  |  1 +
> >  2 files changed, 39 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > index 7991f0e179b2..8c7c77b49ca8 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> >  	u64 *msi_vaddr;
> >  	int ret;
> >  	u32 ctrl, num_ctrls;
> > +	struct device_node *np;
> > +	struct resource r;
> >  
> >  	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
> >  		pp->irq_mask[ctrl] = ~0;
> > @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> >  	 * order not to miss MSI TLPs from those devices the MSI target
> >  	 * address has to be within the lowest 4GB.
> >  	 *
> > -	 * Note until there is a better alternative found the reservation is
> > -	 * done by allocating from the artificially limited DMA-coherent
> > -	 * memory.
> > +	 * Check if there is memory region reserved for this device. If yes,
> > +	 * pick up the msi_data from this region. This will be helpful for
> > +	 * platforms that do not use/have 32-bit DMA addresses but want to use
> > +	 * endpoints which support only 32-bit MSI address.
> > +	 * Else, if the memory region is not reserved, try to allocate a 32-bit
> > +	 * IOVA. Additionally, if this allocation also fails, attempt a 64-bit
> > +	 * allocation. If the 64-bit MSI address is allocated, then the EPs
> > +	 * supporting 32-bit MSI address will not work.
> >  	 */
> > -	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> > -	if (ret)
> > -		dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> > +	np = of_parse_phandle(dev->of_node, "memory-region", 0);
> > +	if (np) {
> > +		ret = of_address_to_resource(np, 0, &r);
> > +		if (ret) {
> > +			dev_err(dev, "No memory address assigned to the region\n");
> > +			return ret;
> > +		}
> >  
> > -	msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> > -					GFP_KERNEL);
> > -	if (!msi_vaddr) {
> > -		dev_err(dev, "Failed to alloc and map MSI data\n");
> > -		dw_pcie_free_msi(pp);
> > -		return -ENOMEM;
> > +		pp->msi_data = r.start;
> > +	} else {
> > +		dev_dbg(dev, "No %s specified\n", "memory-region");
> > +		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> > +		if (ret)
> > +			dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> > +
> > +		msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> > +						GFP_KERNEL);
> > +		if (!msi_vaddr) {
> > +			dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
> > +			dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
> > +			msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> > +							GFP_KERNEL);
> > +		}
> > +
> > +		if (!msi_vaddr) {
> > +			dev_err(dev, "Failed to alloc and map MSI data\n");
> > +			dw_pcie_free_msi(pp);
> > +			return -ENOMEM;
> > +		}
> 
> Should we just put this second if-check inside the above fallback?
>
Yeah, we can do that. Will fix it in the next version after waiting for
comments from others.
> >  	}
> >  
> >  	return 0;
> > diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> > index 55ff76e3d384..c85cf4d56e98 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware.h
> > +++ b/drivers/pci/controller/dwc/pcie-designware.h
> > @@ -317,6 +317,7 @@ struct dw_pcie_rp {
> >  	phys_addr_t		io_bus_addr;
> >  	u32			io_size;
> >  	int			irq;
> > +	u8			coherent_dma_bits;
> >  	const struct dw_pcie_host_ops *ops;
> >  	int			msi_irq[MAX_MSI_CTRLS];
> >  	struct irq_domain	*irq_domain;
> 
> Looks like this is a lingering change? Please drop.
> 
Sorry about that. Will remove in the next version.
> Thanks,
> Will
> 
> > -- 
> > 2.43.0.275.g3460e3d667-goog
> >
Serge Semin Jan. 12, 2024, 10:04 a.m. UTC | #3
On Thu, Jan 11, 2024 at 09:51:03AM +0530, Ajay Agarwal wrote:
> There can be platforms that do not use/have 32-bit DMA addresses
> but want to enumerate endpoints which support only 32-bit MSI
> address. The current implementation of 32-bit IOVA allocation can
> fail for such platforms, eventually leading to the probe failure.
> 
> If there is a memory region reserved for the pci->dev, pick up
> the MSI data from this region. This can be used by the platforms
> described above.

I don't like this part of the change. Here is why

1. One more time DW PCIe iMSI-RX doesn't need any actual _system
memory_ to work! What is needed a single dword within the PCIe
bus address space! The solution with using the coherent DMA allocation
is mainly a hack/workaround to make sure the system memory behind the
MSI address isn't utilized for something else. The correct solution
would be to reserve PCIe-bus space memory for MSIs with no RAM behind
at all. For instance, if no RAM below 4GB what prevents us from using
the lowest PCIe bus address memory for iMSI-Rx (not saying that IOMMU
and stuff like in-/outbound iATU can be also utilized to set the
lowest PCIe bus address space free).

You on the contrary suggest to convert a temporal workaround to being
the platforms DT-bindings convention by defining new "memory-region"
property semantics. This basically propagates a weak software solution
to the DT-bindings, which isn't right.

2. Even if we get used to the solution with always coherent DMA
allocating for iMSI-Rx, I don't really see much benefit in reserving a
specific system memory for it. If there is no actual RAM below 4GB
then reserving won't work. If there is what's the point in reserving
it if normal DMA-mask and dma_alloc_coherent() will work just fine?

3. If, as an emergency solution for this problem, you wish to assign a
specific DMA-buffer then you don't need to define new non-standard
"memory-region" property semantics. What about using the
"restricted-dma-pool" reserved-memory region?

https://www.kernel.org/doc/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml

-Serge(y)

> 
> Else, if the memory region is not reserved, try to allocate a
> 32-bit IOVA. Additionally, if this allocation also fails, attempt
> a 64-bit allocation for probe to be successful. If the 64-bit MSI
> address is allocated, then the EPs supporting 32-bit MSI address
> will not work.

> 
> Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
> ---
> Changelog since v1:
>  - Use reserved memory, if it exists, to setup the MSI data
>  - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
> 
>  .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
>  drivers/pci/controller/dwc/pcie-designware.h  |  1 +
>  2 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> index 7991f0e179b2..8c7c77b49ca8 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>  	u64 *msi_vaddr;
>  	int ret;
>  	u32 ctrl, num_ctrls;
> +	struct device_node *np;
> +	struct resource r;
>  
>  	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
>  		pp->irq_mask[ctrl] = ~0;
> @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>  	 * order not to miss MSI TLPs from those devices the MSI target
>  	 * address has to be within the lowest 4GB.
>  	 *
> -	 * Note until there is a better alternative found the reservation is
> -	 * done by allocating from the artificially limited DMA-coherent
> -	 * memory.
> +	 * Check if there is memory region reserved for this device. If yes,
> +	 * pick up the msi_data from this region. This will be helpful for
> +	 * platforms that do not use/have 32-bit DMA addresses but want to use
> +	 * endpoints which support only 32-bit MSI address.
> +	 * Else, if the memory region is not reserved, try to allocate a 32-bit
> +	 * IOVA. Additionally, if this allocation also fails, attempt a 64-bit
> +	 * allocation. If the 64-bit MSI address is allocated, then the EPs
> +	 * supporting 32-bit MSI address will not work.
>  	 */
> -	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> -	if (ret)
> -		dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> +	np = of_parse_phandle(dev->of_node, "memory-region", 0);
> +	if (np) {
> +		ret = of_address_to_resource(np, 0, &r);
> +		if (ret) {
> +			dev_err(dev, "No memory address assigned to the region\n");
> +			return ret;
> +		}
>  
> -	msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> -					GFP_KERNEL);
> -	if (!msi_vaddr) {
> -		dev_err(dev, "Failed to alloc and map MSI data\n");
> -		dw_pcie_free_msi(pp);
> -		return -ENOMEM;
> +		pp->msi_data = r.start;
> +	} else {
> +		dev_dbg(dev, "No %s specified\n", "memory-region");
> +		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> +		if (ret)
> +			dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> +
> +		msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +						GFP_KERNEL);
> +		if (!msi_vaddr) {
> +			dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
> +			dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
> +			msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +							GFP_KERNEL);
> +		}
> +
> +		if (!msi_vaddr) {
> +			dev_err(dev, "Failed to alloc and map MSI data\n");
> +			dw_pcie_free_msi(pp);
> +			return -ENOMEM;
> +		}
>  	}
>  
>  	return 0;
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 55ff76e3d384..c85cf4d56e98 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -317,6 +317,7 @@ struct dw_pcie_rp {
>  	phys_addr_t		io_bus_addr;
>  	u32			io_size;
>  	int			irq;
> +	u8			coherent_dma_bits;
>  	const struct dw_pcie_host_ops *ops;
>  	int			msi_irq[MAX_MSI_CTRLS];
>  	struct irq_domain	*irq_domain;
> -- 
> 2.43.0.275.g3460e3d667-goog
>
Robin Murphy Jan. 16, 2024, 1:30 p.m. UTC | #4
On 2024-01-11 4:21 am, Ajay Agarwal wrote:
> There can be platforms that do not use/have 32-bit DMA addresses
> but want to enumerate endpoints which support only 32-bit MSI
> address. The current implementation of 32-bit IOVA allocation can
> fail for such platforms, eventually leading to the probe failure.
> 
> If there is a memory region reserved for the pci->dev, pick up
> the MSI data from this region. This can be used by the platforms
> described above.
> 
> Else, if the memory region is not reserved, try to allocate a
> 32-bit IOVA. Additionally, if this allocation also fails, attempt
> a 64-bit allocation for probe to be successful. If the 64-bit MSI
> address is allocated, then the EPs supporting 32-bit MSI address
> will not work.
> 
> Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
> ---
> Changelog since v1:
>   - Use reserved memory, if it exists, to setup the MSI data
>   - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
> 
>   .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
>   drivers/pci/controller/dwc/pcie-designware.h  |  1 +
>   2 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> index 7991f0e179b2..8c7c77b49ca8 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>   	u64 *msi_vaddr;
>   	int ret;
>   	u32 ctrl, num_ctrls;
> +	struct device_node *np;
> +	struct resource r;
>   
>   	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
>   		pp->irq_mask[ctrl] = ~0;
> @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>   	 * order not to miss MSI TLPs from those devices the MSI target
>   	 * address has to be within the lowest 4GB.
>   	 *
> -	 * Note until there is a better alternative found the reservation is
> -	 * done by allocating from the artificially limited DMA-coherent
> -	 * memory.
> +	 * Check if there is memory region reserved for this device. If yes,
> +	 * pick up the msi_data from this region. This will be helpful for
> +	 * platforms that do not use/have 32-bit DMA addresses but want to use
> +	 * endpoints which support only 32-bit MSI address.
> +	 * Else, if the memory region is not reserved, try to allocate a 32-bit
> +	 * IOVA. Additionally, if this allocation also fails, attempt a 64-bit
> +	 * allocation. If the 64-bit MSI address is allocated, then the EPs
> +	 * supporting 32-bit MSI address will not work.
>   	 */
> -	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> -	if (ret)
> -		dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> +	np = of_parse_phandle(dev->of_node, "memory-region", 0);
> +	if (np) {
> +		ret = of_address_to_resource(np, 0, &r);

This is incorrect in several ways - reserved-memory nodes represent 
actual system memory, so can't be used to reserve arbitrary PCI memory 
space (which may be different if DMA offsets are involved); the whole 
purpose of going through the DMA API is to ensure we get a unique *bus* 
address. Obviously we don't want to reserve actual memory for something 
that functionally doesn't need it, but conversely having a 
reserved-memory region for an address which isn't memory would be 
nonsensical. And even if this *were* a viable approach, you haven't 
updated the DWC binding to allow it, nor defined a reserved-memory 
binding for the node itself.

If it was reasonable to put something in DT at all, then the logical 
thing would be a property expressing an MSI address directly on the 
controller node itself, but even that would be dictating software policy 
rather than describing an aspect of the platform itself. Plus this is 
far from the only driver with this concern, so it wouldn't make much 
sense to hack just one binding without all the others as well. The rest 
of the DT already describes everything an OS needs to know in order to 
decide an MSI address to use, it's just a matter of making this 
particular OS do a better job of putting it all together.

Thanks,
Robin.

> +		if (ret) {
> +			dev_err(dev, "No memory address assigned to the region\n");
> +			return ret;
> +		}
>   
> -	msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> -					GFP_KERNEL);
> -	if (!msi_vaddr) {
> -		dev_err(dev, "Failed to alloc and map MSI data\n");
> -		dw_pcie_free_msi(pp);
> -		return -ENOMEM;
> +		pp->msi_data = r.start;
> +	} else {
> +		dev_dbg(dev, "No %s specified\n", "memory-region");
> +		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> +		if (ret)
> +			dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> +
> +		msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +						GFP_KERNEL);
> +		if (!msi_vaddr) {
> +			dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
> +			dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
> +			msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +							GFP_KERNEL);
> +		}
> +
> +		if (!msi_vaddr) {
> +			dev_err(dev, "Failed to alloc and map MSI data\n");
> +			dw_pcie_free_msi(pp);
> +			return -ENOMEM;
> +		}
>   	}
>   
>   	return 0;
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 55ff76e3d384..c85cf4d56e98 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -317,6 +317,7 @@ struct dw_pcie_rp {
>   	phys_addr_t		io_bus_addr;
>   	u32			io_size;
>   	int			irq;
> +	u8			coherent_dma_bits;
>   	const struct dw_pcie_host_ops *ops;
>   	int			msi_irq[MAX_MSI_CTRLS];
>   	struct irq_domain	*irq_domain;
Sajid Dalvi Jan. 16, 2024, 8:47 p.m. UTC | #5
On Tue, Jan 16, 2024 at 7:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2024-01-11 4:21 am, Ajay Agarwal wrote:
> > There can be platforms that do not use/have 32-bit DMA addresses
> > but want to enumerate endpoints which support only 32-bit MSI
> > address. The current implementation of 32-bit IOVA allocation can
> > fail for such platforms, eventually leading to the probe failure.
> >
> > If there is a memory region reserved for the pci->dev, pick up
> > the MSI data from this region. This can be used by the platforms
> > described above.
> >
> > Else, if the memory region is not reserved, try to allocate a
> > 32-bit IOVA. Additionally, if this allocation also fails, attempt
> > a 64-bit allocation for probe to be successful. If the 64-bit MSI
> > address is allocated, then the EPs supporting 32-bit MSI address
> > will not work.
> >
> > Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
> > ---
> > Changelog since v1:
> >   - Use reserved memory, if it exists, to setup the MSI data
> >   - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
> >
> >   .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
> >   drivers/pci/controller/dwc/pcie-designware.h  |  1 +
> >   2 files changed, 39 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > index 7991f0e179b2..8c7c77b49ca8 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> >       u64 *msi_vaddr;
> >       int ret;
> >       u32 ctrl, num_ctrls;
> > +     struct device_node *np;
> > +     struct resource r;
> >
> >       for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
> >               pp->irq_mask[ctrl] = ~0;
> > @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> >        * order not to miss MSI TLPs from those devices the MSI target
> >        * address has to be within the lowest 4GB.
> >        *
> > -      * Note until there is a better alternative found the reservation is
> > -      * done by allocating from the artificially limited DMA-coherent
> > -      * memory.
> > +      * Check if there is memory region reserved for this device. If yes,
> > +      * pick up the msi_data from this region. This will be helpful for
> > +      * platforms that do not use/have 32-bit DMA addresses but want to use
> > +      * endpoints which support only 32-bit MSI address.
> > +      * Else, if the memory region is not reserved, try to allocate a 32-bit
> > +      * IOVA. Additionally, if this allocation also fails, attempt a 64-bit
> > +      * allocation. If the 64-bit MSI address is allocated, then the EPs
> > +      * supporting 32-bit MSI address will not work.
> >        */
> > -     ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> > -     if (ret)
> > -             dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> > +     np = of_parse_phandle(dev->of_node, "memory-region", 0);
> > +     if (np) {
> > +             ret = of_address_to_resource(np, 0, &r);
>
> This is incorrect in several ways - reserved-memory nodes represent
> actual system memory, so can't be used to reserve arbitrary PCI memory
> space (which may be different if DMA offsets are involved); the whole
> purpose of going through the DMA API is to ensure we get a unique *bus*
> address. Obviously we don't want to reserve actual memory for something
> that functionally doesn't need it, but conversely having a
> reserved-memory region for an address which isn't memory would be
> nonsensical. And even if this *were* a viable approach, you haven't
> updated the DWC binding to allow it, nor defined a reserved-memory
> binding for the node itself.
>
> If it was reasonable to put something in DT at all, then the logical
> thing would be a property expressing an MSI address directly on the
> controller node itself, but even that would be dictating software policy
> rather than describing an aspect of the platform itself. Plus this is
> far from the only driver with this concern, so it wouldn't make much
> sense to hack just one binding without all the others as well. The rest
> of the DT already describes everything an OS needs to know in order to
> decide an MSI address to use, it's just a matter of making this
> particular OS do a better job of putting it all together.
>
> Thanks,
> Robin.
>
> > +             if (ret) {
> > +                     dev_err(dev, "No memory address assigned to the region\n");
> > +                     return ret;
> > +             }
> >
> > -     msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> > -                                     GFP_KERNEL);
> > -     if (!msi_vaddr) {
> > -             dev_err(dev, "Failed to alloc and map MSI data\n");
> > -             dw_pcie_free_msi(pp);
> > -             return -ENOMEM;
> > +             pp->msi_data = r.start;
> > +     } else {
> > +             dev_dbg(dev, "No %s specified\n", "memory-region");
> > +             ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> > +             if (ret)
> > +                     dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
> > +
> > +             msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> > +                                             GFP_KERNEL);
> > +             if (!msi_vaddr) {
> > +                     dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
> > +                     dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
> > +                     msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> > +                                                     GFP_KERNEL);
> > +             }
> > +
> > +             if (!msi_vaddr) {
> > +                     dev_err(dev, "Failed to alloc and map MSI data\n");
> > +                     dw_pcie_free_msi(pp);
> > +                     return -ENOMEM;
> > +             }
> >       }
> >
> >       return 0;
> > diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> > index 55ff76e3d384..c85cf4d56e98 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware.h
> > +++ b/drivers/pci/controller/dwc/pcie-designware.h
> > @@ -317,6 +317,7 @@ struct dw_pcie_rp {
> >       phys_addr_t             io_bus_addr;
> >       u32                     io_size;
> >       int                     irq;
> > +     u8                      coherent_dma_bits;
> >       const struct dw_pcie_host_ops *ops;
> >       int                     msi_irq[MAX_MSI_CTRLS];
> >       struct irq_domain       *irq_domain;


Robin,
Needed some clarification.
It seems you are implying that the pcie device tree node should define a
property for the MSI address within the PCIe address space.
However, you also state that this would not be an ideal solution, and
would prefer using existing device tree constructs.
I am not sure what you mean by, " The rest of the DT already describes
everything."
Do you mean adding an "msi" reg to reg-names and defining the address
in the reg list?

Thanks,
Sajid
Robin Murphy Jan. 16, 2024, 9:02 p.m. UTC | #6
On 2024-01-16 5:18 pm, Sajid Dalvi wrote:
> On Tue, Jan 16, 2024 at 7:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> On 2024-01-11 4:21 am, Ajay Agarwal wrote:
>>> There can be platforms that do not use/have 32-bit DMA addresses
>>> but want to enumerate endpoints which support only 32-bit MSI
>>> address. The current implementation of 32-bit IOVA allocation can
>>> fail for such platforms, eventually leading to the probe failure.
>>>
>>> If there is a memory region reserved for the pci->dev, pick up
>>> the MSI data from this region. This can be used by the platforms
>>> described above.
>>>
>>> Else, if the memory region is not reserved, try to allocate a
>>> 32-bit IOVA. Additionally, if this allocation also fails, attempt
>>> a 64-bit allocation for probe to be successful. If the 64-bit MSI
>>> address is allocated, then the EPs supporting 32-bit MSI address
>>> will not work.
>>>
>>> Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
>>> ---
>>> Changelog since v1:
>>>    - Use reserved memory, if it exists, to setup the MSI data
>>>    - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
>>>
>>>    .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
>>>    drivers/pci/controller/dwc/pcie-designware.h  |  1 +
>>>    2 files changed, 39 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c
> b/drivers/pci/controller/dwc/pcie-designware-host.c
>>> index 7991f0e179b2..8c7c77b49ca8 100644
>>> --- a/drivers/pci/controller/dwc/pcie-designware-host.c
>>> +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
>>> @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp
> *pp)
>>>        u64 *msi_vaddr;
>>>        int ret;
>>>        u32 ctrl, num_ctrls;
>>> +     struct device_node *np;
>>> +     struct resource r;
>>>
>>>        for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
>>>                pp->irq_mask[ctrl] = ~0;
>>> @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct
> dw_pcie_rp *pp)
>>>         * order not to miss MSI TLPs from those devices the MSI target
>>>         * address has to be within the lowest 4GB.
>>>         *
>>> -      * Note until there is a better alternative found the reservation
> is
>>> -      * done by allocating from the artificially limited DMA-coherent
>>> -      * memory.
>>> +      * Check if there is memory region reserved for this device. If
> yes,
>>> +      * pick up the msi_data from this region. This will be helpful for
>>> +      * platforms that do not use/have 32-bit DMA addresses but want
> to use
>>> +      * endpoints which support only 32-bit MSI address.
>>> +      * Else, if the memory region is not reserved, try to allocate a
> 32-bit
>>> +      * IOVA. Additionally, if this allocation also fails, attempt a
> 64-bit
>>> +      * allocation. If the 64-bit MSI address is allocated, then the
> EPs
>>> +      * supporting 32-bit MSI address will not work.
>>>         */
>>> -     ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
>>> -     if (ret)
>>> -             dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices
> with only 32-bit MSI support may not work properly\n");
>>> +     np = of_parse_phandle(dev->of_node, "memory-region", 0);
>>> +     if (np) {
>>> +             ret = of_address_to_resource(np, 0, &r);
>>
>> This is incorrect in several ways - reserved-memory nodes represent
>> actual system memory, so can't be used to reserve arbitrary PCI memory
>> space (which may be different if DMA offsets are involved); the whole
>> purpose of going through the DMA API is to ensure we get a unique *bus*
>> address. Obviously we don't want to reserve actual memory for something
>> that functionally doesn't need it, but conversely having a
>> reserved-memory region for an address which isn't memory would be
>> nonsensical. And even if this *were* a viable approach, you haven't
>> updated the DWC binding to allow it, nor defined a reserved-memory
>> binding for the node itself.
>>
>> If it was reasonable to put something in DT at all, then the logical
>> thing would be a property expressing an MSI address directly on the
>> controller node itself, but even that would be dictating software policy
>> rather than describing an aspect of the platform itself. Plus this is
>> far from the only driver with this concern, so it wouldn't make much
>> sense to hack just one binding without all the others as well. The rest
>> of the DT already describes everything an OS needs to know in order to
>> decide an MSI address to use, it's just a matter of making this
>> particular OS do a better job of putting it all together.
>>
>> Thanks,
>> Robin.
>>
> 
> Robin,
> Needed some clarification.
> It seems you are implying that the pcie device tree node should define a
> property for the MSI address within the PCIe address space.
> However, you also state that this would not be an ideal solution, and
> would prefer using existing device tree constructs.
> I am not sure what you mean by, " The rest of the DT already describes
> everything."
> Do you mean adding an "msi" reg to reg-names and defining the address
> in the reg list?

No, I'm saying the closest this should come to DT at all is the 
possibility of the low-level driver hard-coding a platform-specific 
value for pp->msi_data based on some platform-specific compatible, as 
Serge pointed to on v1.

Otherwise, based on the system memory layout and dma-ranges of the 
controller node we have enough information to figure out what PCI bus 
address ranges can't collide with any valid DMA mapping of RAM, and thus 
generate a suitable MSI address, but that really wants to be a generic 
PCI-layer helper (which could also generically implement the various DMA 
API tricks as a fallback if necessary).

Thanks,
Robin.
Ajay Agarwal Jan. 19, 2024, 5:58 a.m. UTC | #7
On Tue, Jan 16, 2024 at 09:02:48PM +0000, Robin Murphy wrote:
> On 2024-01-16 5:18 pm, Sajid Dalvi wrote:
> > On Tue, Jan 16, 2024 at 7:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
> > > 
> > > On 2024-01-11 4:21 am, Ajay Agarwal wrote:
> > > > There can be platforms that do not use/have 32-bit DMA addresses
> > > > but want to enumerate endpoints which support only 32-bit MSI
> > > > address. The current implementation of 32-bit IOVA allocation can
> > > > fail for such platforms, eventually leading to the probe failure.
> > > > 
> > > > If there is a memory region reserved for the pci->dev, pick up
> > > > the MSI data from this region. This can be used by the platforms
> > > > described above.
> > > > 
> > > > Else, if the memory region is not reserved, try to allocate a
> > > > 32-bit IOVA. Additionally, if this allocation also fails, attempt
> > > > a 64-bit allocation for probe to be successful. If the 64-bit MSI
> > > > address is allocated, then the EPs supporting 32-bit MSI address
> > > > will not work.
> > > > 
> > > > Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
> > > > ---
> > > > Changelog since v1:
> > > >    - Use reserved memory, if it exists, to setup the MSI data
> > > >    - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
> > > > 
> > > >    .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
> > > >    drivers/pci/controller/dwc/pcie-designware.h  |  1 +
> > > >    2 files changed, 39 insertions(+), 12 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c
> > b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > index 7991f0e179b2..8c7c77b49ca8 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp
> > *pp)
> > > >        u64 *msi_vaddr;
> > > >        int ret;
> > > >        u32 ctrl, num_ctrls;
> > > > +     struct device_node *np;
> > > > +     struct resource r;
> > > > 
> > > >        for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
> > > >                pp->irq_mask[ctrl] = ~0;
> > > > @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct
> > dw_pcie_rp *pp)
> > > >         * order not to miss MSI TLPs from those devices the MSI target
> > > >         * address has to be within the lowest 4GB.
> > > >         *
> > > > -      * Note until there is a better alternative found the reservation
> > is
> > > > -      * done by allocating from the artificially limited DMA-coherent
> > > > -      * memory.
> > > > +      * Check if there is memory region reserved for this device. If
> > yes,
> > > > +      * pick up the msi_data from this region. This will be helpful for
> > > > +      * platforms that do not use/have 32-bit DMA addresses but want
> > to use
> > > > +      * endpoints which support only 32-bit MSI address.
> > > > +      * Else, if the memory region is not reserved, try to allocate a
> > 32-bit
> > > > +      * IOVA. Additionally, if this allocation also fails, attempt a
> > 64-bit
> > > > +      * allocation. If the 64-bit MSI address is allocated, then the
> > EPs
> > > > +      * supporting 32-bit MSI address will not work.
> > > >         */
> > > > -     ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
> > > > -     if (ret)
> > > > -             dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices
> > with only 32-bit MSI support may not work properly\n");
> > > > +     np = of_parse_phandle(dev->of_node, "memory-region", 0);
> > > > +     if (np) {
> > > > +             ret = of_address_to_resource(np, 0, &r);
> > > 
> > > This is incorrect in several ways - reserved-memory nodes represent
> > > actual system memory, so can't be used to reserve arbitrary PCI memory
> > > space (which may be different if DMA offsets are involved); the whole
> > > purpose of going through the DMA API is to ensure we get a unique *bus*
> > > address. Obviously we don't want to reserve actual memory for something
> > > that functionally doesn't need it, but conversely having a
> > > reserved-memory region for an address which isn't memory would be
> > > nonsensical. And even if this *were* a viable approach, you haven't
> > > updated the DWC binding to allow it, nor defined a reserved-memory
> > > binding for the node itself.
> > > 
> > > If it was reasonable to put something in DT at all, then the logical
> > > thing would be a property expressing an MSI address directly on the
> > > controller node itself, but even that would be dictating software policy
> > > rather than describing an aspect of the platform itself. Plus this is
> > > far from the only driver with this concern, so it wouldn't make much
> > > sense to hack just one binding without all the others as well. The rest
> > > of the DT already describes everything an OS needs to know in order to
> > > decide an MSI address to use, it's just a matter of making this
> > > particular OS do a better job of putting it all together.
> > > 
> > > Thanks,
> > > Robin.
> > > 
> > 
> > Robin,
> > Needed some clarification.
> > It seems you are implying that the pcie device tree node should define a
> > property for the MSI address within the PCIe address space.
> > However, you also state that this would not be an ideal solution, and
> > would prefer using existing device tree constructs.
> > I am not sure what you mean by, " The rest of the DT already describes
> > everything."
> > Do you mean adding an "msi" reg to reg-names and defining the address
> > in the reg list?
> 
> No, I'm saying the closest this should come to DT at all is the possibility
> of the low-level driver hard-coding a platform-specific value for

I am assuming that you mean the platform driver (IOW, vendor driver) by
the "low-level" driver? Please confirm.

> pp->msi_data based on some platform-specific compatible, as Serge pointed to
> on v1.
>
Does this look ok to you? The expectation is that the pp->msi_data will
have to be populated by the platform driver if it wants to ensure the
support for all kinds of endpoints.

+       if (pp->msi_data)
+               return 0;
+
        ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
        if (ret)
                dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
 
        msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
                                        GFP_KERNEL);
+       if (!msi_vaddr) {
+               dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
+               dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
+               msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
+                                               GFP_KERNEL);
+       }
+
        if (!msi_vaddr) {
                dev_err(dev, "Failed to alloc and map MSI data\n");
                dw_pcie_free_msi(pp);

> Otherwise, based on the system memory layout and dma-ranges of the
> controller node we have enough information to figure out what PCI bus
> address ranges can't collide with any valid DMA mapping of RAM, and thus
> generate a suitable MSI address, but that really wants to be a generic
> PCI-layer helper (which could also generically implement the various DMA API
> tricks as a fallback if necessary).
> 
> Thanks,
> Robin.
Ajay Agarwal Jan. 30, 2024, 5:22 p.m. UTC | #8
Hi Robin, Sergey
A friendly ping for your review.
Robin Murphy Jan. 30, 2024, 5:39 p.m. UTC | #9
On 19/01/2024 5:58 am, Ajay Agarwal wrote:
> On Tue, Jan 16, 2024 at 09:02:48PM +0000, Robin Murphy wrote:
>> On 2024-01-16 5:18 pm, Sajid Dalvi wrote:
>>> On Tue, Jan 16, 2024 at 7:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>>>
>>>> On 2024-01-11 4:21 am, Ajay Agarwal wrote:
>>>>> There can be platforms that do not use/have 32-bit DMA addresses
>>>>> but want to enumerate endpoints which support only 32-bit MSI
>>>>> address. The current implementation of 32-bit IOVA allocation can
>>>>> fail for such platforms, eventually leading to the probe failure.
>>>>>
>>>>> If there is a memory region reserved for the pci->dev, pick up
>>>>> the MSI data from this region. This can be used by the platforms
>>>>> described above.
>>>>>
>>>>> Else, if the memory region is not reserved, try to allocate a
>>>>> 32-bit IOVA. Additionally, if this allocation also fails, attempt
>>>>> a 64-bit allocation for probe to be successful. If the 64-bit MSI
>>>>> address is allocated, then the EPs supporting 32-bit MSI address
>>>>> will not work.
>>>>>
>>>>> Signed-off-by: Ajay Agarwal <ajayagarwal@google.com>
>>>>> ---
>>>>> Changelog since v1:
>>>>>     - Use reserved memory, if it exists, to setup the MSI data
>>>>>     - Fallback to 64-bit IOVA allocation if 32-bit allocation fails
>>>>>
>>>>>     .../pci/controller/dwc/pcie-designware-host.c | 50 ++++++++++++++-----
>>>>>     drivers/pci/controller/dwc/pcie-designware.h  |  1 +
>>>>>     2 files changed, 39 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c
>>> b/drivers/pci/controller/dwc/pcie-designware-host.c
>>>>> index 7991f0e179b2..8c7c77b49ca8 100644
>>>>> --- a/drivers/pci/controller/dwc/pcie-designware-host.c
>>>>> +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
>>>>> @@ -331,6 +331,8 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp
>>> *pp)
>>>>>         u64 *msi_vaddr;
>>>>>         int ret;
>>>>>         u32 ctrl, num_ctrls;
>>>>> +     struct device_node *np;
>>>>> +     struct resource r;
>>>>>
>>>>>         for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
>>>>>                 pp->irq_mask[ctrl] = ~0;
>>>>> @@ -374,20 +376,44 @@ static int dw_pcie_msi_host_init(struct
>>> dw_pcie_rp *pp)
>>>>>          * order not to miss MSI TLPs from those devices the MSI target
>>>>>          * address has to be within the lowest 4GB.
>>>>>          *
>>>>> -      * Note until there is a better alternative found the reservation
>>> is
>>>>> -      * done by allocating from the artificially limited DMA-coherent
>>>>> -      * memory.
>>>>> +      * Check if there is memory region reserved for this device. If
>>> yes,
>>>>> +      * pick up the msi_data from this region. This will be helpful for
>>>>> +      * platforms that do not use/have 32-bit DMA addresses but want
>>> to use
>>>>> +      * endpoints which support only 32-bit MSI address.
>>>>> +      * Else, if the memory region is not reserved, try to allocate a
>>> 32-bit
>>>>> +      * IOVA. Additionally, if this allocation also fails, attempt a
>>> 64-bit
>>>>> +      * allocation. If the 64-bit MSI address is allocated, then the
>>> EPs
>>>>> +      * supporting 32-bit MSI address will not work.
>>>>>          */
>>>>> -     ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
>>>>> -     if (ret)
>>>>> -             dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices
>>> with only 32-bit MSI support may not work properly\n");
>>>>> +     np = of_parse_phandle(dev->of_node, "memory-region", 0);
>>>>> +     if (np) {
>>>>> +             ret = of_address_to_resource(np, 0, &r);
>>>>
>>>> This is incorrect in several ways - reserved-memory nodes represent
>>>> actual system memory, so can't be used to reserve arbitrary PCI memory
>>>> space (which may be different if DMA offsets are involved); the whole
>>>> purpose of going through the DMA API is to ensure we get a unique *bus*
>>>> address. Obviously we don't want to reserve actual memory for something
>>>> that functionally doesn't need it, but conversely having a
>>>> reserved-memory region for an address which isn't memory would be
>>>> nonsensical. And even if this *were* a viable approach, you haven't
>>>> updated the DWC binding to allow it, nor defined a reserved-memory
>>>> binding for the node itself.
>>>>
>>>> If it was reasonable to put something in DT at all, then the logical
>>>> thing would be a property expressing an MSI address directly on the
>>>> controller node itself, but even that would be dictating software policy
>>>> rather than describing an aspect of the platform itself. Plus this is
>>>> far from the only driver with this concern, so it wouldn't make much
>>>> sense to hack just one binding without all the others as well. The rest
>>>> of the DT already describes everything an OS needs to know in order to
>>>> decide an MSI address to use, it's just a matter of making this
>>>> particular OS do a better job of putting it all together.
>>>>
>>>> Thanks,
>>>> Robin.
>>>>
>>>
>>> Robin,
>>> Needed some clarification.
>>> It seems you are implying that the pcie device tree node should define a
>>> property for the MSI address within the PCIe address space.
>>> However, you also state that this would not be an ideal solution, and
>>> would prefer using existing device tree constructs.
>>> I am not sure what you mean by, " The rest of the DT already describes
>>> everything."
>>> Do you mean adding an "msi" reg to reg-names and defining the address
>>> in the reg list?
>>
>> No, I'm saying the closest this should come to DT at all is the possibility
>> of the low-level driver hard-coding a platform-specific value for
> 
> I am assuming that you mean the platform driver (IOW, vendor driver) by
> the "low-level" driver? Please confirm.

Indeed, I mean the vendor/platform driver (sorry if I'm not aware of any 
standard terminology here - I think I picked up "low-level driver" from 
one of the previous threads).

>> pp->msi_data based on some platform-specific compatible, as Serge pointed to
>> on v1.
>>
> Does this look ok to you? The expectation is that the pp->msi_data will
> have to be populated by the platform driver if it wants to ensure the
> support for all kinds of endpoints.
> 
> +       if (pp->msi_data)
> +               return 0;
> +
>          ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
>          if (ret)
>                  dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
>   
>          msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
>                                          GFP_KERNEL);
> +       if (!msi_vaddr) {
> +               dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
> +               dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
> +               msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
> +                                               GFP_KERNEL);
> +       }
> +
>          if (!msi_vaddr) {
>                  dev_err(dev, "Failed to alloc and map MSI data\n");
>                  dw_pcie_free_msi(pp);

Yeah, something like that. Personally I'd still be tempted to try some 
mildly more involved logic to just have a single dev_warn(), but I think 
that's less important than just having something which clearly works.

Thanks,
Robin.
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index 7991f0e179b2..8c7c77b49ca8 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -331,6 +331,8 @@  static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
 	u64 *msi_vaddr;
 	int ret;
 	u32 ctrl, num_ctrls;
+	struct device_node *np;
+	struct resource r;
 
 	for (ctrl = 0; ctrl < MAX_MSI_CTRLS; ctrl++)
 		pp->irq_mask[ctrl] = ~0;
@@ -374,20 +376,44 @@  static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
 	 * order not to miss MSI TLPs from those devices the MSI target
 	 * address has to be within the lowest 4GB.
 	 *
-	 * Note until there is a better alternative found the reservation is
-	 * done by allocating from the artificially limited DMA-coherent
-	 * memory.
+	 * Check if there is memory region reserved for this device. If yes,
+	 * pick up the msi_data from this region. This will be helpful for
+	 * platforms that do not use/have 32-bit DMA addresses but want to use
+	 * endpoints which support only 32-bit MSI address.
+	 * Else, if the memory region is not reserved, try to allocate a 32-bit
+	 * IOVA. Additionally, if this allocation also fails, attempt a 64-bit
+	 * allocation. If the 64-bit MSI address is allocated, then the EPs
+	 * supporting 32-bit MSI address will not work.
 	 */
-	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
-	if (ret)
-		dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (np) {
+		ret = of_address_to_resource(np, 0, &r);
+		if (ret) {
+			dev_err(dev, "No memory address assigned to the region\n");
+			return ret;
+		}
 
-	msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
-					GFP_KERNEL);
-	if (!msi_vaddr) {
-		dev_err(dev, "Failed to alloc and map MSI data\n");
-		dw_pcie_free_msi(pp);
-		return -ENOMEM;
+		pp->msi_data = r.start;
+	} else {
+		dev_dbg(dev, "No %s specified\n", "memory-region");
+		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
+		if (ret)
+			dev_warn(dev, "Failed to set DMA mask to 32-bit. Devices with only 32-bit MSI support may not work properly\n");
+
+		msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
+						GFP_KERNEL);
+		if (!msi_vaddr) {
+			dev_warn(dev, "Failed to alloc 32-bit MSI data. Attempting 64-bit now\n");
+			dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
+			msi_vaddr = dmam_alloc_coherent(dev, sizeof(u64), &pp->msi_data,
+							GFP_KERNEL);
+		}
+
+		if (!msi_vaddr) {
+			dev_err(dev, "Failed to alloc and map MSI data\n");
+			dw_pcie_free_msi(pp);
+			return -ENOMEM;
+		}
 	}
 
 	return 0;
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 55ff76e3d384..c85cf4d56e98 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -317,6 +317,7 @@  struct dw_pcie_rp {
 	phys_addr_t		io_bus_addr;
 	u32			io_size;
 	int			irq;
+	u8			coherent_dma_bits;
 	const struct dw_pcie_host_ops *ops;
 	int			msi_irq[MAX_MSI_CTRLS];
 	struct irq_domain	*irq_domain;