diff mbox series

[v5,5/5] PCI: dwc: Add common send PME_Turn_Off message method

Message ID 20240319-pme_msg-v5-5-af9ffe57f432@nxp.com
State New
Headers show
Series PCI: dwc: Add common pme_turn_off message by using outbound iATU | expand

Commit Message

Frank Li March 19, 2024, 4:07 p.m. UTC
Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
window. This space's size is 'region_align'.

Set outbound ATU map memory write to send PCI message. So one MMIO write
can trigger a PCI message, such as PME_Turn_Off.

Add common dwc_pme_turn_off() function.

Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
exist.

Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
 drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
 drivers/pci/controller/dwc/pcie-designware.h      |  3 +
 2 files changed, 93 insertions(+), 4 deletions(-)

Comments

Manivannan Sadhasivam April 5, 2024, 6:24 a.m. UTC | #1
On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:

PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend

> Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> window. This space's size is 'region_align'.
> 
> Set outbound ATU map memory write to send PCI message. So one MMIO write
> can trigger a PCI message, such as PME_Turn_Off.
> 
> Add common dwc_pme_turn_off() function.
> 
> Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> exist.
> 

How about:

"Instead of relying on the vendor specific implementations to send the
PME_Turn_Off message, let's introduce a generic way of sending the message using
the MSG TLP.

This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
at the end of the first IORESOURCE_MEM window of the host bridge. And then
sending the PME_Turn_Off message during system suspend with the help of iATU.

It should be noted that this generic implementation is optional for the glue
drivers and can be overridden by a custom 'pme_turn_off' callback."

> Signed-off-by: Frank Li <Frank.Li@nxp.com>
> ---
>  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
>  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
>  2 files changed, 93 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> index 267687ab33cbc..d5723fce7a894 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
>  	return 0;
>  }
>  
> +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	struct resource_entry *win;
> +	struct resource *res;
> +
> +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> +	if (win) {
> +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> +		if (!res)
> +			return;
> +
> +		/* Reserve last region_align block as message TLP space */
> +		res->start = win->res->end - pci->region_align + 1;
> +		res->end = win->res->end;

Don't you need to adjust the host bridge window size and end address?

> +		res->name = "msg";
> +		res->flags = win->res->flags | IORESOURCE_BUSY;
> +

Shouldn't this resource be added back to the host bridge?

> +		if (!request_resource(win->res, res))

Why can't you use devm_ helper to manage the resource, since the lifetime of the
resource is till dw_pcie_host_deinit()?

> +			pp->msg_res = res;
> +		else
> +			devm_kfree(pci->dev, res);
> +	}
> +}
> +
>  int dw_pcie_host_init(struct dw_pcie_rp *pp)
>  {
>  	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> @@ -479,6 +504,10 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
>  
>  	dw_pcie_iatu_detect(pci);
>  
> +	/* Need call after dw_pcie_iatu_detect() before dw_pcie_setup_rc() */

It'd be better to add the reason also i.,e

	/*
	 * Allocate the resource for MSG TLP before programming the iATU
	 * outbound window in dw_pcie_setup_rc(). Since the allocation depends
	 * on the value of 'region_align', this has to be done after
	 * dw_pcie_iatu_detect().
	 */

> +	if (pp->use_atu_msg)

Who is setting this flag?

> +		dw_pcie_host_request_msg_tlp_res(pp);
> +
>  	ret = dw_pcie_edma_detect(pci);
>  	if (ret)
>  		goto err_free_msi;
> @@ -536,6 +565,11 @@ void dw_pcie_host_deinit(struct dw_pcie_rp *pp)
>  
>  	dw_pcie_edma_remove(pci);
>  
> +	if (pp->msg_res) {
> +		release_resource(pp->msg_res);
> +		devm_kfree(pci->dev, pp->msg_res);
> +	}
> +
>  	if (pp->has_msi_ctrl)
>  		dw_pcie_free_msi(pp);
>  
> @@ -697,6 +731,10 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
>  		atu.pci_addr = entry->res->start - entry->offset;
>  		atu.size = resource_size(entry->res);
>  
> +		/* MSG TLB window resource reserve at one of end of IORESOURCE_MEM resource */
> +		if (pp->msg_res && pp->msg_res->parent == entry->res)
> +			atu.size -= resource_size(pp->msg_res);

If you adjust the bridge window above, then this won't be needed.

> +
>  		ret = dw_pcie_prog_outbound_atu(pci, &atu);
>  		if (ret) {
>  			dev_err(pci->dev, "Failed to set MEM range %pr\n",
> @@ -728,6 +766,8 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
>  		dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n",
>  			 pci->num_ob_windows);
>  
> +	pp->msg_atu_index = i;
> +
>  	i = 0;
>  	resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) {
>  		if (resource_type(entry->res) != IORESOURCE_MEM)
> @@ -833,11 +873,54 @@ int dw_pcie_setup_rc(struct dw_pcie_rp *pp)
>  }
>  EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
>  
> +/* Using message outbound ATU to send out PME_Turn_Off message for dwc PCIe controller */
> +static int dw_pcie_pme_turn_off(struct dw_pcie *pci)
> +{
> +	struct dw_pcie_ob_atu_cfg atu = { 0 };
> +	void __iomem *m;

*mem

> +	int ret;
> +
> +	if (pci->num_ob_windows <= pci->pp.msg_atu_index)
> +		return -EINVAL;
> +
> +	if (!pci->pp.msg_res)
> +		return -EINVAL;
> +
> +	atu.code = PCIE_MSG_CODE_PME_TURN_OFF;
> +	atu.routing = PCIE_MSG_TYPE_R_BC;
> +	atu.type = PCIE_ATU_TYPE_MSG;
> +	atu.size = resource_size(pci->pp.msg_res);
> +	atu.index = pci->pp.msg_atu_index;
> +
> +	if (!atu.size) {
> +		dev_dbg(pci->dev,
> +			"atu memory map windows is zero, please check 'msg' reg in dts\n");

You are already checking the existence of the 'pci->pp.msg_res' region above. So
shouldn't that be sufficient enough? Can the size be 0, if the region exist?

- Mani

> +		return -ENOMEM;
> +	}
> +
> +	atu.cpu_addr = pci->pp.msg_res->start;
> +
> +	ret = dw_pcie_prog_outbound_atu(pci, &atu);
> +	if (ret)
> +		return ret;
> +
> +	m = ioremap(atu.cpu_addr, pci->region_align);
> +	if (!m)
> +		return -ENOMEM;
> +
> +	/* A dummy write is converted to a Msg TLP */
> +	writel(0, m);
> +
> +	iounmap(m);
> +
> +	return 0;
> +}
> +
>  int dw_pcie_suspend_noirq(struct dw_pcie *pci)
>  {
>  	u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
>  	u32 val;
> -	int ret;
> +	int ret = 0;
>  
>  	/*
>  	 * If L1SS is supported, then do not put the link into L2 as some
> @@ -849,10 +932,13 @@ int dw_pcie_suspend_noirq(struct dw_pcie *pci)
>  	if (dw_pcie_get_ltssm(pci) <= DW_PCIE_LTSSM_DETECT_ACT)
>  		return 0;
>  
> -	if (!pci->pp.ops->pme_turn_off)
> -		return 0;
> +	if (pci->pp.ops->pme_turn_off)
> +		pci->pp.ops->pme_turn_off(&pci->pp);
> +	else
> +		ret = dw_pcie_pme_turn_off(pci);
>  
> -	pci->pp.ops->pme_turn_off(&pci->pp);
> +	if (ret)
> +		return ret;
>  
>  	ret = read_poll_timeout(dw_pcie_get_ltssm, val, val == DW_PCIE_LTSSM_L2_IDLE,
>  				PCIE_PME_TO_L2_TIMEOUT_US/10,
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 703b50bc5e0f1..dca5de4c6e877 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -341,6 +341,9 @@ struct dw_pcie_rp {
>  	struct pci_host_bridge  *bridge;
>  	raw_spinlock_t		lock;
>  	DECLARE_BITMAP(msi_irq_in_use, MAX_MSI_IRQS);
> +	bool			use_atu_msg;
> +	int			msg_atu_index;
> +	struct resource		*msg_res;
>  };
>  
>  struct dw_pcie_ep_ops {
> 
> -- 
> 2.34.1
>
Frank Li April 5, 2024, 2:31 p.m. UTC | #2
On Fri, Apr 05, 2024 at 11:54:26AM +0530, Manivannan Sadhasivam wrote:
> On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:
> 
> PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend
> 
> > Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> > window. This space's size is 'region_align'.
> > 
> > Set outbound ATU map memory write to send PCI message. So one MMIO write
> > can trigger a PCI message, such as PME_Turn_Off.
> > 
> > Add common dwc_pme_turn_off() function.
> > 
> > Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> > dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> > exist.
> > 
> 
> How about:
> 
> "Instead of relying on the vendor specific implementations to send the
> PME_Turn_Off message, let's introduce a generic way of sending the message using
> the MSG TLP.
> 
> This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
> at the end of the first IORESOURCE_MEM window of the host bridge. And then
> sending the PME_Turn_Off message during system suspend with the help of iATU.
> 
> It should be noted that this generic implementation is optional for the glue
> drivers and can be overridden by a custom 'pme_turn_off' callback."
> 
> > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > ---
> >  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
> >  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
> >  2 files changed, 93 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > index 267687ab33cbc..d5723fce7a894 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> >  	return 0;
> >  }
> >  
> > +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> > +{
> > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > +	struct resource_entry *win;
> > +	struct resource *res;
> > +
> > +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> > +	if (win) {
> > +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> > +		if (!res)
> > +			return;
> > +
> > +		/* Reserve last region_align block as message TLP space */
> > +		res->start = win->res->end - pci->region_align + 1;
> > +		res->end = win->res->end;
> 
> Don't you need to adjust the host bridge window size and end address?

Needn't. request_resource will reserve it from bridge window. Like malloc,
if you malloc to get a region of memory, which will never get by malloc
again utill you call free.

> 
> > +		res->name = "msg";
> > +		res->flags = win->res->flags | IORESOURCE_BUSY;
> > +
> 
> Shouldn't this resource be added back to the host bridge?

No, this resource will reserver for msg only for whole bridge life cycle.
Genenally alloc resource only happen at PCI devices probe. All pci space
will be fixed after system probe.

> 
> > +		if (!request_resource(win->res, res))
> 
> Why can't you use devm_ helper to manage the resource, since the lifetime of the
> resource is till dw_pcie_host_deinit()?
> 
> > +			pp->msg_res = res;
> > +		else
> > +			devm_kfree(pci->dev, res);
> > +	}
> > +}
> > +
> >  int dw_pcie_host_init(struct dw_pcie_rp *pp)
> >  {
> >  	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > @@ -479,6 +504,10 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
> >  
> >  	dw_pcie_iatu_detect(pci);
> >  
> > +	/* Need call after dw_pcie_iatu_detect() before dw_pcie_setup_rc() */
> 
> It'd be better to add the reason also i.,e
> 
> 	/*
> 	 * Allocate the resource for MSG TLP before programming the iATU
> 	 * outbound window in dw_pcie_setup_rc(). Since the allocation depends
> 	 * on the value of 'region_align', this has to be done after
> 	 * dw_pcie_iatu_detect().
> 	 */
> 
> > +	if (pp->use_atu_msg)
> 
> Who is setting this flag?
> 
> > +		dw_pcie_host_request_msg_tlp_res(pp);
> > +
> >  	ret = dw_pcie_edma_detect(pci);
> >  	if (ret)
> >  		goto err_free_msi;
> > @@ -536,6 +565,11 @@ void dw_pcie_host_deinit(struct dw_pcie_rp *pp)
> >  
> >  	dw_pcie_edma_remove(pci);
> >  
> > +	if (pp->msg_res) {
> > +		release_resource(pp->msg_res);
> > +		devm_kfree(pci->dev, pp->msg_res);
> > +	}
> > +
> >  	if (pp->has_msi_ctrl)
> >  		dw_pcie_free_msi(pp);
> >  
> > @@ -697,6 +731,10 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
> >  		atu.pci_addr = entry->res->start - entry->offset;
> >  		atu.size = resource_size(entry->res);
> >  
> > +		/* MSG TLB window resource reserve at one of end of IORESOURCE_MEM resource */
> > +		if (pp->msg_res && pp->msg_res->parent == entry->res)
> > +			atu.size -= resource_size(pp->msg_res);
> 
> If you adjust the bridge window above, then this won't be needed.
> 
> > +
> >  		ret = dw_pcie_prog_outbound_atu(pci, &atu);
> >  		if (ret) {
> >  			dev_err(pci->dev, "Failed to set MEM range %pr\n",
> > @@ -728,6 +766,8 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
> >  		dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n",
> >  			 pci->num_ob_windows);
> >  
> > +	pp->msg_atu_index = i;
> > +
> >  	i = 0;
> >  	resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) {
> >  		if (resource_type(entry->res) != IORESOURCE_MEM)
> > @@ -833,11 +873,54 @@ int dw_pcie_setup_rc(struct dw_pcie_rp *pp)
> >  }
> >  EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
> >  
> > +/* Using message outbound ATU to send out PME_Turn_Off message for dwc PCIe controller */
> > +static int dw_pcie_pme_turn_off(struct dw_pcie *pci)
> > +{
> > +	struct dw_pcie_ob_atu_cfg atu = { 0 };
> > +	void __iomem *m;
> 
> *mem
> 
> > +	int ret;
> > +
> > +	if (pci->num_ob_windows <= pci->pp.msg_atu_index)
> > +		return -EINVAL;
> > +
> > +	if (!pci->pp.msg_res)
> > +		return -EINVAL;
> > +
> > +	atu.code = PCIE_MSG_CODE_PME_TURN_OFF;
> > +	atu.routing = PCIE_MSG_TYPE_R_BC;
> > +	atu.type = PCIE_ATU_TYPE_MSG;
> > +	atu.size = resource_size(pci->pp.msg_res);
> > +	atu.index = pci->pp.msg_atu_index;
> > +
> > +	if (!atu.size) {
> > +		dev_dbg(pci->dev,
> > +			"atu memory map windows is zero, please check 'msg' reg in dts\n");
> 
> You are already checking the existence of the 'pci->pp.msg_res' region above. So
> shouldn't that be sufficient enough? Can the size be 0, if the region exist?
> 
> - Mani
> 
> > +		return -ENOMEM;
> > +	}
> > +
> > +	atu.cpu_addr = pci->pp.msg_res->start;
> > +
> > +	ret = dw_pcie_prog_outbound_atu(pci, &atu);
> > +	if (ret)
> > +		return ret;
> > +
> > +	m = ioremap(atu.cpu_addr, pci->region_align);
> > +	if (!m)
> > +		return -ENOMEM;
> > +
> > +	/* A dummy write is converted to a Msg TLP */
> > +	writel(0, m);
> > +
> > +	iounmap(m);
> > +
> > +	return 0;
> > +}
> > +
> >  int dw_pcie_suspend_noirq(struct dw_pcie *pci)
> >  {
> >  	u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
> >  	u32 val;
> > -	int ret;
> > +	int ret = 0;
> >  
> >  	/*
> >  	 * If L1SS is supported, then do not put the link into L2 as some
> > @@ -849,10 +932,13 @@ int dw_pcie_suspend_noirq(struct dw_pcie *pci)
> >  	if (dw_pcie_get_ltssm(pci) <= DW_PCIE_LTSSM_DETECT_ACT)
> >  		return 0;
> >  
> > -	if (!pci->pp.ops->pme_turn_off)
> > -		return 0;
> > +	if (pci->pp.ops->pme_turn_off)
> > +		pci->pp.ops->pme_turn_off(&pci->pp);
> > +	else
> > +		ret = dw_pcie_pme_turn_off(pci);
> >  
> > -	pci->pp.ops->pme_turn_off(&pci->pp);
> > +	if (ret)
> > +		return ret;
> >  
> >  	ret = read_poll_timeout(dw_pcie_get_ltssm, val, val == DW_PCIE_LTSSM_L2_IDLE,
> >  				PCIE_PME_TO_L2_TIMEOUT_US/10,
> > diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> > index 703b50bc5e0f1..dca5de4c6e877 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware.h
> > +++ b/drivers/pci/controller/dwc/pcie-designware.h
> > @@ -341,6 +341,9 @@ struct dw_pcie_rp {
> >  	struct pci_host_bridge  *bridge;
> >  	raw_spinlock_t		lock;
> >  	DECLARE_BITMAP(msi_irq_in_use, MAX_MSI_IRQS);
> > +	bool			use_atu_msg;
> > +	int			msg_atu_index;
> > +	struct resource		*msg_res;
> >  };
> >  
> >  struct dw_pcie_ep_ops {
> > 
> > -- 
> > 2.34.1
> > 
> 
> -- 
> மணிவண்ணன் சதாசிவம்
Manivannan Sadhasivam April 6, 2024, 4:01 a.m. UTC | #3
On Fri, Apr 05, 2024 at 10:31:16AM -0400, Frank Li wrote:
> On Fri, Apr 05, 2024 at 11:54:26AM +0530, Manivannan Sadhasivam wrote:
> > On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:
> > 
> > PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend
> > 
> > > Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> > > window. This space's size is 'region_align'.
> > > 
> > > Set outbound ATU map memory write to send PCI message. So one MMIO write
> > > can trigger a PCI message, such as PME_Turn_Off.
> > > 
> > > Add common dwc_pme_turn_off() function.
> > > 
> > > Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> > > dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> > > exist.
> > > 
> > 
> > How about:
> > 
> > "Instead of relying on the vendor specific implementations to send the
> > PME_Turn_Off message, let's introduce a generic way of sending the message using
> > the MSG TLP.
> > 
> > This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
> > at the end of the first IORESOURCE_MEM window of the host bridge. And then
> > sending the PME_Turn_Off message during system suspend with the help of iATU.
> > 
> > It should be noted that this generic implementation is optional for the glue
> > drivers and can be overridden by a custom 'pme_turn_off' callback."
> > 
> > > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > > ---
> > >  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
> > >  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
> > >  2 files changed, 93 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > index 267687ab33cbc..d5723fce7a894 100644
> > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> > >  	return 0;
> > >  }
> > >  
> > > +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> > > +{
> > > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > > +	struct resource_entry *win;
> > > +	struct resource *res;
> > > +
> > > +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> > > +	if (win) {
> > > +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> > > +		if (!res)
> > > +			return;
> > > +
> > > +		/* Reserve last region_align block as message TLP space */
> > > +		res->start = win->res->end - pci->region_align + 1;
> > > +		res->end = win->res->end;
> > 
> > Don't you need to adjust the host bridge window size and end address?
> 
> Needn't. request_resource will reserve it from bridge window. Like malloc,
> if you malloc to get a region of memory, which will never get by malloc
> again utill you call free.
> 

Hmm, will that modify the window->res->end address and size?

> > 
> > > +		res->name = "msg";
> > > +		res->flags = win->res->flags | IORESOURCE_BUSY;
> > > +
> > 
> > Shouldn't this resource be added back to the host bridge?
> 
> No, this resource will reserver for msg only for whole bridge life cycle.
> Genenally alloc resource only happen at PCI devices probe. All pci space
> will be fixed after system probe.
> 

I don't think so. This resource still belongs to the host bridge, so we should
add it back.

- Mani
Frank Li April 8, 2024, 3:11 p.m. UTC | #4
On Sat, Apr 06, 2024 at 09:31:31AM +0530, Manivannan Sadhasivam wrote:
> On Fri, Apr 05, 2024 at 10:31:16AM -0400, Frank Li wrote:
> > On Fri, Apr 05, 2024 at 11:54:26AM +0530, Manivannan Sadhasivam wrote:
> > > On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:
> > > 
> > > PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend
> > > 
> > > > Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> > > > window. This space's size is 'region_align'.
> > > > 
> > > > Set outbound ATU map memory write to send PCI message. So one MMIO write
> > > > can trigger a PCI message, such as PME_Turn_Off.
> > > > 
> > > > Add common dwc_pme_turn_off() function.
> > > > 
> > > > Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> > > > dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> > > > exist.
> > > > 
> > > 
> > > How about:
> > > 
> > > "Instead of relying on the vendor specific implementations to send the
> > > PME_Turn_Off message, let's introduce a generic way of sending the message using
> > > the MSG TLP.
> > > 
> > > This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
> > > at the end of the first IORESOURCE_MEM window of the host bridge. And then
> > > sending the PME_Turn_Off message during system suspend with the help of iATU.
> > > 
> > > It should be noted that this generic implementation is optional for the glue
> > > drivers and can be overridden by a custom 'pme_turn_off' callback."
> > > 
> > > > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > > > ---
> > > >  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
> > > >  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
> > > >  2 files changed, 93 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > index 267687ab33cbc..d5723fce7a894 100644
> > > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> > > > +{
> > > > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > > > +	struct resource_entry *win;
> > > > +	struct resource *res;
> > > > +
> > > > +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> > > > +	if (win) {
> > > > +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> > > > +		if (!res)
> > > > +			return;
> > > > +
> > > > +		/* Reserve last region_align block as message TLP space */
> > > > +		res->start = win->res->end - pci->region_align + 1;
> > > > +		res->end = win->res->end;
> > > 
> > > Don't you need to adjust the host bridge window size and end address?
> > 
> > Needn't. request_resource will reserve it from bridge window. Like malloc,
> > if you malloc to get a region of memory, which will never get by malloc
> > again utill you call free.
> > 
> 
> Hmm, will that modify the window->res->end address and size?

No. This windows already reported to pci system before this function. It is
not good to modify window-res-end. It just add child resource like below.

windows is root resource, which will create may child when call
request_resource.
          bridge -> windows
		child1 -> msg
		child2 -> pci ep1
		child3 -> pci_ep2.
		...

Although you see whole bridge window, 'msg' already used and put under root
resource,  new pci devices will never use 'msg' resource. 

If change windows->res->end here, I worry about it may broken resource
tree.

> 
> > > 
> > > > +		res->name = "msg";
> > > > +		res->flags = win->res->flags | IORESOURCE_BUSY;
> > > > +
> > > 
> > > Shouldn't this resource be added back to the host bridge?
> > 
> > No, this resource will reserver for msg only for whole bridge life cycle.
> > Genenally alloc resource only happen at PCI devices probe. All pci space
> > will be fixed after system probe.
> > 
> 
> I don't think so. This resource still belongs to the host bridge, so we should
> add it back.

When add back?  It was reserved at bridge probe. When bridge remove, all
resource will released. 

> 
> - Mani
> 
> -- 
> மணிவண்ணன் சதாசிவம்
Manivannan Sadhasivam April 12, 2024, 5:05 p.m. UTC | #5
On Mon, Apr 08, 2024 at 11:11:11AM -0400, Frank Li wrote:
> On Sat, Apr 06, 2024 at 09:31:31AM +0530, Manivannan Sadhasivam wrote:
> > On Fri, Apr 05, 2024 at 10:31:16AM -0400, Frank Li wrote:
> > > On Fri, Apr 05, 2024 at 11:54:26AM +0530, Manivannan Sadhasivam wrote:
> > > > On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:
> > > > 
> > > > PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend
> > > > 
> > > > > Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> > > > > window. This space's size is 'region_align'.
> > > > > 
> > > > > Set outbound ATU map memory write to send PCI message. So one MMIO write
> > > > > can trigger a PCI message, such as PME_Turn_Off.
> > > > > 
> > > > > Add common dwc_pme_turn_off() function.
> > > > > 
> > > > > Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> > > > > dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> > > > > exist.
> > > > > 
> > > > 
> > > > How about:
> > > > 
> > > > "Instead of relying on the vendor specific implementations to send the
> > > > PME_Turn_Off message, let's introduce a generic way of sending the message using
> > > > the MSG TLP.
> > > > 
> > > > This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
> > > > at the end of the first IORESOURCE_MEM window of the host bridge. And then
> > > > sending the PME_Turn_Off message during system suspend with the help of iATU.
> > > > 
> > > > It should be noted that this generic implementation is optional for the glue
> > > > drivers and can be overridden by a custom 'pme_turn_off' callback."
> > > > 
> > > > > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > > > > ---
> > > > >  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
> > > > >  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
> > > > >  2 files changed, 93 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > index 267687ab33cbc..d5723fce7a894 100644
> > > > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> > > > > +{
> > > > > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > > > > +	struct resource_entry *win;
> > > > > +	struct resource *res;
> > > > > +
> > > > > +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> > > > > +	if (win) {
> > > > > +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> > > > > +		if (!res)
> > > > > +			return;
> > > > > +
> > > > > +		/* Reserve last region_align block as message TLP space */
> > > > > +		res->start = win->res->end - pci->region_align + 1;
> > > > > +		res->end = win->res->end;
> > > > 
> > > > Don't you need to adjust the host bridge window size and end address?
> > > 
> > > Needn't. request_resource will reserve it from bridge window. Like malloc,
> > > if you malloc to get a region of memory, which will never get by malloc
> > > again utill you call free.
> > > 
> > 
> > Hmm, will that modify the window->res->end address and size?
> 
> No. This windows already reported to pci system before this function. It is
> not good to modify window-res-end. It just add child resource like below.
> 
> windows is root resource, which will create may child when call
> request_resource.
>           bridge -> windows
> 		child1 -> msg
> 		child2 -> pci ep1
> 		child3 -> pci_ep2.
> 		...
> 
> Although you see whole bridge window, 'msg' already used and put under root
> resource,  new pci devices will never use 'msg' resource. 
> 
> If change windows->res->end here, I worry about it may broken resource
> tree.
> 

Hmm, I think your argument is fair. I was worrying that if someone try to
map separately by referencing win->res->end, then they will see access
violation.

But why can't you just allocate the resource using 'alloc_resource()' API
instead of always allocating at the end?

- Mani

> > 
> > > > 
> > > > > +		res->name = "msg";
> > > > > +		res->flags = win->res->flags | IORESOURCE_BUSY;
> > > > > +
> > > > 
> > > > Shouldn't this resource be added back to the host bridge?
> > > 
> > > No, this resource will reserver for msg only for whole bridge life cycle.
> > > Genenally alloc resource only happen at PCI devices probe. All pci space
> > > will be fixed after system probe.
> > > 
> > 
> > I don't think so. This resource still belongs to the host bridge, so we should
> > add it back.
> 
> When add back?  It was reserved at bridge probe. When bridge remove, all
> resource will released. 
> 
> > 
> > - Mani
> > 
> > -- 
> > மணிவண்ணன் சதாசிவம்
Frank Li April 12, 2024, 9:08 p.m. UTC | #6
On Fri, Apr 12, 2024 at 10:35:48PM +0530, Manivannan Sadhasivam wrote:
> On Mon, Apr 08, 2024 at 11:11:11AM -0400, Frank Li wrote:
> > On Sat, Apr 06, 2024 at 09:31:31AM +0530, Manivannan Sadhasivam wrote:
> > > On Fri, Apr 05, 2024 at 10:31:16AM -0400, Frank Li wrote:
> > > > On Fri, Apr 05, 2024 at 11:54:26AM +0530, Manivannan Sadhasivam wrote:
> > > > > On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:
> > > > > 
> > > > > PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend
> > > > > 
> > > > > > Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> > > > > > window. This space's size is 'region_align'.
> > > > > > 
> > > > > > Set outbound ATU map memory write to send PCI message. So one MMIO write
> > > > > > can trigger a PCI message, such as PME_Turn_Off.
> > > > > > 
> > > > > > Add common dwc_pme_turn_off() function.
> > > > > > 
> > > > > > Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> > > > > > dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> > > > > > exist.
> > > > > > 
> > > > > 
> > > > > How about:
> > > > > 
> > > > > "Instead of relying on the vendor specific implementations to send the
> > > > > PME_Turn_Off message, let's introduce a generic way of sending the message using
> > > > > the MSG TLP.
> > > > > 
> > > > > This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
> > > > > at the end of the first IORESOURCE_MEM window of the host bridge. And then
> > > > > sending the PME_Turn_Off message during system suspend with the help of iATU.
> > > > > 
> > > > > It should be noted that this generic implementation is optional for the glue
> > > > > drivers and can be overridden by a custom 'pme_turn_off' callback."
> > > > > 
> > > > > > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > > > > > ---
> > > > > >  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
> > > > > >  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
> > > > > >  2 files changed, 93 insertions(+), 4 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > > index 267687ab33cbc..d5723fce7a894 100644
> > > > > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > > @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> > > > > >  	return 0;
> > > > > >  }
> > > > > >  
> > > > > > +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> > > > > > +{
> > > > > > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > > > > > +	struct resource_entry *win;
> > > > > > +	struct resource *res;
> > > > > > +
> > > > > > +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> > > > > > +	if (win) {
> > > > > > +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> > > > > > +		if (!res)
> > > > > > +			return;
> > > > > > +
> > > > > > +		/* Reserve last region_align block as message TLP space */
> > > > > > +		res->start = win->res->end - pci->region_align + 1;
> > > > > > +		res->end = win->res->end;
> > > > > 
> > > > > Don't you need to adjust the host bridge window size and end address?
> > > > 
> > > > Needn't. request_resource will reserve it from bridge window. Like malloc,
> > > > if you malloc to get a region of memory, which will never get by malloc
> > > > again utill you call free.
> > > > 
> > > 
> > > Hmm, will that modify the window->res->end address and size?
> > 
> > No. This windows already reported to pci system before this function. It is
> > not good to modify window-res-end. It just add child resource like below.
> > 
> > windows is root resource, which will create may child when call
> > request_resource.
> >           bridge -> windows
> > 		child1 -> msg
> > 		child2 -> pci ep1
> > 		child3 -> pci_ep2.
> > 		...
> > 
> > Although you see whole bridge window, 'msg' already used and put under root
> > resource,  new pci devices will never use 'msg' resource. 
> > 
> > If change windows->res->end here, I worry about it may broken resource
> > tree.
> > 
> 
> Hmm, I think your argument is fair. I was worrying that if someone try to
> map separately by referencing win->res->end, then they will see access
> violation.

It should be wrong if use it without request resource.

> 
> But why can't you just allocate the resource using 'alloc_resource()' API
> instead of always allocating at the end?

Alloc will start from windows (for example: 0x8000_0000). 
0x8000_0000 -> 0x8001_0000 will be allocated to 'msg'.

If ep1 want to get 1MB windows, 0x8010_0000 will return. There is a big
hole between 0x8001_0000 to 0x8010_0000.

I just want to reduce impact to existed system. So PCIe memory layout will
be kept the same w/o this patch.

There are not big difference between allocate resource and reserve resource
for 'msg'. Just little better friendly for exist system.

Frank

> 
> - Mani
> 
> > > 
> > > > > 
> > > > > > +		res->name = "msg";
> > > > > > +		res->flags = win->res->flags | IORESOURCE_BUSY;
> > > > > > +
> > > > > 
> > > > > Shouldn't this resource be added back to the host bridge?
> > > > 
> > > > No, this resource will reserver for msg only for whole bridge life cycle.
> > > > Genenally alloc resource only happen at PCI devices probe. All pci space
> > > > will be fixed after system probe.
> > > > 
> > > 
> > > I don't think so. This resource still belongs to the host bridge, so we should
> > > add it back.
> > 
> > When add back?  It was reserved at bridge probe. When bridge remove, all
> > resource will released. 
> > 
> > > 
> > > - Mani
> > > 
> > > -- 
> > > மணிவண்ணன் சதாசிவம்
> 
> -- 
> மணிவண்ணன் சதாசிவம்
Manivannan Sadhasivam April 14, 2024, 10:32 a.m. UTC | #7
On Fri, Apr 12, 2024 at 05:08:33PM -0400, Frank Li wrote:
> On Fri, Apr 12, 2024 at 10:35:48PM +0530, Manivannan Sadhasivam wrote:
> > On Mon, Apr 08, 2024 at 11:11:11AM -0400, Frank Li wrote:
> > > On Sat, Apr 06, 2024 at 09:31:31AM +0530, Manivannan Sadhasivam wrote:
> > > > On Fri, Apr 05, 2024 at 10:31:16AM -0400, Frank Li wrote:
> > > > > On Fri, Apr 05, 2024 at 11:54:26AM +0530, Manivannan Sadhasivam wrote:
> > > > > > On Tue, Mar 19, 2024 at 12:07:15PM -0400, Frank Li wrote:
> > > > > > 
> > > > > > PCI: dwc: Add generic MSG TLP support for sending PME_Turn_Off during system suspend
> > > > > > 
> > > > > > > Reserve space at end of first IORESOURCE_MEM window as message TLP MMIO
> > > > > > > window. This space's size is 'region_align'.
> > > > > > > 
> > > > > > > Set outbound ATU map memory write to send PCI message. So one MMIO write
> > > > > > > can trigger a PCI message, such as PME_Turn_Off.
> > > > > > > 
> > > > > > > Add common dwc_pme_turn_off() function.
> > > > > > > 
> > > > > > > Call dwc_pme_turn_off() to send out PME_Turn_Off message in general
> > > > > > > dw_pcie_suspend_noirq() if there are not platform callback pme_turn_off()
> > > > > > > exist.
> > > > > > > 
> > > > > > 
> > > > > > How about:
> > > > > > 
> > > > > > "Instead of relying on the vendor specific implementations to send the
> > > > > > PME_Turn_Off message, let's introduce a generic way of sending the message using
> > > > > > the MSG TLP.
> > > > > > 
> > > > > > This is achieved by reserving a region for MSG TLP of size 'pci->region_align',
> > > > > > at the end of the first IORESOURCE_MEM window of the host bridge. And then
> > > > > > sending the PME_Turn_Off message during system suspend with the help of iATU.
> > > > > > 
> > > > > > It should be noted that this generic implementation is optional for the glue
> > > > > > drivers and can be overridden by a custom 'pme_turn_off' callback."
> > > > > > 
> > > > > > > Signed-off-by: Frank Li <Frank.Li@nxp.com>
> > > > > > > ---
> > > > > > >  drivers/pci/controller/dwc/pcie-designware-host.c | 94 ++++++++++++++++++++++-
> > > > > > >  drivers/pci/controller/dwc/pcie-designware.h      |  3 +
> > > > > > >  2 files changed, 93 insertions(+), 4 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > > > index 267687ab33cbc..d5723fce7a894 100644
> > > > > > > --- a/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > > > +++ b/drivers/pci/controller/dwc/pcie-designware-host.c
> > > > > > > @@ -393,6 +393,31 @@ static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
> > > > > > >  	return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > +static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
> > > > > > > +{
> > > > > > > +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> > > > > > > +	struct resource_entry *win;
> > > > > > > +	struct resource *res;
> > > > > > > +
> > > > > > > +	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
> > > > > > > +	if (win) {
> > > > > > > +		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
> > > > > > > +		if (!res)
> > > > > > > +			return;
> > > > > > > +
> > > > > > > +		/* Reserve last region_align block as message TLP space */
> > > > > > > +		res->start = win->res->end - pci->region_align + 1;
> > > > > > > +		res->end = win->res->end;
> > > > > > 
> > > > > > Don't you need to adjust the host bridge window size and end address?
> > > > > 
> > > > > Needn't. request_resource will reserve it from bridge window. Like malloc,
> > > > > if you malloc to get a region of memory, which will never get by malloc
> > > > > again utill you call free.
> > > > > 
> > > > 
> > > > Hmm, will that modify the window->res->end address and size?
> > > 
> > > No. This windows already reported to pci system before this function. It is
> > > not good to modify window-res-end. It just add child resource like below.
> > > 
> > > windows is root resource, which will create may child when call
> > > request_resource.
> > >           bridge -> windows
> > > 		child1 -> msg
> > > 		child2 -> pci ep1
> > > 		child3 -> pci_ep2.
> > > 		...
> > > 
> > > Although you see whole bridge window, 'msg' already used and put under root
> > > resource,  new pci devices will never use 'msg' resource. 
> > > 
> > > If change windows->res->end here, I worry about it may broken resource
> > > tree.
> > > 
> > 
> > Hmm, I think your argument is fair. I was worrying that if someone try to
> > map separately by referencing win->res->end, then they will see access
> > violation.
> 
> It should be wrong if use it without request resource.
> 
> > 
> > But why can't you just allocate the resource using 'alloc_resource()' API
> > instead of always allocating at the end?
> 
> Alloc will start from windows (for example: 0x8000_0000). 
> 0x8000_0000 -> 0x8001_0000 will be allocated to 'msg'.
> 
> If ep1 want to get 1MB windows, 0x8010_0000 will return. There is a big
> hole between 0x8001_0000 to 0x8010_0000.
> 
> I just want to reduce impact to existed system. So PCIe memory layout will
> be kept the same w/o this patch.
> 
> There are not big difference between allocate resource and reserve resource
> for 'msg'. Just little better friendly for exist system.
> 

Ok. This sounds fine to me. Please add this information as a comment so that
everyone will be aware of the reasoning.

- Mani

> Frank
> 
> > 
> > - Mani
> > 
> > > > 
> > > > > > 
> > > > > > > +		res->name = "msg";
> > > > > > > +		res->flags = win->res->flags | IORESOURCE_BUSY;
> > > > > > > +
> > > > > > 
> > > > > > Shouldn't this resource be added back to the host bridge?
> > > > > 
> > > > > No, this resource will reserver for msg only for whole bridge life cycle.
> > > > > Genenally alloc resource only happen at PCI devices probe. All pci space
> > > > > will be fixed after system probe.
> > > > > 
> > > > 
> > > > I don't think so. This resource still belongs to the host bridge, so we should
> > > > add it back.
> > > 
> > > When add back?  It was reserved at bridge probe. When bridge remove, all
> > > resource will released. 
> > > 
> > > > 
> > > > - Mani
> > > > 
> > > > -- 
> > > > மணிவண்ணன் சதாசிவம்
> > 
> > -- 
> > மணிவண்ணன் சதாசிவம்
diff mbox series

Patch

diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index 267687ab33cbc..d5723fce7a894 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -393,6 +393,31 @@  static int dw_pcie_msi_host_init(struct dw_pcie_rp *pp)
 	return 0;
 }
 
+static void dw_pcie_host_request_msg_tlp_res(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct resource_entry *win;
+	struct resource *res;
+
+	win = resource_list_first_type(&pp->bridge->windows, IORESOURCE_MEM);
+	if (win) {
+		res = devm_kzalloc(pci->dev, sizeof(*res), GFP_KERNEL);
+		if (!res)
+			return;
+
+		/* Reserve last region_align block as message TLP space */
+		res->start = win->res->end - pci->region_align + 1;
+		res->end = win->res->end;
+		res->name = "msg";
+		res->flags = win->res->flags | IORESOURCE_BUSY;
+
+		if (!request_resource(win->res, res))
+			pp->msg_res = res;
+		else
+			devm_kfree(pci->dev, res);
+	}
+}
+
 int dw_pcie_host_init(struct dw_pcie_rp *pp)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
@@ -479,6 +504,10 @@  int dw_pcie_host_init(struct dw_pcie_rp *pp)
 
 	dw_pcie_iatu_detect(pci);
 
+	/* Need call after dw_pcie_iatu_detect() before dw_pcie_setup_rc() */
+	if (pp->use_atu_msg)
+		dw_pcie_host_request_msg_tlp_res(pp);
+
 	ret = dw_pcie_edma_detect(pci);
 	if (ret)
 		goto err_free_msi;
@@ -536,6 +565,11 @@  void dw_pcie_host_deinit(struct dw_pcie_rp *pp)
 
 	dw_pcie_edma_remove(pci);
 
+	if (pp->msg_res) {
+		release_resource(pp->msg_res);
+		devm_kfree(pci->dev, pp->msg_res);
+	}
+
 	if (pp->has_msi_ctrl)
 		dw_pcie_free_msi(pp);
 
@@ -697,6 +731,10 @@  static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
 		atu.pci_addr = entry->res->start - entry->offset;
 		atu.size = resource_size(entry->res);
 
+		/* MSG TLB window resource reserve at one of end of IORESOURCE_MEM resource */
+		if (pp->msg_res && pp->msg_res->parent == entry->res)
+			atu.size -= resource_size(pp->msg_res);
+
 		ret = dw_pcie_prog_outbound_atu(pci, &atu);
 		if (ret) {
 			dev_err(pci->dev, "Failed to set MEM range %pr\n",
@@ -728,6 +766,8 @@  static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
 		dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n",
 			 pci->num_ob_windows);
 
+	pp->msg_atu_index = i;
+
 	i = 0;
 	resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) {
 		if (resource_type(entry->res) != IORESOURCE_MEM)
@@ -833,11 +873,54 @@  int dw_pcie_setup_rc(struct dw_pcie_rp *pp)
 }
 EXPORT_SYMBOL_GPL(dw_pcie_setup_rc);
 
+/* Using message outbound ATU to send out PME_Turn_Off message for dwc PCIe controller */
+static int dw_pcie_pme_turn_off(struct dw_pcie *pci)
+{
+	struct dw_pcie_ob_atu_cfg atu = { 0 };
+	void __iomem *m;
+	int ret;
+
+	if (pci->num_ob_windows <= pci->pp.msg_atu_index)
+		return -EINVAL;
+
+	if (!pci->pp.msg_res)
+		return -EINVAL;
+
+	atu.code = PCIE_MSG_CODE_PME_TURN_OFF;
+	atu.routing = PCIE_MSG_TYPE_R_BC;
+	atu.type = PCIE_ATU_TYPE_MSG;
+	atu.size = resource_size(pci->pp.msg_res);
+	atu.index = pci->pp.msg_atu_index;
+
+	if (!atu.size) {
+		dev_dbg(pci->dev,
+			"atu memory map windows is zero, please check 'msg' reg in dts\n");
+		return -ENOMEM;
+	}
+
+	atu.cpu_addr = pci->pp.msg_res->start;
+
+	ret = dw_pcie_prog_outbound_atu(pci, &atu);
+	if (ret)
+		return ret;
+
+	m = ioremap(atu.cpu_addr, pci->region_align);
+	if (!m)
+		return -ENOMEM;
+
+	/* A dummy write is converted to a Msg TLP */
+	writel(0, m);
+
+	iounmap(m);
+
+	return 0;
+}
+
 int dw_pcie_suspend_noirq(struct dw_pcie *pci)
 {
 	u8 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
 	u32 val;
-	int ret;
+	int ret = 0;
 
 	/*
 	 * If L1SS is supported, then do not put the link into L2 as some
@@ -849,10 +932,13 @@  int dw_pcie_suspend_noirq(struct dw_pcie *pci)
 	if (dw_pcie_get_ltssm(pci) <= DW_PCIE_LTSSM_DETECT_ACT)
 		return 0;
 
-	if (!pci->pp.ops->pme_turn_off)
-		return 0;
+	if (pci->pp.ops->pme_turn_off)
+		pci->pp.ops->pme_turn_off(&pci->pp);
+	else
+		ret = dw_pcie_pme_turn_off(pci);
 
-	pci->pp.ops->pme_turn_off(&pci->pp);
+	if (ret)
+		return ret;
 
 	ret = read_poll_timeout(dw_pcie_get_ltssm, val, val == DW_PCIE_LTSSM_L2_IDLE,
 				PCIE_PME_TO_L2_TIMEOUT_US/10,
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 703b50bc5e0f1..dca5de4c6e877 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -341,6 +341,9 @@  struct dw_pcie_rp {
 	struct pci_host_bridge  *bridge;
 	raw_spinlock_t		lock;
 	DECLARE_BITMAP(msi_irq_in_use, MAX_MSI_IRQS);
+	bool			use_atu_msg;
+	int			msg_atu_index;
+	struct resource		*msg_res;
 };
 
 struct dw_pcie_ep_ops {